Regex to match opening Tags
While fiddling around in RegexSnippets tonight I concocted this regex to match opening tags and to capture the various elements:
(?'openTag'<)
\s*?
(?'tagName'\??\w+)
(?:
\s*?
(?'attribName'\w+)
(?:\s*(?'attribSign'=)\s*)
(?'attribValue'
(?:\'[^\']*\'|\"[^\"]*\"|\w+)
)
)*
\s*?
(?'closeTag'[\/\?]?>)
...and here is a script that I wrote to test it with:
using System ;
using System.Text.RegularExpressions ;
namespace RegexSnippets.Tests
{
public class Foo
{
public static void Main()
{
string source = @"<A href="foo"><TD><A> <TABLE id=1 foo="bar">" ;
string pattern = @"(?'openTag'<)\s*?(?'tagName'\??\w+)(\s*?(?'attribute'(?'attribName'\w+)(?:\s*(?'attribSign'=)\s*)(?'attribValue'(?:\'[^\']*\'|\"[^\"]*\"|\w+))))+\s*?(?'closeTag'[\/\?]?>)" ;
Regex re = new Regex(
pattern,
RegexOptions.IgnoreCase|RegexOptions.Multiline|RegexOptions.Singleline
) ;
for( Match m = re.Match( source ); m.Success; m = m.NextMatch() )
{
Console.WriteLine( "Open tag = {0}", m.Groups["openTag"].Value ) ;
Console.WriteLine( "Tagname = {0}", m.Groups["tagName"].Value ) ;
if( m.Groups["attribName"].Success )
{
for( int i = 0; i < m.Groups["attribName"].Captures.Count; i++ ) {
Console.WriteLine( "Attrib Name = {0}", m.Groups["attribName"].Captures[i].Value ) ;
Console.WriteLine( "Attrib Sign = {0}", m.Groups["attribSign"].Captures[i].Value ) ;
Console.WriteLine( "Attrib Value = {0}", m.Groups["attribValue"].Captures[i].Value ) ;
}
}
Console.WriteLine( "Close tag = {0}", m.Groups["closeTag"].Value ) ;
Console.Write( "{0}**********************************{0}", Environment.NewLine ) ;
}
Console.ReadLine() ;
}
}
}