Match vs MatchSparse, a regular expression implementation detail that may surprise you.
So out of the blue I printed the type of a Match, and found it wasn't a Match, but instead a MatchSparse. Okay, now I have to go look up the difference. It appears that a MatchSparse is created when you don't have sequentially ordered numeric capture groups (example: (?<10>\\d+)). Now, what happens when you are using a sparse versus non sparse match? Well, hell, it uses a Hashtable instead of an array. So I got to thinking, under what circumstances would the Hashtable perform better than the array? In the Framework, the HybridCollection switches over after only 10 items. So 10 items might be good, but it didn't really make a difference, I went ahead and bumped it up to 500 items. 500 seems to do the job and gets you approximately a 7-8% gain in performance. You want to see what I mean? Follow the code and see for yourself. The morale of the story, Sometimes it is good to know implementation details so you can merge those details with your own knowledge of performance characteristics.
using System;
using System.Text;
using System.Text.RegularExpressions;
public class MatchVsMatchSparse {
private static void Main(string[] args) {
Regex matchUsingMatch = BuildRegex(500, false);
Regex matchUsingMatchSparse = BuildRegex(500, true);
string strNumbers = "012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
DateTime start, end;
start = DateTime.Now;
for(int i = 0; i < 100000; i++) {
matchUsingMatch.Match(strNumbers);
}
end = DateTime.Now;
Console.WriteLine("Timing using {0}: {1}", matchUsingMatch.GetType().ToString(), end - start);
start = DateTime.Now;
for(int i = 0; i < 100000; i++) {
matchUsingMatchSparse.Match(strNumbers);
}
end = DateTime.Now;
Console.WriteLine("Timing using {0}: {1}", matchUsingMatchSparse.GetType().ToString(), end - start);
}
private static Regex BuildRegex(int slots, bool sparse) {
StringBuilder sb = new StringBuilder();
for(int i = 1; i <= slots; i++) {
sb.Append("(");
if ( sparse ) {
sb.Append("?<");
sb.Append((i*5).ToString());
sb.Append(">");
}
sb.Append("\\d)");
}
return new Regex(sb.ToString(), RegexOptions.Compiled);
}
}