...

Source file src/github.com/dlclark/regexp2/regexp_performance_test.go

Documentation: github.com/dlclark/regexp2

     1  package regexp2
     2  
     3  import (
     4  	"strings"
     5  	"testing"
     6  	"time"
     7  )
     8  
     9  func BenchmarkLiteral(b *testing.B) {
    10  	x := strings.Repeat("x", 50) + "y"
    11  	b.StopTimer()
    12  	re := MustCompile("y", 0)
    13  	b.StartTimer()
    14  	for i := 0; i < b.N; i++ {
    15  		if m, err := re.MatchString(x); !m || err != nil {
    16  			b.Fatalf("no match or error! %v", err)
    17  		}
    18  	}
    19  }
    20  
    21  func BenchmarkNotLiteral(b *testing.B) {
    22  	x := strings.Repeat("x", 50) + "y"
    23  	b.StopTimer()
    24  	re := MustCompile(".y", 0)
    25  	b.StartTimer()
    26  	for i := 0; i < b.N; i++ {
    27  		if m, err := re.MatchString(x); !m || err != nil {
    28  			b.Fatalf("no match or error! %v", err)
    29  		}
    30  	}
    31  }
    32  
    33  func BenchmarkMatchClass(b *testing.B) {
    34  	b.StopTimer()
    35  	x := strings.Repeat("xxxx", 20) + "w"
    36  	re := MustCompile("[abcdw]", 0)
    37  	b.StartTimer()
    38  	for i := 0; i < b.N; i++ {
    39  		if m, err := re.MatchString(x); !m || err != nil {
    40  			b.Fatalf("no match or error! %v", err)
    41  		}
    42  
    43  	}
    44  }
    45  
    46  func BenchmarkMatchClass_InRange(b *testing.B) {
    47  	b.StopTimer()
    48  	// 'b' is between 'a' and 'c', so the charclass
    49  	// range checking is no help here.
    50  	x := strings.Repeat("bbbb", 20) + "c"
    51  	re := MustCompile("[ac]", 0)
    52  	b.StartTimer()
    53  	for i := 0; i < b.N; i++ {
    54  		if m, err := re.MatchString(x); !m || err != nil {
    55  			b.Fatalf("no match or error! %v", err)
    56  		}
    57  	}
    58  }
    59  
    60  /*
    61  func BenchmarkReplaceAll(b *testing.B) {
    62  
    63  	x := "abcdefghijklmnopqrstuvwxyz"
    64  	b.StopTimer()
    65  	re := MustCompile("[cjrw]", 0)
    66  	b.StartTimer()
    67  	for i := 0; i < b.N; i++ {
    68  		re.ReplaceAllString(x, "")
    69  	}
    70  
    71  }
    72  */
    73  func BenchmarkAnchoredLiteralShortNonMatch(b *testing.B) {
    74  	b.StopTimer()
    75  	x := "abcdefghijklmnopqrstuvwxyz"
    76  	re := MustCompile("^zbc(d|e)", 0)
    77  	b.StartTimer()
    78  	for i := 0; i < b.N; i++ {
    79  		if m, err := re.MatchString(x); m || err != nil {
    80  			b.Fatalf("unexpected match or error! %v", err)
    81  		}
    82  	}
    83  }
    84  
    85  func BenchmarkAnchoredLiteralLongNonMatch(b *testing.B) {
    86  	b.StopTimer()
    87  
    88  	data := "abcdefghijklmnopqrstuvwxyz"
    89  	x := make([]rune, 32768*len(data))
    90  	for i := 0; i < 32768; /*(2^15)*/ i++ {
    91  		for j := 0; j < len(data); j++ {
    92  			x[i*len(data)+j] = rune(data[j])
    93  		}
    94  	}
    95  
    96  	re := MustCompile("^zbc(d|e)", 0)
    97  	b.StartTimer()
    98  	for i := 0; i < b.N; i++ {
    99  		if m, err := re.MatchRunes(x); m || err != nil {
   100  			b.Fatalf("unexpected match or error! %v", err)
   101  		}
   102  	}
   103  }
   104  
   105  func BenchmarkAnchoredShortMatch(b *testing.B) {
   106  	b.StopTimer()
   107  	x := "abcdefghijklmnopqrstuvwxyz"
   108  	re := MustCompile("^.bc(d|e)", 0)
   109  	b.StartTimer()
   110  	for i := 0; i < b.N; i++ {
   111  		if m, err := re.MatchString(x); !m || err != nil {
   112  			b.Fatalf("no match or error! %v", err)
   113  		}
   114  	}
   115  }
   116  
   117  func BenchmarkAnchoredLongMatch(b *testing.B) {
   118  	b.StopTimer()
   119  	data := "abcdefghijklmnopqrstuvwxyz"
   120  	x := make([]rune, 32768*len(data))
   121  	for i := 0; i < 32768; /*(2^15)*/ i++ {
   122  		for j := 0; j < len(data); j++ {
   123  			x[i*len(data)+j] = rune(data[j])
   124  		}
   125  	}
   126  
   127  	re := MustCompile("^.bc(d|e)", 0)
   128  	b.StartTimer()
   129  	for i := 0; i < b.N; i++ {
   130  		if m, err := re.MatchRunes(x); !m || err != nil {
   131  			b.Fatalf("no match or error! %v", err)
   132  		}
   133  	}
   134  }
   135  
   136  func BenchmarkOnePassShortA(b *testing.B) {
   137  	b.StopTimer()
   138  	x := "abcddddddeeeededd"
   139  	re := MustCompile("^.bc(d|e)*$", 0)
   140  	b.StartTimer()
   141  	for i := 0; i < b.N; i++ {
   142  		if m, err := re.MatchString(x); !m || err != nil {
   143  			b.Fatalf("no match or error! %v", err)
   144  		}
   145  	}
   146  }
   147  
   148  func BenchmarkNotOnePassShortA(b *testing.B) {
   149  	b.StopTimer()
   150  	x := "abcddddddeeeededd"
   151  	re := MustCompile(".bc(d|e)*$", 0)
   152  	b.StartTimer()
   153  	for i := 0; i < b.N; i++ {
   154  		if m, err := re.MatchString(x); !m || err != nil {
   155  			b.Fatalf("no match or error! %v", err)
   156  		}
   157  	}
   158  }
   159  
   160  func BenchmarkOnePassShortB(b *testing.B) {
   161  	b.StopTimer()
   162  	x := "abcddddddeeeededd"
   163  	re := MustCompile("^.bc(?:d|e)*$", 0)
   164  	b.StartTimer()
   165  	for i := 0; i < b.N; i++ {
   166  		if m, err := re.MatchString(x); !m || err != nil {
   167  			b.Fatalf("no match or error! %v", err)
   168  		}
   169  	}
   170  }
   171  
   172  func BenchmarkNotOnePassShortB(b *testing.B) {
   173  	b.StopTimer()
   174  	x := "abcddddddeeeededd"
   175  	re := MustCompile(".bc(?:d|e)*$", 0)
   176  	b.StartTimer()
   177  	for i := 0; i < b.N; i++ {
   178  		if m, err := re.MatchString(x); !m || err != nil {
   179  			b.Fatalf("no match or error! %v", err)
   180  		}
   181  	}
   182  }
   183  
   184  func BenchmarkOnePassLongPrefix(b *testing.B) {
   185  	b.StopTimer()
   186  	x := "abcdefghijklmnopqrstuvwxyz"
   187  	re := MustCompile("^abcdefghijklmnopqrstuvwxyz.*$", 0)
   188  	b.StartTimer()
   189  	for i := 0; i < b.N; i++ {
   190  		if m, err := re.MatchString(x); !m || err != nil {
   191  			b.Fatalf("no match or error! %v", err)
   192  		}
   193  	}
   194  }
   195  
   196  func BenchmarkOnePassLongNotPrefix(b *testing.B) {
   197  	b.StopTimer()
   198  	x := "abcdefghijklmnopqrstuvwxyz"
   199  	re := MustCompile("^.bcdefghijklmnopqrstuvwxyz.*$", 0)
   200  	b.StartTimer()
   201  	for i := 0; i < b.N; i++ {
   202  		if m, err := re.MatchString(x); !m || err != nil {
   203  			b.Fatalf("no match or error! %v", err)
   204  		}
   205  	}
   206  }
   207  
   208  var text []rune
   209  
   210  func makeText(n int) []rune {
   211  	if len(text) >= n {
   212  		return text[:n]
   213  	}
   214  	text = make([]rune, n)
   215  	x := ^uint32(0)
   216  	for i := range text {
   217  		x += x
   218  		x ^= 1
   219  		if int32(x) < 0 {
   220  			x ^= 0x88888eef
   221  		}
   222  		if x%31 == 0 {
   223  			text[i] = '\n'
   224  		} else {
   225  			text[i] = rune(x%(0x7E+1-0x20) + 0x20)
   226  		}
   227  	}
   228  	return text
   229  }
   230  
   231  func benchmark(b *testing.B, re string, n int) {
   232  	r := MustCompile(re, 0)
   233  	t := makeText(n)
   234  	b.ResetTimer()
   235  	b.SetBytes(int64(n))
   236  	for i := 0; i < b.N; i++ {
   237  		if m, err := r.MatchRunes(t); m {
   238  			b.Fatal("match!")
   239  		} else if err != nil {
   240  			b.Fatalf("Err %v", err)
   241  		}
   242  	}
   243  }
   244  
   245  const (
   246  	easy0  = "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   247  	easy1  = "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
   248  	medium = "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   249  	hard   = "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
   250  	hard1  = "ABCD|CDEF|EFGH|GHIJ|IJKL|KLMN|MNOP|OPQR|QRST|STUV|UVWX|WXYZ"
   251  	parens = "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" +
   252  		"(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
   253  )
   254  
   255  func BenchmarkMatchEasy0_32(b *testing.B)   { benchmark(b, easy0, 32<<0) }
   256  func BenchmarkMatchEasy0_1K(b *testing.B)   { benchmark(b, easy0, 1<<10) }
   257  func BenchmarkMatchEasy0_32K(b *testing.B)  { benchmark(b, easy0, 32<<10) }
   258  func BenchmarkMatchEasy0_1M(b *testing.B)   { benchmark(b, easy0, 1<<20) }
   259  func BenchmarkMatchEasy0_32M(b *testing.B)  { benchmark(b, easy0, 32<<20) }
   260  func BenchmarkMatchEasy1_32(b *testing.B)   { benchmark(b, easy1, 32<<0) }
   261  func BenchmarkMatchEasy1_1K(b *testing.B)   { benchmark(b, easy1, 1<<10) }
   262  func BenchmarkMatchEasy1_32K(b *testing.B)  { benchmark(b, easy1, 32<<10) }
   263  func BenchmarkMatchEasy1_1M(b *testing.B)   { benchmark(b, easy1, 1<<20) }
   264  func BenchmarkMatchEasy1_32M(b *testing.B)  { benchmark(b, easy1, 32<<20) }
   265  func BenchmarkMatchMedium_32(b *testing.B)  { benchmark(b, medium, 32<<0) }
   266  func BenchmarkMatchMedium_1K(b *testing.B)  { benchmark(b, medium, 1<<10) }
   267  func BenchmarkMatchMedium_32K(b *testing.B) { benchmark(b, medium, 32<<10) }
   268  func BenchmarkMatchMedium_1M(b *testing.B)  { benchmark(b, medium, 1<<20) }
   269  func BenchmarkMatchMedium_32M(b *testing.B) { benchmark(b, medium, 32<<20) }
   270  func BenchmarkMatchHard_32(b *testing.B)    { benchmark(b, hard, 32<<0) }
   271  func BenchmarkMatchHard_1K(b *testing.B)    { benchmark(b, hard, 1<<10) }
   272  func BenchmarkMatchHard_32K(b *testing.B)   { benchmark(b, hard, 32<<10) }
   273  func BenchmarkMatchHard_1M(b *testing.B)    { benchmark(b, hard, 1<<20) }
   274  func BenchmarkMatchHard_32M(b *testing.B)   { benchmark(b, hard, 32<<20) }
   275  func BenchmarkMatchHard1_32(b *testing.B)   { benchmark(b, hard1, 32<<0) }
   276  func BenchmarkMatchHard1_1K(b *testing.B)   { benchmark(b, hard1, 1<<10) }
   277  func BenchmarkMatchHard1_32K(b *testing.B)  { benchmark(b, hard1, 32<<10) }
   278  func BenchmarkMatchHard1_1M(b *testing.B)   { benchmark(b, hard1, 1<<20) }
   279  func BenchmarkMatchHard1_32M(b *testing.B)  { benchmark(b, hard1, 32<<20) }
   280  
   281  // TestProgramTooLongForBacktrack tests that a regex which is too long
   282  // for the backtracker still executes properly.
   283  func TestProgramTooLongForBacktrack(t *testing.T) {
   284  	longRegex := MustCompile(`(one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve|thirteen|fourteen|fifteen|sixteen|seventeen|eighteen|nineteen|twenty|twentyone|twentytwo|twentythree|twentyfour|twentyfive|twentysix|twentyseven|twentyeight|twentynine|thirty|thirtyone|thirtytwo|thirtythree|thirtyfour|thirtyfive|thirtysix|thirtyseven|thirtyeight|thirtynine|forty|fortyone|fortytwo|fortythree|fortyfour|fortyfive|fortysix|fortyseven|fortyeight|fortynine|fifty|fiftyone|fiftytwo|fiftythree|fiftyfour|fiftyfive|fiftysix|fiftyseven|fiftyeight|fiftynine|sixty|sixtyone|sixtytwo|sixtythree|sixtyfour|sixtyfive|sixtysix|sixtyseven|sixtyeight|sixtynine|seventy|seventyone|seventytwo|seventythree|seventyfour|seventyfive|seventysix|seventyseven|seventyeight|seventynine|eighty|eightyone|eightytwo|eightythree|eightyfour|eightyfive|eightysix|eightyseven|eightyeight|eightynine|ninety|ninetyone|ninetytwo|ninetythree|ninetyfour|ninetyfive|ninetysix|ninetyseven|ninetyeight|ninetynine|onehundred)`, 0)
   285  
   286  	if m, err := longRegex.MatchString("two"); !m {
   287  		t.Errorf("longRegex.MatchString(\"two\") was false, want true")
   288  	} else if err != nil {
   289  		t.Errorf("Error: %v", err)
   290  	}
   291  	if m, err := longRegex.MatchString("xxx"); m {
   292  		t.Errorf("longRegex.MatchString(\"xxx\") was true, want false")
   293  	} else if err != nil {
   294  		t.Errorf("Error: %v", err)
   295  	}
   296  }
   297  
   298  func BenchmarkLeading(b *testing.B) {
   299  	b.StopTimer()
   300  	r := MustCompile("[a-q][^u-z]{13}x", 0)
   301  	inp := makeText(1000000)
   302  	b.StartTimer()
   303  	for i := 0; i < b.N; i++ {
   304  		if m, err := r.MatchRunes(inp); !m {
   305  			b.Errorf("Expected match")
   306  		} else if err != nil {
   307  			b.Errorf("Error: %v", err)
   308  		}
   309  	}
   310  }
   311  
   312  func BenchmarkShortSearch(b *testing.B) {
   313  	// set to default check period for benchmarking purposes
   314  	// unit tests tend to send this low
   315  	SetTimeoutCheckPeriod(DefaultClockPeriod)
   316  	type benchmark struct {
   317  		name     string
   318  		parallel bool // Run in parallel?
   319  		timeout  time.Duration
   320  		increase time.Duration // timeout increase per match
   321  	}
   322  	for _, mode := range []benchmark{
   323  		{"serial-no-timeout", false, DefaultMatchTimeout, 0},
   324  		{"serial-fixed-timeout", false, time.Second, 0},
   325  		{"serial-increasing-timeout", false, time.Second, time.Second},
   326  		{"parallel-no-timeout", true, DefaultMatchTimeout, 0},
   327  		{"parallel-fixed-timeout", true, time.Second, 0},
   328  		{"parallel-increasing-timeout", true, time.Second, time.Second},
   329  	} {
   330  		b.Run(mode.name, func(b *testing.B) {
   331  			t := makeText(100)
   332  			b.SetBytes(int64(len(t)))
   333  			matchOnce := func(r *Regexp) {
   334  				if m, err := r.MatchRunes(t); m {
   335  					b.Fatal("match!")
   336  				} else if err != nil {
   337  					b.Fatalf("Err %v", err)
   338  				}
   339  			}
   340  
   341  			if !mode.parallel {
   342  				r := MustCompile(easy0, 0)
   343  				r.MatchTimeout = mode.timeout
   344  				for i := 0; i < b.N; i++ {
   345  					matchOnce(r)
   346  					r.MatchTimeout += mode.increase
   347  				}
   348  			} else {
   349  				b.RunParallel(func(pb *testing.PB) {
   350  					r := MustCompile(easy0, 0)
   351  					r.MatchTimeout = mode.timeout
   352  					for pb.Next() {
   353  						matchOnce(r)
   354  						r.MatchTimeout += mode.increase
   355  					}
   356  				})
   357  			}
   358  		})
   359  	}
   360  }
   361  

View as plain text