Source file src/regexp/find_test.go

     1  // Copyright 2010 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package regexp
     6  
     7  import (
     8  	"fmt"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  // For each pattern/text pair, what is the expected output of each function?
    14  // We can derive the textual results from the indexed results, the non-submatch
    15  // results from the submatched results, the single results from the 'all' results,
    16  // and the byte results from the string results. Therefore the table includes
    17  // only the FindAllStringSubmatchIndex result.
    18  type FindTest struct {
    19  	pat     string
    20  	text    string
    21  	matches [][]int
    22  }
    23  
    24  func (t FindTest) String() string {
    25  	return fmt.Sprintf("pat: %#q text: %#q", t.pat, t.text)
    26  }
    27  
    28  var findTests = []FindTest{
    29  	{``, ``, build(1, 0, 0)},
    30  	{`^abcdefg`, "abcdefg", build(1, 0, 7)},
    31  	{`a+`, "baaab", build(1, 1, 4)},
    32  	{"abcd..", "abcdef", build(1, 0, 6)},
    33  	{`a`, "a", build(1, 0, 1)},
    34  	{`x`, "y", nil},
    35  	{`b`, "abc", build(1, 1, 2)},
    36  	{`.`, "a", build(1, 0, 1)},
    37  	{`.*`, "abcdef", build(1, 0, 6)},
    38  	{`^`, "abcde", build(1, 0, 0)},
    39  	{`$`, "abcde", build(1, 5, 5)},
    40  	{`^abcd$`, "abcd", build(1, 0, 4)},
    41  	{`^bcd'`, "abcdef", nil},
    42  	{`^abcd$`, "abcde", nil},
    43  	{`a+`, "baaab", build(1, 1, 4)},
    44  	{`a*`, "baaab", build(3, 0, 0, 1, 4, 5, 5)},
    45  	{`[a-z]+`, "abcd", build(1, 0, 4)},
    46  	{`[^a-z]+`, "ab1234cd", build(1, 2, 6)},
    47  	{`[a\-\]z]+`, "az]-bcz", build(2, 0, 4, 6, 7)},
    48  	{`[^\n]+`, "abcd\n", build(1, 0, 4)},
    49  	{`[日本語]+`, "日本語日本語", build(1, 0, 18)},
    50  	{`日本語+`, "日本語", build(1, 0, 9)},
    51  	{`日本語+`, "日本語語語語", build(1, 0, 18)},
    52  	{`()`, "", build(1, 0, 0, 0, 0)},
    53  	{`(a)`, "a", build(1, 0, 1, 0, 1)},
    54  	{`(.)(.)`, "日a", build(1, 0, 4, 0, 3, 3, 4)},
    55  	{`(.*)`, "", build(1, 0, 0, 0, 0)},
    56  	{`(.*)`, "abcd", build(1, 0, 4, 0, 4)},
    57  	{`(..)(..)`, "abcd", build(1, 0, 4, 0, 2, 2, 4)},
    58  	{`(([^xyz]*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 3, 4)},
    59  	{`((a|b|c)*(d))`, "abcd", build(1, 0, 4, 0, 4, 2, 3, 3, 4)},
    60  	{`(((a|b|c)*)(d))`, "abcd", build(1, 0, 4, 0, 4, 0, 3, 2, 3, 3, 4)},
    61  	{`\a\f\n\r\t\v`, "\a\f\n\r\t\v", build(1, 0, 6)},
    62  	{`[\a\f\n\r\t\v]+`, "\a\f\n\r\t\v", build(1, 0, 6)},
    63  
    64  	{`a*(|(b))c*`, "aacc", build(1, 0, 4, 2, 2, -1, -1)},
    65  	{`(.*).*`, "ab", build(1, 0, 2, 0, 2)},
    66  	{`[.]`, ".", build(1, 0, 1)},
    67  	{`/$`, "/abc/", build(1, 4, 5)},
    68  	{`/$`, "/abc", nil},
    69  
    70  	// multiple matches
    71  	{`.`, "abc", build(3, 0, 1, 1, 2, 2, 3)},
    72  	{`(.)`, "abc", build(3, 0, 1, 0, 1, 1, 2, 1, 2, 2, 3, 2, 3)},
    73  	{`.(.)`, "abcd", build(2, 0, 2, 1, 2, 2, 4, 3, 4)},
    74  	{`ab*`, "abbaab", build(3, 0, 3, 3, 4, 4, 6)},
    75  	{`a(b*)`, "abbaab", build(3, 0, 3, 1, 3, 3, 4, 4, 4, 4, 6, 5, 6)},
    76  
    77  	// fixed bugs
    78  	{`ab$`, "cab", build(1, 1, 3)},
    79  	{`axxb$`, "axxcb", nil},
    80  	{`data`, "daXY data", build(1, 5, 9)},
    81  	{`da(.)a$`, "daXY data", build(1, 5, 9, 7, 8)},
    82  	{`zx+`, "zzx", build(1, 1, 3)},
    83  	{`ab$`, "abcab", build(1, 3, 5)},
    84  	{`(aa)*$`, "a", build(1, 1, 1, -1, -1)},
    85  	{`(?:.|(?:.a))`, "", nil},
    86  	{`(?:A(?:A|a))`, "Aa", build(1, 0, 2)},
    87  	{`(?:A|(?:A|a))`, "a", build(1, 0, 1)},
    88  	{`(a){0}`, "", build(1, 0, 0, -1, -1)},
    89  	{`(?-s)(?:(?:^).)`, "\n", nil},
    90  	{`(?s)(?:(?:^).)`, "\n", build(1, 0, 1)},
    91  	{`(?:(?:^).)`, "\n", nil},
    92  	{`\b`, "x", build(2, 0, 0, 1, 1)},
    93  	{`\b`, "xx", build(2, 0, 0, 2, 2)},
    94  	{`\b`, "x y", build(4, 0, 0, 1, 1, 2, 2, 3, 3)},
    95  	{`\b`, "xx yy", build(4, 0, 0, 2, 2, 3, 3, 5, 5)},
    96  	{`\B`, "x", nil},
    97  	{`\B`, "xx", build(1, 1, 1)},
    98  	{`\B`, "x y", nil},
    99  	{`\B`, "xx yy", build(2, 1, 1, 4, 4)},
   100  	{`(|a)*`, "aa", build(3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2)},
   101  	{`0A|0[aA]`, "0a", build(1, 0, 2)},
   102  	{`0[aA]|0A`, "0a", build(1, 0, 2)},
   103  
   104  	// RE2 tests
   105  	{`[^\S\s]`, "abcd", nil},
   106  	{`[^\S[:space:]]`, "abcd", nil},
   107  	{`[^\D\d]`, "abcd", nil},
   108  	{`[^\D[:digit:]]`, "abcd", nil},
   109  	{`(?i)\W`, "x", nil},
   110  	{`(?i)\W`, "k", nil},
   111  	{`(?i)\W`, "s", nil},
   112  
   113  	// can backslash-escape any punctuation
   114  	{`\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~`,
   115  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   116  	{`[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\{\|\}\~]+`,
   117  		`!"#$%&'()*+,-./:;<=>?@[\]^_{|}~`, build(1, 0, 31)},
   118  	{"\\`", "`", build(1, 0, 1)},
   119  	{"[\\`]+", "`", build(1, 0, 1)},
   120  
   121  	{"\ufffd", "\xff", build(1, 0, 1)},
   122  	{"\ufffd", "hello\xffworld", build(1, 5, 6)},
   123  	{`.*`, "hello\xffworld", build(1, 0, 11)},
   124  	{`\x{fffd}`, "\xc2\x00", build(1, 0, 1)},
   125  	{"[\ufffd]", "\xff", build(1, 0, 1)},
   126  	{`[\x{fffd}]`, "\xc2\x00", build(1, 0, 1)},
   127  
   128  	// long set of matches (longer than startSize)
   129  	{
   130  		".",
   131  		"qwertyuiopasdfghjklzxcvbnm1234567890",
   132  		build(36, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10,
   133  			10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20,
   134  			20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30,
   135  			30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36),
   136  	},
   137  }
   138  
   139  // build is a helper to construct a [][]int by extracting n sequences from x.
   140  // This represents n matches with len(x)/n submatches each.
   141  func build(n int, x ...int) [][]int {
   142  	ret := make([][]int, n)
   143  	runLength := len(x) / n
   144  	j := 0
   145  	for i := range ret {
   146  		ret[i] = make([]int, runLength)
   147  		copy(ret[i], x[j:])
   148  		j += runLength
   149  		if j > len(x) {
   150  			panic("invalid build entry")
   151  		}
   152  	}
   153  	return ret
   154  }
   155  
   156  // First the simple cases.
   157  
   158  func TestFind(t *testing.T) {
   159  	for _, test := range findTests {
   160  		re := MustCompile(test.pat)
   161  		if re.String() != test.pat {
   162  			t.Errorf("String() = `%s`; should be `%s`", re.String(), test.pat)
   163  		}
   164  		result := re.Find([]byte(test.text))
   165  		switch {
   166  		case len(test.matches) == 0 && len(result) == 0:
   167  			// ok
   168  		case test.matches == nil && result != nil:
   169  			t.Errorf("expected no match; got one: %s", test)
   170  		case test.matches != nil && result == nil:
   171  			t.Errorf("expected match; got none: %s", test)
   172  		case test.matches != nil && result != nil:
   173  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   174  			if len(result) != cap(result) {
   175  				t.Errorf("expected capacity %d got %d: %s", len(result), cap(result), test)
   176  			}
   177  			if expect != string(result) {
   178  				t.Errorf("expected %q got %q: %s", expect, result, test)
   179  			}
   180  		}
   181  	}
   182  }
   183  
   184  func TestFindString(t *testing.T) {
   185  	for _, test := range findTests {
   186  		result := MustCompile(test.pat).FindString(test.text)
   187  		switch {
   188  		case len(test.matches) == 0 && len(result) == 0:
   189  			// ok
   190  		case test.matches == nil && result != "":
   191  			t.Errorf("expected no match; got one: %s", test)
   192  		case test.matches != nil && result == "":
   193  			// Tricky because an empty result has two meanings: no match or empty match.
   194  			if test.matches[0][0] != test.matches[0][1] {
   195  				t.Errorf("expected match; got none: %s", test)
   196  			}
   197  		case test.matches != nil && result != "":
   198  			expect := test.text[test.matches[0][0]:test.matches[0][1]]
   199  			if expect != result {
   200  				t.Errorf("expected %q got %q: %s", expect, result, test)
   201  			}
   202  		}
   203  	}
   204  }
   205  
   206  func testFindIndex(test *FindTest, result []int, t *testing.T) {
   207  	switch {
   208  	case len(test.matches) == 0 && len(result) == 0:
   209  		// ok
   210  	case test.matches == nil && result != nil:
   211  		t.Errorf("expected no match; got one: %s", test)
   212  	case test.matches != nil && result == nil:
   213  		t.Errorf("expected match; got none: %s", test)
   214  	case test.matches != nil && result != nil:
   215  		expect := test.matches[0]
   216  		if expect[0] != result[0] || expect[1] != result[1] {
   217  			t.Errorf("expected %v got %v: %s", expect, result, test)
   218  		}
   219  	}
   220  }
   221  
   222  func TestFindIndex(t *testing.T) {
   223  	for _, test := range findTests {
   224  		testFindIndex(&test, MustCompile(test.pat).FindIndex([]byte(test.text)), t)
   225  	}
   226  }
   227  
   228  func TestFindStringIndex(t *testing.T) {
   229  	for _, test := range findTests {
   230  		testFindIndex(&test, MustCompile(test.pat).FindStringIndex(test.text), t)
   231  	}
   232  }
   233  
   234  func TestFindReaderIndex(t *testing.T) {
   235  	for _, test := range findTests {
   236  		testFindIndex(&test, MustCompile(test.pat).FindReaderIndex(strings.NewReader(test.text)), t)
   237  	}
   238  }
   239  
   240  // Now come the simple All cases.
   241  
   242  func TestFindAll(t *testing.T) {
   243  	for _, test := range findTests {
   244  		result := MustCompile(test.pat).FindAll([]byte(test.text), -1)
   245  		switch {
   246  		case test.matches == nil && result == nil:
   247  			// ok
   248  		case test.matches == nil && result != nil:
   249  			t.Errorf("expected no match; got one: %s", test)
   250  		case test.matches != nil && result == nil:
   251  			t.Fatalf("expected match; got none: %s", test)
   252  		case test.matches != nil && result != nil:
   253  			if len(test.matches) != len(result) {
   254  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   255  				continue
   256  			}
   257  			for k, e := range test.matches {
   258  				got := result[k]
   259  				if len(got) != cap(got) {
   260  					t.Errorf("match %d: expected capacity %d got %d: %s", k, len(got), cap(got), test)
   261  				}
   262  				expect := test.text[e[0]:e[1]]
   263  				if expect != string(got) {
   264  					t.Errorf("match %d: expected %q got %q: %s", k, expect, got, test)
   265  				}
   266  			}
   267  		}
   268  	}
   269  }
   270  
   271  func TestFindAllString(t *testing.T) {
   272  	for _, test := range findTests {
   273  		result := MustCompile(test.pat).FindAllString(test.text, -1)
   274  		switch {
   275  		case test.matches == nil && result == nil:
   276  			// ok
   277  		case test.matches == nil && result != nil:
   278  			t.Errorf("expected no match; got one: %s", test)
   279  		case test.matches != nil && result == nil:
   280  			t.Errorf("expected match; got none: %s", test)
   281  		case test.matches != nil && result != nil:
   282  			if len(test.matches) != len(result) {
   283  				t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   284  				continue
   285  			}
   286  			for k, e := range test.matches {
   287  				expect := test.text[e[0]:e[1]]
   288  				if expect != result[k] {
   289  					t.Errorf("expected %q got %q: %s", expect, result, test)
   290  				}
   291  			}
   292  		}
   293  	}
   294  }
   295  
   296  func testFindAllIndex(test *FindTest, result [][]int, t *testing.T) {
   297  	switch {
   298  	case test.matches == nil && result == nil:
   299  		// ok
   300  	case test.matches == nil && result != nil:
   301  		t.Errorf("expected no match; got one: %s", test)
   302  	case test.matches != nil && result == nil:
   303  		t.Errorf("expected match; got none: %s", test)
   304  	case test.matches != nil && result != nil:
   305  		if len(test.matches) != len(result) {
   306  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   307  			return
   308  		}
   309  		for k, e := range test.matches {
   310  			if e[0] != result[k][0] || e[1] != result[k][1] {
   311  				t.Errorf("match %d: expected %v got %v: %s", k, e, result[k], test)
   312  			}
   313  		}
   314  	}
   315  }
   316  
   317  func TestFindAllIndex(t *testing.T) {
   318  	for _, test := range findTests {
   319  		testFindAllIndex(&test, MustCompile(test.pat).FindAllIndex([]byte(test.text), -1), t)
   320  	}
   321  }
   322  
   323  func TestFindAllStringIndex(t *testing.T) {
   324  	for _, test := range findTests {
   325  		testFindAllIndex(&test, MustCompile(test.pat).FindAllStringIndex(test.text, -1), t)
   326  	}
   327  }
   328  
   329  // Now come the Submatch cases.
   330  
   331  func testSubmatchBytes(test *FindTest, n int, submatches []int, result [][]byte, t *testing.T) {
   332  	if len(submatches) != len(result)*2 {
   333  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   334  		return
   335  	}
   336  	for k := 0; k < len(submatches); k += 2 {
   337  		if submatches[k] == -1 {
   338  			if result[k/2] != nil {
   339  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   340  			}
   341  			continue
   342  		}
   343  		got := result[k/2]
   344  		if len(got) != cap(got) {
   345  			t.Errorf("match %d: expected capacity %d got %d: %s", n, len(got), cap(got), test)
   346  			return
   347  		}
   348  		expect := test.text[submatches[k]:submatches[k+1]]
   349  		if expect != string(got) {
   350  			t.Errorf("match %d: expected %q got %q: %s", n, expect, got, test)
   351  			return
   352  		}
   353  	}
   354  }
   355  
   356  func TestFindSubmatch(t *testing.T) {
   357  	for _, test := range findTests {
   358  		result := MustCompile(test.pat).FindSubmatch([]byte(test.text))
   359  		switch {
   360  		case test.matches == nil && result == nil:
   361  			// ok
   362  		case test.matches == nil && result != nil:
   363  			t.Errorf("expected no match; got one: %s", test)
   364  		case test.matches != nil && result == nil:
   365  			t.Errorf("expected match; got none: %s", test)
   366  		case test.matches != nil && result != nil:
   367  			testSubmatchBytes(&test, 0, test.matches[0], result, t)
   368  		}
   369  	}
   370  }
   371  
   372  func testSubmatchString(test *FindTest, n int, submatches []int, result []string, t *testing.T) {
   373  	if len(submatches) != len(result)*2 {
   374  		t.Errorf("match %d: expected %d submatches; got %d: %s", n, len(submatches)/2, len(result), test)
   375  		return
   376  	}
   377  	for k := 0; k < len(submatches); k += 2 {
   378  		if submatches[k] == -1 {
   379  			if result[k/2] != "" {
   380  				t.Errorf("match %d: expected nil got %q: %s", n, result, test)
   381  			}
   382  			continue
   383  		}
   384  		expect := test.text[submatches[k]:submatches[k+1]]
   385  		if expect != result[k/2] {
   386  			t.Errorf("match %d: expected %q got %q: %s", n, expect, result, test)
   387  			return
   388  		}
   389  	}
   390  }
   391  
   392  func TestFindStringSubmatch(t *testing.T) {
   393  	for _, test := range findTests {
   394  		result := MustCompile(test.pat).FindStringSubmatch(test.text)
   395  		switch {
   396  		case test.matches == nil && result == nil:
   397  			// ok
   398  		case test.matches == nil && result != nil:
   399  			t.Errorf("expected no match; got one: %s", test)
   400  		case test.matches != nil && result == nil:
   401  			t.Errorf("expected match; got none: %s", test)
   402  		case test.matches != nil && result != nil:
   403  			testSubmatchString(&test, 0, test.matches[0], result, t)
   404  		}
   405  	}
   406  }
   407  
   408  func testSubmatchIndices(test *FindTest, n int, expect, result []int, t *testing.T) {
   409  	if len(expect) != len(result) {
   410  		t.Errorf("match %d: expected %d matches; got %d: %s", n, len(expect)/2, len(result)/2, test)
   411  		return
   412  	}
   413  	for k, e := range expect {
   414  		if e != result[k] {
   415  			t.Errorf("match %d: submatch error: expected %v got %v: %s", n, expect, result, test)
   416  		}
   417  	}
   418  }
   419  
   420  func testFindSubmatchIndex(test *FindTest, result []int, t *testing.T) {
   421  	switch {
   422  	case test.matches == nil && result == nil:
   423  		// ok
   424  	case test.matches == nil && result != nil:
   425  		t.Errorf("expected no match; got one: %s", test)
   426  	case test.matches != nil && result == nil:
   427  		t.Errorf("expected match; got none: %s", test)
   428  	case test.matches != nil && result != nil:
   429  		testSubmatchIndices(test, 0, test.matches[0], result, t)
   430  	}
   431  }
   432  
   433  func TestFindSubmatchIndex(t *testing.T) {
   434  	for _, test := range findTests {
   435  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindSubmatchIndex([]byte(test.text)), t)
   436  	}
   437  }
   438  
   439  func TestFindStringSubmatchIndex(t *testing.T) {
   440  	for _, test := range findTests {
   441  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindStringSubmatchIndex(test.text), t)
   442  	}
   443  }
   444  
   445  func TestFindReaderSubmatchIndex(t *testing.T) {
   446  	for _, test := range findTests {
   447  		testFindSubmatchIndex(&test, MustCompile(test.pat).FindReaderSubmatchIndex(strings.NewReader(test.text)), t)
   448  	}
   449  }
   450  
   451  // Now come the monster AllSubmatch cases.
   452  
   453  func TestFindAllSubmatch(t *testing.T) {
   454  	for _, test := range findTests {
   455  		result := MustCompile(test.pat).FindAllSubmatch([]byte(test.text), -1)
   456  		switch {
   457  		case test.matches == nil && result == nil:
   458  			// ok
   459  		case test.matches == nil && result != nil:
   460  			t.Errorf("expected no match; got one: %s", test)
   461  		case test.matches != nil && result == nil:
   462  			t.Errorf("expected match; got none: %s", test)
   463  		case len(test.matches) != len(result):
   464  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   465  		case test.matches != nil && result != nil:
   466  			for k, match := range test.matches {
   467  				testSubmatchBytes(&test, k, match, result[k], t)
   468  			}
   469  		}
   470  	}
   471  }
   472  
   473  func TestFindAllStringSubmatch(t *testing.T) {
   474  	for _, test := range findTests {
   475  		result := MustCompile(test.pat).FindAllStringSubmatch(test.text, -1)
   476  		switch {
   477  		case test.matches == nil && result == nil:
   478  			// ok
   479  		case test.matches == nil && result != nil:
   480  			t.Errorf("expected no match; got one: %s", test)
   481  		case test.matches != nil && result == nil:
   482  			t.Errorf("expected match; got none: %s", test)
   483  		case len(test.matches) != len(result):
   484  			t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   485  		case test.matches != nil && result != nil:
   486  			for k, match := range test.matches {
   487  				testSubmatchString(&test, k, match, result[k], t)
   488  			}
   489  		}
   490  	}
   491  }
   492  
   493  func testFindAllSubmatchIndex(test *FindTest, result [][]int, t *testing.T) {
   494  	switch {
   495  	case test.matches == nil && result == nil:
   496  		// ok
   497  	case test.matches == nil && result != nil:
   498  		t.Errorf("expected no match; got one: %s", test)
   499  	case test.matches != nil && result == nil:
   500  		t.Errorf("expected match; got none: %s", test)
   501  	case len(test.matches) != len(result):
   502  		t.Errorf("expected %d matches; got %d: %s", len(test.matches), len(result), test)
   503  	case test.matches != nil && result != nil:
   504  		for k, match := range test.matches {
   505  			testSubmatchIndices(test, k, match, result[k], t)
   506  		}
   507  	}
   508  }
   509  
   510  func TestFindAllSubmatchIndex(t *testing.T) {
   511  	for _, test := range findTests {
   512  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllSubmatchIndex([]byte(test.text), -1), t)
   513  	}
   514  }
   515  
   516  func TestFindAllStringSubmatchIndex(t *testing.T) {
   517  	for _, test := range findTests {
   518  		testFindAllSubmatchIndex(&test, MustCompile(test.pat).FindAllStringSubmatchIndex(test.text, -1), t)
   519  	}
   520  }
   521  

View as plain text