...

Source file src/golang.org/x/text/cases/context_test.go

Documentation: golang.org/x/text/cases

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package cases
     6  
     7  import (
     8  	"strings"
     9  	"testing"
    10  	"unicode"
    11  
    12  	"golang.org/x/text/language"
    13  	"golang.org/x/text/transform"
    14  	"golang.org/x/text/unicode/norm"
    15  	"golang.org/x/text/unicode/rangetable"
    16  )
    17  
    18  // The following definitions are taken directly from Chapter 3 of The Unicode
    19  // Standard.
    20  
    21  func propCased(r rune) bool {
    22  	return propLower(r) || propUpper(r) || unicode.IsTitle(r)
    23  }
    24  
    25  func propLower(r rune) bool {
    26  	return unicode.IsLower(r) || unicode.Is(unicode.Other_Lowercase, r)
    27  }
    28  
    29  func propUpper(r rune) bool {
    30  	return unicode.IsUpper(r) || unicode.Is(unicode.Other_Uppercase, r)
    31  }
    32  
    33  func propIgnore(r rune) bool {
    34  	if unicode.In(r, unicode.Mn, unicode.Me, unicode.Cf, unicode.Lm, unicode.Sk) {
    35  		return true
    36  	}
    37  	return caseIgnorable[r]
    38  }
    39  
    40  func hasBreakProp(r rune) bool {
    41  	// binary search over ranges
    42  	lo := 0
    43  	hi := len(breakProp)
    44  	for lo < hi {
    45  		m := lo + (hi-lo)/2
    46  		bp := &breakProp[m]
    47  		if bp.lo <= r && r <= bp.hi {
    48  			return true
    49  		}
    50  		if r < bp.lo {
    51  			hi = m
    52  		} else {
    53  			lo = m + 1
    54  		}
    55  	}
    56  	return false
    57  }
    58  
    59  func contextFromRune(r rune) *context {
    60  	c := context{dst: make([]byte, 128), src: []byte(string(r)), atEOF: true}
    61  	c.next()
    62  	return &c
    63  }
    64  
    65  func TestCaseProperties(t *testing.T) {
    66  	if unicode.Version != UnicodeVersion {
    67  		// Properties of existing code points may change by Unicode version, so
    68  		// we need to skip.
    69  		t.Skipf("Skipping as core Unicode version %s different than %s", unicode.Version, UnicodeVersion)
    70  	}
    71  	assigned := rangetable.Assigned(UnicodeVersion)
    72  	coreVersion := rangetable.Assigned(unicode.Version)
    73  	for r := rune(0); r <= lastRuneForTesting; r++ {
    74  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
    75  			continue
    76  		}
    77  		c := contextFromRune(r)
    78  		if got, want := c.info.isCaseIgnorable(), propIgnore(r); got != want {
    79  			t.Errorf("caseIgnorable(%U): got %v; want %v (%x)", r, got, want, c.info)
    80  		}
    81  		// New letters may change case types, but existing case pairings should
    82  		// not change. See Case Pair Stability in
    83  		// https://unicode.org/policies/stability_policy.html.
    84  		if rf := unicode.SimpleFold(r); rf != r && unicode.In(rf, assigned) {
    85  			if got, want := c.info.isCased(), propCased(r); got != want {
    86  				t.Errorf("cased(%U): got %v; want %v (%x)", r, got, want, c.info)
    87  			}
    88  			if got, want := c.caseType() == cUpper, propUpper(r); got != want {
    89  				t.Errorf("upper(%U): got %v; want %v (%x)", r, got, want, c.info)
    90  			}
    91  			if got, want := c.caseType() == cLower, propLower(r); got != want {
    92  				t.Errorf("lower(%U): got %v; want %v (%x)", r, got, want, c.info)
    93  			}
    94  		}
    95  		if got, want := c.info.isBreak(), hasBreakProp(r); got != want {
    96  			t.Errorf("isBreak(%U): got %v; want %v (%x)", r, got, want, c.info)
    97  		}
    98  	}
    99  	// TODO: get title case from unicode file.
   100  }
   101  
   102  func TestMapping(t *testing.T) {
   103  	assigned := rangetable.Assigned(UnicodeVersion)
   104  	coreVersion := rangetable.Assigned(unicode.Version)
   105  	if coreVersion == nil {
   106  		coreVersion = assigned
   107  	}
   108  	apply := func(r rune, f func(c *context) bool) string {
   109  		c := contextFromRune(r)
   110  		f(c)
   111  		return string(c.dst[:c.pDst])
   112  	}
   113  
   114  	for r, tt := range special {
   115  		if got, want := apply(r, lower), tt.toLower; got != want {
   116  			t.Errorf("lowerSpecial:(%U): got %+q; want %+q", r, got, want)
   117  		}
   118  		if got, want := apply(r, title), tt.toTitle; got != want {
   119  			t.Errorf("titleSpecial:(%U): got %+q; want %+q", r, got, want)
   120  		}
   121  		if got, want := apply(r, upper), tt.toUpper; got != want {
   122  			t.Errorf("upperSpecial:(%U): got %+q; want %+q", r, got, want)
   123  		}
   124  	}
   125  
   126  	for r := rune(0); r <= lastRuneForTesting; r++ {
   127  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
   128  			continue
   129  		}
   130  		if rf := unicode.SimpleFold(r); rf == r || !unicode.In(rf, assigned) {
   131  			continue
   132  		}
   133  		if _, ok := special[r]; ok {
   134  			continue
   135  		}
   136  		want := string(unicode.ToLower(r))
   137  		if got := apply(r, lower); got != want {
   138  			t.Errorf("lower:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   139  		}
   140  
   141  		want = string(unicode.ToUpper(r))
   142  		if got := apply(r, upper); got != want {
   143  			t.Errorf("upper:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   144  		}
   145  
   146  		want = string(unicode.ToTitle(r))
   147  		if got := apply(r, title); got != want {
   148  			t.Errorf("title:%q (%U): got %q %U; want %q %U", r, r, got, []rune(got), want, []rune(want))
   149  		}
   150  	}
   151  }
   152  
   153  func runeFoldData(r rune) (x struct{ simple, full, special string }) {
   154  	x = foldMap[r]
   155  	if x.simple == "" {
   156  		x.simple = string(unicode.ToLower(r))
   157  	}
   158  	if x.full == "" {
   159  		x.full = string(unicode.ToLower(r))
   160  	}
   161  	if x.special == "" {
   162  		x.special = x.full
   163  	}
   164  	return
   165  }
   166  
   167  func TestFoldData(t *testing.T) {
   168  	assigned := rangetable.Assigned(UnicodeVersion)
   169  	coreVersion := rangetable.Assigned(unicode.Version)
   170  	if coreVersion == nil {
   171  		coreVersion = assigned
   172  	}
   173  	apply := func(r rune, f func(c *context) bool) (string, info) {
   174  		c := contextFromRune(r)
   175  		f(c)
   176  		return string(c.dst[:c.pDst]), c.info.cccType()
   177  	}
   178  	for r := rune(0); r <= lastRuneForTesting; r++ {
   179  		if !unicode.In(r, assigned) || !unicode.In(r, coreVersion) {
   180  			continue
   181  		}
   182  		x := runeFoldData(r)
   183  		if got, info := apply(r, foldFull); got != x.full {
   184  			t.Errorf("full:%q (%U): got %q %U; want %q %U (ccc=%x)", r, r, got, []rune(got), x.full, []rune(x.full), info)
   185  		}
   186  		// TODO: special and simple.
   187  	}
   188  }
   189  
   190  func TestCCC(t *testing.T) {
   191  	assigned := rangetable.Assigned(UnicodeVersion)
   192  	normVersion := rangetable.Assigned(norm.Version)
   193  	for r := rune(0); r <= lastRuneForTesting; r++ {
   194  		if !unicode.In(r, assigned) || !unicode.In(r, normVersion) {
   195  			continue
   196  		}
   197  		c := contextFromRune(r)
   198  
   199  		p := norm.NFC.PropertiesString(string(r))
   200  		want := cccOther
   201  		switch p.CCC() {
   202  		case 0:
   203  			want = cccZero
   204  		case above:
   205  			want = cccAbove
   206  		}
   207  		if got := c.info.cccType(); got != want {
   208  			t.Errorf("%U: got %x; want %x", r, got, want)
   209  		}
   210  	}
   211  }
   212  
   213  func TestWordBreaks(t *testing.T) {
   214  	for _, tt := range breakTest {
   215  		desc := norm.NFC.String(tt)
   216  		t.Run(desc, func(t *testing.T) {
   217  			parts := strings.Split(tt, "|")
   218  			want := ""
   219  			for _, s := range parts {
   220  				found := false
   221  				// This algorithm implements title casing given word breaks
   222  				// as defined in the Unicode standard 3.13 R3.
   223  				for _, r := range s {
   224  					title := unicode.ToTitle(r)
   225  					lower := unicode.ToLower(r)
   226  					if !found && title != lower {
   227  						found = true
   228  						want += string(title)
   229  					} else {
   230  						want += string(lower)
   231  					}
   232  				}
   233  			}
   234  			src := strings.Join(parts, "")
   235  			got := Title(language.Und).String(src)
   236  			if got != want {
   237  				t.Errorf("got %q; want %q", got, want)
   238  			}
   239  		})
   240  	}
   241  }
   242  
   243  func TestContext(t *testing.T) {
   244  	tests := []struct {
   245  		desc       string
   246  		dstSize    int
   247  		atEOF      bool
   248  		src        string
   249  		out        string
   250  		nSrc       int
   251  		err        error
   252  		ops        string
   253  		prefixArg  string
   254  		prefixWant bool
   255  	}{{
   256  		desc:    "next: past end, atEOF, no checkpoint",
   257  		dstSize: 10,
   258  		atEOF:   true,
   259  		src:     "12",
   260  		out:     "",
   261  		nSrc:    2,
   262  		ops:     "next;next;next",
   263  		// Test that calling prefix with a non-empty argument when the buffer
   264  		// is depleted returns false.
   265  		prefixArg:  "x",
   266  		prefixWant: false,
   267  	}, {
   268  		desc:       "next: not at end, atEOF, no checkpoint",
   269  		dstSize:    10,
   270  		atEOF:      false,
   271  		src:        "12",
   272  		out:        "",
   273  		nSrc:       0,
   274  		err:        transform.ErrShortSrc,
   275  		ops:        "next;next",
   276  		prefixArg:  "",
   277  		prefixWant: true,
   278  	}, {
   279  		desc:       "next: past end, !atEOF, no checkpoint",
   280  		dstSize:    10,
   281  		atEOF:      false,
   282  		src:        "12",
   283  		out:        "",
   284  		nSrc:       0,
   285  		err:        transform.ErrShortSrc,
   286  		ops:        "next;next;next",
   287  		prefixArg:  "",
   288  		prefixWant: true,
   289  	}, {
   290  		desc:       "next: past end, !atEOF, checkpoint",
   291  		dstSize:    10,
   292  		atEOF:      false,
   293  		src:        "12",
   294  		out:        "",
   295  		nSrc:       2,
   296  		ops:        "next;next;checkpoint;next",
   297  		prefixArg:  "",
   298  		prefixWant: true,
   299  	}, {
   300  		desc:       "copy: exact count, atEOF, no checkpoint",
   301  		dstSize:    2,
   302  		atEOF:      true,
   303  		src:        "12",
   304  		out:        "12",
   305  		nSrc:       2,
   306  		ops:        "next;copy;next;copy;next",
   307  		prefixArg:  "",
   308  		prefixWant: true,
   309  	}, {
   310  		desc:       "copy: past end, !atEOF, no checkpoint",
   311  		dstSize:    2,
   312  		atEOF:      false,
   313  		src:        "12",
   314  		out:        "",
   315  		nSrc:       0,
   316  		err:        transform.ErrShortSrc,
   317  		ops:        "next;copy;next;copy;next",
   318  		prefixArg:  "",
   319  		prefixWant: true,
   320  	}, {
   321  		desc:       "copy: past end, !atEOF, checkpoint",
   322  		dstSize:    2,
   323  		atEOF:      false,
   324  		src:        "12",
   325  		out:        "12",
   326  		nSrc:       2,
   327  		ops:        "next;copy;next;copy;checkpoint;next",
   328  		prefixArg:  "",
   329  		prefixWant: true,
   330  	}, {
   331  		desc:       "copy: short dst",
   332  		dstSize:    1,
   333  		atEOF:      false,
   334  		src:        "12",
   335  		out:        "",
   336  		nSrc:       0,
   337  		err:        transform.ErrShortDst,
   338  		ops:        "next;copy;next;copy;checkpoint;next",
   339  		prefixArg:  "12",
   340  		prefixWant: false,
   341  	}, {
   342  		desc:       "copy: short dst, checkpointed",
   343  		dstSize:    1,
   344  		atEOF:      false,
   345  		src:        "12",
   346  		out:        "1",
   347  		nSrc:       1,
   348  		err:        transform.ErrShortDst,
   349  		ops:        "next;copy;checkpoint;next;copy;next",
   350  		prefixArg:  "",
   351  		prefixWant: true,
   352  	}, {
   353  		desc:       "writeString: simple",
   354  		dstSize:    3,
   355  		atEOF:      true,
   356  		src:        "1",
   357  		out:        "1ab",
   358  		nSrc:       1,
   359  		ops:        "next;copy;writeab;next",
   360  		prefixArg:  "",
   361  		prefixWant: true,
   362  	}, {
   363  		desc:       "writeString: short dst",
   364  		dstSize:    2,
   365  		atEOF:      true,
   366  		src:        "12",
   367  		out:        "",
   368  		nSrc:       0,
   369  		err:        transform.ErrShortDst,
   370  		ops:        "next;copy;writeab;next",
   371  		prefixArg:  "2",
   372  		prefixWant: true,
   373  	}, {
   374  		desc:       "writeString: simple",
   375  		dstSize:    3,
   376  		atEOF:      true,
   377  		src:        "12",
   378  		out:        "1ab",
   379  		nSrc:       2,
   380  		ops:        "next;copy;next;writeab;next",
   381  		prefixArg:  "",
   382  		prefixWant: true,
   383  	}, {
   384  		desc:       "writeString: short dst",
   385  		dstSize:    2,
   386  		atEOF:      true,
   387  		src:        "12",
   388  		out:        "",
   389  		nSrc:       0,
   390  		err:        transform.ErrShortDst,
   391  		ops:        "next;copy;next;writeab;next",
   392  		prefixArg:  "1",
   393  		prefixWant: false,
   394  	}, {
   395  		desc:    "prefix",
   396  		dstSize: 2,
   397  		atEOF:   true,
   398  		src:     "12",
   399  		out:     "",
   400  		nSrc:    0,
   401  		// Context will assign an ErrShortSrc if the input wasn't exhausted.
   402  		err:        transform.ErrShortSrc,
   403  		prefixArg:  "12",
   404  		prefixWant: true,
   405  	}}
   406  	for _, tt := range tests {
   407  		c := context{dst: make([]byte, tt.dstSize), src: []byte(tt.src), atEOF: tt.atEOF}
   408  
   409  		for _, op := range strings.Split(tt.ops, ";") {
   410  			switch op {
   411  			case "next":
   412  				c.next()
   413  			case "checkpoint":
   414  				c.checkpoint()
   415  			case "writeab":
   416  				c.writeString("ab")
   417  			case "copy":
   418  				c.copy()
   419  			case "":
   420  			default:
   421  				t.Fatalf("unknown op %q", op)
   422  			}
   423  		}
   424  		if got := c.hasPrefix(tt.prefixArg); got != tt.prefixWant {
   425  			t.Errorf("%s:\nprefix was %v; want %v", tt.desc, got, tt.prefixWant)
   426  		}
   427  		nDst, nSrc, err := c.ret()
   428  		if err != tt.err {
   429  			t.Errorf("%s:\nerror was %v; want %v", tt.desc, err, tt.err)
   430  		}
   431  		if out := string(c.dst[:nDst]); out != tt.out {
   432  			t.Errorf("%s:\nout was %q; want %q", tt.desc, out, tt.out)
   433  		}
   434  		if nSrc != tt.nSrc {
   435  			t.Errorf("%s:\nnSrc was %d; want %d", tt.desc, nSrc, tt.nSrc)
   436  		}
   437  	}
   438  }
   439  

View as plain text