acc_test.go

Documentation: golang.org/x/image/vector

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package vector
     6  
     7  import (
     8  	"bytes"
     9  	"fmt"
    10  	"math"
    11  	"math/rand"
    12  	"runtime"
    13  	"testing"
    14  )
    15  
    16  // TestDivideByFFFF tests that dividing by 0xffff is equivalent to multiplying
    17  // and then shifting by magic constants. The Go compiler itself issues this
    18  // multiply-and-shift for a division by the constant value 0xffff. This trick
    19  // is used in the asm code as the GOARCH=amd64 SIMD instructions have parallel
    20  // multiply but not parallel divide.
    21  //
    22  // There's undoubtedly a justification somewhere in Hacker's Delight chapter 10
    23  // "Integer Division by Constants", but I don't have a more specific link.
    24  //
    25  // http://www.hackersdelight.org/divcMore.pdf and
    26  // http://www.hackersdelight.org/magic.htm
    27  func TestDivideByFFFF(t *testing.T) {
    28  	const mul, shift = 0x80008001, 47
    29  	rng := rand.New(rand.NewSource(1))
    30  	for i := 0; i < 20000; i++ {
    31  		u := rng.Uint32()
    32  		got := uint32((uint64(u) * mul) >> shift)
    33  		want := u / 0xffff
    34  		if got != want {
    35  			t.Fatalf("i=%d, u=%#08x: got %#08x, want %#08x", i, u, got, want)
    36  		}
    37  	}
    38  }
    39  
    40  // TestXxxSIMDUnaligned tests that unaligned SIMD loads/stores don't crash.
    41  
    42  func TestFixedAccumulateSIMDUnaligned(t *testing.T) {
    43  	if !haveAccumulateSIMD {
    44  		t.Skip("No SIMD implemention")
    45  	}
    46  
    47  	dst := make([]uint8, 64)
    48  	src := make([]uint32, 64)
    49  	for d := 0; d < 16; d++ {
    50  		for s := 0; s < 16; s++ {
    51  			fixedAccumulateOpSrcSIMD(dst[d:d+32], src[s:s+32])
    52  		}
    53  	}
    54  }
    55  
    56  func TestFloatingAccumulateSIMDUnaligned(t *testing.T) {
    57  	if !haveAccumulateSIMD {
    58  		t.Skip("No SIMD implemention")
    59  	}
    60  
    61  	dst := make([]uint8, 64)
    62  	src := make([]float32, 64)
    63  	for d := 0; d < 16; d++ {
    64  		for s := 0; s < 16; s++ {
    65  			floatingAccumulateOpSrcSIMD(dst[d:d+32], src[s:s+32])
    66  		}
    67  	}
    68  }
    69  
    70  // TestXxxSIMDShortDst tests that the SIMD implementations don't write past the
    71  // end of the dst buffer.
    72  
    73  func TestFixedAccumulateSIMDShortDst(t *testing.T) {
    74  	if !haveAccumulateSIMD {
    75  		t.Skip("No SIMD implemention")
    76  	}
    77  
    78  	const oneQuarter = uint32(int2ϕ(fxOne*fxOne)) / 4
    79  	src := []uint32{oneQuarter, oneQuarter, oneQuarter, oneQuarter}
    80  	for i := 0; i < 4; i++ {
    81  		dst := make([]uint8, 4)
    82  		fixedAccumulateOpSrcSIMD(dst[:i], src[:i])
    83  		for j := range dst {
    84  			if j < i {
    85  				if got := dst[j]; got == 0 {
    86  					t.Errorf("i=%d, j=%d: got %#02x, want non-zero", i, j, got)
    87  				}
    88  			} else {
    89  				if got := dst[j]; got != 0 {
    90  					t.Errorf("i=%d, j=%d: got %#02x, want zero", i, j, got)
    91  				}
    92  			}
    93  		}
    94  	}
    95  }
    96  
    97  func TestFloatingAccumulateSIMDShortDst(t *testing.T) {
    98  	if !haveAccumulateSIMD {
    99  		t.Skip("No SIMD implemention")
   100  	}
   101  
   102  	const oneQuarter = 0.25
   103  	src := []float32{oneQuarter, oneQuarter, oneQuarter, oneQuarter}
   104  	for i := 0; i < 4; i++ {
   105  		dst := make([]uint8, 4)
   106  		floatingAccumulateOpSrcSIMD(dst[:i], src[:i])
   107  		for j := range dst {
   108  			if j < i {
   109  				if got := dst[j]; got == 0 {
   110  					t.Errorf("i=%d, j=%d: got %#02x, want non-zero", i, j, got)
   111  				}
   112  			} else {
   113  				if got := dst[j]; got != 0 {
   114  					t.Errorf("i=%d, j=%d: got %#02x, want zero", i, j, got)
   115  				}
   116  			}
   117  		}
   118  	}
   119  }
   120  
   121  func TestFixedAccumulateOpOverShort(t *testing.T)    { testAcc(t, fxInShort, fxMaskShort, "over") }
   122  func TestFixedAccumulateOpSrcShort(t *testing.T)     { testAcc(t, fxInShort, fxMaskShort, "src") }
   123  func TestFixedAccumulateMaskShort(t *testing.T)      { testAcc(t, fxInShort, fxMaskShort, "mask") }
   124  func TestFloatingAccumulateOpOverShort(t *testing.T) { testAcc(t, flInShort, flMaskShort, "over") }
   125  func TestFloatingAccumulateOpSrcShort(t *testing.T)  { testAcc(t, flInShort, flMaskShort, "src") }
   126  func TestFloatingAccumulateMaskShort(t *testing.T)   { testAcc(t, flInShort, flMaskShort, "mask") }
   127  
   128  func TestFixedAccumulateOpOver16(t *testing.T)    { testAcc(t, fxIn16, fxMask16, "over") }
   129  func TestFixedAccumulateOpSrc16(t *testing.T)     { testAcc(t, fxIn16, fxMask16, "src") }
   130  func TestFixedAccumulateMask16(t *testing.T)      { testAcc(t, fxIn16, fxMask16, "mask") }
   131  func TestFloatingAccumulateOpOver16(t *testing.T) { testAcc(t, flIn16, flMask16, "over") }
   132  func TestFloatingAccumulateOpSrc16(t *testing.T)  { testAcc(t, flIn16, flMask16, "src") }
   133  func TestFloatingAccumulateMask16(t *testing.T)   { testAcc(t, flIn16, flMask16, "mask") }
   134  
   135  func testAcc(t *testing.T, in interface{}, mask []uint32, op string) {
   136  	for _, simd := range []bool{false, true} {
   137  		maxN := 0
   138  		switch in := in.(type) {
   139  		case []uint32:
   140  			if simd && !haveAccumulateSIMD {
   141  				continue
   142  			}
   143  			maxN = len(in)
   144  		case []float32:
   145  			if simd && !haveAccumulateSIMD {
   146  				continue
   147  			}
   148  			maxN = len(in)
   149  		}
   150  
   151  		for _, n := range []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
   152  			33, 55, 79, 96, 120, 165, 256, maxN} {
   153  
   154  			if n > maxN {
   155  				continue
   156  			}
   157  
   158  			var (
   159  				got8, want8   []uint8
   160  				got32, want32 []uint32
   161  			)
   162  			switch op {
   163  			case "over":
   164  				const background = 0x40
   165  				got8 = make([]uint8, n)
   166  				for i := range got8 {
   167  					got8[i] = background
   168  				}
   169  				want8 = make([]uint8, n)
   170  				for i := range want8 {
   171  					dstA := uint32(background * 0x101)
   172  					maskA := mask[i]
   173  					outA := dstA*(0xffff-maskA)/0xffff + maskA
   174  					want8[i] = uint8(outA >> 8)
   175  				}
   176  
   177  			case "src":
   178  				got8 = make([]uint8, n)
   179  				want8 = make([]uint8, n)
   180  				for i := range want8 {
   181  					want8[i] = uint8(mask[i] >> 8)
   182  				}
   183  
   184  			case "mask":
   185  				got32 = make([]uint32, n)
   186  				want32 = mask[:n]
   187  			}
   188  
   189  			switch in := in.(type) {
   190  			case []uint32:
   191  				switch op {
   192  				case "over":
   193  					if simd {
   194  						fixedAccumulateOpOverSIMD(got8, in[:n])
   195  					} else {
   196  						fixedAccumulateOpOver(got8, in[:n])
   197  					}
   198  				case "src":
   199  					if simd {
   200  						fixedAccumulateOpSrcSIMD(got8, in[:n])
   201  					} else {
   202  						fixedAccumulateOpSrc(got8, in[:n])
   203  					}
   204  				case "mask":
   205  					copy(got32, in[:n])
   206  					if simd {
   207  						fixedAccumulateMaskSIMD(got32)
   208  					} else {
   209  						fixedAccumulateMask(got32)
   210  					}
   211  				}
   212  			case []float32:
   213  				switch op {
   214  				case "over":
   215  					if simd {
   216  						floatingAccumulateOpOverSIMD(got8, in[:n])
   217  					} else {
   218  						floatingAccumulateOpOver(got8, in[:n])
   219  					}
   220  				case "src":
   221  					if simd {
   222  						floatingAccumulateOpSrcSIMD(got8, in[:n])
   223  					} else {
   224  						floatingAccumulateOpSrc(got8, in[:n])
   225  					}
   226  				case "mask":
   227  					if simd {
   228  						floatingAccumulateMaskSIMD(got32, in[:n])
   229  					} else {
   230  						floatingAccumulateMask(got32, in[:n])
   231  					}
   232  				}
   233  			}
   234  
   235  			if op != "mask" {
   236  				if !bytes.Equal(got8, want8) {
   237  					t.Errorf("simd=%t, n=%d:\ngot:  % x\nwant: % x", simd, n, got8, want8)
   238  				}
   239  			} else {
   240  				if !uint32sMatch(got32, want32) {
   241  					t.Errorf("simd=%t, n=%d:\ngot:  % x\nwant: % x", simd, n, got32, want32)
   242  				}
   243  			}
   244  		}
   245  	}
   246  }
   247  
   248  // This package contains multiple implementations of the same algorithm, e.g.
   249  // there are both SIMD and non-SIMD (vanilla) implementations on GOARCH=amd64.
   250  // In general, the tests in this file check that the output is *exactly* the
   251  // same, regardless of implementation.
   252  //
   253  // On GOARCH=wasm, float32 arithmetic is done with 64 bit precision. This is
   254  // allowed by the Go specification: only explicit conversions to float32 have
   255  // to round to 32 bit precision. However, the vanilla implementation therefore
   256  // produces different output for GOARCH=wasm than on other GOARCHes.
   257  //
   258  // We therefore treat GOARCH=wasm as a special case, where the tests check that
   259  // the output is only *approximately* the same (within a 0.1% tolerance).
   260  //
   261  // It's not that, on GOARCH=wasm, we produce the "wrong" answer. In fact, the
   262  // computation is more, not less, accurate on GOARCH=wasm. It's that the golden
   263  // output that the tests compare to were, for historical reasons, produced on
   264  // GOARCH=amd64 and so done with less accuracy (where float32 arithmetic is
   265  // performed entirely with 32 bits, not with 64 bits and then rounded back to
   266  // 32 bits). Furthermore, on amd64, we still want to test that SIMD and
   267  // non-SIMD produce exactly the same (albeit less accurate) output. The SIMD
   268  // implementation in particular is limited by what the underlying hardware
   269  // instructions provide, which often favors speed over accuracy.
   270  
   271  // approxEquals returns whether got is within 0.1% of want.
   272  func approxEquals(got, want float64) bool {
   273  	const tolerance = 0.001
   274  	return math.Abs(got-want) <= math.Abs(want)*tolerance
   275  }
   276  
   277  // sixteen is used by TestFloat32ArithmeticWithinTolerance, below. It needs to
   278  // be a package-level variable so that the compiler does not replace the
   279  // calculation with a single constant.
   280  var sixteen float32 = 16
   281  
   282  // TestFloat32ArithmeticWithinTolerance checks that approxEquals' tolerance is
   283  // sufficiently high so that the results of two separate ways of computing the
   284  // arbitrary fraction 16 / 1122 are deemed "approximately equal" even if they
   285  // aren't "exactly equal".
   286  //
   287  // We're not testing whether the computation on amd64 or wasm is "right" or
   288  // "wrong". We're testing that we cope with them being different.
   289  //
   290  // On GOARCH=amd64, printing x and y gives:
   291  //
   292  //	0.0142602495543672
   293  //	0.014260249212384224
   294  //
   295  // On GOARCH=wasm, printing x and y gives:
   296  //
   297  //	0.0142602495543672
   298  //	0.0142602495543672
   299  //
   300  // The infinitely precise (mathematical) answer is:
   301  //
   302  //	0.014260249554367201426024955436720142602495543672recurring...
   303  //
   304  // See https://play.golang.org/p/RxzKSdD_suE
   305  //
   306  // This test establishes a lower bound on approxEquals' tolerance constant.
   307  // Passing this one test (on all of the various supported GOARCH's) is a
   308  // necessary but not a sufficient condition on that value. Other tests in this
   309  // package that call uint32sMatch or float32sMatch (such as TestMakeFxInXxx,
   310  // TestMakeFlInXxx or anything calling testAcc) also require a sufficiently
   311  // large tolerance. But those tests are more complicated, and if there is a
   312  // problem with the tolerance constant, debugging this test can be simpler.
   313  func TestFloat32ArithmeticWithinTolerance(t *testing.T) {
   314  	x := float64(sixteen) / 1122 // Always use 64-bit division.
   315  	y := float64(sixteen / 1122) // Use 32- or 64-bit division (GOARCH dependent).
   316  	if !approxEquals(x, y) {
   317  		t.Errorf("x and y were not approximately equal:\nx = %v\ny = %v", x, y)
   318  	}
   319  }
   320  
   321  func uint32sMatch(xs, ys []uint32) bool {
   322  	if len(xs) != len(ys) {
   323  		return false
   324  	}
   325  	if runtime.GOARCH == "wasm" {
   326  		for i := range xs {
   327  			if !approxEquals(float64(xs[i]), float64(ys[i])) {
   328  				return false
   329  			}
   330  		}
   331  	} else {
   332  		for i := range xs {
   333  			if xs[i] != ys[i] {
   334  				return false
   335  			}
   336  		}
   337  	}
   338  	return true
   339  }
   340  
   341  func float32sMatch(xs, ys []float32) bool {
   342  	if len(xs) != len(ys) {
   343  		return false
   344  	}
   345  	if runtime.GOARCH == "wasm" {
   346  		for i := range xs {
   347  			if !approxEquals(float64(xs[i]), float64(ys[i])) {
   348  				return false
   349  			}
   350  		}
   351  	} else {
   352  		for i := range xs {
   353  			if xs[i] != ys[i] {
   354  				return false
   355  			}
   356  		}
   357  	}
   358  	return true
   359  }
   360  
   361  func BenchmarkFixedAccumulateOpOver16(b *testing.B)        { benchAcc(b, fxIn16, "over", false) }
   362  func BenchmarkFixedAccumulateOpOverSIMD16(b *testing.B)    { benchAcc(b, fxIn16, "over", true) }
   363  func BenchmarkFixedAccumulateOpSrc16(b *testing.B)         { benchAcc(b, fxIn16, "src", false) }
   364  func BenchmarkFixedAccumulateOpSrcSIMD16(b *testing.B)     { benchAcc(b, fxIn16, "src", true) }
   365  func BenchmarkFixedAccumulateMask16(b *testing.B)          { benchAcc(b, fxIn16, "mask", false) }
   366  func BenchmarkFixedAccumulateMaskSIMD16(b *testing.B)      { benchAcc(b, fxIn16, "mask", true) }
   367  func BenchmarkFloatingAccumulateOpOver16(b *testing.B)     { benchAcc(b, flIn16, "over", false) }
   368  func BenchmarkFloatingAccumulateOpOverSIMD16(b *testing.B) { benchAcc(b, flIn16, "over", true) }
   369  func BenchmarkFloatingAccumulateOpSrc16(b *testing.B)      { benchAcc(b, flIn16, "src", false) }
   370  func BenchmarkFloatingAccumulateOpSrcSIMD16(b *testing.B)  { benchAcc(b, flIn16, "src", true) }
   371  func BenchmarkFloatingAccumulateMask16(b *testing.B)       { benchAcc(b, flIn16, "mask", false) }
   372  func BenchmarkFloatingAccumulateMaskSIMD16(b *testing.B)   { benchAcc(b, flIn16, "mask", true) }
   373  
   374  func BenchmarkFixedAccumulateOpOver64(b *testing.B)        { benchAcc(b, fxIn64, "over", false) }
   375  func BenchmarkFixedAccumulateOpOverSIMD64(b *testing.B)    { benchAcc(b, fxIn64, "over", true) }
   376  func BenchmarkFixedAccumulateOpSrc64(b *testing.B)         { benchAcc(b, fxIn64, "src", false) }
   377  func BenchmarkFixedAccumulateOpSrcSIMD64(b *testing.B)     { benchAcc(b, fxIn64, "src", true) }
   378  func BenchmarkFixedAccumulateMask64(b *testing.B)          { benchAcc(b, fxIn64, "mask", false) }
   379  func BenchmarkFixedAccumulateMaskSIMD64(b *testing.B)      { benchAcc(b, fxIn64, "mask", true) }
   380  func BenchmarkFloatingAccumulateOpOver64(b *testing.B)     { benchAcc(b, flIn64, "over", false) }
   381  func BenchmarkFloatingAccumulateOpOverSIMD64(b *testing.B) { benchAcc(b, flIn64, "over", true) }
   382  func BenchmarkFloatingAccumulateOpSrc64(b *testing.B)      { benchAcc(b, flIn64, "src", false) }
   383  func BenchmarkFloatingAccumulateOpSrcSIMD64(b *testing.B)  { benchAcc(b, flIn64, "src", true) }
   384  func BenchmarkFloatingAccumulateMask64(b *testing.B)       { benchAcc(b, flIn64, "mask", false) }
   385  func BenchmarkFloatingAccumulateMaskSIMD64(b *testing.B)   { benchAcc(b, flIn64, "mask", true) }
   386  
   387  func benchAcc(b *testing.B, in interface{}, op string, simd bool) {
   388  	var f func()
   389  
   390  	switch in := in.(type) {
   391  	case []uint32:
   392  		if simd && !haveAccumulateSIMD {
   393  			b.Skip("No SIMD implemention")
   394  		}
   395  
   396  		switch op {
   397  		case "over":
   398  			dst := make([]uint8, len(in))
   399  			if simd {
   400  				f = func() { fixedAccumulateOpOverSIMD(dst, in) }
   401  			} else {
   402  				f = func() { fixedAccumulateOpOver(dst, in) }
   403  			}
   404  		case "src":
   405  			dst := make([]uint8, len(in))
   406  			if simd {
   407  				f = func() { fixedAccumulateOpSrcSIMD(dst, in) }
   408  			} else {
   409  				f = func() { fixedAccumulateOpSrc(dst, in) }
   410  			}
   411  		case "mask":
   412  			buf := make([]uint32, len(in))
   413  			copy(buf, in)
   414  			if simd {
   415  				f = func() { fixedAccumulateMaskSIMD(buf) }
   416  			} else {
   417  				f = func() { fixedAccumulateMask(buf) }
   418  			}
   419  		}
   420  
   421  	case []float32:
   422  		if simd && !haveAccumulateSIMD {
   423  			b.Skip("No SIMD implemention")
   424  		}
   425  
   426  		switch op {
   427  		case "over":
   428  			dst := make([]uint8, len(in))
   429  			if simd {
   430  				f = func() { floatingAccumulateOpOverSIMD(dst, in) }
   431  			} else {
   432  				f = func() { floatingAccumulateOpOver(dst, in) }
   433  			}
   434  		case "src":
   435  			dst := make([]uint8, len(in))
   436  			if simd {
   437  				f = func() { floatingAccumulateOpSrcSIMD(dst, in) }
   438  			} else {
   439  				f = func() { floatingAccumulateOpSrc(dst, in) }
   440  			}
   441  		case "mask":
   442  			dst := make([]uint32, len(in))
   443  			if simd {
   444  				f = func() { floatingAccumulateMaskSIMD(dst, in) }
   445  			} else {
   446  				f = func() { floatingAccumulateMask(dst, in) }
   447  			}
   448  		}
   449  	}
   450  
   451  	b.ResetTimer()
   452  	for i := 0; i < b.N; i++ {
   453  		f()
   454  	}
   455  }
   456  
   457  // itou exists because "uint32(int2ϕ(-1))" doesn't compile: constant -1
   458  // overflows uint32.
   459  func itou(i int2ϕ) uint32 {
   460  	return uint32(i)
   461  }
   462  
   463  var fxInShort = []uint32{
   464  	itou(+0x08000), // +0.125, // Running sum: +0.125
   465  	itou(-0x20000), // -0.500, // Running sum: -0.375
   466  	itou(+0x10000), // +0.250, // Running sum: -0.125
   467  	itou(+0x18000), // +0.375, // Running sum: +0.250
   468  	itou(+0x08000), // +0.125, // Running sum: +0.375
   469  	itou(+0x00000), // +0.000, // Running sum: +0.375
   470  	itou(-0x40000), // -1.000, // Running sum: -0.625
   471  	itou(-0x20000), // -0.500, // Running sum: -1.125
   472  	itou(+0x10000), // +0.250, // Running sum: -0.875
   473  	itou(+0x38000), // +0.875, // Running sum: +0.000
   474  	itou(+0x10000), // +0.250, // Running sum: +0.250
   475  	itou(+0x30000), // +0.750, // Running sum: +1.000
   476  }
   477  
   478  var flInShort = []float32{
   479  	+0.125, // Running sum: +0.125
   480  	-0.500, // Running sum: -0.375
   481  	+0.250, // Running sum: -0.125
   482  	+0.375, // Running sum: +0.250
   483  	+0.125, // Running sum: +0.375
   484  	+0.000, // Running sum: +0.375
   485  	-1.000, // Running sum: -0.625
   486  	-0.500, // Running sum: -1.125
   487  	+0.250, // Running sum: -0.875
   488  	+0.875, // Running sum: +0.000
   489  	+0.250, // Running sum: +0.250
   490  	+0.750, // Running sum: +1.000
   491  }
   492  
   493  // It's OK for fxMaskShort and flMaskShort to have slightly different values.
   494  // Both the fixed and floating point implementations already have (different)
   495  // rounding errors in the xxxLineTo methods before we get to accumulation. It's
   496  // OK for 50% coverage (in ideal math) to be approximated by either 0x7fff or
   497  // 0x8000. Both slices do contain checks that 0% and 100% map to 0x0000 and
   498  // 0xffff, as does checkCornersCenter in vector_test.go.
   499  //
   500  // It is important, though, for the SIMD and non-SIMD fixed point
   501  // implementations to give the exact same output, and likewise for the floating
   502  // point implementations.
   503  
   504  var fxMaskShort = []uint32{
   505  	0x2000,
   506  	0x6000,
   507  	0x2000,
   508  	0x4000,
   509  	0x6000,
   510  	0x6000,
   511  	0xa000,
   512  	0xffff,
   513  	0xe000,
   514  	0x0000,
   515  	0x4000,
   516  	0xffff,
   517  }
   518  
   519  var flMaskShort = []uint32{
   520  	0x1fff,
   521  	0x5fff,
   522  	0x1fff,
   523  	0x3fff,
   524  	0x5fff,
   525  	0x5fff,
   526  	0x9fff,
   527  	0xffff,
   528  	0xdfff,
   529  	0x0000,
   530  	0x3fff,
   531  	0xffff,
   532  }
   533  
   534  func TestMakeFxInXxx(t *testing.T) {
   535  	dump := func(us []uint32) string {
   536  		var b bytes.Buffer
   537  		for i, u := range us {
   538  			if i%8 == 0 {
   539  				b.WriteByte('\n')
   540  			}
   541  			fmt.Fprintf(&b, "%#08x, ", u)
   542  		}
   543  		return b.String()
   544  	}
   545  
   546  	if !uint32sMatch(fxIn16, hardCodedFxIn16) {
   547  		t.Errorf("height 16: got:%v\nwant:%v", dump(fxIn16), dump(hardCodedFxIn16))
   548  	}
   549  }
   550  
   551  func TestMakeFlInXxx(t *testing.T) {
   552  	dump := func(fs []float32) string {
   553  		var b bytes.Buffer
   554  		for i, f := range fs {
   555  			if i%8 == 0 {
   556  				b.WriteByte('\n')
   557  			}
   558  			fmt.Fprintf(&b, "%v, ", f)
   559  		}
   560  		return b.String()
   561  	}
   562  
   563  	if !float32sMatch(flIn16, hardCodedFlIn16) {
   564  		t.Errorf("height 16: got:%v\nwant:%v", dump(flIn16), dump(hardCodedFlIn16))
   565  	}
   566  }
   567  
   568  func makeInXxx(height int, useFloatingPointMath bool) *Rasterizer {
   569  	width, data := scaledBenchmarkGlyphData(height)
   570  	z := NewRasterizer(width, height)
   571  	z.setUseFloatingPointMath(useFloatingPointMath)
   572  	for _, d := range data {
   573  		switch d.n {
   574  		case 0:
   575  			z.MoveTo(d.px, d.py)
   576  		case 1:
   577  			z.LineTo(d.px, d.py)
   578  		case 2:
   579  			z.QuadTo(d.px, d.py, d.qx, d.qy)
   580  		}
   581  	}
   582  	return z
   583  }
   584  
   585  func makeFxInXxx(height int) []uint32 {
   586  	z := makeInXxx(height, false)
   587  	return z.bufU32
   588  }
   589  
   590  func makeFlInXxx(height int) []float32 {
   591  	z := makeInXxx(height, true)
   592  	return z.bufF32
   593  }
   594  
   595  // fxInXxx and flInXxx are the z.bufU32 and z.bufF32 inputs to the accumulate
   596  // functions when rasterizing benchmarkGlyphData at a height of Xxx pixels.
   597  //
   598  // fxMaskXxx and flMaskXxx are the corresponding golden outputs of those
   599  // accumulateMask functions.
   600  //
   601  // The hardCodedEtc versions are a sanity check for unexpected changes in the
   602  // rasterization implementations up to but not including accumulation.
   603  
   604  var (
   605  	fxIn16 = makeFxInXxx(16)
   606  	fxIn64 = makeFxInXxx(64)
   607  	flIn16 = makeFlInXxx(16)
   608  	flIn64 = makeFlInXxx(64)
   609  )
   610  
   611  var hardCodedFxIn16 = []uint32{
   612  	0x00000000, 0x00000000, 0xffffe91d, 0xfffe7c4a, 0xfffeaa9f, 0xffff4e33, 0xffffc1c5, 0x00007782,
   613  	0x00009619, 0x0001a857, 0x000129e9, 0x00000028, 0x00000000, 0x00000000, 0xffff6e70, 0xfffd3199,
   614  	0xffff5ff8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00014b29,
   615  	0x0002acf3, 0x000007e2, 0xffffca5a, 0xfffcab73, 0xffff8a34, 0x00001b55, 0x0001b334, 0x0001449e,
   616  	0x0000434d, 0xffff62ec, 0xfffe1443, 0xffff325d, 0x00000000, 0x0002234a, 0x0001dcb6, 0xfffe2948,
   617  	0xfffdd6b8, 0x00000000, 0x00028cc0, 0x00017340, 0x00000000, 0x00000000, 0x00000000, 0xffffd2d6,
   618  	0xfffcadd0, 0xffff7f5c, 0x00007400, 0x00038c00, 0xfffe9260, 0xffff2da0, 0x0000023a, 0x0002259b,
   619  	0x0000182a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffdc600, 0xfffe3a00, 0x00000059,
   620  	0x0003a44d, 0x00005b59, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
   621  	0x00000000, 0x00000000, 0xfffe33f3, 0xfffdcc0d, 0x00000000, 0x00033c02, 0x0000c3fe, 0x00000000,
   622  	0x00000000, 0xffffa13d, 0xfffeeec8, 0xffff8c02, 0xffff8c48, 0xffffc7b5, 0x00000000, 0xffff5b68,
   623  	0xffff3498, 0x00000000, 0x00033c00, 0x0000c400, 0xffff9bc4, 0xfffdf4a3, 0xfffe8df3, 0xffffe1a8,
   624  	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00033c00,
   625  	0x000092c7, 0xfffcf373, 0xffff3dc7, 0x00000fcc, 0x00011ae7, 0x000130c3, 0x0000680d, 0x00004a59,
   626  	0x00000a20, 0xfffe9dc4, 0xfffe4a3c, 0x00000000, 0x00033c00, 0xfffe87ef, 0xfffe3c11, 0x0000105e,
   627  	0x0002b9c4, 0x000135dc, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00,
   628  	0x00000000, 0x00033c00, 0xfffd9000, 0xffff3400, 0x0000e400, 0x00031c00, 0x00000000, 0x00000000,
   629  	0x00000000, 0x00000000, 0x00000000, 0xfffe3600, 0xfffdca00, 0x00000000, 0x00033c00, 0xfffcf9a5,
   630  	0xffffca5b, 0x000120e6, 0x0002df1a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
   631  	0xfffdb195, 0xfffe4e6b, 0x00000000, 0x00033c00, 0xfffd9e00, 0xffff2600, 0x00002f0e, 0x00033ea3,
   632  	0x0000924d, 0x00000000, 0x00000000, 0x00000000, 0xfffe83b3, 0xfffd881d, 0xfffff431, 0x00000000,
   633  	0x00031f60, 0xffff297a, 0xfffdb726, 0x00000000, 0x000053a7, 0x0001b506, 0x0000a24b, 0xffffa32d,
   634  	0xfffead9b, 0xffff0479, 0xffffffc9, 0x00000000, 0x00000000, 0x0002d800, 0x0001249d, 0xfffd67bb,
   635  	0xfffe9baa, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x0000ac03, 0x0001448b,
   636  	0xfffe0f70, 0x00000000, 0x000229ea, 0x0001d616, 0xffffff8c, 0xfffebf76, 0xfffe54d9, 0xffff5d9e,
   637  	0xffffd3eb, 0x0000c65e, 0x0000fc15, 0x0001d491, 0xffffb566, 0xfffd9433, 0x00000000, 0x0000e4ec,
   638  }
   639  
   640  var hardCodedFlIn16 = []float32{
   641  	0, 0, -0.022306755, -0.3782405, -0.33334962, -0.1741521, -0.0607556, 0.11660573,
   642  	0.14664596, 0.41462868, 0.2907673, 0.0001568835, 0, 0, -0.14239307, -0.7012868,
   643  	-0.15632017, 0, 0, 0, 0, 0, 0, 0.3230303,
   644  	0.6690931, 0.007876594, -0.05189419, -0.832786, -0.11531975, 0.026225802, 0.42518616, 0.3154636,
   645  	0.06598757, -0.15304244, -0.47969276, -0.20012794, 0, 0.5327272, 0.46727282, -0.45950258,
   646  	-0.5404974, 0, 0.63484025, 0.36515975, 0, 0, 0, -0.04351709,
   647  	-0.8293345, -0.12714837, 0.11087036, 0.88912964, -0.35792422, -0.2053554, 0.0022513224, 0.5374398,
   648  	0.023588525, 0, 0, 0, 0, -0.55346966, -0.44653034, 0.0002531938,
   649  	0.9088273, 0.090919495, 0, 0, 0, 0, 0, 0,
   650  	0, 0, -0.44745448, -0.5525455, 0, 0.80748945, 0.19251058, 0,
   651  	0, -0.092476256, -0.2661464, -0.11322958, -0.11298219, -0.055094406, 0, -0.16045958,
   652  	-0.1996116, 0, 0.80748653, 0.19251347, -0.09804727, -0.51129663, -0.3610403, -0.029615778,
   653  	0, 0, 0, 0, 0, 0, 0, 0.80748653,
   654  	0.14411622, -0.76251525, -0.1890875, 0.01527351, 0.27528667, 0.29730347, 0.101477206, 0.07259522,
   655  	0.009900213, -0.34395567, -0.42788061, 0, 0.80748653, -0.3648737, -0.44261283, 0.015778137,
   656  	0.6826565, 0.30156538, 0, 0, 0, 0, -0.44563293, -0.55436707,
   657  	0, 0.80748653, -0.60703933, -0.20044717, 0.22371745, 0.77628255, 0, 0,
   658  	0, 0, 0, -0.44563293, -0.55436707, 0, 0.80748653, -0.7550391,
   659  	-0.05244744, 0.2797074, 0.72029257, 0, 0, 0, 0, 0,
   660  	-0.57440215, -0.42559785, 0, 0.80748653, -0.59273535, -0.21475118, 0.04544862, 0.81148535,
   661  	0.14306602, 0, 0, 0, -0.369642, -0.61841226, -0.011945802, 0,
   662  	0.7791623, -0.20691396, -0.57224834, 0, 0.08218567, 0.42637306, 0.1586175, -0.089709565,
   663  	-0.32935485, -0.24788953, -0.00022224105, 0, 0, 0.7085409, 0.28821066, -0.64765793,
   664  	-0.34909368, 0, 0, 0, 0, 0, 0.16679136, 0.31914657,
   665  	-0.48593786, 0, 0.537915, 0.462085, -0.00041967133, -0.3120329, -0.41914812, -0.15886839,
   666  	-0.042683028, 0.19370951, 0.24624406, 0.45803425, -0.07049577, -0.6091341, 0, 0.22253075,
   667  }
   668  
   669  var fxMask16 = []uint32{
   670  	0x0000, 0x0000, 0x05b8, 0x66a6, 0xbbfe, 0xe871, 0xf800, 0xda20, 0xb499, 0x4a84, 0x0009, 0x0000, 0x0000,
   671  	0x0000, 0x2463, 0xd7fd, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad35, 0x01f8, 0x0000,
   672  	0x0d69, 0xe28c, 0xffff, 0xf92a, 0x8c5d, 0x3b36, 0x2a62, 0x51a7, 0xcc97, 0xffff, 0xffff, 0x772d, 0x0000,
   673  	0x75ad, 0xffff, 0xffff, 0x5ccf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b4a, 0xdfd6, 0xffff, 0xe2ff, 0x0000,
   674  	0x5b67, 0x8fff, 0x8f70, 0x060a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8e7f, 0xffff, 0xffe9, 0x16d6,
   675  	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7303, 0xffff, 0xffff, 0x30ff,
   676  	0x0000, 0x0000, 0x0000, 0x17b0, 0x5bfe, 0x78fe, 0x95ec, 0xa3fe, 0xa3fe, 0xcd24, 0xfffe, 0xfffe, 0x30fe,
   677  	0x0001, 0x190d, 0x9be5, 0xf868, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0x30fe,
   678  	0x0c4c, 0xcf6f, 0xfffe, 0xfc0b, 0xb551, 0x6920, 0x4f1d, 0x3c87, 0x39ff, 0x928e, 0xffff, 0xffff, 0x30ff,
   679  	0x8f03, 0xffff, 0xfbe7, 0x4d76, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff,
   680  	0xccff, 0xffff, 0xc6ff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x727f, 0xffff, 0xffff, 0x30ff,
   681  	0xf296, 0xffff, 0xb7c6, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x939a, 0xffff, 0xffff, 0x30ff,
   682  	0xc97f, 0xffff, 0xf43c, 0x2493, 0x0000, 0x0000, 0x0000, 0x0000, 0x5f13, 0xfd0c, 0xffff, 0xffff, 0x3827,
   683  	0x6dc9, 0xffff, 0xffff, 0xeb16, 0x7dd4, 0x5541, 0x6c76, 0xc10f, 0xfff1, 0xffff, 0xffff, 0xffff, 0x49ff,
   684  	0x00d8, 0xa6e9, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xfffe, 0xd4fe, 0x83db, 0xffff, 0xffff, 0x7584,
   685  	0x0000, 0x001c, 0x503e, 0xbb08, 0xe3a1, 0xeea6, 0xbd0e, 0x7e09, 0x08e5, 0x1b8b, 0xb67f, 0xb67f, 0x7d44,
   686  }
   687  
   688  var flMask16 = []uint32{
   689  	0x0000, 0x0000, 0x05b5, 0x668a, 0xbbe0, 0xe875, 0xf803, 0xda29, 0xb49f, 0x4a7a, 0x000a, 0x0000, 0x0000,
   690  	0x0000, 0x2473, 0xd7fb, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xad4d, 0x0204, 0x0000,
   691  	0x0d48, 0xe27a, 0xffff, 0xf949, 0x8c70, 0x3bae, 0x2ac9, 0x51f7, 0xccc4, 0xffff, 0xffff, 0x779f, 0x0000,
   692  	0x75a1, 0xffff, 0xffff, 0x5d7b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0b23, 0xdf73, 0xffff, 0xe39d, 0x0000,
   693  	0x5ba0, 0x9033, 0x8f9f, 0x0609, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x8db0, 0xffff, 0xffef, 0x1746,
   694  	0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x728c, 0xffff, 0xffff, 0x3148,
   695  	0x0000, 0x0000, 0x0000, 0x17ac, 0x5bce, 0x78cb, 0x95b7, 0xa3d2, 0xa3d2, 0xcce6, 0xffff, 0xffff, 0x3148,
   696  	0x0000, 0x1919, 0x9bfd, 0xf86b, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x3148,
   697  	0x0c63, 0xcf97, 0xffff, 0xfc17, 0xb59d, 0x6981, 0x4f87, 0x3cf1, 0x3a68, 0x9276, 0xffff, 0xffff, 0x3148,
   698  	0x8eb0, 0xffff, 0xfbf5, 0x4d33, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7214, 0xffff, 0xffff, 0x3148,
   699  	0xccaf, 0xffff, 0xc6ba, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x7214, 0xffff, 0xffff, 0x3148,
   700  	0xf292, 0xffff, 0xb865, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x930c, 0xffff, 0xffff, 0x3148,
   701  	0xc906, 0xffff, 0xf45d, 0x249f, 0x0000, 0x0000, 0x0000, 0x0000, 0x5ea0, 0xfcf1, 0xffff, 0xffff, 0x3888,
   702  	0x6d81, 0xffff, 0xffff, 0xeaf5, 0x7dcf, 0x5533, 0x6c2b, 0xc07b, 0xfff1, 0xffff, 0xffff, 0xffff, 0x4a9d,
   703  	0x00d4, 0xa6a1, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xd54d, 0x8399, 0xffff, 0xffff, 0x764b,
   704  	0x0000, 0x001b, 0x4ffc, 0xbb4a, 0xe3f5, 0xeee3, 0xbd4c, 0x7e42, 0x0900, 0x1b0c, 0xb6fc, 0xb6fc, 0x7e04,
   705  }
   706  
   707  // TestFixedFloatingCloseness compares the closeness of the fixed point and
   708  // floating point rasterizer.
   709  func TestFixedFloatingCloseness(t *testing.T) {
   710  	if len(fxMask16) != len(flMask16) {
   711  		t.Fatalf("len(fxMask16) != len(flMask16)")
   712  	}
   713  
   714  	total := uint32(0)
   715  	for i := range fxMask16 {
   716  		a := fxMask16[i]
   717  		b := flMask16[i]
   718  		if a > b {
   719  			total += a - b
   720  		} else {
   721  			total += b - a
   722  		}
   723  	}
   724  	n := len(fxMask16)
   725  
   726  	// This log message is useful when changing the fixed point rasterizer
   727  	// implementation, such as by changing ϕ. Assuming that the floating point
   728  	// rasterizer is accurate, the average difference is a measure of how
   729  	// inaccurate the (faster) fixed point rasterizer is.
   730  	//
   731  	// Smaller is better.
   732  	percent := float64(total*100) / float64(n*65535)
   733  	t.Logf("Comparing closeness of the fixed point and floating point rasterizer.\n"+
   734  		"Specifically, the elements of fxMask16 and flMask16.\n"+
   735  		"Total diff = %d, n = %d, avg = %.5f out of 65535, or %.5f%%.\n",
   736  		total, n, float64(total)/float64(n), percent)
   737  
   738  	const thresholdPercent = 1.0
   739  	if percent > thresholdPercent {
   740  		t.Errorf("average difference: got %.5f%%, want <= %.5f%%", percent, thresholdPercent)
   741  	}
   742  }
   743
View as plain text