...

Source file src/github.com/decred/dcrd/dcrec/secp256k1/v4/field.go

Documentation: github.com/decred/dcrd/dcrec/secp256k1/v4

     1  // Copyright (c) 2013-2014 The btcsuite developers
     2  // Copyright (c) 2015-2023 The Decred developers
     3  // Copyright (c) 2013-2023 Dave Collins
     4  // Use of this source code is governed by an ISC
     5  // license that can be found in the LICENSE file.
     6  
     7  package secp256k1
     8  
     9  // References:
    10  //   [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone.
    11  //     http://cacr.uwaterloo.ca/hac/
    12  
    13  // All elliptic curve operations for secp256k1 are done in a finite field
    14  // characterized by a 256-bit prime.  Given this precision is larger than the
    15  // biggest available native type, obviously some form of bignum math is needed.
    16  // This package implements specialized fixed-precision field arithmetic rather
    17  // than relying on an arbitrary-precision arithmetic package such as math/big
    18  // for dealing with the field math since the size is known.  As a result, rather
    19  // large performance gains are achieved by taking advantage of many
    20  // optimizations not available to arbitrary-precision arithmetic and generic
    21  // modular arithmetic algorithms.
    22  //
    23  // There are various ways to internally represent each finite field element.
    24  // For example, the most obvious representation would be to use an array of 4
    25  // uint64s (64 bits * 4 = 256 bits).  However, that representation suffers from
    26  // a couple of issues.  First, there is no native Go type large enough to handle
    27  // the intermediate results while adding or multiplying two 64-bit numbers, and
    28  // second there is no space left for overflows when performing the intermediate
    29  // arithmetic between each array element which would lead to expensive carry
    30  // propagation.
    31  //
    32  // Given the above, this implementation represents the field elements as
    33  // 10 uint32s with each word (array entry) treated as base 2^26.  This was
    34  // chosen for the following reasons:
    35  // 1) Most systems at the current time are 64-bit (or at least have 64-bit
    36  //    registers available for specialized purposes such as MMX) so the
    37  //    intermediate results can typically be done using a native register (and
    38  //    using uint64s to avoid the need for additional half-word arithmetic)
    39  // 2) In order to allow addition of the internal words without having to
    40  //    propagate the carry, the max normalized value for each register must
    41  //    be less than the number of bits available in the register
    42  // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a
    43  //    reasonable choice for #2
    44  // 4) Given the need for 256-bits of precision and the properties stated in #1,
    45  //    #2, and #3, the representation which best accommodates this is 10 uint32s
    46  //    with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22
    47  //    bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for
    48  //    overflow
    49  //
    50  // Since it is so important that the field arithmetic is extremely fast for high
    51  // performance crypto, this type does not perform any validation where it
    52  // ordinarily would.  See the documentation for FieldVal for more details.
    53  
    54  import (
    55  	"encoding/hex"
    56  )
    57  
    58  // Constants used to make the code more readable.
    59  const (
    60  	twoBitsMask   = 0x3
    61  	fourBitsMask  = 0xf
    62  	sixBitsMask   = 0x3f
    63  	eightBitsMask = 0xff
    64  )
    65  
    66  // Constants related to the field representation.
    67  const (
    68  	// fieldWords is the number of words used to internally represent the
    69  	// 256-bit value.
    70  	fieldWords = 10
    71  
    72  	// fieldBase is the exponent used to form the numeric base of each word.
    73  	// 2^(fieldBase*i) where i is the word position.
    74  	fieldBase = 26
    75  
    76  	// fieldBaseMask is the mask for the bits in each word needed to
    77  	// represent the numeric base of each word (except the most significant
    78  	// word).
    79  	fieldBaseMask = (1 << fieldBase) - 1
    80  
    81  	// fieldMSBBits is the number of bits in the most significant word used
    82  	// to represent the value.
    83  	fieldMSBBits = 256 - (fieldBase * (fieldWords - 1))
    84  
    85  	// fieldMSBMask is the mask for the bits in the most significant word
    86  	// needed to represent the value.
    87  	fieldMSBMask = (1 << fieldMSBBits) - 1
    88  
    89  	// These fields provide convenient access to each of the words of the
    90  	// secp256k1 prime in the internal field representation to improve code
    91  	// readability.
    92  	fieldPrimeWordZero  = 0x03fffc2f
    93  	fieldPrimeWordOne   = 0x03ffffbf
    94  	fieldPrimeWordTwo   = 0x03ffffff
    95  	fieldPrimeWordThree = 0x03ffffff
    96  	fieldPrimeWordFour  = 0x03ffffff
    97  	fieldPrimeWordFive  = 0x03ffffff
    98  	fieldPrimeWordSix   = 0x03ffffff
    99  	fieldPrimeWordSeven = 0x03ffffff
   100  	fieldPrimeWordEight = 0x03ffffff
   101  	fieldPrimeWordNine  = 0x003fffff
   102  )
   103  
   104  // FieldVal implements optimized fixed-precision arithmetic over the
   105  // secp256k1 finite field.  This means all arithmetic is performed modulo
   106  //
   107  //	0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f.
   108  //
   109  // WARNING: Since it is so important for the field arithmetic to be extremely
   110  // fast for high performance crypto, this type does not perform any validation
   111  // of documented preconditions where it ordinarily would.  As a result, it is
   112  // IMPERATIVE for callers to understand some key concepts that are described
   113  // below and ensure the methods are called with the necessary preconditions that
   114  // each method is documented with.  For example, some methods only give the
   115  // correct result if the field value is normalized and others require the field
   116  // values involved to have a maximum magnitude and THERE ARE NO EXPLICIT CHECKS
   117  // TO ENSURE THOSE PRECONDITIONS ARE SATISFIED.  This does, unfortunately, make
   118  // the type more difficult to use correctly and while I typically prefer to
   119  // ensure all state and input is valid for most code, this is a bit of an
   120  // exception because those extra checks really add up in what ends up being
   121  // critical hot paths.
   122  //
   123  // The first key concept when working with this type is normalization.  In order
   124  // to avoid the need to propagate a ton of carries, the internal representation
   125  // provides additional overflow bits for each word of the overall 256-bit value.
   126  // This means that there are multiple internal representations for the same
   127  // value and, as a result, any methods that rely on comparison of the value,
   128  // such as equality and oddness determination, require the caller to provide a
   129  // normalized value.
   130  //
   131  // The second key concept when working with this type is magnitude.  As
   132  // previously mentioned, the internal representation provides additional
   133  // overflow bits which means that the more math operations that are performed on
   134  // the field value between normalizations, the more those overflow bits
   135  // accumulate.  The magnitude is effectively that maximum possible number of
   136  // those overflow bits that could possibly be required as a result of a given
   137  // operation.  Since there are only a limited number of overflow bits available,
   138  // this implies that the max possible magnitude MUST be tracked by the caller
   139  // and the caller MUST normalize the field value if a given operation would
   140  // cause the magnitude of the result to exceed the max allowed value.
   141  //
   142  // IMPORTANT: The max allowed magnitude of a field value is 64.
   143  type FieldVal struct {
   144  	// Each 256-bit value is represented as 10 32-bit integers in base 2^26.
   145  	// This provides 6 bits of overflow in each word (10 bits in the most
   146  	// significant word) for a total of 64 bits of overflow (9*6 + 10 = 64).  It
   147  	// only implements the arithmetic needed for elliptic curve operations.
   148  	//
   149  	// The following depicts the internal representation:
   150  	// 	 -----------------------------------------------------------------
   151  	// 	|        n[9]       |        n[8]       | ... |        n[0]       |
   152  	// 	| 32 bits available | 32 bits available | ... | 32 bits available |
   153  	// 	| 22 bits for value | 26 bits for value | ... | 26 bits for value |
   154  	// 	| 10 bits overflow  |  6 bits overflow  | ... |  6 bits overflow  |
   155  	// 	| Mult: 2^(26*9)    | Mult: 2^(26*8)    | ... | Mult: 2^(26*0)    |
   156  	// 	 -----------------------------------------------------------------
   157  	//
   158  	// For example, consider the number 2^49 + 1.  It would be represented as:
   159  	// 	n[0] = 1
   160  	// 	n[1] = 2^23
   161  	// 	n[2..9] = 0
   162  	//
   163  	// The full 256-bit value is then calculated by looping i from 9..0 and
   164  	// doing sum(n[i] * 2^(26i)) like so:
   165  	// 	n[9] * 2^(26*9) = 0    * 2^234 = 0
   166  	// 	n[8] * 2^(26*8) = 0    * 2^208 = 0
   167  	// 	...
   168  	// 	n[1] * 2^(26*1) = 2^23 * 2^26  = 2^49
   169  	// 	n[0] * 2^(26*0) = 1    * 2^0   = 1
   170  	// 	Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1
   171  	n [10]uint32
   172  }
   173  
   174  // String returns the field value as a normalized human-readable hex string.
   175  //
   176  //	Preconditions: None
   177  //	Output Normalized: Field is not modified -- same as input value
   178  //	Output Max Magnitude: Field is not modified -- same as input value
   179  func (f FieldVal) String() string {
   180  	// f is a copy, so it's safe to normalize it without mutating the original.
   181  	f.Normalize()
   182  	return hex.EncodeToString(f.Bytes()[:])
   183  }
   184  
   185  // Zero sets the field value to zero in constant time.  A newly created field
   186  // value is already set to zero.  This function can be useful to clear an
   187  // existing field value for reuse.
   188  //
   189  //	Preconditions: None
   190  //	Output Normalized: Yes
   191  //	Output Max Magnitude: 1
   192  func (f *FieldVal) Zero() {
   193  	f.n[0] = 0
   194  	f.n[1] = 0
   195  	f.n[2] = 0
   196  	f.n[3] = 0
   197  	f.n[4] = 0
   198  	f.n[5] = 0
   199  	f.n[6] = 0
   200  	f.n[7] = 0
   201  	f.n[8] = 0
   202  	f.n[9] = 0
   203  }
   204  
   205  // Set sets the field value equal to the passed value in constant time.  The
   206  // normalization and magnitude of the two fields will be identical.
   207  //
   208  // The field value is returned to support chaining.  This enables syntax like:
   209  // f := new(FieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not
   210  // modified.
   211  //
   212  //	Preconditions: None
   213  //	Output Normalized: Same as input value
   214  //	Output Max Magnitude: Same as input value
   215  func (f *FieldVal) Set(val *FieldVal) *FieldVal {
   216  	*f = *val
   217  	return f
   218  }
   219  
   220  // SetInt sets the field value to the passed integer in constant time.  This is
   221  // a convenience function since it is fairly common to perform some arithmetic
   222  // with small native integers.
   223  //
   224  // The field value is returned to support chaining.  This enables syntax such
   225  // as f := new(FieldVal).SetInt(2).Mul(f2) so that f = 2 * f2.
   226  //
   227  //	Preconditions: None
   228  //	Output Normalized: Yes
   229  //	Output Max Magnitude: 1
   230  func (f *FieldVal) SetInt(ui uint16) *FieldVal {
   231  	f.Zero()
   232  	f.n[0] = uint32(ui)
   233  	return f
   234  }
   235  
   236  // SetBytes packs the passed 32-byte big-endian value into the internal field
   237  // value representation in constant time.  SetBytes interprets the provided
   238  // array as a 256-bit big-endian unsigned integer, packs it into the internal
   239  // field value representation, and returns either 1 if it is greater than or
   240  // equal to the field prime (aka it overflowed) or 0 otherwise in constant time.
   241  //
   242  // Note that a bool is not used here because it is not possible in Go to convert
   243  // from a bool to numeric value in constant time and many constant-time
   244  // operations require a numeric value.
   245  //
   246  //	Preconditions: None
   247  //	Output Normalized: Yes if no overflow, no otherwise
   248  //	Output Max Magnitude: 1
   249  func (f *FieldVal) SetBytes(b *[32]byte) uint32 {
   250  	// Pack the 256 total bits across the 10 uint32 words with a max of
   251  	// 26-bits per word.  This could be done with a couple of for loops,
   252  	// but this unrolled version is significantly faster.  Benchmarks show
   253  	// this is about 34 times faster than the variant which uses loops.
   254  	f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 |
   255  		(uint32(b[28])&twoBitsMask)<<24
   256  	f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 |
   257  		(uint32(b[25])&fourBitsMask)<<22
   258  	f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 |
   259  		(uint32(b[22])&sixBitsMask)<<20
   260  	f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 |
   261  		uint32(b[19])<<18
   262  	f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 |
   263  		(uint32(b[15])&twoBitsMask)<<24
   264  	f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 |
   265  		(uint32(b[12])&fourBitsMask)<<22
   266  	f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 |
   267  		(uint32(b[9])&sixBitsMask)<<20
   268  	f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 |
   269  		uint32(b[6])<<18
   270  	f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 |
   271  		(uint32(b[2])&twoBitsMask)<<24
   272  	f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14
   273  
   274  	// The intuition here is that the field value is greater than the prime if
   275  	// one of the higher individual words is greater than corresponding word of
   276  	// the prime and all higher words in the field value are equal to their
   277  	// corresponding word of the prime.  Since this type is modulo the prime,
   278  	// being equal is also an overflow back to 0.
   279  	//
   280  	// Note that because the input is 32 bytes and it was just packed into the
   281  	// field representation, the only words that can possibly be greater are
   282  	// zero and one, because ceil(log_2(2^256 - 1 - P)) = 33 bits max and the
   283  	// internal field representation encodes 26 bits with each word.
   284  	//
   285  	// Thus, there is no need to test if the upper words of the field value
   286  	// exceeds them, hence, only equality is checked for them.
   287  	highWordsEq := constantTimeEq(f.n[9], fieldPrimeWordNine)
   288  	highWordsEq &= constantTimeEq(f.n[8], fieldPrimeWordEight)
   289  	highWordsEq &= constantTimeEq(f.n[7], fieldPrimeWordSeven)
   290  	highWordsEq &= constantTimeEq(f.n[6], fieldPrimeWordSix)
   291  	highWordsEq &= constantTimeEq(f.n[5], fieldPrimeWordFive)
   292  	highWordsEq &= constantTimeEq(f.n[4], fieldPrimeWordFour)
   293  	highWordsEq &= constantTimeEq(f.n[3], fieldPrimeWordThree)
   294  	highWordsEq &= constantTimeEq(f.n[2], fieldPrimeWordTwo)
   295  	overflow := highWordsEq & constantTimeGreater(f.n[1], fieldPrimeWordOne)
   296  	highWordsEq &= constantTimeEq(f.n[1], fieldPrimeWordOne)
   297  	overflow |= highWordsEq & constantTimeGreaterOrEq(f.n[0], fieldPrimeWordZero)
   298  
   299  	return overflow
   300  }
   301  
   302  // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned
   303  // integer (meaning it is truncated to the first 32 bytes), packs it into the
   304  // internal field value representation, and returns whether or not the resulting
   305  // truncated 256-bit integer is greater than or equal to the field prime (aka it
   306  // overflowed) in constant time.
   307  //
   308  // Note that since passing a slice with more than 32 bytes is truncated, it is
   309  // possible that the truncated value is less than the field prime and hence it
   310  // will not be reported as having overflowed in that case.  It is up to the
   311  // caller to decide whether it needs to provide numbers of the appropriate size
   312  // or it if is acceptable to use this function with the described truncation and
   313  // overflow behavior.
   314  //
   315  //	Preconditions: None
   316  //	Output Normalized: Yes if no overflow, no otherwise
   317  //	Output Max Magnitude: 1
   318  func (f *FieldVal) SetByteSlice(b []byte) bool {
   319  	var b32 [32]byte
   320  	b = b[:constantTimeMin(uint32(len(b)), 32)]
   321  	copy(b32[:], b32[:32-len(b)])
   322  	copy(b32[32-len(b):], b)
   323  	result := f.SetBytes(&b32)
   324  	zeroArray32(&b32)
   325  	return result != 0
   326  }
   327  
   328  // Normalize normalizes the internal field words into the desired range and
   329  // performs fast modular reduction over the secp256k1 prime by making use of the
   330  // special form of the prime in constant time.
   331  //
   332  //	Preconditions: None
   333  //	Output Normalized: Yes
   334  //	Output Max Magnitude: 1
   335  func (f *FieldVal) Normalize() *FieldVal {
   336  	// The field representation leaves 6 bits of overflow in each word so
   337  	// intermediate calculations can be performed without needing to
   338  	// propagate the carry to each higher word during the calculations.  In
   339  	// order to normalize, we need to "compact" the full 256-bit value to
   340  	// the right while propagating any carries through to the high order
   341  	// word.
   342  	//
   343  	// Since this field is doing arithmetic modulo the secp256k1 prime, we
   344  	// also need to perform modular reduction over the prime.
   345  	//
   346  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
   347  	// when the modulus is of the special form m = b^t - c, highly efficient
   348  	// reduction can be achieved.
   349  	//
   350  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
   351  	// this criteria.
   352  	//
   353  	// 4294968273 in field representation (base 2^26) is:
   354  	// n[0] = 977
   355  	// n[1] = 64
   356  	// That is to say (2^26 * 64) + 977 = 4294968273
   357  	//
   358  	// The algorithm presented in the referenced section typically repeats
   359  	// until the quotient is zero.  However, due to our field representation
   360  	// we already know to within one reduction how many times we would need
   361  	// to repeat as it's the uppermost bits of the high order word.  Thus we
   362  	// can simply multiply the magnitude by the field representation of the
   363  	// prime and do a single iteration.  After this step there might be an
   364  	// additional carry to bit 256 (bit 22 of the high order word).
   365  	t9 := f.n[9]
   366  	m := t9 >> fieldMSBBits
   367  	t9 = t9 & fieldMSBMask
   368  	t0 := f.n[0] + m*977
   369  	t1 := (t0 >> fieldBase) + f.n[1] + (m << 6)
   370  	t0 = t0 & fieldBaseMask
   371  	t2 := (t1 >> fieldBase) + f.n[2]
   372  	t1 = t1 & fieldBaseMask
   373  	t3 := (t2 >> fieldBase) + f.n[3]
   374  	t2 = t2 & fieldBaseMask
   375  	t4 := (t3 >> fieldBase) + f.n[4]
   376  	t3 = t3 & fieldBaseMask
   377  	t5 := (t4 >> fieldBase) + f.n[5]
   378  	t4 = t4 & fieldBaseMask
   379  	t6 := (t5 >> fieldBase) + f.n[6]
   380  	t5 = t5 & fieldBaseMask
   381  	t7 := (t6 >> fieldBase) + f.n[7]
   382  	t6 = t6 & fieldBaseMask
   383  	t8 := (t7 >> fieldBase) + f.n[8]
   384  	t7 = t7 & fieldBaseMask
   385  	t9 = (t8 >> fieldBase) + t9
   386  	t8 = t8 & fieldBaseMask
   387  
   388  	// At this point, the magnitude is guaranteed to be one, however, the
   389  	// value could still be greater than the prime if there was either a
   390  	// carry through to bit 256 (bit 22 of the higher order word) or the
   391  	// value is greater than or equal to the field characteristic.  The
   392  	// following determines if either or these conditions are true and does
   393  	// the final reduction in constant time.
   394  	//
   395  	// Also note that 'm' will be zero when neither of the aforementioned
   396  	// conditions are true and the value will not be changed when 'm' is zero.
   397  	m = constantTimeEq(t9, fieldMSBMask)
   398  	m &= constantTimeEq(t8&t7&t6&t5&t4&t3&t2, fieldBaseMask)
   399  	m &= constantTimeGreater(t1+64+((t0+977)>>fieldBase), fieldBaseMask)
   400  	m |= t9 >> fieldMSBBits
   401  	t0 = t0 + m*977
   402  	t1 = (t0 >> fieldBase) + t1 + (m << 6)
   403  	t0 = t0 & fieldBaseMask
   404  	t2 = (t1 >> fieldBase) + t2
   405  	t1 = t1 & fieldBaseMask
   406  	t3 = (t2 >> fieldBase) + t3
   407  	t2 = t2 & fieldBaseMask
   408  	t4 = (t3 >> fieldBase) + t4
   409  	t3 = t3 & fieldBaseMask
   410  	t5 = (t4 >> fieldBase) + t5
   411  	t4 = t4 & fieldBaseMask
   412  	t6 = (t5 >> fieldBase) + t6
   413  	t5 = t5 & fieldBaseMask
   414  	t7 = (t6 >> fieldBase) + t7
   415  	t6 = t6 & fieldBaseMask
   416  	t8 = (t7 >> fieldBase) + t8
   417  	t7 = t7 & fieldBaseMask
   418  	t9 = (t8 >> fieldBase) + t9
   419  	t8 = t8 & fieldBaseMask
   420  	t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256.
   421  
   422  	// Finally, set the normalized and reduced words.
   423  	f.n[0] = t0
   424  	f.n[1] = t1
   425  	f.n[2] = t2
   426  	f.n[3] = t3
   427  	f.n[4] = t4
   428  	f.n[5] = t5
   429  	f.n[6] = t6
   430  	f.n[7] = t7
   431  	f.n[8] = t8
   432  	f.n[9] = t9
   433  	return f
   434  }
   435  
   436  // PutBytesUnchecked unpacks the field value to a 32-byte big-endian value
   437  // directly into the passed byte slice in constant time.  The target slice must
   438  // have at least 32 bytes available or it will panic.
   439  //
   440  // There is a similar function, PutBytes, which unpacks the field value into a
   441  // 32-byte array directly.  This version is provided since it can be useful
   442  // to write directly into part of a larger buffer without needing a separate
   443  // allocation.
   444  //
   445  //	Preconditions:
   446  //	  - The field value MUST be normalized
   447  //	  - The target slice MUST have at least 32 bytes available
   448  func (f *FieldVal) PutBytesUnchecked(b []byte) {
   449  	// Unpack the 256 total bits from the 10 uint32 words with a max of
   450  	// 26-bits per word.  This could be done with a couple of for loops,
   451  	// but this unrolled version is a bit faster.  Benchmarks show this is
   452  	// about 10 times faster than the variant which uses loops.
   453  	b[31] = byte(f.n[0] & eightBitsMask)
   454  	b[30] = byte((f.n[0] >> 8) & eightBitsMask)
   455  	b[29] = byte((f.n[0] >> 16) & eightBitsMask)
   456  	b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2)
   457  	b[27] = byte((f.n[1] >> 6) & eightBitsMask)
   458  	b[26] = byte((f.n[1] >> 14) & eightBitsMask)
   459  	b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4)
   460  	b[24] = byte((f.n[2] >> 4) & eightBitsMask)
   461  	b[23] = byte((f.n[2] >> 12) & eightBitsMask)
   462  	b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6)
   463  	b[21] = byte((f.n[3] >> 2) & eightBitsMask)
   464  	b[20] = byte((f.n[3] >> 10) & eightBitsMask)
   465  	b[19] = byte((f.n[3] >> 18) & eightBitsMask)
   466  	b[18] = byte(f.n[4] & eightBitsMask)
   467  	b[17] = byte((f.n[4] >> 8) & eightBitsMask)
   468  	b[16] = byte((f.n[4] >> 16) & eightBitsMask)
   469  	b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2)
   470  	b[14] = byte((f.n[5] >> 6) & eightBitsMask)
   471  	b[13] = byte((f.n[5] >> 14) & eightBitsMask)
   472  	b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4)
   473  	b[11] = byte((f.n[6] >> 4) & eightBitsMask)
   474  	b[10] = byte((f.n[6] >> 12) & eightBitsMask)
   475  	b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6)
   476  	b[8] = byte((f.n[7] >> 2) & eightBitsMask)
   477  	b[7] = byte((f.n[7] >> 10) & eightBitsMask)
   478  	b[6] = byte((f.n[7] >> 18) & eightBitsMask)
   479  	b[5] = byte(f.n[8] & eightBitsMask)
   480  	b[4] = byte((f.n[8] >> 8) & eightBitsMask)
   481  	b[3] = byte((f.n[8] >> 16) & eightBitsMask)
   482  	b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2)
   483  	b[1] = byte((f.n[9] >> 6) & eightBitsMask)
   484  	b[0] = byte((f.n[9] >> 14) & eightBitsMask)
   485  }
   486  
   487  // PutBytes unpacks the field value to a 32-byte big-endian value using the
   488  // passed byte array in constant time.
   489  //
   490  // There is a similar function, PutBytesUnchecked, which unpacks the field value
   491  // into a slice that must have at least 32 bytes available.  This version is
   492  // provided since it can be useful to write directly into an array that is type
   493  // checked.
   494  //
   495  // Alternatively, there is also Bytes, which unpacks the field value into a new
   496  // array and returns that which can sometimes be more ergonomic in applications
   497  // that aren't concerned about an additional copy.
   498  //
   499  //	Preconditions:
   500  //	  - The field value MUST be normalized
   501  func (f *FieldVal) PutBytes(b *[32]byte) {
   502  	f.PutBytesUnchecked(b[:])
   503  }
   504  
   505  // Bytes unpacks the field value to a 32-byte big-endian value in constant time.
   506  //
   507  // See PutBytes and PutBytesUnchecked for variants that allow an array or slice
   508  // to be passed which can be useful to cut down on the number of allocations by
   509  // allowing the caller to reuse a buffer or write directly into part of a larger
   510  // buffer.
   511  //
   512  //	Preconditions:
   513  //	  - The field value MUST be normalized
   514  func (f *FieldVal) Bytes() *[32]byte {
   515  	b := new([32]byte)
   516  	f.PutBytesUnchecked(b[:])
   517  	return b
   518  }
   519  
   520  // IsZeroBit returns 1 when the field value is equal to zero or 0 otherwise in
   521  // constant time.
   522  //
   523  // Note that a bool is not used here because it is not possible in Go to convert
   524  // from a bool to numeric value in constant time and many constant-time
   525  // operations require a numeric value.  See IsZero for the version that returns
   526  // a bool.
   527  //
   528  //	Preconditions:
   529  //	  - The field value MUST be normalized
   530  func (f *FieldVal) IsZeroBit() uint32 {
   531  	// The value can only be zero if no bits are set in any of the words.
   532  	// This is a constant time implementation.
   533  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   534  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   535  
   536  	return constantTimeEq(bits, 0)
   537  }
   538  
   539  // IsZero returns whether or not the field value is equal to zero in constant
   540  // time.
   541  //
   542  //	Preconditions:
   543  //	  - The field value MUST be normalized
   544  func (f *FieldVal) IsZero() bool {
   545  	// The value can only be zero if no bits are set in any of the words.
   546  	// This is a constant time implementation.
   547  	bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] |
   548  		f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9]
   549  
   550  	return bits == 0
   551  }
   552  
   553  // IsOneBit returns 1 when the field value is equal to one or 0 otherwise in
   554  // constant time.
   555  //
   556  // Note that a bool is not used here because it is not possible in Go to convert
   557  // from a bool to numeric value in constant time and many constant-time
   558  // operations require a numeric value.  See IsOne for the version that returns a
   559  // bool.
   560  //
   561  //	Preconditions:
   562  //	   - The field value MUST be normalized
   563  func (f *FieldVal) IsOneBit() uint32 {
   564  	// The value can only be one if the single lowest significant bit is set in
   565  	// the first word and no other bits are set in any of the other words.
   566  	// This is a constant time implementation.
   567  	bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
   568  		f.n[6] | f.n[7] | f.n[8] | f.n[9]
   569  
   570  	return constantTimeEq(bits, 0)
   571  }
   572  
   573  // IsOne returns whether or not the field value is equal to one in constant
   574  // time.
   575  //
   576  //	Preconditions:
   577  //	  - The field value MUST be normalized
   578  func (f *FieldVal) IsOne() bool {
   579  	// The value can only be one if the single lowest significant bit is set in
   580  	// the first word and no other bits are set in any of the other words.
   581  	// This is a constant time implementation.
   582  	bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] |
   583  		f.n[6] | f.n[7] | f.n[8] | f.n[9]
   584  
   585  	return bits == 0
   586  }
   587  
   588  // IsOddBit returns 1 when the field value is an odd number or 0 otherwise in
   589  // constant time.
   590  //
   591  // Note that a bool is not used here because it is not possible in Go to convert
   592  // from a bool to numeric value in constant time and many constant-time
   593  // operations require a numeric value.  See IsOdd for the version that returns a
   594  // bool.
   595  //
   596  //	Preconditions:
   597  //	  - The field value MUST be normalized
   598  func (f *FieldVal) IsOddBit() uint32 {
   599  	// Only odd numbers have the bottom bit set.
   600  	return f.n[0] & 1
   601  }
   602  
   603  // IsOdd returns whether or not the field value is an odd number in constant
   604  // time.
   605  //
   606  //	Preconditions:
   607  //	  - The field value MUST be normalized
   608  func (f *FieldVal) IsOdd() bool {
   609  	// Only odd numbers have the bottom bit set.
   610  	return f.n[0]&1 == 1
   611  }
   612  
   613  // Equals returns whether or not the two field values are the same in constant
   614  // time.
   615  //
   616  //	Preconditions:
   617  //	  - Both field values being compared MUST be normalized
   618  func (f *FieldVal) Equals(val *FieldVal) bool {
   619  	// Xor only sets bits when they are different, so the two field values
   620  	// can only be the same if no bits are set after xoring each word.
   621  	// This is a constant time implementation.
   622  	bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) |
   623  		(f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) |
   624  		(f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) |
   625  		(f.n[9] ^ val.n[9])
   626  
   627  	return bits == 0
   628  }
   629  
   630  // NegateVal negates the passed value and stores the result in f in constant
   631  // time.  The caller must provide the magnitude of the passed value for a
   632  // correct result.
   633  //
   634  // The field value is returned to support chaining.  This enables syntax like:
   635  // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1.
   636  //
   637  //	Preconditions:
   638  //	  - The max magnitude MUST be 63
   639  //	Output Normalized: No
   640  //	Output Max Magnitude: Input magnitude + 1
   641  func (f *FieldVal) NegateVal(val *FieldVal, magnitude uint32) *FieldVal {
   642  	// Negation in the field is just the prime minus the value.  However,
   643  	// in order to allow negation against a field value without having to
   644  	// normalize/reduce it first, multiply by the magnitude (that is how
   645  	// "far" away it is from the normalized value) to adjust.  Also, since
   646  	// negating a value pushes it one more order of magnitude away from the
   647  	// normalized range, add 1 to compensate.
   648  	//
   649  	// For some intuition here, imagine you're performing mod 12 arithmetic
   650  	// (picture a clock) and you are negating the number 7.  So you start at
   651  	// 12 (which is of course 0 under mod 12) and count backwards (left on
   652  	// the clock) 7 times to arrive at 5.  Notice this is just 12-7 = 5.
   653  	// Now, assume you're starting with 19, which is a number that is
   654  	// already larger than the modulus and congruent to 7 (mod 12).  When a
   655  	// value is already in the desired range, its magnitude is 1.  Since 19
   656  	// is an additional "step", its magnitude (mod 12) is 2.  Since any
   657  	// multiple of the modulus is congruent to zero (mod m), the answer can
   658  	// be shortcut by simply multiplying the magnitude by the modulus and
   659  	// subtracting.  Keeping with the example, this would be (2*12)-19 = 5.
   660  	f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0]
   661  	f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1]
   662  	f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2]
   663  	f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3]
   664  	f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4]
   665  	f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5]
   666  	f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6]
   667  	f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7]
   668  	f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8]
   669  	f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9]
   670  
   671  	return f
   672  }
   673  
   674  // Negate negates the field value in constant time.  The existing field value is
   675  // modified.  The caller must provide the magnitude of the field value for a
   676  // correct result.
   677  //
   678  // The field value is returned to support chaining.  This enables syntax like:
   679  // f.Negate().AddInt(1) so that f = -f + 1.
   680  //
   681  //	Preconditions:
   682  //	  - The max magnitude MUST be 63
   683  //	Output Normalized: No
   684  //	Output Max Magnitude: Input magnitude + 1
   685  func (f *FieldVal) Negate(magnitude uint32) *FieldVal {
   686  	return f.NegateVal(f, magnitude)
   687  }
   688  
   689  // AddInt adds the passed integer to the existing field value and stores the
   690  // result in f in constant time.  This is a convenience function since it is
   691  // fairly common to perform some arithmetic with small native integers.
   692  //
   693  // The field value is returned to support chaining.  This enables syntax like:
   694  // f.AddInt(1).Add(f2) so that f = f + 1 + f2.
   695  //
   696  //	Preconditions:
   697  //	  - The field value MUST have a max magnitude of 63
   698  //	Output Normalized: No
   699  //	Output Max Magnitude: Existing field magnitude + 1
   700  func (f *FieldVal) AddInt(ui uint16) *FieldVal {
   701  	// Since the field representation intentionally provides overflow bits,
   702  	// it's ok to use carryless addition as the carry bit is safely part of
   703  	// the word and will be normalized out.
   704  	f.n[0] += uint32(ui)
   705  
   706  	return f
   707  }
   708  
   709  // Add adds the passed value to the existing field value and stores the result
   710  // in f in constant time.
   711  //
   712  // The field value is returned to support chaining.  This enables syntax like:
   713  // f.Add(f2).AddInt(1) so that f = f + f2 + 1.
   714  //
   715  //	Preconditions:
   716  //	  - The sum of the magnitudes of the two field values MUST be a max of 64
   717  //	Output Normalized: No
   718  //	Output Max Magnitude: Sum of the magnitude of the two individual field values
   719  func (f *FieldVal) Add(val *FieldVal) *FieldVal {
   720  	// Since the field representation intentionally provides overflow bits,
   721  	// it's ok to use carryless addition as the carry bit is safely part of
   722  	// each word and will be normalized out.  This could obviously be done
   723  	// in a loop, but the unrolled version is faster.
   724  	f.n[0] += val.n[0]
   725  	f.n[1] += val.n[1]
   726  	f.n[2] += val.n[2]
   727  	f.n[3] += val.n[3]
   728  	f.n[4] += val.n[4]
   729  	f.n[5] += val.n[5]
   730  	f.n[6] += val.n[6]
   731  	f.n[7] += val.n[7]
   732  	f.n[8] += val.n[8]
   733  	f.n[9] += val.n[9]
   734  
   735  	return f
   736  }
   737  
   738  // Add2 adds the passed two field values together and stores the result in f in
   739  // constant time.
   740  //
   741  // The field value is returned to support chaining.  This enables syntax like:
   742  // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1.
   743  //
   744  //	Preconditions:
   745  //	  - The sum of the magnitudes of the two field values MUST be a max of 64
   746  //	Output Normalized: No
   747  //	Output Max Magnitude: Sum of the magnitude of the two field values
   748  func (f *FieldVal) Add2(val *FieldVal, val2 *FieldVal) *FieldVal {
   749  	// Since the field representation intentionally provides overflow bits,
   750  	// it's ok to use carryless addition as the carry bit is safely part of
   751  	// each word and will be normalized out.  This could obviously be done
   752  	// in a loop, but the unrolled version is faster.
   753  	f.n[0] = val.n[0] + val2.n[0]
   754  	f.n[1] = val.n[1] + val2.n[1]
   755  	f.n[2] = val.n[2] + val2.n[2]
   756  	f.n[3] = val.n[3] + val2.n[3]
   757  	f.n[4] = val.n[4] + val2.n[4]
   758  	f.n[5] = val.n[5] + val2.n[5]
   759  	f.n[6] = val.n[6] + val2.n[6]
   760  	f.n[7] = val.n[7] + val2.n[7]
   761  	f.n[8] = val.n[8] + val2.n[8]
   762  	f.n[9] = val.n[9] + val2.n[9]
   763  
   764  	return f
   765  }
   766  
   767  // MulInt multiplies the field value by the passed int and stores the result in
   768  // f in constant time.  Note that this function can overflow if multiplying the
   769  // value by any of the individual words exceeds a max uint32.  Therefore it is
   770  // important that the caller ensures no overflows will occur before using this
   771  // function.
   772  //
   773  // The field value is returned to support chaining.  This enables syntax like:
   774  // f.MulInt(2).Add(f2) so that f = 2 * f + f2.
   775  //
   776  //	Preconditions:
   777  //	  - The field value magnitude multiplied by given val MUST be a max of 64
   778  //	Output Normalized: No
   779  //	Output Max Magnitude: Existing field magnitude times the provided integer val
   780  func (f *FieldVal) MulInt(val uint8) *FieldVal {
   781  	// Since each word of the field representation can hold up to
   782  	// 32 - fieldBase extra bits which will be normalized out, it's safe
   783  	// to multiply each word without using a larger type or carry
   784  	// propagation so long as the values won't overflow a uint32.  This
   785  	// could obviously be done in a loop, but the unrolled version is
   786  	// faster.
   787  	ui := uint32(val)
   788  	f.n[0] *= ui
   789  	f.n[1] *= ui
   790  	f.n[2] *= ui
   791  	f.n[3] *= ui
   792  	f.n[4] *= ui
   793  	f.n[5] *= ui
   794  	f.n[6] *= ui
   795  	f.n[7] *= ui
   796  	f.n[8] *= ui
   797  	f.n[9] *= ui
   798  
   799  	return f
   800  }
   801  
   802  // Mul multiplies the passed value to the existing field value and stores the
   803  // result in f in constant time.  Note that this function can overflow if
   804  // multiplying any of the individual words exceeds a max uint32.  In practice,
   805  // this means the magnitude of either value involved in the multiplication must
   806  // be a max of 8.
   807  //
   808  // The field value is returned to support chaining.  This enables syntax like:
   809  // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1.
   810  //
   811  //	Preconditions:
   812  //	  - Both field values MUST have a max magnitude of 8
   813  //	Output Normalized: No
   814  //	Output Max Magnitude: 1
   815  func (f *FieldVal) Mul(val *FieldVal) *FieldVal {
   816  	return f.Mul2(f, val)
   817  }
   818  
   819  // Mul2 multiplies the passed two field values together and stores the result in
   820  // f in constant time.  Note that this function can overflow if multiplying any
   821  // of the individual words exceeds a max uint32.  In practice, this means the
   822  // magnitude of either value involved in the multiplication must be a max of 8.
   823  //
   824  // The field value is returned to support chaining.  This enables syntax like:
   825  // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1.
   826  //
   827  //	Preconditions:
   828  //	  - Both input field values MUST have a max magnitude of 8
   829  //	Output Normalized: No
   830  //	Output Max Magnitude: 1
   831  func (f *FieldVal) Mul2(val *FieldVal, val2 *FieldVal) *FieldVal {
   832  	// This could be done with a couple of for loops and an array to store
   833  	// the intermediate terms, but this unrolled version is significantly
   834  	// faster.
   835  
   836  	// Terms for 2^(fieldBase*0).
   837  	m := uint64(val.n[0]) * uint64(val2.n[0])
   838  	t0 := m & fieldBaseMask
   839  
   840  	// Terms for 2^(fieldBase*1).
   841  	m = (m >> fieldBase) +
   842  		uint64(val.n[0])*uint64(val2.n[1]) +
   843  		uint64(val.n[1])*uint64(val2.n[0])
   844  	t1 := m & fieldBaseMask
   845  
   846  	// Terms for 2^(fieldBase*2).
   847  	m = (m >> fieldBase) +
   848  		uint64(val.n[0])*uint64(val2.n[2]) +
   849  		uint64(val.n[1])*uint64(val2.n[1]) +
   850  		uint64(val.n[2])*uint64(val2.n[0])
   851  	t2 := m & fieldBaseMask
   852  
   853  	// Terms for 2^(fieldBase*3).
   854  	m = (m >> fieldBase) +
   855  		uint64(val.n[0])*uint64(val2.n[3]) +
   856  		uint64(val.n[1])*uint64(val2.n[2]) +
   857  		uint64(val.n[2])*uint64(val2.n[1]) +
   858  		uint64(val.n[3])*uint64(val2.n[0])
   859  	t3 := m & fieldBaseMask
   860  
   861  	// Terms for 2^(fieldBase*4).
   862  	m = (m >> fieldBase) +
   863  		uint64(val.n[0])*uint64(val2.n[4]) +
   864  		uint64(val.n[1])*uint64(val2.n[3]) +
   865  		uint64(val.n[2])*uint64(val2.n[2]) +
   866  		uint64(val.n[3])*uint64(val2.n[1]) +
   867  		uint64(val.n[4])*uint64(val2.n[0])
   868  	t4 := m & fieldBaseMask
   869  
   870  	// Terms for 2^(fieldBase*5).
   871  	m = (m >> fieldBase) +
   872  		uint64(val.n[0])*uint64(val2.n[5]) +
   873  		uint64(val.n[1])*uint64(val2.n[4]) +
   874  		uint64(val.n[2])*uint64(val2.n[3]) +
   875  		uint64(val.n[3])*uint64(val2.n[2]) +
   876  		uint64(val.n[4])*uint64(val2.n[1]) +
   877  		uint64(val.n[5])*uint64(val2.n[0])
   878  	t5 := m & fieldBaseMask
   879  
   880  	// Terms for 2^(fieldBase*6).
   881  	m = (m >> fieldBase) +
   882  		uint64(val.n[0])*uint64(val2.n[6]) +
   883  		uint64(val.n[1])*uint64(val2.n[5]) +
   884  		uint64(val.n[2])*uint64(val2.n[4]) +
   885  		uint64(val.n[3])*uint64(val2.n[3]) +
   886  		uint64(val.n[4])*uint64(val2.n[2]) +
   887  		uint64(val.n[5])*uint64(val2.n[1]) +
   888  		uint64(val.n[6])*uint64(val2.n[0])
   889  	t6 := m & fieldBaseMask
   890  
   891  	// Terms for 2^(fieldBase*7).
   892  	m = (m >> fieldBase) +
   893  		uint64(val.n[0])*uint64(val2.n[7]) +
   894  		uint64(val.n[1])*uint64(val2.n[6]) +
   895  		uint64(val.n[2])*uint64(val2.n[5]) +
   896  		uint64(val.n[3])*uint64(val2.n[4]) +
   897  		uint64(val.n[4])*uint64(val2.n[3]) +
   898  		uint64(val.n[5])*uint64(val2.n[2]) +
   899  		uint64(val.n[6])*uint64(val2.n[1]) +
   900  		uint64(val.n[7])*uint64(val2.n[0])
   901  	t7 := m & fieldBaseMask
   902  
   903  	// Terms for 2^(fieldBase*8).
   904  	m = (m >> fieldBase) +
   905  		uint64(val.n[0])*uint64(val2.n[8]) +
   906  		uint64(val.n[1])*uint64(val2.n[7]) +
   907  		uint64(val.n[2])*uint64(val2.n[6]) +
   908  		uint64(val.n[3])*uint64(val2.n[5]) +
   909  		uint64(val.n[4])*uint64(val2.n[4]) +
   910  		uint64(val.n[5])*uint64(val2.n[3]) +
   911  		uint64(val.n[6])*uint64(val2.n[2]) +
   912  		uint64(val.n[7])*uint64(val2.n[1]) +
   913  		uint64(val.n[8])*uint64(val2.n[0])
   914  	t8 := m & fieldBaseMask
   915  
   916  	// Terms for 2^(fieldBase*9).
   917  	m = (m >> fieldBase) +
   918  		uint64(val.n[0])*uint64(val2.n[9]) +
   919  		uint64(val.n[1])*uint64(val2.n[8]) +
   920  		uint64(val.n[2])*uint64(val2.n[7]) +
   921  		uint64(val.n[3])*uint64(val2.n[6]) +
   922  		uint64(val.n[4])*uint64(val2.n[5]) +
   923  		uint64(val.n[5])*uint64(val2.n[4]) +
   924  		uint64(val.n[6])*uint64(val2.n[3]) +
   925  		uint64(val.n[7])*uint64(val2.n[2]) +
   926  		uint64(val.n[8])*uint64(val2.n[1]) +
   927  		uint64(val.n[9])*uint64(val2.n[0])
   928  	t9 := m & fieldBaseMask
   929  
   930  	// Terms for 2^(fieldBase*10).
   931  	m = (m >> fieldBase) +
   932  		uint64(val.n[1])*uint64(val2.n[9]) +
   933  		uint64(val.n[2])*uint64(val2.n[8]) +
   934  		uint64(val.n[3])*uint64(val2.n[7]) +
   935  		uint64(val.n[4])*uint64(val2.n[6]) +
   936  		uint64(val.n[5])*uint64(val2.n[5]) +
   937  		uint64(val.n[6])*uint64(val2.n[4]) +
   938  		uint64(val.n[7])*uint64(val2.n[3]) +
   939  		uint64(val.n[8])*uint64(val2.n[2]) +
   940  		uint64(val.n[9])*uint64(val2.n[1])
   941  	t10 := m & fieldBaseMask
   942  
   943  	// Terms for 2^(fieldBase*11).
   944  	m = (m >> fieldBase) +
   945  		uint64(val.n[2])*uint64(val2.n[9]) +
   946  		uint64(val.n[3])*uint64(val2.n[8]) +
   947  		uint64(val.n[4])*uint64(val2.n[7]) +
   948  		uint64(val.n[5])*uint64(val2.n[6]) +
   949  		uint64(val.n[6])*uint64(val2.n[5]) +
   950  		uint64(val.n[7])*uint64(val2.n[4]) +
   951  		uint64(val.n[8])*uint64(val2.n[3]) +
   952  		uint64(val.n[9])*uint64(val2.n[2])
   953  	t11 := m & fieldBaseMask
   954  
   955  	// Terms for 2^(fieldBase*12).
   956  	m = (m >> fieldBase) +
   957  		uint64(val.n[3])*uint64(val2.n[9]) +
   958  		uint64(val.n[4])*uint64(val2.n[8]) +
   959  		uint64(val.n[5])*uint64(val2.n[7]) +
   960  		uint64(val.n[6])*uint64(val2.n[6]) +
   961  		uint64(val.n[7])*uint64(val2.n[5]) +
   962  		uint64(val.n[8])*uint64(val2.n[4]) +
   963  		uint64(val.n[9])*uint64(val2.n[3])
   964  	t12 := m & fieldBaseMask
   965  
   966  	// Terms for 2^(fieldBase*13).
   967  	m = (m >> fieldBase) +
   968  		uint64(val.n[4])*uint64(val2.n[9]) +
   969  		uint64(val.n[5])*uint64(val2.n[8]) +
   970  		uint64(val.n[6])*uint64(val2.n[7]) +
   971  		uint64(val.n[7])*uint64(val2.n[6]) +
   972  		uint64(val.n[8])*uint64(val2.n[5]) +
   973  		uint64(val.n[9])*uint64(val2.n[4])
   974  	t13 := m & fieldBaseMask
   975  
   976  	// Terms for 2^(fieldBase*14).
   977  	m = (m >> fieldBase) +
   978  		uint64(val.n[5])*uint64(val2.n[9]) +
   979  		uint64(val.n[6])*uint64(val2.n[8]) +
   980  		uint64(val.n[7])*uint64(val2.n[7]) +
   981  		uint64(val.n[8])*uint64(val2.n[6]) +
   982  		uint64(val.n[9])*uint64(val2.n[5])
   983  	t14 := m & fieldBaseMask
   984  
   985  	// Terms for 2^(fieldBase*15).
   986  	m = (m >> fieldBase) +
   987  		uint64(val.n[6])*uint64(val2.n[9]) +
   988  		uint64(val.n[7])*uint64(val2.n[8]) +
   989  		uint64(val.n[8])*uint64(val2.n[7]) +
   990  		uint64(val.n[9])*uint64(val2.n[6])
   991  	t15 := m & fieldBaseMask
   992  
   993  	// Terms for 2^(fieldBase*16).
   994  	m = (m >> fieldBase) +
   995  		uint64(val.n[7])*uint64(val2.n[9]) +
   996  		uint64(val.n[8])*uint64(val2.n[8]) +
   997  		uint64(val.n[9])*uint64(val2.n[7])
   998  	t16 := m & fieldBaseMask
   999  
  1000  	// Terms for 2^(fieldBase*17).
  1001  	m = (m >> fieldBase) +
  1002  		uint64(val.n[8])*uint64(val2.n[9]) +
  1003  		uint64(val.n[9])*uint64(val2.n[8])
  1004  	t17 := m & fieldBaseMask
  1005  
  1006  	// Terms for 2^(fieldBase*18).
  1007  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9])
  1008  	t18 := m & fieldBaseMask
  1009  
  1010  	// What's left is for 2^(fieldBase*19).
  1011  	t19 := m >> fieldBase
  1012  
  1013  	// At this point, all of the terms are grouped into their respective
  1014  	// base.
  1015  	//
  1016  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1017  	// when the modulus is of the special form m = b^t - c, highly efficient
  1018  	// reduction can be achieved per the provided algorithm.
  1019  	//
  1020  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1021  	// this criteria.
  1022  	//
  1023  	// 4294968273 in field representation (base 2^26) is:
  1024  	// n[0] = 977
  1025  	// n[1] = 64
  1026  	// That is to say (2^26 * 64) + 977 = 4294968273
  1027  	//
  1028  	// Since each word is in base 26, the upper terms (t10 and up) start
  1029  	// at 260 bits (versus the final desired range of 256 bits), so the
  1030  	// field representation of 'c' from above needs to be adjusted for the
  1031  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1032  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1033  	// n[0] = 977 * 16 = 15632
  1034  	// n[1] = 64 * 16 = 1024
  1035  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1036  	//
  1037  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1038  	// of only n[0] because there are no more terms left to handle n[1].
  1039  	// This means there might be some magnitude left in the upper bits that
  1040  	// is handled below.
  1041  	m = t0 + t10*15632
  1042  	t0 = m & fieldBaseMask
  1043  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1044  	t1 = m & fieldBaseMask
  1045  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1046  	t2 = m & fieldBaseMask
  1047  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1048  	t3 = m & fieldBaseMask
  1049  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1050  	t4 = m & fieldBaseMask
  1051  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1052  	t5 = m & fieldBaseMask
  1053  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1054  	t6 = m & fieldBaseMask
  1055  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1056  	t7 = m & fieldBaseMask
  1057  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1058  	t8 = m & fieldBaseMask
  1059  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1060  	t9 = m & fieldMSBMask
  1061  	m = m >> fieldMSBBits
  1062  
  1063  	// At this point, if the magnitude is greater than 0, the overall value
  1064  	// is greater than the max possible 256-bit value.  In particular, it is
  1065  	// "how many times larger" than the max value it is.
  1066  	//
  1067  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1068  	// quotient is zero.  However, due to the above, we already know at
  1069  	// least how many times we would need to repeat as it's the value
  1070  	// currently in m.  Thus we can simply multiply the magnitude by the
  1071  	// field representation of the prime and do a single iteration.  Notice
  1072  	// that nothing will be changed when the magnitude is zero, so we could
  1073  	// skip this in that case, however always running regardless allows it
  1074  	// to run in constant time.  The final result will be in the range
  1075  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1076  	// magnitude of 1, but it is denormalized.
  1077  	d := t0 + m*977
  1078  	f.n[0] = uint32(d & fieldBaseMask)
  1079  	d = (d >> fieldBase) + t1 + m*64
  1080  	f.n[1] = uint32(d & fieldBaseMask)
  1081  	f.n[2] = uint32((d >> fieldBase) + t2)
  1082  	f.n[3] = uint32(t3)
  1083  	f.n[4] = uint32(t4)
  1084  	f.n[5] = uint32(t5)
  1085  	f.n[6] = uint32(t6)
  1086  	f.n[7] = uint32(t7)
  1087  	f.n[8] = uint32(t8)
  1088  	f.n[9] = uint32(t9)
  1089  
  1090  	return f
  1091  }
  1092  
  1093  // SquareRootVal either calculates the square root of the passed value when it
  1094  // exists or the square root of the negation of the value when it does not exist
  1095  // and stores the result in f in constant time.  The return flag is true when
  1096  // the calculated square root is for the passed value itself and false when it
  1097  // is for its negation.
  1098  //
  1099  // Note that this function can overflow if multiplying any of the individual
  1100  // words exceeds a max uint32.  In practice, this means the magnitude of the
  1101  // field must be a max of 8 to prevent overflow.  The magnitude of the result
  1102  // will be 1.
  1103  //
  1104  //	Preconditions:
  1105  //	  - The input field value MUST have a max magnitude of 8
  1106  //	Output Normalized: No
  1107  //	Output Max Magnitude: 1
  1108  func (f *FieldVal) SquareRootVal(val *FieldVal) bool {
  1109  	// This uses the Tonelli-Shanks method for calculating the square root of
  1110  	// the value when it exists.  The key principles of the method follow.
  1111  	//
  1112  	// Fermat's little theorem states that for a nonzero number 'a' and prime
  1113  	// 'p', a^(p-1) ≡ 1 (mod p).
  1114  	//
  1115  	// Further, Euler's criterion states that an integer 'a' has a square root
  1116  	// (aka is a quadratic residue) modulo a prime if a^((p-1)/2) ≡ 1 (mod p)
  1117  	// and, conversely, when it does NOT have a square root (aka 'a' is a
  1118  	// non-residue) a^((p-1)/2) ≡ -1 (mod p).
  1119  	//
  1120  	// This can be seen by considering that Fermat's little theorem can be
  1121  	// written as (a^((p-1)/2) - 1)(a^((p-1)/2) + 1) ≡ 0 (mod p).  Therefore,
  1122  	// one of the two factors must be 0.  Then, when a ≡ x^2 (aka 'a' is a
  1123  	// quadratic residue), (x^2)^((p-1)/2) ≡ x^(p-1) ≡ 1 (mod p) which implies
  1124  	// the first factor must be zero.  Finally, per Lagrange's theorem, the
  1125  	// non-residues are the only remaining possible solutions and thus must make
  1126  	// the second factor zero to satisfy Fermat's little theorem implying that
  1127  	// a^((p-1)/2) ≡ -1 (mod p) for that case.
  1128  	//
  1129  	// The Tonelli-Shanks method uses these facts along with factoring out
  1130  	// powers of two to solve a congruence that results in either the solution
  1131  	// when the square root exists or the square root of the negation of the
  1132  	// value when it does not.  In the case of primes that are ≡ 3 (mod 4), the
  1133  	// possible solutions are r = ±a^((p+1)/4) (mod p).  Therefore, either r^2 ≡
  1134  	// a (mod p) is true in which case ±r are the two solutions, or r^2 ≡ -a
  1135  	// (mod p) in which case 'a' is a non-residue and there are no solutions.
  1136  	//
  1137  	// The secp256k1 prime is ≡ 3 (mod 4), so this result applies.
  1138  	//
  1139  	// In other words, calculate a^((p+1)/4) and then square it and check it
  1140  	// against the original value to determine if it is actually the square
  1141  	// root.
  1142  	//
  1143  	// In order to efficiently compute a^((p+1)/4), (p+1)/4 needs to be split
  1144  	// into a sequence of squares and multiplications that minimizes the number
  1145  	// of multiplications needed (since they are more costly than squarings).
  1146  	//
  1147  	// The secp256k1 prime + 1 / 4 is 2^254 - 2^30 - 244.  In binary, that is:
  1148  	//
  1149  	// 00111111 11111111 11111111 11111111
  1150  	// 11111111 11111111 11111111 11111111
  1151  	// 11111111 11111111 11111111 11111111
  1152  	// 11111111 11111111 11111111 11111111
  1153  	// 11111111 11111111 11111111 11111111
  1154  	// 11111111 11111111 11111111 11111111
  1155  	// 11111111 11111111 11111111 11111111
  1156  	// 10111111 11111111 11111111 00001100
  1157  	//
  1158  	// Notice that can be broken up into three windows of consecutive 1s (in
  1159  	// order of least to most significant) as:
  1160  	//
  1161  	//   6-bit window with two bits set (bits 4, 5, 6, 7 unset)
  1162  	//   23-bit window with 22 bits set (bit 30 unset)
  1163  	//   223-bit window with all 223 bits set
  1164  	//
  1165  	// Thus, the groups of 1 bits in each window forms the set:
  1166  	// S = {2, 22, 223}.
  1167  	//
  1168  	// The strategy is to calculate a^(2^n - 1) for each grouping via an
  1169  	// addition chain with a sliding window.
  1170  	//
  1171  	// The addition chain used is (credits to Peter Dettman):
  1172  	// (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2)
  1173  	// => 2^1 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223]
  1174  	//
  1175  	// This has a cost of 254 field squarings and 13 field multiplications.
  1176  	var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal
  1177  	a.Set(val)
  1178  	a2.SquareVal(&a).Mul(&a)                                  // a2 = a^(2^2 - 1)
  1179  	a3.SquareVal(&a2).Mul(&a)                                 // a3 = a^(2^3 - 1)
  1180  	a6.SquareVal(&a3).Square().Square()                       // a6 = a^(2^6 - 2^3)
  1181  	a6.Mul(&a3)                                               // a6 = a^(2^6 - 1)
  1182  	a9.SquareVal(&a6).Square().Square()                       // a9 = a^(2^9 - 2^3)
  1183  	a9.Mul(&a3)                                               // a9 = a^(2^9 - 1)
  1184  	a11.SquareVal(&a9).Square()                               // a11 = a^(2^11 - 2^2)
  1185  	a11.Mul(&a2)                                              // a11 = a^(2^11 - 1)
  1186  	a22.SquareVal(&a11).Square().Square().Square().Square()   // a22 = a^(2^16 - 2^5)
  1187  	a22.Square().Square().Square().Square().Square()          // a22 = a^(2^21 - 2^10)
  1188  	a22.Square()                                              // a22 = a^(2^22 - 2^11)
  1189  	a22.Mul(&a11)                                             // a22 = a^(2^22 - 1)
  1190  	a44.SquareVal(&a22).Square().Square().Square().Square()   // a44 = a^(2^27 - 2^5)
  1191  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^32 - 2^10)
  1192  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^37 - 2^15)
  1193  	a44.Square().Square().Square().Square().Square()          // a44 = a^(2^42 - 2^20)
  1194  	a44.Square().Square()                                     // a44 = a^(2^44 - 2^22)
  1195  	a44.Mul(&a22)                                             // a44 = a^(2^44 - 1)
  1196  	a88.SquareVal(&a44).Square().Square().Square().Square()   // a88 = a^(2^49 - 2^5)
  1197  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^54 - 2^10)
  1198  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^59 - 2^15)
  1199  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^64 - 2^20)
  1200  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^69 - 2^25)
  1201  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^74 - 2^30)
  1202  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^79 - 2^35)
  1203  	a88.Square().Square().Square().Square().Square()          // a88 = a^(2^84 - 2^40)
  1204  	a88.Square().Square().Square().Square()                   // a88 = a^(2^88 - 2^44)
  1205  	a88.Mul(&a44)                                             // a88 = a^(2^88 - 1)
  1206  	a176.SquareVal(&a88).Square().Square().Square().Square()  // a176 = a^(2^93 - 2^5)
  1207  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^98 - 2^10)
  1208  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^103 - 2^15)
  1209  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^108 - 2^20)
  1210  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^113 - 2^25)
  1211  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^118 - 2^30)
  1212  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^123 - 2^35)
  1213  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^128 - 2^40)
  1214  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^133 - 2^45)
  1215  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^138 - 2^50)
  1216  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^143 - 2^55)
  1217  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^148 - 2^60)
  1218  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^153 - 2^65)
  1219  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^158 - 2^70)
  1220  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^163 - 2^75)
  1221  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^168 - 2^80)
  1222  	a176.Square().Square().Square().Square().Square()         // a176 = a^(2^173 - 2^85)
  1223  	a176.Square().Square().Square()                           // a176 = a^(2^176 - 2^88)
  1224  	a176.Mul(&a88)                                            // a176 = a^(2^176 - 1)
  1225  	a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5)
  1226  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^186 - 2^10)
  1227  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^191 - 2^15)
  1228  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^196 - 2^20)
  1229  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^201 - 2^25)
  1230  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^206 - 2^30)
  1231  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^211 - 2^35)
  1232  	a220.Square().Square().Square().Square().Square()         // a220 = a^(2^216 - 2^40)
  1233  	a220.Square().Square().Square().Square()                  // a220 = a^(2^220 - 2^44)
  1234  	a220.Mul(&a44)                                            // a220 = a^(2^220 - 1)
  1235  	a223.SquareVal(&a220).Square().Square()                   // a223 = a^(2^223 - 2^3)
  1236  	a223.Mul(&a3)                                             // a223 = a^(2^223 - 1)
  1237  
  1238  	f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5)
  1239  	f.Square().Square().Square().Square().Square()         // f = a^(2^233 - 2^10)
  1240  	f.Square().Square().Square().Square().Square()         // f = a^(2^238 - 2^15)
  1241  	f.Square().Square().Square().Square().Square()         // f = a^(2^243 - 2^20)
  1242  	f.Square().Square().Square()                           // f = a^(2^246 - 2^23)
  1243  	f.Mul(&a22)                                            // f = a^(2^246 - 2^22 - 1)
  1244  	f.Square().Square().Square().Square().Square()         // f = a^(2^251 - 2^27 - 2^5)
  1245  	f.Square()                                             // f = a^(2^252 - 2^28 - 2^6)
  1246  	f.Mul(&a2)                                             // f = a^(2^252 - 2^28 - 2^6 - 2^1 - 1)
  1247  	f.Square().Square()                                    // f = a^(2^254 - 2^30 - 2^8 - 2^3 - 2^2)
  1248  	//                                                     //   = a^(2^254 - 2^30 - 244)
  1249  	//                                                     //   = a^((p+1)/4)
  1250  
  1251  	// Ensure the calculated result is actually the square root by squaring it
  1252  	// and checking against the original value.
  1253  	var sqr FieldVal
  1254  	return sqr.SquareVal(f).Normalize().Equals(val.Normalize())
  1255  }
  1256  
  1257  // Square squares the field value in constant time.  The existing field value is
  1258  // modified.  Note that this function can overflow if multiplying any of the
  1259  // individual words exceeds a max uint32.  In practice, this means the magnitude
  1260  // of the field must be a max of 8 to prevent overflow.
  1261  //
  1262  // The field value is returned to support chaining.  This enables syntax like:
  1263  // f.Square().Mul(f2) so that f = f^2 * f2.
  1264  //
  1265  //	Preconditions:
  1266  //	  - The field value MUST have a max magnitude of 8
  1267  //	Output Normalized: No
  1268  //	Output Max Magnitude: 1
  1269  func (f *FieldVal) Square() *FieldVal {
  1270  	return f.SquareVal(f)
  1271  }
  1272  
  1273  // SquareVal squares the passed value and stores the result in f in constant
  1274  // time.  Note that this function can overflow if multiplying any of the
  1275  // individual words exceeds a max uint32.  In practice, this means the magnitude
  1276  // of the field being squared must be a max of 8 to prevent overflow.
  1277  //
  1278  // The field value is returned to support chaining.  This enables syntax like:
  1279  // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3.
  1280  //
  1281  //	Preconditions:
  1282  //	  - The input field value MUST have a max magnitude of 8
  1283  //	Output Normalized: No
  1284  //	Output Max Magnitude: 1
  1285  func (f *FieldVal) SquareVal(val *FieldVal) *FieldVal {
  1286  	// This could be done with a couple of for loops and an array to store
  1287  	// the intermediate terms, but this unrolled version is significantly
  1288  	// faster.
  1289  
  1290  	// Terms for 2^(fieldBase*0).
  1291  	m := uint64(val.n[0]) * uint64(val.n[0])
  1292  	t0 := m & fieldBaseMask
  1293  
  1294  	// Terms for 2^(fieldBase*1).
  1295  	m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1])
  1296  	t1 := m & fieldBaseMask
  1297  
  1298  	// Terms for 2^(fieldBase*2).
  1299  	m = (m >> fieldBase) +
  1300  		2*uint64(val.n[0])*uint64(val.n[2]) +
  1301  		uint64(val.n[1])*uint64(val.n[1])
  1302  	t2 := m & fieldBaseMask
  1303  
  1304  	// Terms for 2^(fieldBase*3).
  1305  	m = (m >> fieldBase) +
  1306  		2*uint64(val.n[0])*uint64(val.n[3]) +
  1307  		2*uint64(val.n[1])*uint64(val.n[2])
  1308  	t3 := m & fieldBaseMask
  1309  
  1310  	// Terms for 2^(fieldBase*4).
  1311  	m = (m >> fieldBase) +
  1312  		2*uint64(val.n[0])*uint64(val.n[4]) +
  1313  		2*uint64(val.n[1])*uint64(val.n[3]) +
  1314  		uint64(val.n[2])*uint64(val.n[2])
  1315  	t4 := m & fieldBaseMask
  1316  
  1317  	// Terms for 2^(fieldBase*5).
  1318  	m = (m >> fieldBase) +
  1319  		2*uint64(val.n[0])*uint64(val.n[5]) +
  1320  		2*uint64(val.n[1])*uint64(val.n[4]) +
  1321  		2*uint64(val.n[2])*uint64(val.n[3])
  1322  	t5 := m & fieldBaseMask
  1323  
  1324  	// Terms for 2^(fieldBase*6).
  1325  	m = (m >> fieldBase) +
  1326  		2*uint64(val.n[0])*uint64(val.n[6]) +
  1327  		2*uint64(val.n[1])*uint64(val.n[5]) +
  1328  		2*uint64(val.n[2])*uint64(val.n[4]) +
  1329  		uint64(val.n[3])*uint64(val.n[3])
  1330  	t6 := m & fieldBaseMask
  1331  
  1332  	// Terms for 2^(fieldBase*7).
  1333  	m = (m >> fieldBase) +
  1334  		2*uint64(val.n[0])*uint64(val.n[7]) +
  1335  		2*uint64(val.n[1])*uint64(val.n[6]) +
  1336  		2*uint64(val.n[2])*uint64(val.n[5]) +
  1337  		2*uint64(val.n[3])*uint64(val.n[4])
  1338  	t7 := m & fieldBaseMask
  1339  
  1340  	// Terms for 2^(fieldBase*8).
  1341  	m = (m >> fieldBase) +
  1342  		2*uint64(val.n[0])*uint64(val.n[8]) +
  1343  		2*uint64(val.n[1])*uint64(val.n[7]) +
  1344  		2*uint64(val.n[2])*uint64(val.n[6]) +
  1345  		2*uint64(val.n[3])*uint64(val.n[5]) +
  1346  		uint64(val.n[4])*uint64(val.n[4])
  1347  	t8 := m & fieldBaseMask
  1348  
  1349  	// Terms for 2^(fieldBase*9).
  1350  	m = (m >> fieldBase) +
  1351  		2*uint64(val.n[0])*uint64(val.n[9]) +
  1352  		2*uint64(val.n[1])*uint64(val.n[8]) +
  1353  		2*uint64(val.n[2])*uint64(val.n[7]) +
  1354  		2*uint64(val.n[3])*uint64(val.n[6]) +
  1355  		2*uint64(val.n[4])*uint64(val.n[5])
  1356  	t9 := m & fieldBaseMask
  1357  
  1358  	// Terms for 2^(fieldBase*10).
  1359  	m = (m >> fieldBase) +
  1360  		2*uint64(val.n[1])*uint64(val.n[9]) +
  1361  		2*uint64(val.n[2])*uint64(val.n[8]) +
  1362  		2*uint64(val.n[3])*uint64(val.n[7]) +
  1363  		2*uint64(val.n[4])*uint64(val.n[6]) +
  1364  		uint64(val.n[5])*uint64(val.n[5])
  1365  	t10 := m & fieldBaseMask
  1366  
  1367  	// Terms for 2^(fieldBase*11).
  1368  	m = (m >> fieldBase) +
  1369  		2*uint64(val.n[2])*uint64(val.n[9]) +
  1370  		2*uint64(val.n[3])*uint64(val.n[8]) +
  1371  		2*uint64(val.n[4])*uint64(val.n[7]) +
  1372  		2*uint64(val.n[5])*uint64(val.n[6])
  1373  	t11 := m & fieldBaseMask
  1374  
  1375  	// Terms for 2^(fieldBase*12).
  1376  	m = (m >> fieldBase) +
  1377  		2*uint64(val.n[3])*uint64(val.n[9]) +
  1378  		2*uint64(val.n[4])*uint64(val.n[8]) +
  1379  		2*uint64(val.n[5])*uint64(val.n[7]) +
  1380  		uint64(val.n[6])*uint64(val.n[6])
  1381  	t12 := m & fieldBaseMask
  1382  
  1383  	// Terms for 2^(fieldBase*13).
  1384  	m = (m >> fieldBase) +
  1385  		2*uint64(val.n[4])*uint64(val.n[9]) +
  1386  		2*uint64(val.n[5])*uint64(val.n[8]) +
  1387  		2*uint64(val.n[6])*uint64(val.n[7])
  1388  	t13 := m & fieldBaseMask
  1389  
  1390  	// Terms for 2^(fieldBase*14).
  1391  	m = (m >> fieldBase) +
  1392  		2*uint64(val.n[5])*uint64(val.n[9]) +
  1393  		2*uint64(val.n[6])*uint64(val.n[8]) +
  1394  		uint64(val.n[7])*uint64(val.n[7])
  1395  	t14 := m & fieldBaseMask
  1396  
  1397  	// Terms for 2^(fieldBase*15).
  1398  	m = (m >> fieldBase) +
  1399  		2*uint64(val.n[6])*uint64(val.n[9]) +
  1400  		2*uint64(val.n[7])*uint64(val.n[8])
  1401  	t15 := m & fieldBaseMask
  1402  
  1403  	// Terms for 2^(fieldBase*16).
  1404  	m = (m >> fieldBase) +
  1405  		2*uint64(val.n[7])*uint64(val.n[9]) +
  1406  		uint64(val.n[8])*uint64(val.n[8])
  1407  	t16 := m & fieldBaseMask
  1408  
  1409  	// Terms for 2^(fieldBase*17).
  1410  	m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9])
  1411  	t17 := m & fieldBaseMask
  1412  
  1413  	// Terms for 2^(fieldBase*18).
  1414  	m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9])
  1415  	t18 := m & fieldBaseMask
  1416  
  1417  	// What's left is for 2^(fieldBase*19).
  1418  	t19 := m >> fieldBase
  1419  
  1420  	// At this point, all of the terms are grouped into their respective
  1421  	// base.
  1422  	//
  1423  	// Per [HAC] section 14.3.4: Reduction method of moduli of special form,
  1424  	// when the modulus is of the special form m = b^t - c, highly efficient
  1425  	// reduction can be achieved per the provided algorithm.
  1426  	//
  1427  	// The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits
  1428  	// this criteria.
  1429  	//
  1430  	// 4294968273 in field representation (base 2^26) is:
  1431  	// n[0] = 977
  1432  	// n[1] = 64
  1433  	// That is to say (2^26 * 64) + 977 = 4294968273
  1434  	//
  1435  	// Since each word is in base 26, the upper terms (t10 and up) start
  1436  	// at 260 bits (versus the final desired range of 256 bits), so the
  1437  	// field representation of 'c' from above needs to be adjusted for the
  1438  	// extra 4 bits by multiplying it by 2^4 = 16.  4294968273 * 16 =
  1439  	// 68719492368.  Thus, the adjusted field representation of 'c' is:
  1440  	// n[0] = 977 * 16 = 15632
  1441  	// n[1] = 64 * 16 = 1024
  1442  	// That is to say (2^26 * 1024) + 15632 = 68719492368
  1443  	//
  1444  	// To reduce the final term, t19, the entire 'c' value is needed instead
  1445  	// of only n[0] because there are no more terms left to handle n[1].
  1446  	// This means there might be some magnitude left in the upper bits that
  1447  	// is handled below.
  1448  	m = t0 + t10*15632
  1449  	t0 = m & fieldBaseMask
  1450  	m = (m >> fieldBase) + t1 + t10*1024 + t11*15632
  1451  	t1 = m & fieldBaseMask
  1452  	m = (m >> fieldBase) + t2 + t11*1024 + t12*15632
  1453  	t2 = m & fieldBaseMask
  1454  	m = (m >> fieldBase) + t3 + t12*1024 + t13*15632
  1455  	t3 = m & fieldBaseMask
  1456  	m = (m >> fieldBase) + t4 + t13*1024 + t14*15632
  1457  	t4 = m & fieldBaseMask
  1458  	m = (m >> fieldBase) + t5 + t14*1024 + t15*15632
  1459  	t5 = m & fieldBaseMask
  1460  	m = (m >> fieldBase) + t6 + t15*1024 + t16*15632
  1461  	t6 = m & fieldBaseMask
  1462  	m = (m >> fieldBase) + t7 + t16*1024 + t17*15632
  1463  	t7 = m & fieldBaseMask
  1464  	m = (m >> fieldBase) + t8 + t17*1024 + t18*15632
  1465  	t8 = m & fieldBaseMask
  1466  	m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368
  1467  	t9 = m & fieldMSBMask
  1468  	m = m >> fieldMSBBits
  1469  
  1470  	// At this point, if the magnitude is greater than 0, the overall value
  1471  	// is greater than the max possible 256-bit value.  In particular, it is
  1472  	// "how many times larger" than the max value it is.
  1473  	//
  1474  	// The algorithm presented in [HAC] section 14.3.4 repeats until the
  1475  	// quotient is zero.  However, due to the above, we already know at
  1476  	// least how many times we would need to repeat as it's the value
  1477  	// currently in m.  Thus we can simply multiply the magnitude by the
  1478  	// field representation of the prime and do a single iteration.  Notice
  1479  	// that nothing will be changed when the magnitude is zero, so we could
  1480  	// skip this in that case, however always running regardless allows it
  1481  	// to run in constant time.  The final result will be in the range
  1482  	// 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a
  1483  	// magnitude of 1, but it is denormalized.
  1484  	n := t0 + m*977
  1485  	f.n[0] = uint32(n & fieldBaseMask)
  1486  	n = (n >> fieldBase) + t1 + m*64
  1487  	f.n[1] = uint32(n & fieldBaseMask)
  1488  	f.n[2] = uint32((n >> fieldBase) + t2)
  1489  	f.n[3] = uint32(t3)
  1490  	f.n[4] = uint32(t4)
  1491  	f.n[5] = uint32(t5)
  1492  	f.n[6] = uint32(t6)
  1493  	f.n[7] = uint32(t7)
  1494  	f.n[8] = uint32(t8)
  1495  	f.n[9] = uint32(t9)
  1496  
  1497  	return f
  1498  }
  1499  
  1500  // Inverse finds the modular multiplicative inverse of the field value in
  1501  // constant time.  The existing field value is modified.
  1502  //
  1503  // The field value is returned to support chaining.  This enables syntax like:
  1504  // f.Inverse().Mul(f2) so that f = f^-1 * f2.
  1505  //
  1506  //	Preconditions:
  1507  //	  - The field value MUST have a max magnitude of 8
  1508  //	Output Normalized: No
  1509  //	Output Max Magnitude: 1
  1510  func (f *FieldVal) Inverse() *FieldVal {
  1511  	// Fermat's little theorem states that for a nonzero number a and prime
  1512  	// p, a^(p-1) ≡ 1 (mod p).  Since the multiplicative inverse is
  1513  	// a*b ≡ 1 (mod p), it follows that b ≡ a*a^(p-2) ≡ a^(p-1) ≡ 1 (mod p).
  1514  	// Thus, a^(p-2) is the multiplicative inverse.
  1515  	//
  1516  	// In order to efficiently compute a^(p-2), p-2 needs to be split into
  1517  	// a sequence of squares and multiplications that minimizes the number
  1518  	// of multiplications needed (since they are more costly than
  1519  	// squarings). Intermediate results are saved and reused as well.
  1520  	//
  1521  	// The secp256k1 prime - 2 is 2^256 - 4294968275.
  1522  	//
  1523  	// This has a cost of 258 field squarings and 33 field multiplications.
  1524  	var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 FieldVal
  1525  	a2.SquareVal(f)
  1526  	a3.Mul2(&a2, f)
  1527  	a4.SquareVal(&a2)
  1528  	a10.SquareVal(&a4).Mul(&a2)
  1529  	a11.Mul2(&a10, f)
  1530  	a21.Mul2(&a10, &a11)
  1531  	a42.SquareVal(&a21)
  1532  	a45.Mul2(&a42, &a3)
  1533  	a63.Mul2(&a42, &a21)
  1534  	a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11)
  1535  	a1023.Mul2(&a1019, &a4)
  1536  	f.Set(&a63)                                    // f = a^(2^6 - 1)
  1537  	f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32)
  1538  	f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024)
  1539  	f.Mul(&a1023)                                  // f = a^(2^16 - 1)
  1540  	f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32)
  1541  	f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024)
  1542  	f.Mul(&a1023)                                  // f = a^(2^26 - 1)
  1543  	f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32)
  1544  	f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024)
  1545  	f.Mul(&a1023)                                  // f = a^(2^36 - 1)
  1546  	f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32)
  1547  	f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024)
  1548  	f.Mul(&a1023)                                  // f = a^(2^46 - 1)
  1549  	f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32)
  1550  	f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024)
  1551  	f.Mul(&a1023)                                  // f = a^(2^56 - 1)
  1552  	f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32)
  1553  	f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024)
  1554  	f.Mul(&a1023)                                  // f = a^(2^66 - 1)
  1555  	f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32)
  1556  	f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024)
  1557  	f.Mul(&a1023)                                  // f = a^(2^76 - 1)
  1558  	f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32)
  1559  	f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024)
  1560  	f.Mul(&a1023)                                  // f = a^(2^86 - 1)
  1561  	f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32)
  1562  	f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024)
  1563  	f.Mul(&a1023)                                  // f = a^(2^96 - 1)
  1564  	f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32)
  1565  	f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024)
  1566  	f.Mul(&a1023)                                  // f = a^(2^106 - 1)
  1567  	f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32)
  1568  	f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024)
  1569  	f.Mul(&a1023)                                  // f = a^(2^116 - 1)
  1570  	f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32)
  1571  	f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024)
  1572  	f.Mul(&a1023)                                  // f = a^(2^126 - 1)
  1573  	f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32)
  1574  	f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024)
  1575  	f.Mul(&a1023)                                  // f = a^(2^136 - 1)
  1576  	f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32)
  1577  	f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024)
  1578  	f.Mul(&a1023)                                  // f = a^(2^146 - 1)
  1579  	f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32)
  1580  	f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024)
  1581  	f.Mul(&a1023)                                  // f = a^(2^156 - 1)
  1582  	f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32)
  1583  	f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024)
  1584  	f.Mul(&a1023)                                  // f = a^(2^166 - 1)
  1585  	f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32)
  1586  	f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024)
  1587  	f.Mul(&a1023)                                  // f = a^(2^176 - 1)
  1588  	f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32)
  1589  	f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024)
  1590  	f.Mul(&a1023)                                  // f = a^(2^186 - 1)
  1591  	f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32)
  1592  	f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024)
  1593  	f.Mul(&a1023)                                  // f = a^(2^196 - 1)
  1594  	f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32)
  1595  	f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024)
  1596  	f.Mul(&a1023)                                  // f = a^(2^206 - 1)
  1597  	f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32)
  1598  	f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024)
  1599  	f.Mul(&a1023)                                  // f = a^(2^216 - 1)
  1600  	f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32)
  1601  	f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024)
  1602  	f.Mul(&a1019)                                  // f = a^(2^226 - 5)
  1603  	f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160)
  1604  	f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120)
  1605  	f.Mul(&a1023)                                  // f = a^(2^236 - 4097)
  1606  	f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104)
  1607  	f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328)
  1608  	f.Mul(&a1023)                                  // f = a^(2^246 - 4194305)
  1609  	f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760)
  1610  	f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320)
  1611  	return f.Mul(&a45)                             // f = a^(2^256 - 4294968275) = a^(p-2)
  1612  }
  1613  
  1614  // IsGtOrEqPrimeMinusOrder returns whether or not the field value exceeds the
  1615  // group order divided by 2 in constant time.
  1616  //
  1617  //	Preconditions:
  1618  //	  - The field value MUST be normalized
  1619  func (f *FieldVal) IsGtOrEqPrimeMinusOrder() bool {
  1620  	// The secp256k1 prime is equivalent to 2^256 - 4294968273 and the group
  1621  	// order is 2^256 - 432420386565659656852420866394968145599.  Thus,
  1622  	// the prime minus the group order is:
  1623  	// 432420386565659656852420866390673177326
  1624  	//
  1625  	// In hex that is:
  1626  	// 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da172 2fc9baee
  1627  	//
  1628  	// Converting that to field representation (base 2^26) is:
  1629  	//
  1630  	// n[0] = 0x03c9baee
  1631  	// n[1] = 0x03685c8b
  1632  	// n[2] = 0x01fc4402
  1633  	// n[3] = 0x006542dd
  1634  	// n[4] = 0x01455123
  1635  	//
  1636  	// This can be verified with the following test code:
  1637  	//   pMinusN := new(big.Int).Sub(curveParams.P, curveParams.N)
  1638  	//   var fv FieldVal
  1639  	//   fv.SetByteSlice(pMinusN.Bytes())
  1640  	//   t.Logf("%x", fv.n)
  1641  	//
  1642  	//   Outputs: [3c9baee 3685c8b 1fc4402 6542dd 1455123 0 0 0 0 0]
  1643  	const (
  1644  		pMinusNWordZero  = 0x03c9baee
  1645  		pMinusNWordOne   = 0x03685c8b
  1646  		pMinusNWordTwo   = 0x01fc4402
  1647  		pMinusNWordThree = 0x006542dd
  1648  		pMinusNWordFour  = 0x01455123
  1649  		pMinusNWordFive  = 0x00000000
  1650  		pMinusNWordSix   = 0x00000000
  1651  		pMinusNWordSeven = 0x00000000
  1652  		pMinusNWordEight = 0x00000000
  1653  		pMinusNWordNine  = 0x00000000
  1654  	)
  1655  
  1656  	// The intuition here is that the value is greater than field prime minus
  1657  	// the group order if one of the higher individual words is greater than the
  1658  	// corresponding word and all higher words in the value are equal.
  1659  	result := constantTimeGreater(f.n[9], pMinusNWordNine)
  1660  	highWordsEqual := constantTimeEq(f.n[9], pMinusNWordNine)
  1661  	result |= highWordsEqual & constantTimeGreater(f.n[8], pMinusNWordEight)
  1662  	highWordsEqual &= constantTimeEq(f.n[8], pMinusNWordEight)
  1663  	result |= highWordsEqual & constantTimeGreater(f.n[7], pMinusNWordSeven)
  1664  	highWordsEqual &= constantTimeEq(f.n[7], pMinusNWordSeven)
  1665  	result |= highWordsEqual & constantTimeGreater(f.n[6], pMinusNWordSix)
  1666  	highWordsEqual &= constantTimeEq(f.n[6], pMinusNWordSix)
  1667  	result |= highWordsEqual & constantTimeGreater(f.n[5], pMinusNWordFive)
  1668  	highWordsEqual &= constantTimeEq(f.n[5], pMinusNWordFive)
  1669  	result |= highWordsEqual & constantTimeGreater(f.n[4], pMinusNWordFour)
  1670  	highWordsEqual &= constantTimeEq(f.n[4], pMinusNWordFour)
  1671  	result |= highWordsEqual & constantTimeGreater(f.n[3], pMinusNWordThree)
  1672  	highWordsEqual &= constantTimeEq(f.n[3], pMinusNWordThree)
  1673  	result |= highWordsEqual & constantTimeGreater(f.n[2], pMinusNWordTwo)
  1674  	highWordsEqual &= constantTimeEq(f.n[2], pMinusNWordTwo)
  1675  	result |= highWordsEqual & constantTimeGreater(f.n[1], pMinusNWordOne)
  1676  	highWordsEqual &= constantTimeEq(f.n[1], pMinusNWordOne)
  1677  	result |= highWordsEqual & constantTimeGreaterOrEq(f.n[0], pMinusNWordZero)
  1678  
  1679  	return result != 0
  1680  }
  1681  

View as plain text