...

Source file src/github.com/decred/dcrd/dcrec/secp256k1/v4/curve.go

Documentation: github.com/decred/dcrd/dcrec/secp256k1/v4

     1  // Copyright (c) 2015-2024 The Decred developers
     2  // Copyright 2013-2014 The btcsuite developers
     3  // Use of this source code is governed by an ISC
     4  // license that can be found in the LICENSE file.
     5  
     6  package secp256k1
     7  
     8  import (
     9  	"encoding/hex"
    10  	"math/bits"
    11  )
    12  
    13  // References:
    14  //   [SECG]: Recommended Elliptic Curve Domain Parameters
    15  //     https://www.secg.org/sec2-v2.pdf
    16  //
    17  //   [GECC]: Guide to Elliptic Curve Cryptography (Hankerson, Menezes, Vanstone)
    18  //
    19  //   [BRID]: On Binary Representations of Integers with Digits -1, 0, 1
    20  //           (Prodinger, Helmut)
    21  //
    22  //   [STWS]: Secure-TWS: Authenticating Node to Multi-user Communication in
    23  //           Shared Sensor Networks (Oliveira, Leonardo B. et al)
    24  
    25  // All group operations are performed using Jacobian coordinates.  For a given
    26  // (x, y) position on the curve, the Jacobian coordinates are (x1, y1, z1)
    27  // where x = x1/z1^2 and y = y1/z1^3.
    28  
    29  // hexToFieldVal converts the passed hex string into a FieldVal and will panic
    30  // if there is an error.  This is only provided for the hard-coded constants so
    31  // errors in the source code can be detected. It will only (and must only) be
    32  // called with hard-coded values.
    33  func hexToFieldVal(s string) *FieldVal {
    34  	b, err := hex.DecodeString(s)
    35  	if err != nil {
    36  		panic("invalid hex in source file: " + s)
    37  	}
    38  	var f FieldVal
    39  	if overflow := f.SetByteSlice(b); overflow {
    40  		panic("hex in source file overflows mod P: " + s)
    41  	}
    42  	return &f
    43  }
    44  
    45  // hexToModNScalar converts the passed hex string into a ModNScalar and will
    46  // panic if there is an error.  This is only provided for the hard-coded
    47  // constants so errors in the source code can be detected. It will only (and
    48  // must only) be called with hard-coded values.
    49  func hexToModNScalar(s string) *ModNScalar {
    50  	var isNegative bool
    51  	if len(s) > 0 && s[0] == '-' {
    52  		isNegative = true
    53  		s = s[1:]
    54  	}
    55  	if len(s)%2 != 0 {
    56  		s = "0" + s
    57  	}
    58  	b, err := hex.DecodeString(s)
    59  	if err != nil {
    60  		panic("invalid hex in source file: " + s)
    61  	}
    62  	var scalar ModNScalar
    63  	if overflow := scalar.SetByteSlice(b); overflow {
    64  		panic("hex in source file overflows mod N scalar: " + s)
    65  	}
    66  	if isNegative {
    67  		scalar.Negate()
    68  	}
    69  	return &scalar
    70  }
    71  
    72  var (
    73  	// The following constants are used to accelerate scalar point
    74  	// multiplication through the use of the endomorphism:
    75  	//
    76  	// φ(Q) ⟼ λ*Q = (β*Q.x mod p, Q.y)
    77  	//
    78  	// See the code in the deriveEndomorphismParams function in genprecomps.go
    79  	// for details on their derivation.
    80  	//
    81  	// Additionally, see the scalar multiplication function in this file for
    82  	// details on how they are used.
    83  	endoNegLambda = hexToModNScalar("-5363ad4cc05c30e0a5261c028812645a122e22ea20816678df02967c1b23bd72")
    84  	endoBeta      = hexToFieldVal("7ae96a2b657c07106e64479eac3434e99cf0497512f58995c1396c28719501ee")
    85  	endoNegB1     = hexToModNScalar("e4437ed6010e88286f547fa90abfe4c3")
    86  	endoNegB2     = hexToModNScalar("-3086d221a7d46bcde86c90e49284eb15")
    87  	endoZ1        = hexToModNScalar("3086d221a7d46bcde86c90e49284eb153daa8a1471e8ca7f")
    88  	endoZ2        = hexToModNScalar("e4437ed6010e88286f547fa90abfe4c4221208ac9df506c6")
    89  
    90  	// Alternatively, the following parameters are valid as well, however,
    91  	// benchmarks show them to be about 2% slower in practice.
    92  	// endoNegLambda = hexToModNScalar("-ac9c52b33fa3cf1f5ad9e3fd77ed9ba4a880b9fc8ec739c2e0cfc810b51283ce")
    93  	// endoBeta      = hexToFieldVal("851695d49a83f8ef919bb86153cbcb16630fb68aed0a766a3ec693d68e6afa40")
    94  	// endoNegB1     = hexToModNScalar("3086d221a7d46bcde86c90e49284eb15")
    95  	// endoNegB2     = hexToModNScalar("-114ca50f7a8e2f3f657c1108d9d44cfd8")
    96  	// endoZ1        = hexToModNScalar("114ca50f7a8e2f3f657c1108d9d44cfd95fbc92c10fddd145")
    97  	// endoZ2        = hexToModNScalar("3086d221a7d46bcde86c90e49284eb153daa8a1471e8ca7f")
    98  )
    99  
   100  // JacobianPoint is an element of the group formed by the secp256k1 curve in
   101  // Jacobian projective coordinates and thus represents a point on the curve.
   102  type JacobianPoint struct {
   103  	// The X coordinate in Jacobian projective coordinates.  The affine point is
   104  	// X/z^2.
   105  	X FieldVal
   106  
   107  	// The Y coordinate in Jacobian projective coordinates.  The affine point is
   108  	// Y/z^3.
   109  	Y FieldVal
   110  
   111  	// The Z coordinate in Jacobian projective coordinates.
   112  	Z FieldVal
   113  }
   114  
   115  // MakeJacobianPoint returns a Jacobian point with the provided X, Y, and Z
   116  // coordinates.
   117  func MakeJacobianPoint(x, y, z *FieldVal) JacobianPoint {
   118  	var p JacobianPoint
   119  	p.X.Set(x)
   120  	p.Y.Set(y)
   121  	p.Z.Set(z)
   122  	return p
   123  }
   124  
   125  // Set sets the Jacobian point to the provided point.
   126  func (p *JacobianPoint) Set(other *JacobianPoint) {
   127  	p.X.Set(&other.X)
   128  	p.Y.Set(&other.Y)
   129  	p.Z.Set(&other.Z)
   130  }
   131  
   132  // ToAffine reduces the Z value of the existing point to 1 effectively
   133  // making it an affine coordinate in constant time.  The point will be
   134  // normalized.
   135  func (p *JacobianPoint) ToAffine() {
   136  	// Inversions are expensive and both point addition and point doubling
   137  	// are faster when working with points that have a z value of one.  So,
   138  	// if the point needs to be converted to affine, go ahead and normalize
   139  	// the point itself at the same time as the calculation is the same.
   140  	var zInv, tempZ FieldVal
   141  	zInv.Set(&p.Z).Inverse()  // zInv = Z^-1
   142  	tempZ.SquareVal(&zInv)    // tempZ = Z^-2
   143  	p.X.Mul(&tempZ)           // X = X/Z^2 (mag: 1)
   144  	p.Y.Mul(tempZ.Mul(&zInv)) // Y = Y/Z^3 (mag: 1)
   145  	p.Z.SetInt(1)             // Z = 1 (mag: 1)
   146  
   147  	// Normalize the x and y values.
   148  	p.X.Normalize()
   149  	p.Y.Normalize()
   150  }
   151  
   152  // addZ1AndZ2EqualsOne adds two Jacobian points that are already known to have
   153  // z values of 1 and stores the result in the provided result param.  That is to
   154  // say result = p1 + p2.  It performs faster addition than the generic add
   155  // routine since less arithmetic is needed due to the ability to avoid the z
   156  // value multiplications.
   157  //
   158  // NOTE: The points must be normalized for this function to return the correct
   159  // result.  The resulting point will be normalized.
   160  func addZ1AndZ2EqualsOne(p1, p2, result *JacobianPoint) {
   161  	// To compute the point addition efficiently, this implementation splits
   162  	// the equation into intermediate elements which are used to minimize
   163  	// the number of field multiplications using the method shown at:
   164  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-mmadd-2007-bl
   165  	//
   166  	// In particular it performs the calculations using the following:
   167  	// H = X2-X1, HH = H^2, I = 4*HH, J = H*I, r = 2*(Y2-Y1), V = X1*I
   168  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*Y1*J, Z3 = 2*H
   169  	//
   170  	// This results in a cost of 4 field multiplications, 2 field squarings,
   171  	// 6 field additions, and 5 integer multiplications.
   172  	x1, y1 := &p1.X, &p1.Y
   173  	x2, y2 := &p2.X, &p2.Y
   174  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   175  
   176  	// When the x coordinates are the same for two points on the curve, the
   177  	// y coordinates either must be the same, in which case it is point
   178  	// doubling, or they are opposite and the result is the point at
   179  	// infinity per the group law for elliptic curve cryptography.
   180  	if x1.Equals(x2) {
   181  		if y1.Equals(y2) {
   182  			// Since x1 == x2 and y1 == y2, point doubling must be
   183  			// done, otherwise the addition would end up dividing
   184  			// by zero.
   185  			DoubleNonConst(p1, result)
   186  			return
   187  		}
   188  
   189  		// Since x1 == x2 and y1 == -y2, the sum is the point at
   190  		// infinity per the group law.
   191  		x3.SetInt(0)
   192  		y3.SetInt(0)
   193  		z3.SetInt(0)
   194  		return
   195  	}
   196  
   197  	// Calculate X3, Y3, and Z3 according to the intermediate elements
   198  	// breakdown above.
   199  	var h, i, j, r, v FieldVal
   200  	var negJ, neg2V, negX3 FieldVal
   201  	h.Set(x1).Negate(1).Add(x2)                // H = X2-X1 (mag: 3)
   202  	i.SquareVal(&h).MulInt(4)                  // I = 4*H^2 (mag: 4)
   203  	j.Mul2(&h, &i)                             // J = H*I (mag: 1)
   204  	r.Set(y1).Negate(1).Add(y2).MulInt(2)      // r = 2*(Y2-Y1) (mag: 6)
   205  	v.Mul2(x1, &i)                             // V = X1*I (mag: 1)
   206  	negJ.Set(&j).Negate(1)                     // negJ = -J (mag: 2)
   207  	neg2V.Set(&v).MulInt(2).Negate(2)          // neg2V = -(2*V) (mag: 3)
   208  	x3.Set(&r).Square().Add(&negJ).Add(&neg2V) // X3 = r^2-J-2*V (mag: 6)
   209  	negX3.Set(x3).Negate(6)                    // negX3 = -X3 (mag: 7)
   210  	j.Mul(y1).MulInt(2).Negate(2)              // J = -(2*Y1*J) (mag: 3)
   211  	y3.Set(&v).Add(&negX3).Mul(&r).Add(&j)     // Y3 = r*(V-X3)-2*Y1*J (mag: 4)
   212  	z3.Set(&h).MulInt(2)                       // Z3 = 2*H (mag: 6)
   213  
   214  	// Normalize the resulting field values as needed.
   215  	x3.Normalize()
   216  	y3.Normalize()
   217  	z3.Normalize()
   218  }
   219  
   220  // addZ1EqualsZ2 adds two Jacobian points that are already known to have the
   221  // same z value and stores the result in the provided result param.  That is to
   222  // say result = p1 + p2.  It performs faster addition than the generic add
   223  // routine since less arithmetic is needed due to the known equivalence.
   224  //
   225  // NOTE: The points must be normalized for this function to return the correct
   226  // result.  The resulting point will be normalized.
   227  func addZ1EqualsZ2(p1, p2, result *JacobianPoint) {
   228  	// To compute the point addition efficiently, this implementation splits
   229  	// the equation into intermediate elements which are used to minimize
   230  	// the number of field multiplications using a slightly modified version
   231  	// of the method shown at:
   232  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-zadd-2007-m
   233  	//
   234  	// In particular it performs the calculations using the following:
   235  	// A = X2-X1, B = A^2, C=Y2-Y1, D = C^2, E = X1*B, F = X2*B
   236  	// X3 = D-E-F, Y3 = C*(E-X3)-Y1*(F-E), Z3 = Z1*A
   237  	//
   238  	// This results in a cost of 5 field multiplications, 2 field squarings,
   239  	// 9 field additions, and 0 integer multiplications.
   240  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
   241  	x2, y2 := &p2.X, &p2.Y
   242  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   243  
   244  	// When the x coordinates are the same for two points on the curve, the
   245  	// y coordinates either must be the same, in which case it is point
   246  	// doubling, or they are opposite and the result is the point at
   247  	// infinity per the group law for elliptic curve cryptography.
   248  	if x1.Equals(x2) {
   249  		if y1.Equals(y2) {
   250  			// Since x1 == x2 and y1 == y2, point doubling must be
   251  			// done, otherwise the addition would end up dividing
   252  			// by zero.
   253  			DoubleNonConst(p1, result)
   254  			return
   255  		}
   256  
   257  		// Since x1 == x2 and y1 == -y2, the sum is the point at
   258  		// infinity per the group law.
   259  		x3.SetInt(0)
   260  		y3.SetInt(0)
   261  		z3.SetInt(0)
   262  		return
   263  	}
   264  
   265  	// Calculate X3, Y3, and Z3 according to the intermediate elements
   266  	// breakdown above.
   267  	var a, b, c, d, e, f FieldVal
   268  	var negX1, negY1, negE, negX3 FieldVal
   269  	negX1.Set(x1).Negate(1)                // negX1 = -X1 (mag: 2)
   270  	negY1.Set(y1).Negate(1)                // negY1 = -Y1 (mag: 2)
   271  	a.Set(&negX1).Add(x2)                  // A = X2-X1 (mag: 3)
   272  	b.SquareVal(&a)                        // B = A^2 (mag: 1)
   273  	c.Set(&negY1).Add(y2)                  // C = Y2-Y1 (mag: 3)
   274  	d.SquareVal(&c)                        // D = C^2 (mag: 1)
   275  	e.Mul2(x1, &b)                         // E = X1*B (mag: 1)
   276  	negE.Set(&e).Negate(1)                 // negE = -E (mag: 2)
   277  	f.Mul2(x2, &b)                         // F = X2*B (mag: 1)
   278  	x3.Add2(&e, &f).Negate(2).Add(&d)      // X3 = D-E-F (mag: 4)
   279  	negX3.Set(x3).Negate(4)                // negX3 = -X3 (mag: 5)
   280  	y3.Set(y1).Mul(f.Add(&negE)).Negate(1) // Y3 = -(Y1*(F-E)) (mag: 2)
   281  	y3.Add(e.Add(&negX3).Mul(&c))          // Y3 = C*(E-X3)+Y3 (mag: 3)
   282  	z3.Mul2(z1, &a)                        // Z3 = Z1*A (mag: 1)
   283  
   284  	// Normalize the resulting field values as needed.
   285  	x3.Normalize()
   286  	y3.Normalize()
   287  	z3.Normalize()
   288  }
   289  
   290  // addZ2EqualsOne adds two Jacobian points when the second point is already
   291  // known to have a z value of 1 (and the z value for the first point is not 1)
   292  // and stores the result in the provided result param.  That is to say result =
   293  // p1 + p2.  It performs faster addition than the generic add routine since
   294  // less arithmetic is needed due to the ability to avoid multiplications by the
   295  // second point's z value.
   296  //
   297  // NOTE: The points must be normalized for this function to return the correct
   298  // result.  The resulting point will be normalized.
   299  func addZ2EqualsOne(p1, p2, result *JacobianPoint) {
   300  	// To compute the point addition efficiently, this implementation splits
   301  	// the equation into intermediate elements which are used to minimize
   302  	// the number of field multiplications using the method shown at:
   303  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-madd-2007-bl
   304  	//
   305  	// In particular it performs the calculations using the following:
   306  	// Z1Z1 = Z1^2, U2 = X2*Z1Z1, S2 = Y2*Z1*Z1Z1, H = U2-X1, HH = H^2,
   307  	// I = 4*HH, J = H*I, r = 2*(S2-Y1), V = X1*I
   308  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*Y1*J, Z3 = (Z1+H)^2-Z1Z1-HH
   309  	//
   310  	// This results in a cost of 7 field multiplications, 4 field squarings,
   311  	// 9 field additions, and 4 integer multiplications.
   312  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
   313  	x2, y2 := &p2.X, &p2.Y
   314  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   315  
   316  	// When the x coordinates are the same for two points on the curve, the
   317  	// y coordinates either must be the same, in which case it is point
   318  	// doubling, or they are opposite and the result is the point at
   319  	// infinity per the group law for elliptic curve cryptography.  Since
   320  	// any number of Jacobian coordinates can represent the same affine
   321  	// point, the x and y values need to be converted to like terms.  Due to
   322  	// the assumption made for this function that the second point has a z
   323  	// value of 1 (z2=1), the first point is already "converted".
   324  	var z1z1, u2, s2 FieldVal
   325  	z1z1.SquareVal(z1)                        // Z1Z1 = Z1^2 (mag: 1)
   326  	u2.Set(x2).Mul(&z1z1).Normalize()         // U2 = X2*Z1Z1 (mag: 1)
   327  	s2.Set(y2).Mul(&z1z1).Mul(z1).Normalize() // S2 = Y2*Z1*Z1Z1 (mag: 1)
   328  	if x1.Equals(&u2) {
   329  		if y1.Equals(&s2) {
   330  			// Since x1 == x2 and y1 == y2, point doubling must be
   331  			// done, otherwise the addition would end up dividing
   332  			// by zero.
   333  			DoubleNonConst(p1, result)
   334  			return
   335  		}
   336  
   337  		// Since x1 == x2 and y1 == -y2, the sum is the point at
   338  		// infinity per the group law.
   339  		x3.SetInt(0)
   340  		y3.SetInt(0)
   341  		z3.SetInt(0)
   342  		return
   343  	}
   344  
   345  	// Calculate X3, Y3, and Z3 according to the intermediate elements
   346  	// breakdown above.
   347  	var h, hh, i, j, r, rr, v FieldVal
   348  	var negX1, negY1, negX3 FieldVal
   349  	negX1.Set(x1).Negate(1)                // negX1 = -X1 (mag: 2)
   350  	h.Add2(&u2, &negX1)                    // H = U2-X1 (mag: 3)
   351  	hh.SquareVal(&h)                       // HH = H^2 (mag: 1)
   352  	i.Set(&hh).MulInt(4)                   // I = 4 * HH (mag: 4)
   353  	j.Mul2(&h, &i)                         // J = H*I (mag: 1)
   354  	negY1.Set(y1).Negate(1)                // negY1 = -Y1 (mag: 2)
   355  	r.Set(&s2).Add(&negY1).MulInt(2)       // r = 2*(S2-Y1) (mag: 6)
   356  	rr.SquareVal(&r)                       // rr = r^2 (mag: 1)
   357  	v.Mul2(x1, &i)                         // V = X1*I (mag: 1)
   358  	x3.Set(&v).MulInt(2).Add(&j).Negate(3) // X3 = -(J+2*V) (mag: 4)
   359  	x3.Add(&rr)                            // X3 = r^2+X3 (mag: 5)
   360  	negX3.Set(x3).Negate(5)                // negX3 = -X3 (mag: 6)
   361  	y3.Set(y1).Mul(&j).MulInt(2).Negate(2) // Y3 = -(2*Y1*J) (mag: 3)
   362  	y3.Add(v.Add(&negX3).Mul(&r))          // Y3 = r*(V-X3)+Y3 (mag: 4)
   363  	z3.Add2(z1, &h).Square()               // Z3 = (Z1+H)^2 (mag: 1)
   364  	z3.Add(z1z1.Add(&hh).Negate(2))        // Z3 = Z3-(Z1Z1+HH) (mag: 4)
   365  
   366  	// Normalize the resulting field values as needed.
   367  	x3.Normalize()
   368  	y3.Normalize()
   369  	z3.Normalize()
   370  }
   371  
   372  // addGeneric adds two Jacobian points without any assumptions about the z
   373  // values of the two points and stores the result in the provided result param.
   374  // That is to say result = p1 + p2.  It is the slowest of the add routines due
   375  // to requiring the most arithmetic.
   376  //
   377  // NOTE: The points must be normalized for this function to return the correct
   378  // result.  The resulting point will be normalized.
   379  func addGeneric(p1, p2, result *JacobianPoint) {
   380  	// To compute the point addition efficiently, this implementation splits
   381  	// the equation into intermediate elements which are used to minimize
   382  	// the number of field multiplications using the method shown at:
   383  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
   384  	//
   385  	// In particular it performs the calculations using the following:
   386  	// Z1Z1 = Z1^2, Z2Z2 = Z2^2, U1 = X1*Z2Z2, U2 = X2*Z1Z1, S1 = Y1*Z2*Z2Z2
   387  	// S2 = Y2*Z1*Z1Z1, H = U2-U1, I = (2*H)^2, J = H*I, r = 2*(S2-S1)
   388  	// V = U1*I
   389  	// X3 = r^2-J-2*V, Y3 = r*(V-X3)-2*S1*J, Z3 = ((Z1+Z2)^2-Z1Z1-Z2Z2)*H
   390  	//
   391  	// This results in a cost of 11 field multiplications, 5 field squarings,
   392  	// 9 field additions, and 4 integer multiplications.
   393  	x1, y1, z1 := &p1.X, &p1.Y, &p1.Z
   394  	x2, y2, z2 := &p2.X, &p2.Y, &p2.Z
   395  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   396  
   397  	// When the x coordinates are the same for two points on the curve, the
   398  	// y coordinates either must be the same, in which case it is point
   399  	// doubling, or they are opposite and the result is the point at
   400  	// infinity.  Since any number of Jacobian coordinates can represent the
   401  	// same affine point, the x and y values need to be converted to like
   402  	// terms.
   403  	var z1z1, z2z2, u1, u2, s1, s2 FieldVal
   404  	z1z1.SquareVal(z1)                        // Z1Z1 = Z1^2 (mag: 1)
   405  	z2z2.SquareVal(z2)                        // Z2Z2 = Z2^2 (mag: 1)
   406  	u1.Set(x1).Mul(&z2z2).Normalize()         // U1 = X1*Z2Z2 (mag: 1)
   407  	u2.Set(x2).Mul(&z1z1).Normalize()         // U2 = X2*Z1Z1 (mag: 1)
   408  	s1.Set(y1).Mul(&z2z2).Mul(z2).Normalize() // S1 = Y1*Z2*Z2Z2 (mag: 1)
   409  	s2.Set(y2).Mul(&z1z1).Mul(z1).Normalize() // S2 = Y2*Z1*Z1Z1 (mag: 1)
   410  	if u1.Equals(&u2) {
   411  		if s1.Equals(&s2) {
   412  			// Since x1 == x2 and y1 == y2, point doubling must be
   413  			// done, otherwise the addition would end up dividing
   414  			// by zero.
   415  			DoubleNonConst(p1, result)
   416  			return
   417  		}
   418  
   419  		// Since x1 == x2 and y1 == -y2, the sum is the point at
   420  		// infinity per the group law.
   421  		x3.SetInt(0)
   422  		y3.SetInt(0)
   423  		z3.SetInt(0)
   424  		return
   425  	}
   426  
   427  	// Calculate X3, Y3, and Z3 according to the intermediate elements
   428  	// breakdown above.
   429  	var h, i, j, r, rr, v FieldVal
   430  	var negU1, negS1, negX3 FieldVal
   431  	negU1.Set(&u1).Negate(1)               // negU1 = -U1 (mag: 2)
   432  	h.Add2(&u2, &negU1)                    // H = U2-U1 (mag: 3)
   433  	i.Set(&h).MulInt(2).Square()           // I = (2*H)^2 (mag: 1)
   434  	j.Mul2(&h, &i)                         // J = H*I (mag: 1)
   435  	negS1.Set(&s1).Negate(1)               // negS1 = -S1 (mag: 2)
   436  	r.Set(&s2).Add(&negS1).MulInt(2)       // r = 2*(S2-S1) (mag: 6)
   437  	rr.SquareVal(&r)                       // rr = r^2 (mag: 1)
   438  	v.Mul2(&u1, &i)                        // V = U1*I (mag: 1)
   439  	x3.Set(&v).MulInt(2).Add(&j).Negate(3) // X3 = -(J+2*V) (mag: 4)
   440  	x3.Add(&rr)                            // X3 = r^2+X3 (mag: 5)
   441  	negX3.Set(x3).Negate(5)                // negX3 = -X3 (mag: 6)
   442  	y3.Mul2(&s1, &j).MulInt(2).Negate(2)   // Y3 = -(2*S1*J) (mag: 3)
   443  	y3.Add(v.Add(&negX3).Mul(&r))          // Y3 = r*(V-X3)+Y3 (mag: 4)
   444  	z3.Add2(z1, z2).Square()               // Z3 = (Z1+Z2)^2 (mag: 1)
   445  	z3.Add(z1z1.Add(&z2z2).Negate(2))      // Z3 = Z3-(Z1Z1+Z2Z2) (mag: 4)
   446  	z3.Mul(&h)                             // Z3 = Z3*H (mag: 1)
   447  
   448  	// Normalize the resulting field values as needed.
   449  	x3.Normalize()
   450  	y3.Normalize()
   451  	z3.Normalize()
   452  }
   453  
   454  // AddNonConst adds the passed Jacobian points together and stores the result in
   455  // the provided result param in *non-constant* time.
   456  //
   457  // NOTE: The points must be normalized for this function to return the correct
   458  // result.  The resulting point will be normalized.
   459  func AddNonConst(p1, p2, result *JacobianPoint) {
   460  	// The point at infinity is the identity according to the group law for
   461  	// elliptic curve cryptography.  Thus, ∞ + P = P and P + ∞ = P.
   462  	if (p1.X.IsZero() && p1.Y.IsZero()) || p1.Z.IsZero() {
   463  		result.Set(p2)
   464  		return
   465  	}
   466  	if (p2.X.IsZero() && p2.Y.IsZero()) || p2.Z.IsZero() {
   467  		result.Set(p1)
   468  		return
   469  	}
   470  
   471  	// Faster point addition can be achieved when certain assumptions are
   472  	// met.  For example, when both points have the same z value, arithmetic
   473  	// on the z values can be avoided.  This section thus checks for these
   474  	// conditions and calls an appropriate add function which is accelerated
   475  	// by using those assumptions.
   476  	isZ1One := p1.Z.IsOne()
   477  	isZ2One := p2.Z.IsOne()
   478  	switch {
   479  	case isZ1One && isZ2One:
   480  		addZ1AndZ2EqualsOne(p1, p2, result)
   481  		return
   482  	case p1.Z.Equals(&p2.Z):
   483  		addZ1EqualsZ2(p1, p2, result)
   484  		return
   485  	case isZ2One:
   486  		addZ2EqualsOne(p1, p2, result)
   487  		return
   488  	}
   489  
   490  	// None of the above assumptions are true, so fall back to generic
   491  	// point addition.
   492  	addGeneric(p1, p2, result)
   493  }
   494  
   495  // doubleZ1EqualsOne performs point doubling on the passed Jacobian point when
   496  // the point is already known to have a z value of 1 and stores the result in
   497  // the provided result param.  That is to say result = 2*p.  It performs faster
   498  // point doubling than the generic routine since less arithmetic is needed due
   499  // to the ability to avoid multiplication by the z value.
   500  //
   501  // NOTE: The resulting point will be normalized.
   502  func doubleZ1EqualsOne(p, result *JacobianPoint) {
   503  	// This function uses the assumptions that z1 is 1, thus the point
   504  	// doubling formulas reduce to:
   505  	//
   506  	// X3 = (3*X1^2)^2 - 8*X1*Y1^2
   507  	// Y3 = (3*X1^2)*(4*X1*Y1^2 - X3) - 8*Y1^4
   508  	// Z3 = 2*Y1
   509  	//
   510  	// To compute the above efficiently, this implementation splits the
   511  	// equation into intermediate elements which are used to minimize the
   512  	// number of field multiplications in favor of field squarings which
   513  	// are roughly 35% faster than field multiplications with the current
   514  	// implementation at the time this was written.
   515  	//
   516  	// This uses a slightly modified version of the method shown at:
   517  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-mdbl-2007-bl
   518  	//
   519  	// In particular it performs the calculations using the following:
   520  	// A = X1^2, B = Y1^2, C = B^2, D = 2*((X1+B)^2-A-C)
   521  	// E = 3*A, F = E^2, X3 = F-2*D, Y3 = E*(D-X3)-8*C
   522  	// Z3 = 2*Y1
   523  	//
   524  	// This results in a cost of 1 field multiplication, 5 field squarings,
   525  	// 6 field additions, and 5 integer multiplications.
   526  	x1, y1 := &p.X, &p.Y
   527  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   528  	var a, b, c, d, e, f FieldVal
   529  	z3.Set(y1).MulInt(2)                     // Z3 = 2*Y1 (mag: 2)
   530  	a.SquareVal(x1)                          // A = X1^2 (mag: 1)
   531  	b.SquareVal(y1)                          // B = Y1^2 (mag: 1)
   532  	c.SquareVal(&b)                          // C = B^2 (mag: 1)
   533  	b.Add(x1).Square()                       // B = (X1+B)^2 (mag: 1)
   534  	d.Set(&a).Add(&c).Negate(2)              // D = -(A+C) (mag: 3)
   535  	d.Add(&b).MulInt(2)                      // D = 2*(B+D)(mag: 8)
   536  	e.Set(&a).MulInt(3)                      // E = 3*A (mag: 3)
   537  	f.SquareVal(&e)                          // F = E^2 (mag: 1)
   538  	x3.Set(&d).MulInt(2).Negate(16)          // X3 = -(2*D) (mag: 17)
   539  	x3.Add(&f)                               // X3 = F+X3 (mag: 18)
   540  	f.Set(x3).Negate(18).Add(&d).Normalize() // F = D-X3 (mag: 1)
   541  	y3.Set(&c).MulInt(8).Negate(8)           // Y3 = -(8*C) (mag: 9)
   542  	y3.Add(f.Mul(&e))                        // Y3 = E*F+Y3 (mag: 10)
   543  
   544  	// Normalize the resulting field values as needed.
   545  	x3.Normalize()
   546  	y3.Normalize()
   547  	z3.Normalize()
   548  }
   549  
   550  // doubleGeneric performs point doubling on the passed Jacobian point without
   551  // any assumptions about the z value and stores the result in the provided
   552  // result param.  That is to say result = 2*p.  It is the slowest of the point
   553  // doubling routines due to requiring the most arithmetic.
   554  //
   555  // NOTE: The resulting point will be normalized.
   556  func doubleGeneric(p, result *JacobianPoint) {
   557  	// Point doubling formula for Jacobian coordinates for the secp256k1
   558  	// curve:
   559  	//
   560  	// X3 = (3*X1^2)^2 - 8*X1*Y1^2
   561  	// Y3 = (3*X1^2)*(4*X1*Y1^2 - X3) - 8*Y1^4
   562  	// Z3 = 2*Y1*Z1
   563  	//
   564  	// To compute the above efficiently, this implementation splits the
   565  	// equation into intermediate elements which are used to minimize the
   566  	// number of field multiplications in favor of field squarings which
   567  	// are roughly 35% faster than field multiplications with the current
   568  	// implementation at the time this was written.
   569  	//
   570  	// This uses a slightly modified version of the method shown at:
   571  	// https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
   572  	//
   573  	// In particular it performs the calculations using the following:
   574  	// A = X1^2, B = Y1^2, C = B^2, D = 2*((X1+B)^2-A-C)
   575  	// E = 3*A, F = E^2, X3 = F-2*D, Y3 = E*(D-X3)-8*C
   576  	// Z3 = 2*Y1*Z1
   577  	//
   578  	// This results in a cost of 1 field multiplication, 5 field squarings,
   579  	// 6 field additions, and 5 integer multiplications.
   580  	x1, y1, z1 := &p.X, &p.Y, &p.Z
   581  	x3, y3, z3 := &result.X, &result.Y, &result.Z
   582  	var a, b, c, d, e, f FieldVal
   583  	z3.Mul2(y1, z1).MulInt(2)                // Z3 = 2*Y1*Z1 (mag: 2)
   584  	a.SquareVal(x1)                          // A = X1^2 (mag: 1)
   585  	b.SquareVal(y1)                          // B = Y1^2 (mag: 1)
   586  	c.SquareVal(&b)                          // C = B^2 (mag: 1)
   587  	b.Add(x1).Square()                       // B = (X1+B)^2 (mag: 1)
   588  	d.Set(&a).Add(&c).Negate(2)              // D = -(A+C) (mag: 3)
   589  	d.Add(&b).MulInt(2)                      // D = 2*(B+D)(mag: 8)
   590  	e.Set(&a).MulInt(3)                      // E = 3*A (mag: 3)
   591  	f.SquareVal(&e)                          // F = E^2 (mag: 1)
   592  	x3.Set(&d).MulInt(2).Negate(16)          // X3 = -(2*D) (mag: 17)
   593  	x3.Add(&f)                               // X3 = F+X3 (mag: 18)
   594  	f.Set(x3).Negate(18).Add(&d).Normalize() // F = D-X3 (mag: 1)
   595  	y3.Set(&c).MulInt(8).Negate(8)           // Y3 = -(8*C) (mag: 9)
   596  	y3.Add(f.Mul(&e))                        // Y3 = E*F+Y3 (mag: 10)
   597  
   598  	// Normalize the resulting field values as needed.
   599  	x3.Normalize()
   600  	y3.Normalize()
   601  	z3.Normalize()
   602  }
   603  
   604  // DoubleNonConst doubles the passed Jacobian point and stores the result in the
   605  // provided result parameter in *non-constant* time.
   606  //
   607  // NOTE: The point must be normalized for this function to return the correct
   608  // result.  The resulting point will be normalized.
   609  func DoubleNonConst(p, result *JacobianPoint) {
   610  	// Doubling the point at infinity is still infinity.
   611  	if p.Y.IsZero() || p.Z.IsZero() {
   612  		result.X.SetInt(0)
   613  		result.Y.SetInt(0)
   614  		result.Z.SetInt(0)
   615  		return
   616  	}
   617  
   618  	// Slightly faster point doubling can be achieved when the z value is 1
   619  	// by avoiding the multiplication on the z value.  This section calls
   620  	// a point doubling function which is accelerated by using that
   621  	// assumption when possible.
   622  	if p.Z.IsOne() {
   623  		doubleZ1EqualsOne(p, result)
   624  		return
   625  	}
   626  
   627  	// Fall back to generic point doubling which works with arbitrary z
   628  	// values.
   629  	doubleGeneric(p, result)
   630  }
   631  
   632  // mulAdd64 multiplies the two passed base 2^64 digits together, adds the given
   633  // value to the result, and returns the 128-bit result via a (hi, lo) tuple
   634  // where the upper half of the bits are returned in hi and the lower half in lo.
   635  func mulAdd64(digit1, digit2, m uint64) (hi, lo uint64) {
   636  	// Note the carry on the final add is safe to discard because the maximum
   637  	// possible value is:
   638  	//   (2^64 - 1)(2^64 - 1) + (2^64 - 1) = 2^128 - 2^64
   639  	// and:
   640  	//   2^128 - 2^64 < 2^128.
   641  	var c uint64
   642  	hi, lo = bits.Mul64(digit1, digit2)
   643  	lo, c = bits.Add64(lo, m, 0)
   644  	hi, _ = bits.Add64(hi, 0, c)
   645  	return hi, lo
   646  }
   647  
   648  // mulAdd64Carry multiplies the two passed base 2^64 digits together, adds both
   649  // the given value and carry to the result, and returns the 128-bit result via a
   650  // (hi, lo) tuple where the upper half of the bits are returned in hi and the
   651  // lower half in lo.
   652  func mulAdd64Carry(digit1, digit2, m, c uint64) (hi, lo uint64) {
   653  	// Note the carry on the high order add is safe to discard because the
   654  	// maximum possible value is:
   655  	//   (2^64 - 1)(2^64 - 1) + 2*(2^64 - 1) = 2^128 - 1
   656  	// and:
   657  	//   2^128 - 1 < 2^128.
   658  	var c2 uint64
   659  	hi, lo = mulAdd64(digit1, digit2, m)
   660  	lo, c2 = bits.Add64(lo, c, 0)
   661  	hi, _ = bits.Add64(hi, 0, c2)
   662  	return hi, lo
   663  }
   664  
   665  // mul512Rsh320Round computes the full 512-bit product of the two given scalars,
   666  // right shifts the result by 320 bits, rounds to the nearest integer, and
   667  // returns the result in constant time.
   668  //
   669  // Note that despite the inputs and output being mod n scalars, the 512-bit
   670  // product is NOT reduced mod N prior to the right shift.  This is intentional
   671  // because it is used for replacing division with multiplication and thus the
   672  // intermediate results must be done via a field extension to a larger field.
   673  func mul512Rsh320Round(n1, n2 *ModNScalar) ModNScalar {
   674  	// Convert n1 and n2 to base 2^64 digits.
   675  	n1Digit0 := uint64(n1.n[0]) | uint64(n1.n[1])<<32
   676  	n1Digit1 := uint64(n1.n[2]) | uint64(n1.n[3])<<32
   677  	n1Digit2 := uint64(n1.n[4]) | uint64(n1.n[5])<<32
   678  	n1Digit3 := uint64(n1.n[6]) | uint64(n1.n[7])<<32
   679  	n2Digit0 := uint64(n2.n[0]) | uint64(n2.n[1])<<32
   680  	n2Digit1 := uint64(n2.n[2]) | uint64(n2.n[3])<<32
   681  	n2Digit2 := uint64(n2.n[4]) | uint64(n2.n[5])<<32
   682  	n2Digit3 := uint64(n2.n[6]) | uint64(n2.n[7])<<32
   683  
   684  	// Compute the full 512-bit product n1*n2.
   685  	var r0, r1, r2, r3, r4, r5, r6, r7, c uint64
   686  
   687  	// Terms resulting from the product of the first digit of the second number
   688  	// by all digits of the first number.
   689  	//
   690  	// Note that r0 is ignored because it is not needed to compute the higher
   691  	// terms and it is shifted out below anyway.
   692  	c, _ = bits.Mul64(n2Digit0, n1Digit0)
   693  	c, r1 = mulAdd64(n2Digit0, n1Digit1, c)
   694  	c, r2 = mulAdd64(n2Digit0, n1Digit2, c)
   695  	r4, r3 = mulAdd64(n2Digit0, n1Digit3, c)
   696  
   697  	// Terms resulting from the product of the second digit of the second number
   698  	// by all digits of the first number.
   699  	//
   700  	// Note that r1 is ignored because it is no longer needed to compute the
   701  	// higher terms and it is shifted out below anyway.
   702  	c, _ = mulAdd64(n2Digit1, n1Digit0, r1)
   703  	c, r2 = mulAdd64Carry(n2Digit1, n1Digit1, r2, c)
   704  	c, r3 = mulAdd64Carry(n2Digit1, n1Digit2, r3, c)
   705  	r5, r4 = mulAdd64Carry(n2Digit1, n1Digit3, r4, c)
   706  
   707  	// Terms resulting from the product of the third digit of the second number
   708  	// by all digits of the first number.
   709  	//
   710  	// Note that r2 is ignored because it is no longer needed to compute the
   711  	// higher terms and it is shifted out below anyway.
   712  	c, _ = mulAdd64(n2Digit2, n1Digit0, r2)
   713  	c, r3 = mulAdd64Carry(n2Digit2, n1Digit1, r3, c)
   714  	c, r4 = mulAdd64Carry(n2Digit2, n1Digit2, r4, c)
   715  	r6, r5 = mulAdd64Carry(n2Digit2, n1Digit3, r5, c)
   716  
   717  	// Terms resulting from the product of the fourth digit of the second number
   718  	// by all digits of the first number.
   719  	//
   720  	// Note that r3 is ignored because it is no longer needed to compute the
   721  	// higher terms and it is shifted out below anyway.
   722  	c, _ = mulAdd64(n2Digit3, n1Digit0, r3)
   723  	c, r4 = mulAdd64Carry(n2Digit3, n1Digit1, r4, c)
   724  	c, r5 = mulAdd64Carry(n2Digit3, n1Digit2, r5, c)
   725  	r7, r6 = mulAdd64Carry(n2Digit3, n1Digit3, r6, c)
   726  
   727  	// At this point the upper 256 bits of the full 512-bit product n1*n2 are in
   728  	// r4..r7 (recall the low order results were discarded as noted above).
   729  	//
   730  	// Right shift the result 320 bits.  Note that the MSB of r4 determines
   731  	// whether or not to round because it is the final bit that is shifted out.
   732  	//
   733  	// Also, notice that r3..r7 would also ordinarily be set to 0 as well for
   734  	// the full shift, but that is skipped since they are no longer used as
   735  	// their values are known to be zero.
   736  	roundBit := r4 >> 63
   737  	r2, r1, r0 = r7, r6, r5
   738  
   739  	// Conditionally add 1 depending on the round bit in constant time.
   740  	r0, c = bits.Add64(r0, roundBit, 0)
   741  	r1, c = bits.Add64(r1, 0, c)
   742  	r2, r3 = bits.Add64(r2, 0, c)
   743  
   744  	// Finally, convert the result to a mod n scalar.
   745  	//
   746  	// No modular reduction is needed because the result is guaranteed to be
   747  	// less than the group order given the group order is > 2^255 and the
   748  	// maximum possible value of the result is 2^192.
   749  	var result ModNScalar
   750  	result.n[0] = uint32(r0)
   751  	result.n[1] = uint32(r0 >> 32)
   752  	result.n[2] = uint32(r1)
   753  	result.n[3] = uint32(r1 >> 32)
   754  	result.n[4] = uint32(r2)
   755  	result.n[5] = uint32(r2 >> 32)
   756  	result.n[6] = uint32(r3)
   757  	result.n[7] = uint32(r3 >> 32)
   758  	return result
   759  }
   760  
   761  // splitK returns two scalars (k1 and k2) that are a balanced length-two
   762  // representation of the provided scalar such that k ≡ k1 + k2*λ (mod N), where
   763  // N is the secp256k1 group order.
   764  func splitK(k *ModNScalar) (ModNScalar, ModNScalar) {
   765  	// The ultimate goal is to decompose k into two scalars that are around
   766  	// half the bit length of k such that the following equation is satisfied:
   767  	//
   768  	// k1 + k2*λ ≡ k (mod n)
   769  	//
   770  	// The strategy used here is based on algorithm 3.74 from [GECC] with a few
   771  	// modifications to make use of the more efficient mod n scalar type, avoid
   772  	// some costly long divisions, and minimize the number of calculations.
   773  	//
   774  	// Start by defining a function that takes a vector v = <a,b> ∈ ℤ⨯ℤ:
   775  	//
   776  	// f(v) = a + bλ (mod n)
   777  	//
   778  	// Then, find two vectors, v1 = <a1,b1>, and v2 = <a2,b2> in ℤ⨯ℤ such that:
   779  	// 1) v1 and v2 are linearly independent
   780  	// 2) f(v1) = f(v2) = 0
   781  	// 3) v1 and v2 have small Euclidean norm
   782  	//
   783  	// The vectors that satisfy these properties are found via the Euclidean
   784  	// algorithm and are precomputed since both n and λ are fixed values for the
   785  	// secp256k1 curve.  See genprecomps.go for derivation details.
   786  	//
   787  	// Next, consider k as a vector <k, 0> in ℚ⨯ℚ and by linear algebra write:
   788  	//
   789  	// <k, 0> = g1*v1 + g2*v2, where g1, g2 ∈ ℚ
   790  	//
   791  	// Note that, per above, the components of vector v1 are a1 and b1 while the
   792  	// components of vector v2 are a2 and b2.  Given the vectors v1 and v2 were
   793  	// generated such that a1*b2 - a2*b1 = n, solving the equation for g1 and g2
   794  	// yields:
   795  	//
   796  	// g1 = b2*k / n
   797  	// g2 = -b1*k / n
   798  	//
   799  	// Observe:
   800  	// <k, 0> = g1*v1 + g2*v2
   801  	//        = (b2*k/n)*<a1,b1> + (-b1*k/n)*<a2,b2>              | substitute
   802  	//        = <a1*b2*k/n, b1*b2*k/n> + <-a2*b1*k/n, -b2*b1*k/n> | scalar mul
   803  	//        = <a1*b2*k/n - a2*b1*k/n, b1*b2*k/n - b2*b1*k/n>    | vector add
   804  	//        = <[a1*b2*k - a2*b1*k]/n, 0>                        | simplify
   805  	//        = <k*[a1*b2 - a2*b1]/n, 0>                          | factor out k
   806  	//        = <k*n/n, 0>                                        | substitute
   807  	//        = <k, 0>                                            | simplify
   808  	//
   809  	// Now, consider an integer-valued vector v:
   810  	//
   811  	// v = c1*v1 + c2*v2, where c1, c2 ∈ ℤ (mod n)
   812  	//
   813  	// Since vectors v1 and v2 are linearly independent and were generated such
   814  	// that f(v1) = f(v2) = 0, all possible scalars c1 and c2 also produce a
   815  	// vector v such that f(v) = 0.
   816  	//
   817  	// In other words, c1 and c2 can be any integers and the resulting
   818  	// decomposition will still satisfy the required equation.  However, since
   819  	// the goal is to produce a balanced decomposition that provides a
   820  	// performance advantage by minimizing max(k1, k2), c1 and c2 need to be
   821  	// integers close to g1 and g2, respectively, so the resulting vector v is
   822  	// an integer-valued vector that is close to <k, 0>.
   823  	//
   824  	// Finally, consider the vector u:
   825  	//
   826  	// u  = <k, 0> - v
   827  	//
   828  	// It follows that f(u) = k and thus the two components of vector u satisfy
   829  	// the required equation:
   830  	//
   831  	// k1 + k2*λ ≡ k (mod n)
   832  	//
   833  	// Choosing c1 and c2:
   834  	// -------------------
   835  	//
   836  	// As mentioned above, c1 and c2 need to be integers close to g1 and g2,
   837  	// respectively.  The algorithm in [GECC] chooses the following values:
   838  	//
   839  	// c1 = round(g1) = round(b2*k / n)
   840  	// c2 = round(g2) = round(-b1*k / n)
   841  	//
   842  	// However, as section 3.4.2 of [STWS] notes, the aforementioned approach
   843  	// requires costly long divisions that can be avoided by precomputing
   844  	// rounded estimates as follows:
   845  	//
   846  	// t = bitlen(n) + 1
   847  	// z1 = round(2^t * b2 / n)
   848  	// z2 = round(2^t * -b1 / n)
   849  	//
   850  	// Then, use those precomputed estimates to perform a multiplication by k
   851  	// along with a floored division by 2^t, which is a simple right shift by t:
   852  	//
   853  	// c1 = floor(k * z1 / 2^t) = (k * z1) >> t
   854  	// c2 = floor(k * z2 / 2^t) = (k * z2) >> t
   855  	//
   856  	// Finally, round up if last bit discarded in the right shift by t is set by
   857  	// adding 1.
   858  	//
   859  	// As a further optimization, rather than setting t = bitlen(n) + 1 = 257 as
   860  	// stated by [STWS], this implementation uses a higher precision estimate of
   861  	// t = bitlen(n) + 64 = 320 because it allows simplification of the shifts
   862  	// in the internal calculations that are done via uint64s and also allows
   863  	// the use of floor in the precomputations.
   864  	//
   865  	// Thus, the calculations this implementation uses are:
   866  	//
   867  	// z1 = floor(b2<<320 / n)                                     | precomputed
   868  	// z2 = floor((-b1)<<320) / n)                                 | precomputed
   869  	// c1 = ((k * z1) >> 320) + (((k * z1) >> 319) & 1)
   870  	// c2 = ((k * z2) >> 320) + (((k * z2) >> 319) & 1)
   871  	//
   872  	// Putting it all together:
   873  	// ------------------------
   874  	//
   875  	// Calculate the following vectors using the values discussed above:
   876  	//
   877  	// v = c1*v1 + c2*v2
   878  	// u = <k, 0> - v
   879  	//
   880  	// The two components of the resulting vector v are:
   881  	// va = c1*a1 + c2*a2
   882  	// vb = c1*b1 + c2*b2
   883  	//
   884  	// Thus, the two components of the resulting vector u are:
   885  	// k1 = k - va
   886  	// k2 = 0 - vb = -vb
   887  	//
   888  	// As some final optimizations:
   889  	//
   890  	// 1) Note that k1 + k2*λ ≡ k (mod n) means that k1 ≡ k - k2*λ (mod n).
   891  	//    Therefore, the computation of va can be avoided to save two
   892  	//    field multiplications and a field addition.
   893  	//
   894  	// 2) Since k1 = k - k2*λ = k + k2*(-λ), an additional field negation is
   895  	//    saved by storing and using the negative version of λ.
   896  	//
   897  	// 3) Since k2 = -vb = -(c1*b1 + c2*b2) = c1*(-b1) + c2*(-b2), one more
   898  	//    field negation is saved by storing and using the negative versions of
   899  	//    b1 and b2.
   900  	//
   901  	// k2 = c1*(-b1) + c2*(-b2)
   902  	// k1 = k + k2*(-λ)
   903  	var k1, k2 ModNScalar
   904  	c1 := mul512Rsh320Round(k, endoZ1)
   905  	c2 := mul512Rsh320Round(k, endoZ2)
   906  	k2.Add2(c1.Mul(endoNegB1), c2.Mul(endoNegB2))
   907  	k1.Mul2(&k2, endoNegLambda).Add(k)
   908  	return k1, k2
   909  }
   910  
   911  // nafScalar represents a positive integer up to a maximum value of 2^256 - 1
   912  // encoded in non-adjacent form.
   913  //
   914  // NAF is a signed-digit representation where each digit can be +1, 0, or -1.
   915  //
   916  // In order to efficiently encode that information, this type uses two arrays, a
   917  // "positive" array where set bits represent the +1 signed digits and a
   918  // "negative" array where set bits represent the -1 signed digits.  0 is
   919  // represented by neither array having a bit set in that position.
   920  //
   921  // The Pos and Neg methods return the aforementioned positive and negative
   922  // arrays, respectively.
   923  type nafScalar struct {
   924  	// pos houses the positive portion of the representation.  An additional
   925  	// byte is required for the positive portion because the NAF encoding can be
   926  	// up to 1 bit longer than the normal binary encoding of the value.
   927  	//
   928  	// neg houses the negative portion of the representation.  Even though the
   929  	// additional byte is not required for the negative portion, since it can
   930  	// never exceed the length of the normal binary encoding of the value,
   931  	// keeping the same length for positive and negative portions simplifies
   932  	// working with the representation and allows extra conditional branches to
   933  	// be avoided.
   934  	//
   935  	// start and end specify the starting and ending index to use within the pos
   936  	// and neg arrays, respectively.  This allows fixed size arrays to be used
   937  	// versus needing to dynamically allocate space on the heap.
   938  	//
   939  	// NOTE: The fields are defined in the order that they are to minimize the
   940  	// padding on 32-bit and 64-bit platforms.
   941  	pos        [33]byte
   942  	start, end uint8
   943  	neg        [33]byte
   944  }
   945  
   946  // Pos returns the bytes of the encoded value with bits set in the positions
   947  // that represent a signed digit of +1.
   948  func (s *nafScalar) Pos() []byte {
   949  	return s.pos[s.start:s.end]
   950  }
   951  
   952  // Neg returns the bytes of the encoded value with bits set in the positions
   953  // that represent a signed digit of -1.
   954  func (s *nafScalar) Neg() []byte {
   955  	return s.neg[s.start:s.end]
   956  }
   957  
   958  // naf takes a positive integer up to a maximum value of 2^256 - 1 and returns
   959  // its non-adjacent form (NAF), which is a unique signed-digit representation
   960  // such that no two consecutive digits are nonzero.  See the documentation for
   961  // the returned type for details on how the representation is encoded
   962  // efficiently and how to interpret it
   963  //
   964  // NAF is useful in that it has the fewest nonzero digits of any signed digit
   965  // representation, only 1/3rd of its digits are nonzero on average, and at least
   966  // half of the digits will be 0.
   967  //
   968  // The aforementioned properties are particularly beneficial for optimizing
   969  // elliptic curve point multiplication because they effectively minimize the
   970  // number of required point additions in exchange for needing to perform a mix
   971  // of fewer point additions and subtractions and possibly one additional point
   972  // doubling.  This is an excellent tradeoff because subtraction of points has
   973  // the same computational complexity as addition of points and point doubling is
   974  // faster than both.
   975  func naf(k []byte) nafScalar {
   976  	// Strip leading zero bytes.
   977  	for len(k) > 0 && k[0] == 0x00 {
   978  		k = k[1:]
   979  	}
   980  
   981  	// The non-adjacent form (NAF) of a positive integer k is an expression
   982  	// k = ∑_(i=0, l-1) k_i * 2^i where k_i ∈ {0,±1}, k_(l-1) != 0, and no two
   983  	// consecutive digits k_i are nonzero.
   984  	//
   985  	// The traditional method of computing the NAF of a positive integer is
   986  	// given by algorithm 3.30 in [GECC].  It consists of repeatedly dividing k
   987  	// by 2 and choosing the remainder so that the quotient (k−r)/2 is even
   988  	// which ensures the next NAF digit is 0.  This requires log_2(k) steps.
   989  	//
   990  	// However, in [BRID], Prodinger notes that a closed form expression for the
   991  	// NAF representation is the bitwise difference 3k/2 - k/2.  This is more
   992  	// efficient as it can be computed in O(1) versus the O(log(n)) of the
   993  	// traditional approach.
   994  	//
   995  	// The following code makes use of that formula to compute the NAF more
   996  	// efficiently.
   997  	//
   998  	// To understand the logic here, observe that the only way the NAF has a
   999  	// nonzero digit at a given bit is when either 3k/2 or k/2 has a bit set in
  1000  	// that position, but not both.  In other words, the result of a bitwise
  1001  	// xor.  This can be seen simply by considering that when the bits are the
  1002  	// same, the subtraction is either 0-0 or 1-1, both of which are 0.
  1003  	//
  1004  	// Further, observe that the "+1" digits in the result are contributed by
  1005  	// 3k/2 while the "-1" digits are from k/2.  So, they can be determined by
  1006  	// taking the bitwise and of each respective value with the result of the
  1007  	// xor which identifies which bits are nonzero.
  1008  	//
  1009  	// Using that information, this loops backwards from the least significant
  1010  	// byte to the most significant byte while performing the aforementioned
  1011  	// calculations by propagating the potential carry and high order bit from
  1012  	// the next word during the right shift.
  1013  	kLen := len(k)
  1014  	var result nafScalar
  1015  	var carry uint8
  1016  	for byteNum := kLen - 1; byteNum >= 0; byteNum-- {
  1017  		// Calculate k/2.  Notice the carry from the previous word is added and
  1018  		// the low order bit from the next word is shifted in accordingly.
  1019  		kc := uint16(k[byteNum]) + uint16(carry)
  1020  		var nextWord uint8
  1021  		if byteNum > 0 {
  1022  			nextWord = k[byteNum-1]
  1023  		}
  1024  		halfK := kc>>1 | uint16(nextWord<<7)
  1025  
  1026  		// Calculate 3k/2 and determine the non-zero digits in the result.
  1027  		threeHalfK := kc + halfK
  1028  		nonZeroResultDigits := threeHalfK ^ halfK
  1029  
  1030  		// Determine the signed digits {0, ±1}.
  1031  		result.pos[byteNum+1] = uint8(threeHalfK & nonZeroResultDigits)
  1032  		result.neg[byteNum+1] = uint8(halfK & nonZeroResultDigits)
  1033  
  1034  		// Propagate the potential carry from the 3k/2 calculation.
  1035  		carry = uint8(threeHalfK >> 8)
  1036  	}
  1037  	result.pos[0] = carry
  1038  
  1039  	// Set the starting and ending positions within the fixed size arrays to
  1040  	// identify the bytes that are actually used.  This is important since the
  1041  	// encoding is big endian and thus trailing zero bytes changes its value.
  1042  	result.start = 1 - carry
  1043  	result.end = uint8(kLen + 1)
  1044  	return result
  1045  }
  1046  
  1047  // ScalarMultNonConst multiplies k*P where k is a scalar modulo the curve order
  1048  // and P is a point in Jacobian projective coordinates and stores the result in
  1049  // the provided Jacobian point.
  1050  //
  1051  // NOTE: The point must be normalized for this function to return the correct
  1052  // result.  The resulting point will be normalized.
  1053  func ScalarMultNonConst(k *ModNScalar, point, result *JacobianPoint) {
  1054  	// -------------------------------------------------------------------------
  1055  	// This makes use of the following efficiently-computable endomorphism to
  1056  	// accelerate the computation:
  1057  	//
  1058  	// φ(P) ⟼ λ*P = (β*P.x mod p, P.y)
  1059  	//
  1060  	// In other words, there is a special scalar λ that every point on the
  1061  	// elliptic curve can be multiplied by that will result in the same point as
  1062  	// performing a single field multiplication of the point's X coordinate by
  1063  	// the special value β.
  1064  	//
  1065  	// This is useful because scalar point multiplication is significantly more
  1066  	// expensive than a single field multiplication given the former involves a
  1067  	// series of point doublings and additions which themselves consist of a
  1068  	// combination of several field multiplications, squarings, and additions.
  1069  	//
  1070  	// So, the idea behind making use of the endomorphism is thus to decompose
  1071  	// the scalar into two scalars that are each about half the bit length of
  1072  	// the original scalar such that:
  1073  	//
  1074  	// k ≡ k1 + k2*λ (mod n)
  1075  	//
  1076  	// This in turn allows the scalar point multiplication to be performed as a
  1077  	// sum of two smaller half-length multiplications as follows:
  1078  	//
  1079  	// k*P = (k1 + k2*λ)*P
  1080  	//     = k1*P + k2*λ*P
  1081  	//     = k1*P + k2*φ(P)
  1082  	//
  1083  	// Thus, a speedup is achieved so long as it's faster to decompose the
  1084  	// scalar, compute φ(P), and perform a simultaneous multiply of the
  1085  	// half-length point multiplications than it is to compute a full width
  1086  	// point multiplication.
  1087  	//
  1088  	// In practice, benchmarks show the current implementation provides a
  1089  	// speedup of around 30-35% versus not using the endomorphism.
  1090  	//
  1091  	// See section 3.5 in [GECC] for a more rigorous treatment.
  1092  	// -------------------------------------------------------------------------
  1093  
  1094  	// Per above, the main equation here to remember is:
  1095  	//   k*P = k1*P + k2*φ(P)
  1096  	//
  1097  	// p1 below is P in the equation while p2 is φ(P) in the equation.
  1098  	//
  1099  	// NOTE: φ(x,y) = (β*x,y).  The Jacobian z coordinates are the same, so this
  1100  	// math goes through.
  1101  	//
  1102  	// Also, calculate -p1 and -p2 for use in the NAF optimization.
  1103  	p1, p1Neg := new(JacobianPoint), new(JacobianPoint)
  1104  	p1.Set(point)
  1105  	p1Neg.Set(p1)
  1106  	p1Neg.Y.Negate(1).Normalize()
  1107  	p2, p2Neg := new(JacobianPoint), new(JacobianPoint)
  1108  	p2.Set(p1)
  1109  	p2.X.Mul(endoBeta).Normalize()
  1110  	p2Neg.Set(p2)
  1111  	p2Neg.Y.Negate(1).Normalize()
  1112  
  1113  	// Decompose k into k1 and k2 such that k = k1 + k2*λ (mod n) where k1 and
  1114  	// k2 are around half the bit length of k in order to halve the number of EC
  1115  	// operations.
  1116  	//
  1117  	// Notice that this also flips the sign of the scalars and points as needed
  1118  	// to minimize the bit lengths of the scalars k1 and k2.
  1119  	//
  1120  	// This is done because the scalars are operating modulo the group order
  1121  	// which means that when they would otherwise be a small negative magnitude
  1122  	// they will instead be a large positive magnitude.  Since the goal is for
  1123  	// the scalars to have a small magnitude to achieve a performance boost, use
  1124  	// their negation when they are greater than the half order of the group and
  1125  	// flip the positive and negative values of the corresponding point that
  1126  	// will be multiplied by to compensate.
  1127  	//
  1128  	// In other words, transform the calc when k1 is over the half order to:
  1129  	//   k1*P = -k1*-P
  1130  	//
  1131  	// Similarly, transform the calc when k2 is over the half order to:
  1132  	//   k2*φ(P) = -k2*-φ(P)
  1133  	k1, k2 := splitK(k)
  1134  	if k1.IsOverHalfOrder() {
  1135  		k1.Negate()
  1136  		p1, p1Neg = p1Neg, p1
  1137  	}
  1138  	if k2.IsOverHalfOrder() {
  1139  		k2.Negate()
  1140  		p2, p2Neg = p2Neg, p2
  1141  	}
  1142  
  1143  	// Convert k1 and k2 into their NAF representations since NAF has a lot more
  1144  	// zeros overall on average which minimizes the number of required point
  1145  	// additions in exchange for a mix of fewer point additions and subtractions
  1146  	// at the cost of one additional point doubling.
  1147  	//
  1148  	// This is an excellent tradeoff because subtraction of points has the same
  1149  	// computational complexity as addition of points and point doubling is
  1150  	// faster than both.
  1151  	//
  1152  	// Concretely, on average, 1/2 of all bits will be non-zero with the normal
  1153  	// binary representation whereas only 1/3rd of the bits will be non-zero
  1154  	// with NAF.
  1155  	//
  1156  	// The Pos version of the bytes contain the +1s and the Neg versions contain
  1157  	// the -1s.
  1158  	k1Bytes, k2Bytes := k1.Bytes(), k2.Bytes()
  1159  	k1NAF, k2NAF := naf(k1Bytes[:]), naf(k2Bytes[:])
  1160  	k1PosNAF, k1NegNAF := k1NAF.Pos(), k1NAF.Neg()
  1161  	k2PosNAF, k2NegNAF := k2NAF.Pos(), k2NAF.Neg()
  1162  	k1Len, k2Len := len(k1PosNAF), len(k2PosNAF)
  1163  
  1164  	// Add left-to-right using the NAF optimization.  See algorithm 3.77 from
  1165  	// [GECC].
  1166  	//
  1167  	// Point Q = ∞ (point at infinity).
  1168  	var q JacobianPoint
  1169  	m := k1Len
  1170  	if m < k2Len {
  1171  		m = k2Len
  1172  	}
  1173  	for i := 0; i < m; i++ {
  1174  		// Since k1 and k2 are potentially different lengths and the calculation
  1175  		// is being done left to right, pad the front of the shorter one with
  1176  		// 0s.
  1177  		var k1BytePos, k1ByteNeg, k2BytePos, k2ByteNeg byte
  1178  		if i >= m-k1Len {
  1179  			k1BytePos, k1ByteNeg = k1PosNAF[i-(m-k1Len)], k1NegNAF[i-(m-k1Len)]
  1180  		}
  1181  		if i >= m-k2Len {
  1182  			k2BytePos, k2ByteNeg = k2PosNAF[i-(m-k2Len)], k2NegNAF[i-(m-k2Len)]
  1183  		}
  1184  
  1185  		for mask := uint8(1 << 7); mask > 0; mask >>= 1 {
  1186  			// Q = 2 * Q
  1187  			DoubleNonConst(&q, &q)
  1188  
  1189  			// Add or subtract the first point based on the signed digit of the
  1190  			// NAF representation of k1 at this bit position.
  1191  			//
  1192  			// +1: Q = Q + p1
  1193  			// -1: Q = Q - p1
  1194  			//  0: Q = Q (no change)
  1195  			if k1BytePos&mask == mask {
  1196  				AddNonConst(&q, p1, &q)
  1197  			} else if k1ByteNeg&mask == mask {
  1198  				AddNonConst(&q, p1Neg, &q)
  1199  			}
  1200  
  1201  			// Add or subtract the second point based on the signed digit of the
  1202  			// NAF representation of k2 at this bit position.
  1203  			//
  1204  			// +1: Q = Q + p2
  1205  			// -1: Q = Q - p2
  1206  			//  0: Q = Q (no change)
  1207  			if k2BytePos&mask == mask {
  1208  				AddNonConst(&q, p2, &q)
  1209  			} else if k2ByteNeg&mask == mask {
  1210  				AddNonConst(&q, p2Neg, &q)
  1211  			}
  1212  		}
  1213  	}
  1214  
  1215  	result.Set(&q)
  1216  }
  1217  
  1218  // ScalarBaseMultNonConst multiplies k*G where k is a scalar modulo the curve
  1219  // order and G is the base point of the group and stores the result in the
  1220  // provided Jacobian point.
  1221  //
  1222  // NOTE: The resulting point will be normalized.
  1223  func ScalarBaseMultNonConst(k *ModNScalar, result *JacobianPoint) {
  1224  	scalarBaseMultNonConst(k, result)
  1225  }
  1226  
  1227  // jacobianG is the secp256k1 base point converted to Jacobian coordinates and
  1228  // is defined here to avoid repeatedly converting it.
  1229  var jacobianG = func() JacobianPoint {
  1230  	var G JacobianPoint
  1231  	bigAffineToJacobian(curveParams.Gx, curveParams.Gy, &G)
  1232  	return G
  1233  }()
  1234  
  1235  // scalarBaseMultNonConstSlow computes k*G through ScalarMultNonConst.
  1236  func scalarBaseMultNonConstSlow(k *ModNScalar, result *JacobianPoint) {
  1237  	ScalarMultNonConst(k, &jacobianG, result)
  1238  }
  1239  
  1240  // scalarBaseMultNonConstFast computes k*G through the precomputed lookup
  1241  // tables.
  1242  func scalarBaseMultNonConstFast(k *ModNScalar, result *JacobianPoint) {
  1243  	bytePoints := s256BytePoints()
  1244  
  1245  	// Start with the point at infinity.
  1246  	result.X.Zero()
  1247  	result.Y.Zero()
  1248  	result.Z.Zero()
  1249  
  1250  	// bytePoints has all 256 byte points for each 8-bit window.  The strategy
  1251  	// is to add up the byte points.  This is best understood by expressing k in
  1252  	// base-256 which it already sort of is.  Each "digit" in the 8-bit window
  1253  	// can be looked up using bytePoints and added together.
  1254  	kb := k.Bytes()
  1255  	for i := 0; i < len(kb); i++ {
  1256  		pt := &bytePoints[i][kb[i]]
  1257  		AddNonConst(result, pt, result)
  1258  	}
  1259  }
  1260  
  1261  // isOnCurve returns whether or not the affine point (x,y) is on the curve.
  1262  func isOnCurve(fx, fy *FieldVal) bool {
  1263  	// Elliptic curve equation for secp256k1 is: y^2 = x^3 + 7
  1264  	y2 := new(FieldVal).SquareVal(fy).Normalize()
  1265  	result := new(FieldVal).SquareVal(fx).Mul(fx).AddInt(7).Normalize()
  1266  	return y2.Equals(result)
  1267  }
  1268  
  1269  // DecompressY attempts to calculate the Y coordinate for the given X coordinate
  1270  // such that the result pair is a point on the secp256k1 curve.  It adjusts Y
  1271  // based on the desired oddness and returns whether or not it was successful
  1272  // since not all X coordinates are valid.
  1273  //
  1274  // The magnitude of the provided X coordinate field val must be a max of 8 for a
  1275  // correct result.  The resulting Y field val will have a max magnitude of 2.
  1276  func DecompressY(x *FieldVal, odd bool, resultY *FieldVal) bool {
  1277  	// The curve equation for secp256k1 is: y^2 = x^3 + 7.  Thus
  1278  	// y = +-sqrt(x^3 + 7).
  1279  	//
  1280  	// The x coordinate must be invalid if there is no square root for the
  1281  	// calculated rhs because it means the X coordinate is not for a point on
  1282  	// the curve.
  1283  	x3PlusB := new(FieldVal).SquareVal(x).Mul(x).AddInt(7)
  1284  	if hasSqrt := resultY.SquareRootVal(x3PlusB); !hasSqrt {
  1285  		return false
  1286  	}
  1287  	if resultY.Normalize().IsOdd() != odd {
  1288  		resultY.Negate(1)
  1289  	}
  1290  	return true
  1291  }
  1292  

View as plain text