1 // Copyright (c) 2013-2014 The btcsuite developers 2 // Copyright (c) 2015-2023 The Decred developers 3 // Copyright (c) 2013-2023 Dave Collins 4 // Use of this source code is governed by an ISC 5 // license that can be found in the LICENSE file. 6 7 package secp256k1 8 9 // References: 10 // [HAC]: Handbook of Applied Cryptography Menezes, van Oorschot, Vanstone. 11 // http://cacr.uwaterloo.ca/hac/ 12 13 // All elliptic curve operations for secp256k1 are done in a finite field 14 // characterized by a 256-bit prime. Given this precision is larger than the 15 // biggest available native type, obviously some form of bignum math is needed. 16 // This package implements specialized fixed-precision field arithmetic rather 17 // than relying on an arbitrary-precision arithmetic package such as math/big 18 // for dealing with the field math since the size is known. As a result, rather 19 // large performance gains are achieved by taking advantage of many 20 // optimizations not available to arbitrary-precision arithmetic and generic 21 // modular arithmetic algorithms. 22 // 23 // There are various ways to internally represent each finite field element. 24 // For example, the most obvious representation would be to use an array of 4 25 // uint64s (64 bits * 4 = 256 bits). However, that representation suffers from 26 // a couple of issues. First, there is no native Go type large enough to handle 27 // the intermediate results while adding or multiplying two 64-bit numbers, and 28 // second there is no space left for overflows when performing the intermediate 29 // arithmetic between each array element which would lead to expensive carry 30 // propagation. 31 // 32 // Given the above, this implementation represents the field elements as 33 // 10 uint32s with each word (array entry) treated as base 2^26. This was 34 // chosen for the following reasons: 35 // 1) Most systems at the current time are 64-bit (or at least have 64-bit 36 // registers available for specialized purposes such as MMX) so the 37 // intermediate results can typically be done using a native register (and 38 // using uint64s to avoid the need for additional half-word arithmetic) 39 // 2) In order to allow addition of the internal words without having to 40 // propagate the carry, the max normalized value for each register must 41 // be less than the number of bits available in the register 42 // 3) Since we're dealing with 32-bit values, 64-bits of overflow is a 43 // reasonable choice for #2 44 // 4) Given the need for 256-bits of precision and the properties stated in #1, 45 // #2, and #3, the representation which best accommodates this is 10 uint32s 46 // with base 2^26 (26 bits * 10 = 260 bits, so the final word only needs 22 47 // bits) which leaves the desired 64 bits (32 * 10 = 320, 320 - 256 = 64) for 48 // overflow 49 // 50 // Since it is so important that the field arithmetic is extremely fast for high 51 // performance crypto, this type does not perform any validation where it 52 // ordinarily would. See the documentation for FieldVal for more details. 53 54 import ( 55 "encoding/hex" 56 ) 57 58 // Constants used to make the code more readable. 59 const ( 60 twoBitsMask = 0x3 61 fourBitsMask = 0xf 62 sixBitsMask = 0x3f 63 eightBitsMask = 0xff 64 ) 65 66 // Constants related to the field representation. 67 const ( 68 // fieldWords is the number of words used to internally represent the 69 // 256-bit value. 70 fieldWords = 10 71 72 // fieldBase is the exponent used to form the numeric base of each word. 73 // 2^(fieldBase*i) where i is the word position. 74 fieldBase = 26 75 76 // fieldBaseMask is the mask for the bits in each word needed to 77 // represent the numeric base of each word (except the most significant 78 // word). 79 fieldBaseMask = (1 << fieldBase) - 1 80 81 // fieldMSBBits is the number of bits in the most significant word used 82 // to represent the value. 83 fieldMSBBits = 256 - (fieldBase * (fieldWords - 1)) 84 85 // fieldMSBMask is the mask for the bits in the most significant word 86 // needed to represent the value. 87 fieldMSBMask = (1 << fieldMSBBits) - 1 88 89 // These fields provide convenient access to each of the words of the 90 // secp256k1 prime in the internal field representation to improve code 91 // readability. 92 fieldPrimeWordZero = 0x03fffc2f 93 fieldPrimeWordOne = 0x03ffffbf 94 fieldPrimeWordTwo = 0x03ffffff 95 fieldPrimeWordThree = 0x03ffffff 96 fieldPrimeWordFour = 0x03ffffff 97 fieldPrimeWordFive = 0x03ffffff 98 fieldPrimeWordSix = 0x03ffffff 99 fieldPrimeWordSeven = 0x03ffffff 100 fieldPrimeWordEight = 0x03ffffff 101 fieldPrimeWordNine = 0x003fffff 102 ) 103 104 // FieldVal implements optimized fixed-precision arithmetic over the 105 // secp256k1 finite field. This means all arithmetic is performed modulo 106 // 107 // 0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffefffffc2f. 108 // 109 // WARNING: Since it is so important for the field arithmetic to be extremely 110 // fast for high performance crypto, this type does not perform any validation 111 // of documented preconditions where it ordinarily would. As a result, it is 112 // IMPERATIVE for callers to understand some key concepts that are described 113 // below and ensure the methods are called with the necessary preconditions that 114 // each method is documented with. For example, some methods only give the 115 // correct result if the field value is normalized and others require the field 116 // values involved to have a maximum magnitude and THERE ARE NO EXPLICIT CHECKS 117 // TO ENSURE THOSE PRECONDITIONS ARE SATISFIED. This does, unfortunately, make 118 // the type more difficult to use correctly and while I typically prefer to 119 // ensure all state and input is valid for most code, this is a bit of an 120 // exception because those extra checks really add up in what ends up being 121 // critical hot paths. 122 // 123 // The first key concept when working with this type is normalization. In order 124 // to avoid the need to propagate a ton of carries, the internal representation 125 // provides additional overflow bits for each word of the overall 256-bit value. 126 // This means that there are multiple internal representations for the same 127 // value and, as a result, any methods that rely on comparison of the value, 128 // such as equality and oddness determination, require the caller to provide a 129 // normalized value. 130 // 131 // The second key concept when working with this type is magnitude. As 132 // previously mentioned, the internal representation provides additional 133 // overflow bits which means that the more math operations that are performed on 134 // the field value between normalizations, the more those overflow bits 135 // accumulate. The magnitude is effectively that maximum possible number of 136 // those overflow bits that could possibly be required as a result of a given 137 // operation. Since there are only a limited number of overflow bits available, 138 // this implies that the max possible magnitude MUST be tracked by the caller 139 // and the caller MUST normalize the field value if a given operation would 140 // cause the magnitude of the result to exceed the max allowed value. 141 // 142 // IMPORTANT: The max allowed magnitude of a field value is 64. 143 type FieldVal struct { 144 // Each 256-bit value is represented as 10 32-bit integers in base 2^26. 145 // This provides 6 bits of overflow in each word (10 bits in the most 146 // significant word) for a total of 64 bits of overflow (9*6 + 10 = 64). It 147 // only implements the arithmetic needed for elliptic curve operations. 148 // 149 // The following depicts the internal representation: 150 // ----------------------------------------------------------------- 151 // | n[9] | n[8] | ... | n[0] | 152 // | 32 bits available | 32 bits available | ... | 32 bits available | 153 // | 22 bits for value | 26 bits for value | ... | 26 bits for value | 154 // | 10 bits overflow | 6 bits overflow | ... | 6 bits overflow | 155 // | Mult: 2^(26*9) | Mult: 2^(26*8) | ... | Mult: 2^(26*0) | 156 // ----------------------------------------------------------------- 157 // 158 // For example, consider the number 2^49 + 1. It would be represented as: 159 // n[0] = 1 160 // n[1] = 2^23 161 // n[2..9] = 0 162 // 163 // The full 256-bit value is then calculated by looping i from 9..0 and 164 // doing sum(n[i] * 2^(26i)) like so: 165 // n[9] * 2^(26*9) = 0 * 2^234 = 0 166 // n[8] * 2^(26*8) = 0 * 2^208 = 0 167 // ... 168 // n[1] * 2^(26*1) = 2^23 * 2^26 = 2^49 169 // n[0] * 2^(26*0) = 1 * 2^0 = 1 170 // Sum: 0 + 0 + ... + 2^49 + 1 = 2^49 + 1 171 n [10]uint32 172 } 173 174 // String returns the field value as a normalized human-readable hex string. 175 // 176 // Preconditions: None 177 // Output Normalized: Field is not modified -- same as input value 178 // Output Max Magnitude: Field is not modified -- same as input value 179 func (f FieldVal) String() string { 180 // f is a copy, so it's safe to normalize it without mutating the original. 181 f.Normalize() 182 return hex.EncodeToString(f.Bytes()[:]) 183 } 184 185 // Zero sets the field value to zero in constant time. A newly created field 186 // value is already set to zero. This function can be useful to clear an 187 // existing field value for reuse. 188 // 189 // Preconditions: None 190 // Output Normalized: Yes 191 // Output Max Magnitude: 1 192 func (f *FieldVal) Zero() { 193 f.n[0] = 0 194 f.n[1] = 0 195 f.n[2] = 0 196 f.n[3] = 0 197 f.n[4] = 0 198 f.n[5] = 0 199 f.n[6] = 0 200 f.n[7] = 0 201 f.n[8] = 0 202 f.n[9] = 0 203 } 204 205 // Set sets the field value equal to the passed value in constant time. The 206 // normalization and magnitude of the two fields will be identical. 207 // 208 // The field value is returned to support chaining. This enables syntax like: 209 // f := new(FieldVal).Set(f2).Add(1) so that f = f2 + 1 where f2 is not 210 // modified. 211 // 212 // Preconditions: None 213 // Output Normalized: Same as input value 214 // Output Max Magnitude: Same as input value 215 func (f *FieldVal) Set(val *FieldVal) *FieldVal { 216 *f = *val 217 return f 218 } 219 220 // SetInt sets the field value to the passed integer in constant time. This is 221 // a convenience function since it is fairly common to perform some arithmetic 222 // with small native integers. 223 // 224 // The field value is returned to support chaining. This enables syntax such 225 // as f := new(FieldVal).SetInt(2).Mul(f2) so that f = 2 * f2. 226 // 227 // Preconditions: None 228 // Output Normalized: Yes 229 // Output Max Magnitude: 1 230 func (f *FieldVal) SetInt(ui uint16) *FieldVal { 231 f.Zero() 232 f.n[0] = uint32(ui) 233 return f 234 } 235 236 // SetBytes packs the passed 32-byte big-endian value into the internal field 237 // value representation in constant time. SetBytes interprets the provided 238 // array as a 256-bit big-endian unsigned integer, packs it into the internal 239 // field value representation, and returns either 1 if it is greater than or 240 // equal to the field prime (aka it overflowed) or 0 otherwise in constant time. 241 // 242 // Note that a bool is not used here because it is not possible in Go to convert 243 // from a bool to numeric value in constant time and many constant-time 244 // operations require a numeric value. 245 // 246 // Preconditions: None 247 // Output Normalized: Yes if no overflow, no otherwise 248 // Output Max Magnitude: 1 249 func (f *FieldVal) SetBytes(b *[32]byte) uint32 { 250 // Pack the 256 total bits across the 10 uint32 words with a max of 251 // 26-bits per word. This could be done with a couple of for loops, 252 // but this unrolled version is significantly faster. Benchmarks show 253 // this is about 34 times faster than the variant which uses loops. 254 f.n[0] = uint32(b[31]) | uint32(b[30])<<8 | uint32(b[29])<<16 | 255 (uint32(b[28])&twoBitsMask)<<24 256 f.n[1] = uint32(b[28])>>2 | uint32(b[27])<<6 | uint32(b[26])<<14 | 257 (uint32(b[25])&fourBitsMask)<<22 258 f.n[2] = uint32(b[25])>>4 | uint32(b[24])<<4 | uint32(b[23])<<12 | 259 (uint32(b[22])&sixBitsMask)<<20 260 f.n[3] = uint32(b[22])>>6 | uint32(b[21])<<2 | uint32(b[20])<<10 | 261 uint32(b[19])<<18 262 f.n[4] = uint32(b[18]) | uint32(b[17])<<8 | uint32(b[16])<<16 | 263 (uint32(b[15])&twoBitsMask)<<24 264 f.n[5] = uint32(b[15])>>2 | uint32(b[14])<<6 | uint32(b[13])<<14 | 265 (uint32(b[12])&fourBitsMask)<<22 266 f.n[6] = uint32(b[12])>>4 | uint32(b[11])<<4 | uint32(b[10])<<12 | 267 (uint32(b[9])&sixBitsMask)<<20 268 f.n[7] = uint32(b[9])>>6 | uint32(b[8])<<2 | uint32(b[7])<<10 | 269 uint32(b[6])<<18 270 f.n[8] = uint32(b[5]) | uint32(b[4])<<8 | uint32(b[3])<<16 | 271 (uint32(b[2])&twoBitsMask)<<24 272 f.n[9] = uint32(b[2])>>2 | uint32(b[1])<<6 | uint32(b[0])<<14 273 274 // The intuition here is that the field value is greater than the prime if 275 // one of the higher individual words is greater than corresponding word of 276 // the prime and all higher words in the field value are equal to their 277 // corresponding word of the prime. Since this type is modulo the prime, 278 // being equal is also an overflow back to 0. 279 // 280 // Note that because the input is 32 bytes and it was just packed into the 281 // field representation, the only words that can possibly be greater are 282 // zero and one, because ceil(log_2(2^256 - 1 - P)) = 33 bits max and the 283 // internal field representation encodes 26 bits with each word. 284 // 285 // Thus, there is no need to test if the upper words of the field value 286 // exceeds them, hence, only equality is checked for them. 287 highWordsEq := constantTimeEq(f.n[9], fieldPrimeWordNine) 288 highWordsEq &= constantTimeEq(f.n[8], fieldPrimeWordEight) 289 highWordsEq &= constantTimeEq(f.n[7], fieldPrimeWordSeven) 290 highWordsEq &= constantTimeEq(f.n[6], fieldPrimeWordSix) 291 highWordsEq &= constantTimeEq(f.n[5], fieldPrimeWordFive) 292 highWordsEq &= constantTimeEq(f.n[4], fieldPrimeWordFour) 293 highWordsEq &= constantTimeEq(f.n[3], fieldPrimeWordThree) 294 highWordsEq &= constantTimeEq(f.n[2], fieldPrimeWordTwo) 295 overflow := highWordsEq & constantTimeGreater(f.n[1], fieldPrimeWordOne) 296 highWordsEq &= constantTimeEq(f.n[1], fieldPrimeWordOne) 297 overflow |= highWordsEq & constantTimeGreaterOrEq(f.n[0], fieldPrimeWordZero) 298 299 return overflow 300 } 301 302 // SetByteSlice interprets the provided slice as a 256-bit big-endian unsigned 303 // integer (meaning it is truncated to the first 32 bytes), packs it into the 304 // internal field value representation, and returns whether or not the resulting 305 // truncated 256-bit integer is greater than or equal to the field prime (aka it 306 // overflowed) in constant time. 307 // 308 // Note that since passing a slice with more than 32 bytes is truncated, it is 309 // possible that the truncated value is less than the field prime and hence it 310 // will not be reported as having overflowed in that case. It is up to the 311 // caller to decide whether it needs to provide numbers of the appropriate size 312 // or it if is acceptable to use this function with the described truncation and 313 // overflow behavior. 314 // 315 // Preconditions: None 316 // Output Normalized: Yes if no overflow, no otherwise 317 // Output Max Magnitude: 1 318 func (f *FieldVal) SetByteSlice(b []byte) bool { 319 var b32 [32]byte 320 b = b[:constantTimeMin(uint32(len(b)), 32)] 321 copy(b32[:], b32[:32-len(b)]) 322 copy(b32[32-len(b):], b) 323 result := f.SetBytes(&b32) 324 zeroArray32(&b32) 325 return result != 0 326 } 327 328 // Normalize normalizes the internal field words into the desired range and 329 // performs fast modular reduction over the secp256k1 prime by making use of the 330 // special form of the prime in constant time. 331 // 332 // Preconditions: None 333 // Output Normalized: Yes 334 // Output Max Magnitude: 1 335 func (f *FieldVal) Normalize() *FieldVal { 336 // The field representation leaves 6 bits of overflow in each word so 337 // intermediate calculations can be performed without needing to 338 // propagate the carry to each higher word during the calculations. In 339 // order to normalize, we need to "compact" the full 256-bit value to 340 // the right while propagating any carries through to the high order 341 // word. 342 // 343 // Since this field is doing arithmetic modulo the secp256k1 prime, we 344 // also need to perform modular reduction over the prime. 345 // 346 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 347 // when the modulus is of the special form m = b^t - c, highly efficient 348 // reduction can be achieved. 349 // 350 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 351 // this criteria. 352 // 353 // 4294968273 in field representation (base 2^26) is: 354 // n[0] = 977 355 // n[1] = 64 356 // That is to say (2^26 * 64) + 977 = 4294968273 357 // 358 // The algorithm presented in the referenced section typically repeats 359 // until the quotient is zero. However, due to our field representation 360 // we already know to within one reduction how many times we would need 361 // to repeat as it's the uppermost bits of the high order word. Thus we 362 // can simply multiply the magnitude by the field representation of the 363 // prime and do a single iteration. After this step there might be an 364 // additional carry to bit 256 (bit 22 of the high order word). 365 t9 := f.n[9] 366 m := t9 >> fieldMSBBits 367 t9 = t9 & fieldMSBMask 368 t0 := f.n[0] + m*977 369 t1 := (t0 >> fieldBase) + f.n[1] + (m << 6) 370 t0 = t0 & fieldBaseMask 371 t2 := (t1 >> fieldBase) + f.n[2] 372 t1 = t1 & fieldBaseMask 373 t3 := (t2 >> fieldBase) + f.n[3] 374 t2 = t2 & fieldBaseMask 375 t4 := (t3 >> fieldBase) + f.n[4] 376 t3 = t3 & fieldBaseMask 377 t5 := (t4 >> fieldBase) + f.n[5] 378 t4 = t4 & fieldBaseMask 379 t6 := (t5 >> fieldBase) + f.n[6] 380 t5 = t5 & fieldBaseMask 381 t7 := (t6 >> fieldBase) + f.n[7] 382 t6 = t6 & fieldBaseMask 383 t8 := (t7 >> fieldBase) + f.n[8] 384 t7 = t7 & fieldBaseMask 385 t9 = (t8 >> fieldBase) + t9 386 t8 = t8 & fieldBaseMask 387 388 // At this point, the magnitude is guaranteed to be one, however, the 389 // value could still be greater than the prime if there was either a 390 // carry through to bit 256 (bit 22 of the higher order word) or the 391 // value is greater than or equal to the field characteristic. The 392 // following determines if either or these conditions are true and does 393 // the final reduction in constant time. 394 // 395 // Also note that 'm' will be zero when neither of the aforementioned 396 // conditions are true and the value will not be changed when 'm' is zero. 397 m = constantTimeEq(t9, fieldMSBMask) 398 m &= constantTimeEq(t8&t7&t6&t5&t4&t3&t2, fieldBaseMask) 399 m &= constantTimeGreater(t1+64+((t0+977)>>fieldBase), fieldBaseMask) 400 m |= t9 >> fieldMSBBits 401 t0 = t0 + m*977 402 t1 = (t0 >> fieldBase) + t1 + (m << 6) 403 t0 = t0 & fieldBaseMask 404 t2 = (t1 >> fieldBase) + t2 405 t1 = t1 & fieldBaseMask 406 t3 = (t2 >> fieldBase) + t3 407 t2 = t2 & fieldBaseMask 408 t4 = (t3 >> fieldBase) + t4 409 t3 = t3 & fieldBaseMask 410 t5 = (t4 >> fieldBase) + t5 411 t4 = t4 & fieldBaseMask 412 t6 = (t5 >> fieldBase) + t6 413 t5 = t5 & fieldBaseMask 414 t7 = (t6 >> fieldBase) + t7 415 t6 = t6 & fieldBaseMask 416 t8 = (t7 >> fieldBase) + t8 417 t7 = t7 & fieldBaseMask 418 t9 = (t8 >> fieldBase) + t9 419 t8 = t8 & fieldBaseMask 420 t9 = t9 & fieldMSBMask // Remove potential multiple of 2^256. 421 422 // Finally, set the normalized and reduced words. 423 f.n[0] = t0 424 f.n[1] = t1 425 f.n[2] = t2 426 f.n[3] = t3 427 f.n[4] = t4 428 f.n[5] = t5 429 f.n[6] = t6 430 f.n[7] = t7 431 f.n[8] = t8 432 f.n[9] = t9 433 return f 434 } 435 436 // PutBytesUnchecked unpacks the field value to a 32-byte big-endian value 437 // directly into the passed byte slice in constant time. The target slice must 438 // have at least 32 bytes available or it will panic. 439 // 440 // There is a similar function, PutBytes, which unpacks the field value into a 441 // 32-byte array directly. This version is provided since it can be useful 442 // to write directly into part of a larger buffer without needing a separate 443 // allocation. 444 // 445 // Preconditions: 446 // - The field value MUST be normalized 447 // - The target slice MUST have at least 32 bytes available 448 func (f *FieldVal) PutBytesUnchecked(b []byte) { 449 // Unpack the 256 total bits from the 10 uint32 words with a max of 450 // 26-bits per word. This could be done with a couple of for loops, 451 // but this unrolled version is a bit faster. Benchmarks show this is 452 // about 10 times faster than the variant which uses loops. 453 b[31] = byte(f.n[0] & eightBitsMask) 454 b[30] = byte((f.n[0] >> 8) & eightBitsMask) 455 b[29] = byte((f.n[0] >> 16) & eightBitsMask) 456 b[28] = byte((f.n[0]>>24)&twoBitsMask | (f.n[1]&sixBitsMask)<<2) 457 b[27] = byte((f.n[1] >> 6) & eightBitsMask) 458 b[26] = byte((f.n[1] >> 14) & eightBitsMask) 459 b[25] = byte((f.n[1]>>22)&fourBitsMask | (f.n[2]&fourBitsMask)<<4) 460 b[24] = byte((f.n[2] >> 4) & eightBitsMask) 461 b[23] = byte((f.n[2] >> 12) & eightBitsMask) 462 b[22] = byte((f.n[2]>>20)&sixBitsMask | (f.n[3]&twoBitsMask)<<6) 463 b[21] = byte((f.n[3] >> 2) & eightBitsMask) 464 b[20] = byte((f.n[3] >> 10) & eightBitsMask) 465 b[19] = byte((f.n[3] >> 18) & eightBitsMask) 466 b[18] = byte(f.n[4] & eightBitsMask) 467 b[17] = byte((f.n[4] >> 8) & eightBitsMask) 468 b[16] = byte((f.n[4] >> 16) & eightBitsMask) 469 b[15] = byte((f.n[4]>>24)&twoBitsMask | (f.n[5]&sixBitsMask)<<2) 470 b[14] = byte((f.n[5] >> 6) & eightBitsMask) 471 b[13] = byte((f.n[5] >> 14) & eightBitsMask) 472 b[12] = byte((f.n[5]>>22)&fourBitsMask | (f.n[6]&fourBitsMask)<<4) 473 b[11] = byte((f.n[6] >> 4) & eightBitsMask) 474 b[10] = byte((f.n[6] >> 12) & eightBitsMask) 475 b[9] = byte((f.n[6]>>20)&sixBitsMask | (f.n[7]&twoBitsMask)<<6) 476 b[8] = byte((f.n[7] >> 2) & eightBitsMask) 477 b[7] = byte((f.n[7] >> 10) & eightBitsMask) 478 b[6] = byte((f.n[7] >> 18) & eightBitsMask) 479 b[5] = byte(f.n[8] & eightBitsMask) 480 b[4] = byte((f.n[8] >> 8) & eightBitsMask) 481 b[3] = byte((f.n[8] >> 16) & eightBitsMask) 482 b[2] = byte((f.n[8]>>24)&twoBitsMask | (f.n[9]&sixBitsMask)<<2) 483 b[1] = byte((f.n[9] >> 6) & eightBitsMask) 484 b[0] = byte((f.n[9] >> 14) & eightBitsMask) 485 } 486 487 // PutBytes unpacks the field value to a 32-byte big-endian value using the 488 // passed byte array in constant time. 489 // 490 // There is a similar function, PutBytesUnchecked, which unpacks the field value 491 // into a slice that must have at least 32 bytes available. This version is 492 // provided since it can be useful to write directly into an array that is type 493 // checked. 494 // 495 // Alternatively, there is also Bytes, which unpacks the field value into a new 496 // array and returns that which can sometimes be more ergonomic in applications 497 // that aren't concerned about an additional copy. 498 // 499 // Preconditions: 500 // - The field value MUST be normalized 501 func (f *FieldVal) PutBytes(b *[32]byte) { 502 f.PutBytesUnchecked(b[:]) 503 } 504 505 // Bytes unpacks the field value to a 32-byte big-endian value in constant time. 506 // 507 // See PutBytes and PutBytesUnchecked for variants that allow an array or slice 508 // to be passed which can be useful to cut down on the number of allocations by 509 // allowing the caller to reuse a buffer or write directly into part of a larger 510 // buffer. 511 // 512 // Preconditions: 513 // - The field value MUST be normalized 514 func (f *FieldVal) Bytes() *[32]byte { 515 b := new([32]byte) 516 f.PutBytesUnchecked(b[:]) 517 return b 518 } 519 520 // IsZeroBit returns 1 when the field value is equal to zero or 0 otherwise in 521 // constant time. 522 // 523 // Note that a bool is not used here because it is not possible in Go to convert 524 // from a bool to numeric value in constant time and many constant-time 525 // operations require a numeric value. See IsZero for the version that returns 526 // a bool. 527 // 528 // Preconditions: 529 // - The field value MUST be normalized 530 func (f *FieldVal) IsZeroBit() uint32 { 531 // The value can only be zero if no bits are set in any of the words. 532 // This is a constant time implementation. 533 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 534 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 535 536 return constantTimeEq(bits, 0) 537 } 538 539 // IsZero returns whether or not the field value is equal to zero in constant 540 // time. 541 // 542 // Preconditions: 543 // - The field value MUST be normalized 544 func (f *FieldVal) IsZero() bool { 545 // The value can only be zero if no bits are set in any of the words. 546 // This is a constant time implementation. 547 bits := f.n[0] | f.n[1] | f.n[2] | f.n[3] | f.n[4] | 548 f.n[5] | f.n[6] | f.n[7] | f.n[8] | f.n[9] 549 550 return bits == 0 551 } 552 553 // IsOneBit returns 1 when the field value is equal to one or 0 otherwise in 554 // constant time. 555 // 556 // Note that a bool is not used here because it is not possible in Go to convert 557 // from a bool to numeric value in constant time and many constant-time 558 // operations require a numeric value. See IsOne for the version that returns a 559 // bool. 560 // 561 // Preconditions: 562 // - The field value MUST be normalized 563 func (f *FieldVal) IsOneBit() uint32 { 564 // The value can only be one if the single lowest significant bit is set in 565 // the first word and no other bits are set in any of the other words. 566 // This is a constant time implementation. 567 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] | 568 f.n[6] | f.n[7] | f.n[8] | f.n[9] 569 570 return constantTimeEq(bits, 0) 571 } 572 573 // IsOne returns whether or not the field value is equal to one in constant 574 // time. 575 // 576 // Preconditions: 577 // - The field value MUST be normalized 578 func (f *FieldVal) IsOne() bool { 579 // The value can only be one if the single lowest significant bit is set in 580 // the first word and no other bits are set in any of the other words. 581 // This is a constant time implementation. 582 bits := (f.n[0] ^ 1) | f.n[1] | f.n[2] | f.n[3] | f.n[4] | f.n[5] | 583 f.n[6] | f.n[7] | f.n[8] | f.n[9] 584 585 return bits == 0 586 } 587 588 // IsOddBit returns 1 when the field value is an odd number or 0 otherwise in 589 // constant time. 590 // 591 // Note that a bool is not used here because it is not possible in Go to convert 592 // from a bool to numeric value in constant time and many constant-time 593 // operations require a numeric value. See IsOdd for the version that returns a 594 // bool. 595 // 596 // Preconditions: 597 // - The field value MUST be normalized 598 func (f *FieldVal) IsOddBit() uint32 { 599 // Only odd numbers have the bottom bit set. 600 return f.n[0] & 1 601 } 602 603 // IsOdd returns whether or not the field value is an odd number in constant 604 // time. 605 // 606 // Preconditions: 607 // - The field value MUST be normalized 608 func (f *FieldVal) IsOdd() bool { 609 // Only odd numbers have the bottom bit set. 610 return f.n[0]&1 == 1 611 } 612 613 // Equals returns whether or not the two field values are the same in constant 614 // time. 615 // 616 // Preconditions: 617 // - Both field values being compared MUST be normalized 618 func (f *FieldVal) Equals(val *FieldVal) bool { 619 // Xor only sets bits when they are different, so the two field values 620 // can only be the same if no bits are set after xoring each word. 621 // This is a constant time implementation. 622 bits := (f.n[0] ^ val.n[0]) | (f.n[1] ^ val.n[1]) | (f.n[2] ^ val.n[2]) | 623 (f.n[3] ^ val.n[3]) | (f.n[4] ^ val.n[4]) | (f.n[5] ^ val.n[5]) | 624 (f.n[6] ^ val.n[6]) | (f.n[7] ^ val.n[7]) | (f.n[8] ^ val.n[8]) | 625 (f.n[9] ^ val.n[9]) 626 627 return bits == 0 628 } 629 630 // NegateVal negates the passed value and stores the result in f in constant 631 // time. The caller must provide the magnitude of the passed value for a 632 // correct result. 633 // 634 // The field value is returned to support chaining. This enables syntax like: 635 // f.NegateVal(f2).AddInt(1) so that f = -f2 + 1. 636 // 637 // Preconditions: 638 // - The max magnitude MUST be 63 639 // Output Normalized: No 640 // Output Max Magnitude: Input magnitude + 1 641 func (f *FieldVal) NegateVal(val *FieldVal, magnitude uint32) *FieldVal { 642 // Negation in the field is just the prime minus the value. However, 643 // in order to allow negation against a field value without having to 644 // normalize/reduce it first, multiply by the magnitude (that is how 645 // "far" away it is from the normalized value) to adjust. Also, since 646 // negating a value pushes it one more order of magnitude away from the 647 // normalized range, add 1 to compensate. 648 // 649 // For some intuition here, imagine you're performing mod 12 arithmetic 650 // (picture a clock) and you are negating the number 7. So you start at 651 // 12 (which is of course 0 under mod 12) and count backwards (left on 652 // the clock) 7 times to arrive at 5. Notice this is just 12-7 = 5. 653 // Now, assume you're starting with 19, which is a number that is 654 // already larger than the modulus and congruent to 7 (mod 12). When a 655 // value is already in the desired range, its magnitude is 1. Since 19 656 // is an additional "step", its magnitude (mod 12) is 2. Since any 657 // multiple of the modulus is congruent to zero (mod m), the answer can 658 // be shortcut by simply multiplying the magnitude by the modulus and 659 // subtracting. Keeping with the example, this would be (2*12)-19 = 5. 660 f.n[0] = (magnitude+1)*fieldPrimeWordZero - val.n[0] 661 f.n[1] = (magnitude+1)*fieldPrimeWordOne - val.n[1] 662 f.n[2] = (magnitude+1)*fieldBaseMask - val.n[2] 663 f.n[3] = (magnitude+1)*fieldBaseMask - val.n[3] 664 f.n[4] = (magnitude+1)*fieldBaseMask - val.n[4] 665 f.n[5] = (magnitude+1)*fieldBaseMask - val.n[5] 666 f.n[6] = (magnitude+1)*fieldBaseMask - val.n[6] 667 f.n[7] = (magnitude+1)*fieldBaseMask - val.n[7] 668 f.n[8] = (magnitude+1)*fieldBaseMask - val.n[8] 669 f.n[9] = (magnitude+1)*fieldMSBMask - val.n[9] 670 671 return f 672 } 673 674 // Negate negates the field value in constant time. The existing field value is 675 // modified. The caller must provide the magnitude of the field value for a 676 // correct result. 677 // 678 // The field value is returned to support chaining. This enables syntax like: 679 // f.Negate().AddInt(1) so that f = -f + 1. 680 // 681 // Preconditions: 682 // - The max magnitude MUST be 63 683 // Output Normalized: No 684 // Output Max Magnitude: Input magnitude + 1 685 func (f *FieldVal) Negate(magnitude uint32) *FieldVal { 686 return f.NegateVal(f, magnitude) 687 } 688 689 // AddInt adds the passed integer to the existing field value and stores the 690 // result in f in constant time. This is a convenience function since it is 691 // fairly common to perform some arithmetic with small native integers. 692 // 693 // The field value is returned to support chaining. This enables syntax like: 694 // f.AddInt(1).Add(f2) so that f = f + 1 + f2. 695 // 696 // Preconditions: 697 // - The field value MUST have a max magnitude of 63 698 // Output Normalized: No 699 // Output Max Magnitude: Existing field magnitude + 1 700 func (f *FieldVal) AddInt(ui uint16) *FieldVal { 701 // Since the field representation intentionally provides overflow bits, 702 // it's ok to use carryless addition as the carry bit is safely part of 703 // the word and will be normalized out. 704 f.n[0] += uint32(ui) 705 706 return f 707 } 708 709 // Add adds the passed value to the existing field value and stores the result 710 // in f in constant time. 711 // 712 // The field value is returned to support chaining. This enables syntax like: 713 // f.Add(f2).AddInt(1) so that f = f + f2 + 1. 714 // 715 // Preconditions: 716 // - The sum of the magnitudes of the two field values MUST be a max of 64 717 // Output Normalized: No 718 // Output Max Magnitude: Sum of the magnitude of the two individual field values 719 func (f *FieldVal) Add(val *FieldVal) *FieldVal { 720 // Since the field representation intentionally provides overflow bits, 721 // it's ok to use carryless addition as the carry bit is safely part of 722 // each word and will be normalized out. This could obviously be done 723 // in a loop, but the unrolled version is faster. 724 f.n[0] += val.n[0] 725 f.n[1] += val.n[1] 726 f.n[2] += val.n[2] 727 f.n[3] += val.n[3] 728 f.n[4] += val.n[4] 729 f.n[5] += val.n[5] 730 f.n[6] += val.n[6] 731 f.n[7] += val.n[7] 732 f.n[8] += val.n[8] 733 f.n[9] += val.n[9] 734 735 return f 736 } 737 738 // Add2 adds the passed two field values together and stores the result in f in 739 // constant time. 740 // 741 // The field value is returned to support chaining. This enables syntax like: 742 // f3.Add2(f, f2).AddInt(1) so that f3 = f + f2 + 1. 743 // 744 // Preconditions: 745 // - The sum of the magnitudes of the two field values MUST be a max of 64 746 // Output Normalized: No 747 // Output Max Magnitude: Sum of the magnitude of the two field values 748 func (f *FieldVal) Add2(val *FieldVal, val2 *FieldVal) *FieldVal { 749 // Since the field representation intentionally provides overflow bits, 750 // it's ok to use carryless addition as the carry bit is safely part of 751 // each word and will be normalized out. This could obviously be done 752 // in a loop, but the unrolled version is faster. 753 f.n[0] = val.n[0] + val2.n[0] 754 f.n[1] = val.n[1] + val2.n[1] 755 f.n[2] = val.n[2] + val2.n[2] 756 f.n[3] = val.n[3] + val2.n[3] 757 f.n[4] = val.n[4] + val2.n[4] 758 f.n[5] = val.n[5] + val2.n[5] 759 f.n[6] = val.n[6] + val2.n[6] 760 f.n[7] = val.n[7] + val2.n[7] 761 f.n[8] = val.n[8] + val2.n[8] 762 f.n[9] = val.n[9] + val2.n[9] 763 764 return f 765 } 766 767 // MulInt multiplies the field value by the passed int and stores the result in 768 // f in constant time. Note that this function can overflow if multiplying the 769 // value by any of the individual words exceeds a max uint32. Therefore it is 770 // important that the caller ensures no overflows will occur before using this 771 // function. 772 // 773 // The field value is returned to support chaining. This enables syntax like: 774 // f.MulInt(2).Add(f2) so that f = 2 * f + f2. 775 // 776 // Preconditions: 777 // - The field value magnitude multiplied by given val MUST be a max of 64 778 // Output Normalized: No 779 // Output Max Magnitude: Existing field magnitude times the provided integer val 780 func (f *FieldVal) MulInt(val uint8) *FieldVal { 781 // Since each word of the field representation can hold up to 782 // 32 - fieldBase extra bits which will be normalized out, it's safe 783 // to multiply each word without using a larger type or carry 784 // propagation so long as the values won't overflow a uint32. This 785 // could obviously be done in a loop, but the unrolled version is 786 // faster. 787 ui := uint32(val) 788 f.n[0] *= ui 789 f.n[1] *= ui 790 f.n[2] *= ui 791 f.n[3] *= ui 792 f.n[4] *= ui 793 f.n[5] *= ui 794 f.n[6] *= ui 795 f.n[7] *= ui 796 f.n[8] *= ui 797 f.n[9] *= ui 798 799 return f 800 } 801 802 // Mul multiplies the passed value to the existing field value and stores the 803 // result in f in constant time. Note that this function can overflow if 804 // multiplying any of the individual words exceeds a max uint32. In practice, 805 // this means the magnitude of either value involved in the multiplication must 806 // be a max of 8. 807 // 808 // The field value is returned to support chaining. This enables syntax like: 809 // f.Mul(f2).AddInt(1) so that f = (f * f2) + 1. 810 // 811 // Preconditions: 812 // - Both field values MUST have a max magnitude of 8 813 // Output Normalized: No 814 // Output Max Magnitude: 1 815 func (f *FieldVal) Mul(val *FieldVal) *FieldVal { 816 return f.Mul2(f, val) 817 } 818 819 // Mul2 multiplies the passed two field values together and stores the result in 820 // f in constant time. Note that this function can overflow if multiplying any 821 // of the individual words exceeds a max uint32. In practice, this means the 822 // magnitude of either value involved in the multiplication must be a max of 8. 823 // 824 // The field value is returned to support chaining. This enables syntax like: 825 // f3.Mul2(f, f2).AddInt(1) so that f3 = (f * f2) + 1. 826 // 827 // Preconditions: 828 // - Both input field values MUST have a max magnitude of 8 829 // Output Normalized: No 830 // Output Max Magnitude: 1 831 func (f *FieldVal) Mul2(val *FieldVal, val2 *FieldVal) *FieldVal { 832 // This could be done with a couple of for loops and an array to store 833 // the intermediate terms, but this unrolled version is significantly 834 // faster. 835 836 // Terms for 2^(fieldBase*0). 837 m := uint64(val.n[0]) * uint64(val2.n[0]) 838 t0 := m & fieldBaseMask 839 840 // Terms for 2^(fieldBase*1). 841 m = (m >> fieldBase) + 842 uint64(val.n[0])*uint64(val2.n[1]) + 843 uint64(val.n[1])*uint64(val2.n[0]) 844 t1 := m & fieldBaseMask 845 846 // Terms for 2^(fieldBase*2). 847 m = (m >> fieldBase) + 848 uint64(val.n[0])*uint64(val2.n[2]) + 849 uint64(val.n[1])*uint64(val2.n[1]) + 850 uint64(val.n[2])*uint64(val2.n[0]) 851 t2 := m & fieldBaseMask 852 853 // Terms for 2^(fieldBase*3). 854 m = (m >> fieldBase) + 855 uint64(val.n[0])*uint64(val2.n[3]) + 856 uint64(val.n[1])*uint64(val2.n[2]) + 857 uint64(val.n[2])*uint64(val2.n[1]) + 858 uint64(val.n[3])*uint64(val2.n[0]) 859 t3 := m & fieldBaseMask 860 861 // Terms for 2^(fieldBase*4). 862 m = (m >> fieldBase) + 863 uint64(val.n[0])*uint64(val2.n[4]) + 864 uint64(val.n[1])*uint64(val2.n[3]) + 865 uint64(val.n[2])*uint64(val2.n[2]) + 866 uint64(val.n[3])*uint64(val2.n[1]) + 867 uint64(val.n[4])*uint64(val2.n[0]) 868 t4 := m & fieldBaseMask 869 870 // Terms for 2^(fieldBase*5). 871 m = (m >> fieldBase) + 872 uint64(val.n[0])*uint64(val2.n[5]) + 873 uint64(val.n[1])*uint64(val2.n[4]) + 874 uint64(val.n[2])*uint64(val2.n[3]) + 875 uint64(val.n[3])*uint64(val2.n[2]) + 876 uint64(val.n[4])*uint64(val2.n[1]) + 877 uint64(val.n[5])*uint64(val2.n[0]) 878 t5 := m & fieldBaseMask 879 880 // Terms for 2^(fieldBase*6). 881 m = (m >> fieldBase) + 882 uint64(val.n[0])*uint64(val2.n[6]) + 883 uint64(val.n[1])*uint64(val2.n[5]) + 884 uint64(val.n[2])*uint64(val2.n[4]) + 885 uint64(val.n[3])*uint64(val2.n[3]) + 886 uint64(val.n[4])*uint64(val2.n[2]) + 887 uint64(val.n[5])*uint64(val2.n[1]) + 888 uint64(val.n[6])*uint64(val2.n[0]) 889 t6 := m & fieldBaseMask 890 891 // Terms for 2^(fieldBase*7). 892 m = (m >> fieldBase) + 893 uint64(val.n[0])*uint64(val2.n[7]) + 894 uint64(val.n[1])*uint64(val2.n[6]) + 895 uint64(val.n[2])*uint64(val2.n[5]) + 896 uint64(val.n[3])*uint64(val2.n[4]) + 897 uint64(val.n[4])*uint64(val2.n[3]) + 898 uint64(val.n[5])*uint64(val2.n[2]) + 899 uint64(val.n[6])*uint64(val2.n[1]) + 900 uint64(val.n[7])*uint64(val2.n[0]) 901 t7 := m & fieldBaseMask 902 903 // Terms for 2^(fieldBase*8). 904 m = (m >> fieldBase) + 905 uint64(val.n[0])*uint64(val2.n[8]) + 906 uint64(val.n[1])*uint64(val2.n[7]) + 907 uint64(val.n[2])*uint64(val2.n[6]) + 908 uint64(val.n[3])*uint64(val2.n[5]) + 909 uint64(val.n[4])*uint64(val2.n[4]) + 910 uint64(val.n[5])*uint64(val2.n[3]) + 911 uint64(val.n[6])*uint64(val2.n[2]) + 912 uint64(val.n[7])*uint64(val2.n[1]) + 913 uint64(val.n[8])*uint64(val2.n[0]) 914 t8 := m & fieldBaseMask 915 916 // Terms for 2^(fieldBase*9). 917 m = (m >> fieldBase) + 918 uint64(val.n[0])*uint64(val2.n[9]) + 919 uint64(val.n[1])*uint64(val2.n[8]) + 920 uint64(val.n[2])*uint64(val2.n[7]) + 921 uint64(val.n[3])*uint64(val2.n[6]) + 922 uint64(val.n[4])*uint64(val2.n[5]) + 923 uint64(val.n[5])*uint64(val2.n[4]) + 924 uint64(val.n[6])*uint64(val2.n[3]) + 925 uint64(val.n[7])*uint64(val2.n[2]) + 926 uint64(val.n[8])*uint64(val2.n[1]) + 927 uint64(val.n[9])*uint64(val2.n[0]) 928 t9 := m & fieldBaseMask 929 930 // Terms for 2^(fieldBase*10). 931 m = (m >> fieldBase) + 932 uint64(val.n[1])*uint64(val2.n[9]) + 933 uint64(val.n[2])*uint64(val2.n[8]) + 934 uint64(val.n[3])*uint64(val2.n[7]) + 935 uint64(val.n[4])*uint64(val2.n[6]) + 936 uint64(val.n[5])*uint64(val2.n[5]) + 937 uint64(val.n[6])*uint64(val2.n[4]) + 938 uint64(val.n[7])*uint64(val2.n[3]) + 939 uint64(val.n[8])*uint64(val2.n[2]) + 940 uint64(val.n[9])*uint64(val2.n[1]) 941 t10 := m & fieldBaseMask 942 943 // Terms for 2^(fieldBase*11). 944 m = (m >> fieldBase) + 945 uint64(val.n[2])*uint64(val2.n[9]) + 946 uint64(val.n[3])*uint64(val2.n[8]) + 947 uint64(val.n[4])*uint64(val2.n[7]) + 948 uint64(val.n[5])*uint64(val2.n[6]) + 949 uint64(val.n[6])*uint64(val2.n[5]) + 950 uint64(val.n[7])*uint64(val2.n[4]) + 951 uint64(val.n[8])*uint64(val2.n[3]) + 952 uint64(val.n[9])*uint64(val2.n[2]) 953 t11 := m & fieldBaseMask 954 955 // Terms for 2^(fieldBase*12). 956 m = (m >> fieldBase) + 957 uint64(val.n[3])*uint64(val2.n[9]) + 958 uint64(val.n[4])*uint64(val2.n[8]) + 959 uint64(val.n[5])*uint64(val2.n[7]) + 960 uint64(val.n[6])*uint64(val2.n[6]) + 961 uint64(val.n[7])*uint64(val2.n[5]) + 962 uint64(val.n[8])*uint64(val2.n[4]) + 963 uint64(val.n[9])*uint64(val2.n[3]) 964 t12 := m & fieldBaseMask 965 966 // Terms for 2^(fieldBase*13). 967 m = (m >> fieldBase) + 968 uint64(val.n[4])*uint64(val2.n[9]) + 969 uint64(val.n[5])*uint64(val2.n[8]) + 970 uint64(val.n[6])*uint64(val2.n[7]) + 971 uint64(val.n[7])*uint64(val2.n[6]) + 972 uint64(val.n[8])*uint64(val2.n[5]) + 973 uint64(val.n[9])*uint64(val2.n[4]) 974 t13 := m & fieldBaseMask 975 976 // Terms for 2^(fieldBase*14). 977 m = (m >> fieldBase) + 978 uint64(val.n[5])*uint64(val2.n[9]) + 979 uint64(val.n[6])*uint64(val2.n[8]) + 980 uint64(val.n[7])*uint64(val2.n[7]) + 981 uint64(val.n[8])*uint64(val2.n[6]) + 982 uint64(val.n[9])*uint64(val2.n[5]) 983 t14 := m & fieldBaseMask 984 985 // Terms for 2^(fieldBase*15). 986 m = (m >> fieldBase) + 987 uint64(val.n[6])*uint64(val2.n[9]) + 988 uint64(val.n[7])*uint64(val2.n[8]) + 989 uint64(val.n[8])*uint64(val2.n[7]) + 990 uint64(val.n[9])*uint64(val2.n[6]) 991 t15 := m & fieldBaseMask 992 993 // Terms for 2^(fieldBase*16). 994 m = (m >> fieldBase) + 995 uint64(val.n[7])*uint64(val2.n[9]) + 996 uint64(val.n[8])*uint64(val2.n[8]) + 997 uint64(val.n[9])*uint64(val2.n[7]) 998 t16 := m & fieldBaseMask 999 1000 // Terms for 2^(fieldBase*17). 1001 m = (m >> fieldBase) + 1002 uint64(val.n[8])*uint64(val2.n[9]) + 1003 uint64(val.n[9])*uint64(val2.n[8]) 1004 t17 := m & fieldBaseMask 1005 1006 // Terms for 2^(fieldBase*18). 1007 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val2.n[9]) 1008 t18 := m & fieldBaseMask 1009 1010 // What's left is for 2^(fieldBase*19). 1011 t19 := m >> fieldBase 1012 1013 // At this point, all of the terms are grouped into their respective 1014 // base. 1015 // 1016 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1017 // when the modulus is of the special form m = b^t - c, highly efficient 1018 // reduction can be achieved per the provided algorithm. 1019 // 1020 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1021 // this criteria. 1022 // 1023 // 4294968273 in field representation (base 2^26) is: 1024 // n[0] = 977 1025 // n[1] = 64 1026 // That is to say (2^26 * 64) + 977 = 4294968273 1027 // 1028 // Since each word is in base 26, the upper terms (t10 and up) start 1029 // at 260 bits (versus the final desired range of 256 bits), so the 1030 // field representation of 'c' from above needs to be adjusted for the 1031 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1032 // 68719492368. Thus, the adjusted field representation of 'c' is: 1033 // n[0] = 977 * 16 = 15632 1034 // n[1] = 64 * 16 = 1024 1035 // That is to say (2^26 * 1024) + 15632 = 68719492368 1036 // 1037 // To reduce the final term, t19, the entire 'c' value is needed instead 1038 // of only n[0] because there are no more terms left to handle n[1]. 1039 // This means there might be some magnitude left in the upper bits that 1040 // is handled below. 1041 m = t0 + t10*15632 1042 t0 = m & fieldBaseMask 1043 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1044 t1 = m & fieldBaseMask 1045 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1046 t2 = m & fieldBaseMask 1047 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1048 t3 = m & fieldBaseMask 1049 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1050 t4 = m & fieldBaseMask 1051 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1052 t5 = m & fieldBaseMask 1053 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1054 t6 = m & fieldBaseMask 1055 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1056 t7 = m & fieldBaseMask 1057 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1058 t8 = m & fieldBaseMask 1059 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1060 t9 = m & fieldMSBMask 1061 m = m >> fieldMSBBits 1062 1063 // At this point, if the magnitude is greater than 0, the overall value 1064 // is greater than the max possible 256-bit value. In particular, it is 1065 // "how many times larger" than the max value it is. 1066 // 1067 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1068 // quotient is zero. However, due to the above, we already know at 1069 // least how many times we would need to repeat as it's the value 1070 // currently in m. Thus we can simply multiply the magnitude by the 1071 // field representation of the prime and do a single iteration. Notice 1072 // that nothing will be changed when the magnitude is zero, so we could 1073 // skip this in that case, however always running regardless allows it 1074 // to run in constant time. The final result will be in the range 1075 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1076 // magnitude of 1, but it is denormalized. 1077 d := t0 + m*977 1078 f.n[0] = uint32(d & fieldBaseMask) 1079 d = (d >> fieldBase) + t1 + m*64 1080 f.n[1] = uint32(d & fieldBaseMask) 1081 f.n[2] = uint32((d >> fieldBase) + t2) 1082 f.n[3] = uint32(t3) 1083 f.n[4] = uint32(t4) 1084 f.n[5] = uint32(t5) 1085 f.n[6] = uint32(t6) 1086 f.n[7] = uint32(t7) 1087 f.n[8] = uint32(t8) 1088 f.n[9] = uint32(t9) 1089 1090 return f 1091 } 1092 1093 // SquareRootVal either calculates the square root of the passed value when it 1094 // exists or the square root of the negation of the value when it does not exist 1095 // and stores the result in f in constant time. The return flag is true when 1096 // the calculated square root is for the passed value itself and false when it 1097 // is for its negation. 1098 // 1099 // Note that this function can overflow if multiplying any of the individual 1100 // words exceeds a max uint32. In practice, this means the magnitude of the 1101 // field must be a max of 8 to prevent overflow. The magnitude of the result 1102 // will be 1. 1103 // 1104 // Preconditions: 1105 // - The input field value MUST have a max magnitude of 8 1106 // Output Normalized: No 1107 // Output Max Magnitude: 1 1108 func (f *FieldVal) SquareRootVal(val *FieldVal) bool { 1109 // This uses the Tonelli-Shanks method for calculating the square root of 1110 // the value when it exists. The key principles of the method follow. 1111 // 1112 // Fermat's little theorem states that for a nonzero number 'a' and prime 1113 // 'p', a^(p-1) ≡ 1 (mod p). 1114 // 1115 // Further, Euler's criterion states that an integer 'a' has a square root 1116 // (aka is a quadratic residue) modulo a prime if a^((p-1)/2) ≡ 1 (mod p) 1117 // and, conversely, when it does NOT have a square root (aka 'a' is a 1118 // non-residue) a^((p-1)/2) ≡ -1 (mod p). 1119 // 1120 // This can be seen by considering that Fermat's little theorem can be 1121 // written as (a^((p-1)/2) - 1)(a^((p-1)/2) + 1) ≡ 0 (mod p). Therefore, 1122 // one of the two factors must be 0. Then, when a ≡ x^2 (aka 'a' is a 1123 // quadratic residue), (x^2)^((p-1)/2) ≡ x^(p-1) ≡ 1 (mod p) which implies 1124 // the first factor must be zero. Finally, per Lagrange's theorem, the 1125 // non-residues are the only remaining possible solutions and thus must make 1126 // the second factor zero to satisfy Fermat's little theorem implying that 1127 // a^((p-1)/2) ≡ -1 (mod p) for that case. 1128 // 1129 // The Tonelli-Shanks method uses these facts along with factoring out 1130 // powers of two to solve a congruence that results in either the solution 1131 // when the square root exists or the square root of the negation of the 1132 // value when it does not. In the case of primes that are ≡ 3 (mod 4), the 1133 // possible solutions are r = ±a^((p+1)/4) (mod p). Therefore, either r^2 ≡ 1134 // a (mod p) is true in which case ±r are the two solutions, or r^2 ≡ -a 1135 // (mod p) in which case 'a' is a non-residue and there are no solutions. 1136 // 1137 // The secp256k1 prime is ≡ 3 (mod 4), so this result applies. 1138 // 1139 // In other words, calculate a^((p+1)/4) and then square it and check it 1140 // against the original value to determine if it is actually the square 1141 // root. 1142 // 1143 // In order to efficiently compute a^((p+1)/4), (p+1)/4 needs to be split 1144 // into a sequence of squares and multiplications that minimizes the number 1145 // of multiplications needed (since they are more costly than squarings). 1146 // 1147 // The secp256k1 prime + 1 / 4 is 2^254 - 2^30 - 244. In binary, that is: 1148 // 1149 // 00111111 11111111 11111111 11111111 1150 // 11111111 11111111 11111111 11111111 1151 // 11111111 11111111 11111111 11111111 1152 // 11111111 11111111 11111111 11111111 1153 // 11111111 11111111 11111111 11111111 1154 // 11111111 11111111 11111111 11111111 1155 // 11111111 11111111 11111111 11111111 1156 // 10111111 11111111 11111111 00001100 1157 // 1158 // Notice that can be broken up into three windows of consecutive 1s (in 1159 // order of least to most significant) as: 1160 // 1161 // 6-bit window with two bits set (bits 4, 5, 6, 7 unset) 1162 // 23-bit window with 22 bits set (bit 30 unset) 1163 // 223-bit window with all 223 bits set 1164 // 1165 // Thus, the groups of 1 bits in each window forms the set: 1166 // S = {2, 22, 223}. 1167 // 1168 // The strategy is to calculate a^(2^n - 1) for each grouping via an 1169 // addition chain with a sliding window. 1170 // 1171 // The addition chain used is (credits to Peter Dettman): 1172 // (0,0),(1,0),(2,2),(3,2),(4,1),(5,5),(6,6),(7,7),(8,8),(9,7),(10,2) 1173 // => 2^1 2^[2] 2^3 2^6 2^9 2^11 2^[22] 2^44 2^88 2^176 2^220 2^[223] 1174 // 1175 // This has a cost of 254 field squarings and 13 field multiplications. 1176 var a, a2, a3, a6, a9, a11, a22, a44, a88, a176, a220, a223 FieldVal 1177 a.Set(val) 1178 a2.SquareVal(&a).Mul(&a) // a2 = a^(2^2 - 1) 1179 a3.SquareVal(&a2).Mul(&a) // a3 = a^(2^3 - 1) 1180 a6.SquareVal(&a3).Square().Square() // a6 = a^(2^6 - 2^3) 1181 a6.Mul(&a3) // a6 = a^(2^6 - 1) 1182 a9.SquareVal(&a6).Square().Square() // a9 = a^(2^9 - 2^3) 1183 a9.Mul(&a3) // a9 = a^(2^9 - 1) 1184 a11.SquareVal(&a9).Square() // a11 = a^(2^11 - 2^2) 1185 a11.Mul(&a2) // a11 = a^(2^11 - 1) 1186 a22.SquareVal(&a11).Square().Square().Square().Square() // a22 = a^(2^16 - 2^5) 1187 a22.Square().Square().Square().Square().Square() // a22 = a^(2^21 - 2^10) 1188 a22.Square() // a22 = a^(2^22 - 2^11) 1189 a22.Mul(&a11) // a22 = a^(2^22 - 1) 1190 a44.SquareVal(&a22).Square().Square().Square().Square() // a44 = a^(2^27 - 2^5) 1191 a44.Square().Square().Square().Square().Square() // a44 = a^(2^32 - 2^10) 1192 a44.Square().Square().Square().Square().Square() // a44 = a^(2^37 - 2^15) 1193 a44.Square().Square().Square().Square().Square() // a44 = a^(2^42 - 2^20) 1194 a44.Square().Square() // a44 = a^(2^44 - 2^22) 1195 a44.Mul(&a22) // a44 = a^(2^44 - 1) 1196 a88.SquareVal(&a44).Square().Square().Square().Square() // a88 = a^(2^49 - 2^5) 1197 a88.Square().Square().Square().Square().Square() // a88 = a^(2^54 - 2^10) 1198 a88.Square().Square().Square().Square().Square() // a88 = a^(2^59 - 2^15) 1199 a88.Square().Square().Square().Square().Square() // a88 = a^(2^64 - 2^20) 1200 a88.Square().Square().Square().Square().Square() // a88 = a^(2^69 - 2^25) 1201 a88.Square().Square().Square().Square().Square() // a88 = a^(2^74 - 2^30) 1202 a88.Square().Square().Square().Square().Square() // a88 = a^(2^79 - 2^35) 1203 a88.Square().Square().Square().Square().Square() // a88 = a^(2^84 - 2^40) 1204 a88.Square().Square().Square().Square() // a88 = a^(2^88 - 2^44) 1205 a88.Mul(&a44) // a88 = a^(2^88 - 1) 1206 a176.SquareVal(&a88).Square().Square().Square().Square() // a176 = a^(2^93 - 2^5) 1207 a176.Square().Square().Square().Square().Square() // a176 = a^(2^98 - 2^10) 1208 a176.Square().Square().Square().Square().Square() // a176 = a^(2^103 - 2^15) 1209 a176.Square().Square().Square().Square().Square() // a176 = a^(2^108 - 2^20) 1210 a176.Square().Square().Square().Square().Square() // a176 = a^(2^113 - 2^25) 1211 a176.Square().Square().Square().Square().Square() // a176 = a^(2^118 - 2^30) 1212 a176.Square().Square().Square().Square().Square() // a176 = a^(2^123 - 2^35) 1213 a176.Square().Square().Square().Square().Square() // a176 = a^(2^128 - 2^40) 1214 a176.Square().Square().Square().Square().Square() // a176 = a^(2^133 - 2^45) 1215 a176.Square().Square().Square().Square().Square() // a176 = a^(2^138 - 2^50) 1216 a176.Square().Square().Square().Square().Square() // a176 = a^(2^143 - 2^55) 1217 a176.Square().Square().Square().Square().Square() // a176 = a^(2^148 - 2^60) 1218 a176.Square().Square().Square().Square().Square() // a176 = a^(2^153 - 2^65) 1219 a176.Square().Square().Square().Square().Square() // a176 = a^(2^158 - 2^70) 1220 a176.Square().Square().Square().Square().Square() // a176 = a^(2^163 - 2^75) 1221 a176.Square().Square().Square().Square().Square() // a176 = a^(2^168 - 2^80) 1222 a176.Square().Square().Square().Square().Square() // a176 = a^(2^173 - 2^85) 1223 a176.Square().Square().Square() // a176 = a^(2^176 - 2^88) 1224 a176.Mul(&a88) // a176 = a^(2^176 - 1) 1225 a220.SquareVal(&a176).Square().Square().Square().Square() // a220 = a^(2^181 - 2^5) 1226 a220.Square().Square().Square().Square().Square() // a220 = a^(2^186 - 2^10) 1227 a220.Square().Square().Square().Square().Square() // a220 = a^(2^191 - 2^15) 1228 a220.Square().Square().Square().Square().Square() // a220 = a^(2^196 - 2^20) 1229 a220.Square().Square().Square().Square().Square() // a220 = a^(2^201 - 2^25) 1230 a220.Square().Square().Square().Square().Square() // a220 = a^(2^206 - 2^30) 1231 a220.Square().Square().Square().Square().Square() // a220 = a^(2^211 - 2^35) 1232 a220.Square().Square().Square().Square().Square() // a220 = a^(2^216 - 2^40) 1233 a220.Square().Square().Square().Square() // a220 = a^(2^220 - 2^44) 1234 a220.Mul(&a44) // a220 = a^(2^220 - 1) 1235 a223.SquareVal(&a220).Square().Square() // a223 = a^(2^223 - 2^3) 1236 a223.Mul(&a3) // a223 = a^(2^223 - 1) 1237 1238 f.SquareVal(&a223).Square().Square().Square().Square() // f = a^(2^228 - 2^5) 1239 f.Square().Square().Square().Square().Square() // f = a^(2^233 - 2^10) 1240 f.Square().Square().Square().Square().Square() // f = a^(2^238 - 2^15) 1241 f.Square().Square().Square().Square().Square() // f = a^(2^243 - 2^20) 1242 f.Square().Square().Square() // f = a^(2^246 - 2^23) 1243 f.Mul(&a22) // f = a^(2^246 - 2^22 - 1) 1244 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 2^27 - 2^5) 1245 f.Square() // f = a^(2^252 - 2^28 - 2^6) 1246 f.Mul(&a2) // f = a^(2^252 - 2^28 - 2^6 - 2^1 - 1) 1247 f.Square().Square() // f = a^(2^254 - 2^30 - 2^8 - 2^3 - 2^2) 1248 // // = a^(2^254 - 2^30 - 244) 1249 // // = a^((p+1)/4) 1250 1251 // Ensure the calculated result is actually the square root by squaring it 1252 // and checking against the original value. 1253 var sqr FieldVal 1254 return sqr.SquareVal(f).Normalize().Equals(val.Normalize()) 1255 } 1256 1257 // Square squares the field value in constant time. The existing field value is 1258 // modified. Note that this function can overflow if multiplying any of the 1259 // individual words exceeds a max uint32. In practice, this means the magnitude 1260 // of the field must be a max of 8 to prevent overflow. 1261 // 1262 // The field value is returned to support chaining. This enables syntax like: 1263 // f.Square().Mul(f2) so that f = f^2 * f2. 1264 // 1265 // Preconditions: 1266 // - The field value MUST have a max magnitude of 8 1267 // Output Normalized: No 1268 // Output Max Magnitude: 1 1269 func (f *FieldVal) Square() *FieldVal { 1270 return f.SquareVal(f) 1271 } 1272 1273 // SquareVal squares the passed value and stores the result in f in constant 1274 // time. Note that this function can overflow if multiplying any of the 1275 // individual words exceeds a max uint32. In practice, this means the magnitude 1276 // of the field being squared must be a max of 8 to prevent overflow. 1277 // 1278 // The field value is returned to support chaining. This enables syntax like: 1279 // f3.SquareVal(f).Mul(f) so that f3 = f^2 * f = f^3. 1280 // 1281 // Preconditions: 1282 // - The input field value MUST have a max magnitude of 8 1283 // Output Normalized: No 1284 // Output Max Magnitude: 1 1285 func (f *FieldVal) SquareVal(val *FieldVal) *FieldVal { 1286 // This could be done with a couple of for loops and an array to store 1287 // the intermediate terms, but this unrolled version is significantly 1288 // faster. 1289 1290 // Terms for 2^(fieldBase*0). 1291 m := uint64(val.n[0]) * uint64(val.n[0]) 1292 t0 := m & fieldBaseMask 1293 1294 // Terms for 2^(fieldBase*1). 1295 m = (m >> fieldBase) + 2*uint64(val.n[0])*uint64(val.n[1]) 1296 t1 := m & fieldBaseMask 1297 1298 // Terms for 2^(fieldBase*2). 1299 m = (m >> fieldBase) + 1300 2*uint64(val.n[0])*uint64(val.n[2]) + 1301 uint64(val.n[1])*uint64(val.n[1]) 1302 t2 := m & fieldBaseMask 1303 1304 // Terms for 2^(fieldBase*3). 1305 m = (m >> fieldBase) + 1306 2*uint64(val.n[0])*uint64(val.n[3]) + 1307 2*uint64(val.n[1])*uint64(val.n[2]) 1308 t3 := m & fieldBaseMask 1309 1310 // Terms for 2^(fieldBase*4). 1311 m = (m >> fieldBase) + 1312 2*uint64(val.n[0])*uint64(val.n[4]) + 1313 2*uint64(val.n[1])*uint64(val.n[3]) + 1314 uint64(val.n[2])*uint64(val.n[2]) 1315 t4 := m & fieldBaseMask 1316 1317 // Terms for 2^(fieldBase*5). 1318 m = (m >> fieldBase) + 1319 2*uint64(val.n[0])*uint64(val.n[5]) + 1320 2*uint64(val.n[1])*uint64(val.n[4]) + 1321 2*uint64(val.n[2])*uint64(val.n[3]) 1322 t5 := m & fieldBaseMask 1323 1324 // Terms for 2^(fieldBase*6). 1325 m = (m >> fieldBase) + 1326 2*uint64(val.n[0])*uint64(val.n[6]) + 1327 2*uint64(val.n[1])*uint64(val.n[5]) + 1328 2*uint64(val.n[2])*uint64(val.n[4]) + 1329 uint64(val.n[3])*uint64(val.n[3]) 1330 t6 := m & fieldBaseMask 1331 1332 // Terms for 2^(fieldBase*7). 1333 m = (m >> fieldBase) + 1334 2*uint64(val.n[0])*uint64(val.n[7]) + 1335 2*uint64(val.n[1])*uint64(val.n[6]) + 1336 2*uint64(val.n[2])*uint64(val.n[5]) + 1337 2*uint64(val.n[3])*uint64(val.n[4]) 1338 t7 := m & fieldBaseMask 1339 1340 // Terms for 2^(fieldBase*8). 1341 m = (m >> fieldBase) + 1342 2*uint64(val.n[0])*uint64(val.n[8]) + 1343 2*uint64(val.n[1])*uint64(val.n[7]) + 1344 2*uint64(val.n[2])*uint64(val.n[6]) + 1345 2*uint64(val.n[3])*uint64(val.n[5]) + 1346 uint64(val.n[4])*uint64(val.n[4]) 1347 t8 := m & fieldBaseMask 1348 1349 // Terms for 2^(fieldBase*9). 1350 m = (m >> fieldBase) + 1351 2*uint64(val.n[0])*uint64(val.n[9]) + 1352 2*uint64(val.n[1])*uint64(val.n[8]) + 1353 2*uint64(val.n[2])*uint64(val.n[7]) + 1354 2*uint64(val.n[3])*uint64(val.n[6]) + 1355 2*uint64(val.n[4])*uint64(val.n[5]) 1356 t9 := m & fieldBaseMask 1357 1358 // Terms for 2^(fieldBase*10). 1359 m = (m >> fieldBase) + 1360 2*uint64(val.n[1])*uint64(val.n[9]) + 1361 2*uint64(val.n[2])*uint64(val.n[8]) + 1362 2*uint64(val.n[3])*uint64(val.n[7]) + 1363 2*uint64(val.n[4])*uint64(val.n[6]) + 1364 uint64(val.n[5])*uint64(val.n[5]) 1365 t10 := m & fieldBaseMask 1366 1367 // Terms for 2^(fieldBase*11). 1368 m = (m >> fieldBase) + 1369 2*uint64(val.n[2])*uint64(val.n[9]) + 1370 2*uint64(val.n[3])*uint64(val.n[8]) + 1371 2*uint64(val.n[4])*uint64(val.n[7]) + 1372 2*uint64(val.n[5])*uint64(val.n[6]) 1373 t11 := m & fieldBaseMask 1374 1375 // Terms for 2^(fieldBase*12). 1376 m = (m >> fieldBase) + 1377 2*uint64(val.n[3])*uint64(val.n[9]) + 1378 2*uint64(val.n[4])*uint64(val.n[8]) + 1379 2*uint64(val.n[5])*uint64(val.n[7]) + 1380 uint64(val.n[6])*uint64(val.n[6]) 1381 t12 := m & fieldBaseMask 1382 1383 // Terms for 2^(fieldBase*13). 1384 m = (m >> fieldBase) + 1385 2*uint64(val.n[4])*uint64(val.n[9]) + 1386 2*uint64(val.n[5])*uint64(val.n[8]) + 1387 2*uint64(val.n[6])*uint64(val.n[7]) 1388 t13 := m & fieldBaseMask 1389 1390 // Terms for 2^(fieldBase*14). 1391 m = (m >> fieldBase) + 1392 2*uint64(val.n[5])*uint64(val.n[9]) + 1393 2*uint64(val.n[6])*uint64(val.n[8]) + 1394 uint64(val.n[7])*uint64(val.n[7]) 1395 t14 := m & fieldBaseMask 1396 1397 // Terms for 2^(fieldBase*15). 1398 m = (m >> fieldBase) + 1399 2*uint64(val.n[6])*uint64(val.n[9]) + 1400 2*uint64(val.n[7])*uint64(val.n[8]) 1401 t15 := m & fieldBaseMask 1402 1403 // Terms for 2^(fieldBase*16). 1404 m = (m >> fieldBase) + 1405 2*uint64(val.n[7])*uint64(val.n[9]) + 1406 uint64(val.n[8])*uint64(val.n[8]) 1407 t16 := m & fieldBaseMask 1408 1409 // Terms for 2^(fieldBase*17). 1410 m = (m >> fieldBase) + 2*uint64(val.n[8])*uint64(val.n[9]) 1411 t17 := m & fieldBaseMask 1412 1413 // Terms for 2^(fieldBase*18). 1414 m = (m >> fieldBase) + uint64(val.n[9])*uint64(val.n[9]) 1415 t18 := m & fieldBaseMask 1416 1417 // What's left is for 2^(fieldBase*19). 1418 t19 := m >> fieldBase 1419 1420 // At this point, all of the terms are grouped into their respective 1421 // base. 1422 // 1423 // Per [HAC] section 14.3.4: Reduction method of moduli of special form, 1424 // when the modulus is of the special form m = b^t - c, highly efficient 1425 // reduction can be achieved per the provided algorithm. 1426 // 1427 // The secp256k1 prime is equivalent to 2^256 - 4294968273, so it fits 1428 // this criteria. 1429 // 1430 // 4294968273 in field representation (base 2^26) is: 1431 // n[0] = 977 1432 // n[1] = 64 1433 // That is to say (2^26 * 64) + 977 = 4294968273 1434 // 1435 // Since each word is in base 26, the upper terms (t10 and up) start 1436 // at 260 bits (versus the final desired range of 256 bits), so the 1437 // field representation of 'c' from above needs to be adjusted for the 1438 // extra 4 bits by multiplying it by 2^4 = 16. 4294968273 * 16 = 1439 // 68719492368. Thus, the adjusted field representation of 'c' is: 1440 // n[0] = 977 * 16 = 15632 1441 // n[1] = 64 * 16 = 1024 1442 // That is to say (2^26 * 1024) + 15632 = 68719492368 1443 // 1444 // To reduce the final term, t19, the entire 'c' value is needed instead 1445 // of only n[0] because there are no more terms left to handle n[1]. 1446 // This means there might be some magnitude left in the upper bits that 1447 // is handled below. 1448 m = t0 + t10*15632 1449 t0 = m & fieldBaseMask 1450 m = (m >> fieldBase) + t1 + t10*1024 + t11*15632 1451 t1 = m & fieldBaseMask 1452 m = (m >> fieldBase) + t2 + t11*1024 + t12*15632 1453 t2 = m & fieldBaseMask 1454 m = (m >> fieldBase) + t3 + t12*1024 + t13*15632 1455 t3 = m & fieldBaseMask 1456 m = (m >> fieldBase) + t4 + t13*1024 + t14*15632 1457 t4 = m & fieldBaseMask 1458 m = (m >> fieldBase) + t5 + t14*1024 + t15*15632 1459 t5 = m & fieldBaseMask 1460 m = (m >> fieldBase) + t6 + t15*1024 + t16*15632 1461 t6 = m & fieldBaseMask 1462 m = (m >> fieldBase) + t7 + t16*1024 + t17*15632 1463 t7 = m & fieldBaseMask 1464 m = (m >> fieldBase) + t8 + t17*1024 + t18*15632 1465 t8 = m & fieldBaseMask 1466 m = (m >> fieldBase) + t9 + t18*1024 + t19*68719492368 1467 t9 = m & fieldMSBMask 1468 m = m >> fieldMSBBits 1469 1470 // At this point, if the magnitude is greater than 0, the overall value 1471 // is greater than the max possible 256-bit value. In particular, it is 1472 // "how many times larger" than the max value it is. 1473 // 1474 // The algorithm presented in [HAC] section 14.3.4 repeats until the 1475 // quotient is zero. However, due to the above, we already know at 1476 // least how many times we would need to repeat as it's the value 1477 // currently in m. Thus we can simply multiply the magnitude by the 1478 // field representation of the prime and do a single iteration. Notice 1479 // that nothing will be changed when the magnitude is zero, so we could 1480 // skip this in that case, however always running regardless allows it 1481 // to run in constant time. The final result will be in the range 1482 // 0 <= result <= prime + (2^64 - c), so it is guaranteed to have a 1483 // magnitude of 1, but it is denormalized. 1484 n := t0 + m*977 1485 f.n[0] = uint32(n & fieldBaseMask) 1486 n = (n >> fieldBase) + t1 + m*64 1487 f.n[1] = uint32(n & fieldBaseMask) 1488 f.n[2] = uint32((n >> fieldBase) + t2) 1489 f.n[3] = uint32(t3) 1490 f.n[4] = uint32(t4) 1491 f.n[5] = uint32(t5) 1492 f.n[6] = uint32(t6) 1493 f.n[7] = uint32(t7) 1494 f.n[8] = uint32(t8) 1495 f.n[9] = uint32(t9) 1496 1497 return f 1498 } 1499 1500 // Inverse finds the modular multiplicative inverse of the field value in 1501 // constant time. The existing field value is modified. 1502 // 1503 // The field value is returned to support chaining. This enables syntax like: 1504 // f.Inverse().Mul(f2) so that f = f^-1 * f2. 1505 // 1506 // Preconditions: 1507 // - The field value MUST have a max magnitude of 8 1508 // Output Normalized: No 1509 // Output Max Magnitude: 1 1510 func (f *FieldVal) Inverse() *FieldVal { 1511 // Fermat's little theorem states that for a nonzero number a and prime 1512 // p, a^(p-1) ≡ 1 (mod p). Since the multiplicative inverse is 1513 // a*b ≡ 1 (mod p), it follows that b ≡ a*a^(p-2) ≡ a^(p-1) ≡ 1 (mod p). 1514 // Thus, a^(p-2) is the multiplicative inverse. 1515 // 1516 // In order to efficiently compute a^(p-2), p-2 needs to be split into 1517 // a sequence of squares and multiplications that minimizes the number 1518 // of multiplications needed (since they are more costly than 1519 // squarings). Intermediate results are saved and reused as well. 1520 // 1521 // The secp256k1 prime - 2 is 2^256 - 4294968275. 1522 // 1523 // This has a cost of 258 field squarings and 33 field multiplications. 1524 var a2, a3, a4, a10, a11, a21, a42, a45, a63, a1019, a1023 FieldVal 1525 a2.SquareVal(f) 1526 a3.Mul2(&a2, f) 1527 a4.SquareVal(&a2) 1528 a10.SquareVal(&a4).Mul(&a2) 1529 a11.Mul2(&a10, f) 1530 a21.Mul2(&a10, &a11) 1531 a42.SquareVal(&a21) 1532 a45.Mul2(&a42, &a3) 1533 a63.Mul2(&a42, &a21) 1534 a1019.SquareVal(&a63).Square().Square().Square().Mul(&a11) 1535 a1023.Mul2(&a1019, &a4) 1536 f.Set(&a63) // f = a^(2^6 - 1) 1537 f.Square().Square().Square().Square().Square() // f = a^(2^11 - 32) 1538 f.Square().Square().Square().Square().Square() // f = a^(2^16 - 1024) 1539 f.Mul(&a1023) // f = a^(2^16 - 1) 1540 f.Square().Square().Square().Square().Square() // f = a^(2^21 - 32) 1541 f.Square().Square().Square().Square().Square() // f = a^(2^26 - 1024) 1542 f.Mul(&a1023) // f = a^(2^26 - 1) 1543 f.Square().Square().Square().Square().Square() // f = a^(2^31 - 32) 1544 f.Square().Square().Square().Square().Square() // f = a^(2^36 - 1024) 1545 f.Mul(&a1023) // f = a^(2^36 - 1) 1546 f.Square().Square().Square().Square().Square() // f = a^(2^41 - 32) 1547 f.Square().Square().Square().Square().Square() // f = a^(2^46 - 1024) 1548 f.Mul(&a1023) // f = a^(2^46 - 1) 1549 f.Square().Square().Square().Square().Square() // f = a^(2^51 - 32) 1550 f.Square().Square().Square().Square().Square() // f = a^(2^56 - 1024) 1551 f.Mul(&a1023) // f = a^(2^56 - 1) 1552 f.Square().Square().Square().Square().Square() // f = a^(2^61 - 32) 1553 f.Square().Square().Square().Square().Square() // f = a^(2^66 - 1024) 1554 f.Mul(&a1023) // f = a^(2^66 - 1) 1555 f.Square().Square().Square().Square().Square() // f = a^(2^71 - 32) 1556 f.Square().Square().Square().Square().Square() // f = a^(2^76 - 1024) 1557 f.Mul(&a1023) // f = a^(2^76 - 1) 1558 f.Square().Square().Square().Square().Square() // f = a^(2^81 - 32) 1559 f.Square().Square().Square().Square().Square() // f = a^(2^86 - 1024) 1560 f.Mul(&a1023) // f = a^(2^86 - 1) 1561 f.Square().Square().Square().Square().Square() // f = a^(2^91 - 32) 1562 f.Square().Square().Square().Square().Square() // f = a^(2^96 - 1024) 1563 f.Mul(&a1023) // f = a^(2^96 - 1) 1564 f.Square().Square().Square().Square().Square() // f = a^(2^101 - 32) 1565 f.Square().Square().Square().Square().Square() // f = a^(2^106 - 1024) 1566 f.Mul(&a1023) // f = a^(2^106 - 1) 1567 f.Square().Square().Square().Square().Square() // f = a^(2^111 - 32) 1568 f.Square().Square().Square().Square().Square() // f = a^(2^116 - 1024) 1569 f.Mul(&a1023) // f = a^(2^116 - 1) 1570 f.Square().Square().Square().Square().Square() // f = a^(2^121 - 32) 1571 f.Square().Square().Square().Square().Square() // f = a^(2^126 - 1024) 1572 f.Mul(&a1023) // f = a^(2^126 - 1) 1573 f.Square().Square().Square().Square().Square() // f = a^(2^131 - 32) 1574 f.Square().Square().Square().Square().Square() // f = a^(2^136 - 1024) 1575 f.Mul(&a1023) // f = a^(2^136 - 1) 1576 f.Square().Square().Square().Square().Square() // f = a^(2^141 - 32) 1577 f.Square().Square().Square().Square().Square() // f = a^(2^146 - 1024) 1578 f.Mul(&a1023) // f = a^(2^146 - 1) 1579 f.Square().Square().Square().Square().Square() // f = a^(2^151 - 32) 1580 f.Square().Square().Square().Square().Square() // f = a^(2^156 - 1024) 1581 f.Mul(&a1023) // f = a^(2^156 - 1) 1582 f.Square().Square().Square().Square().Square() // f = a^(2^161 - 32) 1583 f.Square().Square().Square().Square().Square() // f = a^(2^166 - 1024) 1584 f.Mul(&a1023) // f = a^(2^166 - 1) 1585 f.Square().Square().Square().Square().Square() // f = a^(2^171 - 32) 1586 f.Square().Square().Square().Square().Square() // f = a^(2^176 - 1024) 1587 f.Mul(&a1023) // f = a^(2^176 - 1) 1588 f.Square().Square().Square().Square().Square() // f = a^(2^181 - 32) 1589 f.Square().Square().Square().Square().Square() // f = a^(2^186 - 1024) 1590 f.Mul(&a1023) // f = a^(2^186 - 1) 1591 f.Square().Square().Square().Square().Square() // f = a^(2^191 - 32) 1592 f.Square().Square().Square().Square().Square() // f = a^(2^196 - 1024) 1593 f.Mul(&a1023) // f = a^(2^196 - 1) 1594 f.Square().Square().Square().Square().Square() // f = a^(2^201 - 32) 1595 f.Square().Square().Square().Square().Square() // f = a^(2^206 - 1024) 1596 f.Mul(&a1023) // f = a^(2^206 - 1) 1597 f.Square().Square().Square().Square().Square() // f = a^(2^211 - 32) 1598 f.Square().Square().Square().Square().Square() // f = a^(2^216 - 1024) 1599 f.Mul(&a1023) // f = a^(2^216 - 1) 1600 f.Square().Square().Square().Square().Square() // f = a^(2^221 - 32) 1601 f.Square().Square().Square().Square().Square() // f = a^(2^226 - 1024) 1602 f.Mul(&a1019) // f = a^(2^226 - 5) 1603 f.Square().Square().Square().Square().Square() // f = a^(2^231 - 160) 1604 f.Square().Square().Square().Square().Square() // f = a^(2^236 - 5120) 1605 f.Mul(&a1023) // f = a^(2^236 - 4097) 1606 f.Square().Square().Square().Square().Square() // f = a^(2^241 - 131104) 1607 f.Square().Square().Square().Square().Square() // f = a^(2^246 - 4195328) 1608 f.Mul(&a1023) // f = a^(2^246 - 4194305) 1609 f.Square().Square().Square().Square().Square() // f = a^(2^251 - 134217760) 1610 f.Square().Square().Square().Square().Square() // f = a^(2^256 - 4294968320) 1611 return f.Mul(&a45) // f = a^(2^256 - 4294968275) = a^(p-2) 1612 } 1613 1614 // IsGtOrEqPrimeMinusOrder returns whether or not the field value exceeds the 1615 // group order divided by 2 in constant time. 1616 // 1617 // Preconditions: 1618 // - The field value MUST be normalized 1619 func (f *FieldVal) IsGtOrEqPrimeMinusOrder() bool { 1620 // The secp256k1 prime is equivalent to 2^256 - 4294968273 and the group 1621 // order is 2^256 - 432420386565659656852420866394968145599. Thus, 1622 // the prime minus the group order is: 1623 // 432420386565659656852420866390673177326 1624 // 1625 // In hex that is: 1626 // 0x00000000 00000000 00000000 00000001 45512319 50b75fc4 402da172 2fc9baee 1627 // 1628 // Converting that to field representation (base 2^26) is: 1629 // 1630 // n[0] = 0x03c9baee 1631 // n[1] = 0x03685c8b 1632 // n[2] = 0x01fc4402 1633 // n[3] = 0x006542dd 1634 // n[4] = 0x01455123 1635 // 1636 // This can be verified with the following test code: 1637 // pMinusN := new(big.Int).Sub(curveParams.P, curveParams.N) 1638 // var fv FieldVal 1639 // fv.SetByteSlice(pMinusN.Bytes()) 1640 // t.Logf("%x", fv.n) 1641 // 1642 // Outputs: [3c9baee 3685c8b 1fc4402 6542dd 1455123 0 0 0 0 0] 1643 const ( 1644 pMinusNWordZero = 0x03c9baee 1645 pMinusNWordOne = 0x03685c8b 1646 pMinusNWordTwo = 0x01fc4402 1647 pMinusNWordThree = 0x006542dd 1648 pMinusNWordFour = 0x01455123 1649 pMinusNWordFive = 0x00000000 1650 pMinusNWordSix = 0x00000000 1651 pMinusNWordSeven = 0x00000000 1652 pMinusNWordEight = 0x00000000 1653 pMinusNWordNine = 0x00000000 1654 ) 1655 1656 // The intuition here is that the value is greater than field prime minus 1657 // the group order if one of the higher individual words is greater than the 1658 // corresponding word and all higher words in the value are equal. 1659 result := constantTimeGreater(f.n[9], pMinusNWordNine) 1660 highWordsEqual := constantTimeEq(f.n[9], pMinusNWordNine) 1661 result |= highWordsEqual & constantTimeGreater(f.n[8], pMinusNWordEight) 1662 highWordsEqual &= constantTimeEq(f.n[8], pMinusNWordEight) 1663 result |= highWordsEqual & constantTimeGreater(f.n[7], pMinusNWordSeven) 1664 highWordsEqual &= constantTimeEq(f.n[7], pMinusNWordSeven) 1665 result |= highWordsEqual & constantTimeGreater(f.n[6], pMinusNWordSix) 1666 highWordsEqual &= constantTimeEq(f.n[6], pMinusNWordSix) 1667 result |= highWordsEqual & constantTimeGreater(f.n[5], pMinusNWordFive) 1668 highWordsEqual &= constantTimeEq(f.n[5], pMinusNWordFive) 1669 result |= highWordsEqual & constantTimeGreater(f.n[4], pMinusNWordFour) 1670 highWordsEqual &= constantTimeEq(f.n[4], pMinusNWordFour) 1671 result |= highWordsEqual & constantTimeGreater(f.n[3], pMinusNWordThree) 1672 highWordsEqual &= constantTimeEq(f.n[3], pMinusNWordThree) 1673 result |= highWordsEqual & constantTimeGreater(f.n[2], pMinusNWordTwo) 1674 highWordsEqual &= constantTimeEq(f.n[2], pMinusNWordTwo) 1675 result |= highWordsEqual & constantTimeGreater(f.n[1], pMinusNWordOne) 1676 highWordsEqual &= constantTimeEq(f.n[1], pMinusNWordOne) 1677 result |= highWordsEqual & constantTimeGreaterOrEq(f.n[0], pMinusNWordZero) 1678 1679 return result != 0 1680 } 1681