diff options
author | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
---|---|---|
committer | Matt A. Tobin <mattatobin@localhost.localdomain> | 2018-02-02 04:16:08 -0500 |
commit | 5f8de423f190bbb79a62f804151bc24824fa32d8 (patch) | |
tree | 10027f336435511475e392454359edea8e25895d /security/nss/lib/freebl/ecl | |
parent | 49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff) | |
download | UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip |
Add m-esr52 at 52.6.0
Diffstat (limited to 'security/nss/lib/freebl/ecl')
26 files changed, 7970 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README new file mode 100644 index 000000000..04a8b3b01 --- /dev/null +++ b/security/nss/lib/freebl/ecl/README @@ -0,0 +1,267 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +The ECL exposes routines for constructing and converting curve +parameters for internal use. + + +HEADER FILES +============ + +ecl-exp.h - Exports data structures and curve names. For use by code +that does not have access to mp_ints. + +ecl-curve.h - Provides hex encodings (in the form of ECCurveParams +structs) of standardizes elliptic curve domain parameters and mappings +from ECCurveName to ECCurveParams. For use by code that does not have +access to mp_ints. + +ecl.h - Interface to constructors for curve parameters and group object, +and point multiplication operations. Used by higher level algorithms +(like ECDH and ECDSA) to actually perform elliptic curve cryptography. + +ecl-priv.h - Data structures and functions for internal use within the +library. + +ecp.h - Internal header file that contains all functions for point +arithmetic over prime fields. + +DATA STRUCTURES AND TYPES +========================= + +ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic +curve domain parameters. + +ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding +of elliptic curve domain parameters. Can be generated by a user +and passed to ECGroup_fromHex or can be generated from a name by +EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for +the standardized curves defined by ECCurveName. + +ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that +represents a group of elliptic curve points for a particular set of +elliptic curve domain parameters. Contains all domain parameters (curve +a and b, field, base point) as well as pointers to the functions that +should be used for point arithmetic and the underlying field GFMethod. +Generated by either ECGroup_fromHex or ECGroup_fromName. + +GFMethod (from ecl-priv.h) - Represents a field underlying a set of +elliptic curve domain parameters. Contains the irreducible that defines +the field (either the prime or the binary polynomial) as well as +pointers to the functions that should be used for field arithmetic. + +ARITHMETIC FUNCTIONS +==================== + +Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul +or ECPoints_mul (from ecl.h) to do point arithmetic. These functions +will choose which underlying algorithms to use, based on the ECGroup +structure. + +Point Multiplication +-------------------- + +ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers. +It also provides two implementations for the pts_mul operation - +ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and +ec_pts_mul_simul_w2 (which does a simultaneous point multiplication +using a table with window size 2*2). + +ec_naf.c provides an implementation of an algorithm to calculate a +non-adjacent form of a scalar, minimizing the number of point +additions that need to be done in a point multiplication. + +Point Arithmetic over Prime Fields +---------------------------------- + +ecp_aff.c provides point arithmetic using affine coordinates. + +ecp_jac.c provides point arithmetic using Jacobian projective +coordinates and mixed Jacobian-affine coordinates. (Jacobian projective +coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2, +y=Y/Z^3). + +ecp_jm.c provides point arithmetic using Modified Jacobian +coordinates and mixed Modified_Jacobian-affine coordinates. +(Modified Jacobian coordinates represent a point (x, y) +as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is +the linear coefficient in the curve defining equation). + +ecp_192.c and ecp_224.c provide optimized field arithmetic. + +Point Arithmetic over Binary Polynomial Fields +---------------------------------------------- + +ec2_aff.c provides point arithmetic using affine coordinates. + +ec2_proj.c provides point arithmetic using projective coordinates. +(Projective coordinates represent a point (x, y) as (X, Y, Z), where +x=X/Z, y=Y/Z^2). + +ec2_mont.c provides point multiplication using Montgomery projective +coordinates. + +ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field arithmetic. + +Field Arithmetic +---------------- + +ecl_gf.c provides constructors for field objects (GFMethod) with the +functions GFMethod_cons*. It also provides wrappers around the basic +field operations. + +Prime Field Arithmetic +---------------------- + +The mpi library provides the basic prime field arithmetic. + +ecp_mont.c provides wrappers around the Montgomery multiplication +functions from the mpi library and adds encoding and decoding functions. +It also provides the function to construct a GFMethod object using +Montgomery multiplication. + +ecp_192.c and ecp_224.c provide optimized modular reduction for the +fields defined by nistp192 and nistp224 primes. + +ecl_gf.c provides wrappers around the basic field operations. + +Binary Polynomial Field Arithmetic +---------------------------------- + +../mpi/mp_gf2m.c provides basic binary polynomial field arithmetic, +including addition, multiplication, squaring, mod, and division, as well +as conversion ob polynomial representations between bitstring and int[]. + +ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field mod, mul, +and sqr operations. + +ecl_gf.c provides wrappers around the basic field operations. + +Field Encoding +-------------- + +By default, field elements are encoded in their basic form. It is +possible to use an alternative encoding, however. For example, it is +possible to Montgomery representation of prime field elements and +take advantage of the fast modular multiplication that Montgomery +representation provides. The process of converting from basic form to +Montgomery representation is called field encoding, and the opposite +process would be field decoding. All internal point operations assume +that the operands are field encoded as appropriate. By rewiring the +underlying field arithmetic to perform operations on these encoded +values, the same overlying point arithmetic operations can be used +regardless of field representation. + +ALGORITHM WIRING +================ + +The EC library allows point and field arithmetic algorithms to be +substituted ("wired-in") on a fine-grained basis. This allows for +generic algorithms and algorithms that are optimized for a particular +curve, field, or architecture, to coexist and to be automatically +selected at runtime. + +Wiring Mechanism +---------------- + +The ECGroup and GFMethod structure contain pointers to the point and +field arithmetic functions, respectively, that are to be used in +operations. + +The selection of algorithms to use is handled in the function +ecgroup_fromNameAndHex in ecl.c. + +Default Wiring +-------------- + +Curves over prime fields by default use montgomery field arithmetic, +point multiplication using 5-bit window non-adjacent-form with +Modified Jacobian coordinates, and 2*2-bit simultaneous point +multiplication using Jacobian coordinates. +(Wiring in function ECGroup_consGFp_mont in ecl.c.) + +Curves over prime fields that have optimized modular reduction (i.e., +secp160r1, nistp192, and nistp224) do not use Montgomery field +arithmetic. Instead, they use basic field arithmetic with their +optimized reduction (as in ecp_192.c and ecp_224.c). They +use the same point multiplication and simultaneous point multiplication +algorithms as other curves over prime fields. + +Curves over binary polynomial fields by default use generic field +arithmetic with montgomery point multiplication and basic kP + lQ +computation (multiply, multiply, and add). (Wiring in function +ECGroup_cons_GF2m in ecl.c.) + +Curves over binary polynomial fields that have optimized field +arithmetic (i.e., any 163-, 193, or 233-bit field) use their optimized +field arithmetic. They use the same point multiplication and +simultaneous point multiplication algorithms as other curves over binary +fields. + +Example +------- + +We provide an example for plugging in an optimized implementation for +the Koblitz curve nistk163. + +Suppose the file ec2_k163.c contains the optimized implementation. In +particular it contains a point multiplication function: + + mp_err ec_GF2m_nistk163_pt_mul(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group); + +Since only a pt_mul function is provided, the generic pt_add function +will be used. + +There are two options for handling the optimized field arithmetic used +by the ..._pt_mul function. Say the optimized field arithmetic includes +the following functions: + + mp_err ec_GF2m_nistk163_add(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_mul(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_sqr(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_div(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + +First, the optimized field arithmetic could simply be called directly +by the ..._pt_mul function. This would be accomplished by changing +the ecgroup_fromNameAndHex function in ecl.c to include the following +statements: + + if (name == ECCurve_NIST_K163) { + group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx, + &geny, &order, params->cofactor); + if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } + MP_CHECKOK( ec_group_set_nistk163(group) ); + } + +and including in ec2_k163.c the following function: + + mp_err ec_group_set_nistk163(ECGroup *group) { + group->point_mul = &ec_GF2m_nistk163_pt_mul; + return MP_OKAY; + } + +As a result, ec_GF2m_pt_add and similar functions would use the +basic binary polynomial field arithmetic ec_GF2m_add, ec_GF2m_mul, +ec_GF2m_sqr, and ec_GF2m_div. + +Alternatively, the optimized field arithmetic could be wired into the +group's GFMethod. This would be accomplished by putting the following +function in ec2_k163.c: + + mp_err ec_group_set_nistk163(ECGroup *group) { + group->meth->field_add = &ec_GF2m_nistk163_add; + group->meth->field_mul = &ec_GF2m_nistk163_mul; + group->meth->field_sqr = &ec_GF2m_nistk163_sqr; + group->meth->field_div = &ec_GF2m_nistk163_div; + group->point_mul = &ec_GF2m_nistk163_pt_mul; + return MP_OKAY; + } + +For an example of functions that use special field encodings, take a +look at ecp_mont.c. diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c new file mode 100644 index 000000000..0122961e6 --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_32.c @@ -0,0 +1,390 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from public domain code by Matthew Dempsky and D. J. Bernstein. + */ + +#include "ecl-priv.h" +#include "mpi.h" + +#include <stdint.h> +#include <stdio.h> + +typedef uint32_t elem[32]; + +/* + * Add two field elements. + * out = a + b + */ +static void +add(elem out, const elem a, const elem b) +{ + uint32_t j; + uint32_t u = 0; + for (j = 0; j < 31; ++j) { + u += a[j] + b[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += a[31] + b[31]; + out[31] = u; +} + +/* + * Subtract two field elements. + * out = a - b + */ +static void +sub(elem out, const elem a, const elem b) +{ + uint32_t j; + uint32_t u; + u = 218; + for (j = 0; j < 31; ++j) { + u += a[j] + 0xFF00 - b[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += a[31] - b[31]; + out[31] = u; +} + +/* + * "Squeeze" an element after multiplication (and square). + */ +static void +squeeze(elem a) +{ + uint32_t j; + uint32_t u; + u = 0; + for (j = 0; j < 31; ++j) { + u += a[j]; + a[j] = u & 0xFF; + u >>= 8; + } + u += a[31]; + a[31] = u & 0x7F; + u = 19 * (u >> 7); + for (j = 0; j < 31; ++j) { + u += a[j]; + a[j] = u & 0xFF; + u >>= 8; + } + a[31] += u; +} + +static const elem minusp = { 19, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 128 }; + +/* + * Reduce point a by 2^255-19 + */ +static void +reduce(elem a) +{ + elem aorig; + uint32_t j; + uint32_t negative; + + for (j = 0; j < 32; ++j) { + aorig[j] = a[j]; + } + add(a, a, minusp); + negative = 1 + ~((a[31] >> 7) & 1); + for (j = 0; j < 32; ++j) { + a[j] ^= negative & (aorig[j] ^ a[j]); + } +} + +/* + * Multiplication and squeeze + * out = a * b + */ +static void +mult(elem out, const elem a, const elem b) +{ + uint32_t i; + uint32_t j; + uint32_t u; + + for (i = 0; i < 32; ++i) { + u = 0; + for (j = 0; j <= i; ++j) { + u += a[j] * b[i - j]; + } + for (j = i + 1; j < 32; ++j) { + u += 38 * a[j] * b[i + 32 - j]; + } + out[i] = u; + } + squeeze(out); +} + +/* + * Multiplication + * out = 121665 * a + */ +static void +mult121665(elem out, const elem a) +{ + uint32_t j; + uint32_t u; + + u = 0; + for (j = 0; j < 31; ++j) { + u += 121665 * a[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += 121665 * a[31]; + out[31] = u & 0x7F; + u = 19 * (u >> 7); + for (j = 0; j < 31; ++j) { + u += out[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += out[j]; + out[j] = u; +} + +/* + * Square a and squeeze the result. + * out = a * a + */ +static void +square(elem out, const elem a) +{ + uint32_t i; + uint32_t j; + uint32_t u; + + for (i = 0; i < 32; ++i) { + u = 0; + for (j = 0; j < i - j; ++j) { + u += a[j] * a[i - j]; + } + for (j = i + 1; j < i + 32 - j; ++j) { + u += 38 * a[j] * a[i + 32 - j]; + } + u *= 2; + if ((i & 1) == 0) { + u += a[i / 2] * a[i / 2]; + u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; + } + out[i] = u; + } + squeeze(out); +} + +/* + * Constant time swap between r and s depending on b + */ +static void +cswap(uint32_t p[64], uint32_t q[64], uint32_t b) +{ + uint32_t j; + uint32_t swap = 1 + ~b; + + for (j = 0; j < 64; ++j) { + const uint32_t t = swap & (p[j] ^ q[j]); + p[j] ^= t; + q[j] ^= t; + } +} + +/* + * Montgomery ladder + */ +static void +monty(elem x_2_out, elem z_2_out, + const elem point, const elem scalar) +{ + uint32_t x_3[64] = { 0 }; + uint32_t x_2[64] = { 0 }; + uint32_t a0[64]; + uint32_t a1[64]; + uint32_t b0[64]; + uint32_t b1[64]; + uint32_t c1[64]; + uint32_t r[32]; + uint32_t s[32]; + uint32_t t[32]; + uint32_t u[32]; + uint32_t swap = 0; + uint32_t k_t = 0; + int j; + + for (j = 0; j < 32; ++j) { + x_3[j] = point[j]; + } + x_3[32] = 1; + x_2[0] = 1; + + for (j = 254; j >= 0; --j) { + k_t = (scalar[j >> 3] >> (j & 7)) & 1; + swap ^= k_t; + cswap(x_2, x_3, swap); + swap = k_t; + add(a0, x_2, x_2 + 32); + sub(a0 + 32, x_2, x_2 + 32); + add(a1, x_3, x_3 + 32); + sub(a1 + 32, x_3, x_3 + 32); + square(b0, a0); + square(b0 + 32, a0 + 32); + mult(b1, a1, a0 + 32); + mult(b1 + 32, a1 + 32, a0); + add(c1, b1, b1 + 32); + sub(c1 + 32, b1, b1 + 32); + square(r, c1 + 32); + sub(s, b0, b0 + 32); + mult121665(t, s); + add(u, t, b0); + mult(x_2, b0, b0 + 32); + mult(x_2 + 32, s, u); + square(x_3, c1); + mult(x_3 + 32, r, point); + } + + cswap(x_2, x_3, swap); + for (j = 0; j < 32; ++j) { + x_2_out[j] = x_2[j]; + } + for (j = 0; j < 32; ++j) { + z_2_out[j] = x_2[j + 32]; + } +} + +static void +recip(elem out, const elem z) +{ + elem z2; + elem z9; + elem z11; + elem z2_5_0; + elem z2_10_0; + elem z2_20_0; + elem z2_50_0; + elem z2_100_0; + elem t0; + elem t1; + int i; + + /* 2 */ square(z2, z); + /* 4 */ square(t1, z2); + /* 8 */ square(t0, t1); + /* 9 */ mult(z9, t0, z); + /* 11 */ mult(z11, z9, z2); + /* 22 */ square(t0, z11); + /* 2^5 - 2^0 = 31 */ mult(z2_5_0, t0, z9); + + /* 2^6 - 2^1 */ square(t0, z2_5_0); + /* 2^7 - 2^2 */ square(t1, t0); + /* 2^8 - 2^3 */ square(t0, t1); + /* 2^9 - 2^4 */ square(t1, t0); + /* 2^10 - 2^5 */ square(t0, t1); + /* 2^10 - 2^0 */ mult(z2_10_0, t0, z2_5_0); + + /* 2^11 - 2^1 */ square(t0, z2_10_0); + /* 2^12 - 2^2 */ square(t1, t0); + /* 2^20 - 2^10 */ + for (i = 2; i < 10; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^20 - 2^0 */ mult(z2_20_0, t1, z2_10_0); + + /* 2^21 - 2^1 */ square(t0, z2_20_0); + /* 2^22 - 2^2 */ square(t1, t0); + /* 2^40 - 2^20 */ + for (i = 2; i < 20; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^40 - 2^0 */ mult(t0, t1, z2_20_0); + + /* 2^41 - 2^1 */ square(t1, t0); + /* 2^42 - 2^2 */ square(t0, t1); + /* 2^50 - 2^10 */ + for (i = 2; i < 10; i += 2) { + square(t1, t0); + square(t0, t1); + } + /* 2^50 - 2^0 */ mult(z2_50_0, t0, z2_10_0); + + /* 2^51 - 2^1 */ square(t0, z2_50_0); + /* 2^52 - 2^2 */ square(t1, t0); + /* 2^100 - 2^50 */ + for (i = 2; i < 50; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^100 - 2^0 */ mult(z2_100_0, t1, z2_50_0); + + /* 2^101 - 2^1 */ square(t1, z2_100_0); + /* 2^102 - 2^2 */ square(t0, t1); + /* 2^200 - 2^100 */ + for (i = 2; i < 100; i += 2) { + square(t1, t0); + square(t0, t1); + } + /* 2^200 - 2^0 */ mult(t1, t0, z2_100_0); + + /* 2^201 - 2^1 */ square(t0, t1); + /* 2^202 - 2^2 */ square(t1, t0); + /* 2^250 - 2^50 */ + for (i = 2; i < 50; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^250 - 2^0 */ mult(t0, t1, z2_50_0); + + /* 2^251 - 2^1 */ square(t1, t0); + /* 2^252 - 2^2 */ square(t0, t1); + /* 2^253 - 2^3 */ square(t1, t0); + /* 2^254 - 2^4 */ square(t0, t1); + /* 2^255 - 2^5 */ square(t1, t0); + /* 2^255 - 21 */ mult(out, t1, z11); +} + +/* + * Computes q = Curve25519(p, s) + */ +SECStatus +ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p) +{ + elem point = { 0 }; + elem x_2 = { 0 }; + elem z_2 = { 0 }; + elem X = { 0 }; + elem scalar = { 0 }; + uint32_t i; + + /* read and mask scalar */ + for (i = 0; i < 32; ++i) { + scalar[i] = s[i]; + } + scalar[0] &= 0xF8; + scalar[31] &= 0x7F; + scalar[31] |= 64; + + /* read and mask point */ + for (i = 0; i < 32; ++i) { + point[i] = p[i]; + } + point[31] &= 0x7F; + + monty(x_2, z_2, point, scalar); + recip(z_2, z_2); + mult(X, x_2, z_2); + reduce(X); + for (i = 0; i < 32; ++i) { + q[i] = X[i]; + } + return 0; +} diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c new file mode 100644 index 000000000..89327ad1c --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_64.c @@ -0,0 +1,514 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from public domain C code by Adan Langley and Daniel J. Bernstein + */ + +#include "uint128.h" + +#include "ecl-priv.h" +#include "mpi.h" + +#include <stdint.h> +#include <stdio.h> +#include <string.h> + +typedef uint8_t u8; +typedef uint64_t felem; + +/* Sum two numbers: output += in */ +static void +fsum(felem *output, const felem *in) +{ + unsigned i; + for (i = 0; i < 5; ++i) { + output[i] += in[i]; + } +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!) + */ +static void +fdifference_backwards(felem *ioutput, const felem *iin) +{ + static const int64_t twotothe51 = ((int64_t)1l << 51); + const int64_t *in = (const int64_t *)iin; + int64_t *out = (int64_t *)ioutput; + + out[0] = in[0] - out[0]; + out[1] = in[1] - out[1]; + out[2] = in[2] - out[2]; + out[3] = in[3] - out[3]; + out[4] = in[4] - out[4]; + + // An arithmetic shift right of 63 places turns a positive number to 0 and a + // negative number to all 1's. This gives us a bitmask that lets us avoid + // side-channel prone branches. + int64_t t; + +#define NEGCHAIN(a, b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 1 & t; + +#define NEGCHAIN19(a, b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 19 & t; + + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); + NEGCHAIN19(4, 0); + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); +} + +/* Multiply a number by a scalar: output = in * scalar */ +static void +fscalar_product(felem *output, const felem *in, + const felem scalar) +{ + uint128_t tmp, tmp2; + + tmp = mul6464(in[0], scalar); + output[0] = mask51(tmp); + + tmp2 = mul6464(in[1], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[1] = mask51(tmp); + + tmp2 = mul6464(in[2], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[2] = mask51(tmp); + + tmp2 = mul6464(in[3], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[3] = mask51(tmp); + + tmp2 = mul6464(in[4], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[4] = mask51(tmp); + + output[0] += mask_lower(rshift128(tmp, 51)) * 19; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + */ +static void +fmul(felem *output, const felem *in2, const felem *in) +{ + uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; + + t0 = mul6464(in[0], in2[0]); + t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1])); + t2 = add128(add128(mul6464(in[0], in2[2]), + mul6464(in[2], in2[0])), + mul6464(in[1], in2[1])); + t3 = add128(add128(add128(mul6464(in[0], in2[3]), + mul6464(in[3], in2[0])), + mul6464(in[1], in2[2])), + mul6464(in[2], in2[1])); + t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]), + mul6464(in[4], in2[0])), + mul6464(in[3], in2[1])), + mul6464(in[1], in2[3])), + mul6464(in[2], in2[2])); + t5 = add128(add128(add128(mul6464(in[4], in2[1]), + mul6464(in[1], in2[4])), + mul6464(in[2], in2[3])), + mul6464(in[3], in2[2])); + t6 = add128(add128(mul6464(in[4], in2[2]), + mul6464(in[2], in2[4])), + mul6464(in[3], in2[3])); + t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3])); + t8 = mul6464(in[4], in2[4]); + + t0 = add128(t0, mul12819(t5)); + t1 = add128(t1, mul12819(t6)); + t2 = add128(t2, mul12819(t7)); + t3 = add128(t3, mul12819(t8)); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t4 = add128(t4, rshift128(t3, 51)); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t1 = add128(t1, rshift128(t0, 51)); + t2 = mask51full(t2); + t2 = add128(t2, rshift128(t1, 51)); + + output[0] = mask51(t0); + output[1] = mask51(t1); + output[2] = mask_lower(t2); + output[3] = mask51(t3); + output[4] = mask51(t4); +} + +static void +fsquare(felem *output, const felem *in) +{ + uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; + + t0 = mul6464(in[0], in[0]); + t1 = lshift128(mul6464(in[0], in[1]), 1); + t2 = add128(lshift128(mul6464(in[0], in[2]), 1), + mul6464(in[1], in[1])); + t3 = add128(lshift128(mul6464(in[0], in[3]), 1), + lshift128(mul6464(in[1], in[2]), 1)); + t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1), + lshift128(mul6464(in[3], in[1]), 1)), + mul6464(in[2], in[2])); + t5 = add128(lshift128(mul6464(in[4], in[1]), 1), + lshift128(mul6464(in[2], in[3]), 1)); + t6 = add128(lshift128(mul6464(in[4], in[2]), 1), + mul6464(in[3], in[3])); + t7 = lshift128(mul6464(in[3], in[4]), 1); + t8 = mul6464(in[4], in[4]); + + t0 = add128(t0, mul12819(t5)); + t1 = add128(t1, mul12819(t6)); + t2 = add128(t2, mul12819(t7)); + t3 = add128(t3, mul12819(t8)); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t4 = add128(t4, rshift128(t3, 51)); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t1 = add128(t1, rshift128(t0, 51)); + + output[0] = mask51(t0); + output[1] = mask_lower(t1); + output[2] = mask51(t2); + output[3] = mask51(t3); + output[4] = mask51(t4); +} + +/* Take a 32-byte number and expand it into polynomial form */ +static void NO_SANITIZE_ALIGNMENT +fexpand(felem *output, const u8 *in) +{ + output[0] = *((const uint64_t *)(in)) & MASK51; + output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51; + output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51; + output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51; + output[4] = (*((const uint64_t *)(in + 25)) >> 4) & MASK51; +} + +/* Take a fully reduced polynomial form number and contract it into a + * 32-byte array + */ +static void +fcontract(u8 *output, const felem *input) +{ + uint128_t t0 = init128x(input[0]); + uint128_t t1 = init128x(input[1]); + uint128_t t2 = init128x(input[2]); + uint128_t t3 = init128x(input[3]); + uint128_t t4 = init128x(input[4]); + uint128_t tmp = init128x(19); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + /* now t is between 0 and 2^255-1, properly carried. */ + /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ + + t0 = add128(t0, tmp); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + /* now between 19 and 2^255-1 in both cases, and offset by 19. */ + + t0 = add128(t0, init128x(0x8000000000000 - 19)); + tmp = init128x(0x8000000000000 - 1); + t1 = add128(t1, tmp); + t2 = add128(t2, tmp); + t3 = add128(t3, tmp); + t4 = add128(t4, tmp); + + /* now between 2^255 and 2^256-20, and offset by 2^255. */ + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t4 = mask51full(t4); + + *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51; + *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38); + *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25); + *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12); +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + */ +static void +fmonty(felem *x2, felem *z2, /* output 2Q */ + felem *x3, felem *z3, /* output Q + Q' */ + felem *x, felem *z, /* input Q */ + felem *xprime, felem *zprime, /* input Q' */ + const felem *qmqp /* input Q - Q' */) +{ + felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], + zzzprime[5]; + + memcpy(origx, x, 5 * sizeof(felem)); + fsum(x, z); + fdifference_backwards(z, origx); // does x - z + + memcpy(origxprime, xprime, sizeof(felem) * 5); + fsum(xprime, zprime); + fdifference_backwards(zprime, origxprime); + fmul(xxprime, xprime, z); + fmul(zzprime, x, zprime); + memcpy(origxprime, xxprime, sizeof(felem) * 5); + fsum(xxprime, zzprime); + fdifference_backwards(zzprime, origxprime); + fsquare(x3, xxprime); + fsquare(zzzprime, zzprime); + fmul(z3, zzzprime, qmqp); + + fsquare(xx, x); + fsquare(zz, z); + fmul(x2, xx, zz); + fdifference_backwards(zz, xx); // does zz = xx - zz + fscalar_product(zzz, zz, 121665); + fsum(zzz, xx); + fmul(z2, zz, zzz); +} + +// ----------------------------------------------------------------------------- +// Maybe swap the contents of two felem arrays (@a and @b), each @len elements +// long. Perform the swap iff @swap is non-zero. +// +// This function performs the swap without leaking any side-channel +// information. +// ----------------------------------------------------------------------------- +static void +swap_conditional(felem *a, felem *b, unsigned len, felem iswap) +{ + unsigned i; + const felem swap = 1 + ~iswap; + + for (i = 0; i < len; ++i) { + const felem x = swap & (a[i] ^ b[i]); + a[i] ^= x; + b[i] ^= x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a 32-byte number + * q: a point of the curve (short form) + */ +static void +cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) +{ + felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 }; + felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 }; + felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(felem) * 5); + + for (i = 0; i < 32; ++i) { + u8 byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const felem bit = byte >> 7; + + swap_conditional(nqx, nqpqx, 5, bit); + swap_conditional(nqz, nqpqz, 5, bit); + fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); + swap_conditional(nqx2, nqpqx2, 5, bit); + swap_conditional(nqz2, nqpqz2, 5, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(felem) * 5); + memcpy(resultz, nqz, sizeof(felem) * 5); +} + +// ----------------------------------------------------------------------------- +// Shamelessly copied from djb's code +// ----------------------------------------------------------------------------- +static void +crecip(felem *out, const felem *z) +{ + felem z2[5]; + felem z9[5]; + felem z11[5]; + felem z2_5_0[5]; + felem z2_10_0[5]; + felem z2_20_0[5]; + felem z2_50_0[5]; + felem z2_100_0[5]; + felem t0[5]; + felem t1[5]; + int i; + + /* 2 */ fsquare(z2, z); + /* 4 */ fsquare(t1, z2); + /* 8 */ fsquare(t0, t1); + /* 9 */ fmul(z9, t0, z); + /* 11 */ fmul(z11, z9, z2); + /* 22 */ fsquare(t0, z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9); + + /* 2^6 - 2^1 */ fsquare(t0, z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1, t0); + /* 2^8 - 2^3 */ fsquare(t0, t1); + /* 2^9 - 2^4 */ fsquare(t1, t0); + /* 2^10 - 2^5 */ fsquare(t0, t1); + /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0); + + /* 2^11 - 2^1 */ fsquare(t0, z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1, t0); + /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0); + + /* 2^21 - 2^1 */ fsquare(t0, z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1, t0); + /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0); + + /* 2^41 - 2^1 */ fsquare(t1, t0); + /* 2^42 - 2^2 */ fsquare(t0, t1); + /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { + fsquare(t1, t0); + fsquare(t0, t1); + } + /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0); + + /* 2^51 - 2^1 */ fsquare(t0, z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1, t0); + /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0); + + /* 2^101 - 2^1 */ fsquare(t1, z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0, t1); + /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { + fsquare(t1, t0); + fsquare(t0, t1); + } + /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0); + + /* 2^201 - 2^1 */ fsquare(t0, t1); + /* 2^202 - 2^2 */ fsquare(t1, t0); + /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0); + + /* 2^251 - 2^1 */ fsquare(t1, t0); + /* 2^252 - 2^2 */ fsquare(t0, t1); + /* 2^253 - 2^3 */ fsquare(t1, t0); + /* 2^254 - 2^4 */ fsquare(t0, t1); + /* 2^255 - 2^5 */ fsquare(t1, t0); + /* 2^255 - 21 */ fmul(out, t1, z11); +} + +SECStatus +ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, + const uint8_t *basepoint) +{ + felem bp[5], x[5], z[5], zmone[5]; + uint8_t e[32]; + int i; + + for (i = 0; i < 32; ++i) { + e[i] = secret[i]; + } + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); + + return 0; +} diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c new file mode 100644 index 000000000..cad08cb27 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ec_naf.c @@ -0,0 +1,68 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-priv.h" + +/* Returns 2^e as an integer. This is meant to be used for small powers of + * two. */ +int +ec_twoTo(int e) +{ + int a = 1; + int i; + + for (i = 0; i < e; i++) { + a *= 2; + } + return a; +} + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err +ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w) +{ + mp_int k; + mp_err res = MP_OKAY; + int i, twowm1, mask; + + twowm1 = ec_twoTo(w - 1); + mask = 2 * twowm1 - 1; + + MP_DIGITS(&k) = 0; + MP_CHECKOK(mp_init_copy(&k, in)); + + i = 0; + /* Compute wNAF form */ + while (mp_cmp_z(&k) > 0) { + if (mp_isodd(&k)) { + out[i] = MP_DIGIT(&k, 0) & mask; + if (out[i] >= twowm1) + out[i] -= 2 * twowm1; + + /* Subtract off out[i]. Note mp_sub_d only works with + * unsigned digits */ + if (out[i] >= 0) { + MP_CHECKOK(mp_sub_d(&k, out[i], &k)); + } else { + MP_CHECKOK(mp_add_d(&k, -(out[i]), &k)); + } + } else { + out[i] = 0; + } + MP_CHECKOK(mp_div_2(&k, &k)); + i++; + } + /* Zero out the remaining elements of the out array. */ + for (; i < bitsize + 1; i++) { + out[i] = 0; + } +CLEANUP: + mp_clear(&k); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h new file mode 100644 index 000000000..df061396c --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-curve.h @@ -0,0 +1,123 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-exp.h" +#include <stdlib.h> + +#ifndef __ecl_curve_h_ +#define __ecl_curve_h_ + +/* copied from certt.h */ +#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */ +#define KU_KEY_AGREEMENT (0x08) /* bit 4 */ + +static const ECCurveParams ecCurve_NIST_P256 = { + "NIST-P256", ECField_GFp, 256, + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC", + "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", + "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", + "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", + 1, 128, 65, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve_NIST_P384 = { + "NIST-P384", ECField_GFp, 384, + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC", + "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", + "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", + "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", + 1, 192, 97, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve_NIST_P521 = { + "NIST-P521", ECField_GFp, 521, + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC", + "0051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00", + "00C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66", + "011839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650", + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409", + 1, 256, 133, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve25519 = { + "Curve25519", ECField_GFp, 255, + "7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed", + "076D06", + "00", + "0900000000000000000000000000000000000000000000000000000000000000", + "20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + "1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed", + 8, 128, 32, KU_KEY_AGREEMENT +}; + +/* mapping between ECCurveName enum and pointers to ECCurveParams */ +static const ECCurveParams *ecCurve_map[] = { + NULL, /* ECCurve_noName */ + NULL, /* ECCurve_NIST_P192 */ + NULL, /* ECCurve_NIST_P224 */ + &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */ + &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */ + &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */ + NULL, /* ECCurve_NIST_K163 */ + NULL, /* ECCurve_NIST_B163 */ + NULL, /* ECCurve_NIST_K233 */ + NULL, /* ECCurve_NIST_B233 */ + NULL, /* ECCurve_NIST_K283 */ + NULL, /* ECCurve_NIST_B283 */ + NULL, /* ECCurve_NIST_K409 */ + NULL, /* ECCurve_NIST_B409 */ + NULL, /* ECCurve_NIST_K571 */ + NULL, /* ECCurve_NIST_B571 */ + NULL, /* ECCurve_X9_62_PRIME_192V2 */ + NULL, /* ECCurve_X9_62_PRIME_192V3 */ + NULL, /* ECCurve_X9_62_PRIME_239V1 */ + NULL, /* ECCurve_X9_62_PRIME_239V2 */ + NULL, /* ECCurve_X9_62_PRIME_239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V2 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB176V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB208W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB272W1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB304W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB359V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB368W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB431R1 */ + NULL, /* ECCurve_SECG_PRIME_112R1 */ + NULL, /* ECCurve_SECG_PRIME_112R2 */ + NULL, /* ECCurve_SECG_PRIME_128R1 */ + NULL, /* ECCurve_SECG_PRIME_128R2 */ + NULL, /* ECCurve_SECG_PRIME_160K1 */ + NULL, /* ECCurve_SECG_PRIME_160R1 */ + NULL, /* ECCurve_SECG_PRIME_160R2 */ + NULL, /* ECCurve_SECG_PRIME_192K1 */ + NULL, /* ECCurve_SECG_PRIME_224K1 */ + NULL, /* ECCurve_SECG_PRIME_256K1 */ + NULL, /* ECCurve_SECG_CHAR2_113R1 */ + NULL, /* ECCurve_SECG_CHAR2_113R2 */ + NULL, /* ECCurve_SECG_CHAR2_131R1 */ + NULL, /* ECCurve_SECG_CHAR2_131R2 */ + NULL, /* ECCurve_SECG_CHAR2_163R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R2 */ + NULL, /* ECCurve_SECG_CHAR2_239K1 */ + NULL, /* ECCurve_WTLS_1 */ + NULL, /* ECCurve_WTLS_8 */ + NULL, /* ECCurve_WTLS_9 */ + &ecCurve25519, /* ECCurve25519 */ + NULL /* ECCurve_pastLastCurve */ +}; + +#endif diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h new file mode 100644 index 000000000..44adb8a1c --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-exp.h @@ -0,0 +1,167 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_exp_h_ +#define __ecl_exp_h_ + +/* Curve field type */ +typedef enum { + ECField_GFp, + ECField_GF2m +} ECField; + +/* Hexadecimal encoding of curve parameters */ +struct ECCurveParamsStr { + char *text; + ECField field; + unsigned int size; + char *irr; + char *curvea; + char *curveb; + char *genx; + char *geny; + char *order; + int cofactor; + int security; + int pointSize; + unsigned int usage; +}; +typedef struct ECCurveParamsStr ECCurveParams; + +/* Named curve parameters */ +typedef enum { + + ECCurve_noName = 0, + + /* NIST prime curves */ + ECCurve_NIST_P192, /* not supported */ + ECCurve_NIST_P224, /* not supported */ + ECCurve_NIST_P256, + ECCurve_NIST_P384, + ECCurve_NIST_P521, + + /* NIST binary curves */ + ECCurve_NIST_K163, /* not supported */ + ECCurve_NIST_B163, /* not supported */ + ECCurve_NIST_K233, /* not supported */ + ECCurve_NIST_B233, /* not supported */ + ECCurve_NIST_K283, /* not supported */ + ECCurve_NIST_B283, /* not supported */ + ECCurve_NIST_K409, /* not supported */ + ECCurve_NIST_B409, /* not supported */ + ECCurve_NIST_K571, /* not supported */ + ECCurve_NIST_B571, /* not supported */ + + /* ANSI X9.62 prime curves */ + /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */ + ECCurve_X9_62_PRIME_192V2, /* not supported */ + ECCurve_X9_62_PRIME_192V3, /* not supported */ + ECCurve_X9_62_PRIME_239V1, /* not supported */ + ECCurve_X9_62_PRIME_239V2, /* not supported */ + ECCurve_X9_62_PRIME_239V3, /* not supported */ + /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */ + + /* ANSI X9.62 binary curves */ + ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */ + + /* SEC2 prime curves */ + ECCurve_SECG_PRIME_112R1, /* not supported */ + ECCurve_SECG_PRIME_112R2, /* not supported */ + ECCurve_SECG_PRIME_128R1, /* not supported */ + ECCurve_SECG_PRIME_128R2, /* not supported */ + ECCurve_SECG_PRIME_160K1, /* not supported */ + ECCurve_SECG_PRIME_160R1, /* not supported */ + ECCurve_SECG_PRIME_160R2, /* not supported */ + ECCurve_SECG_PRIME_192K1, /* not supported */ + /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */ + ECCurve_SECG_PRIME_224K1, /* not supported */ + /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */ + ECCurve_SECG_PRIME_256K1, /* not supported */ + /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */ + /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */ + /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */ + + /* SEC2 binary curves */ + ECCurve_SECG_CHAR2_113R1, /* not supported */ + ECCurve_SECG_CHAR2_113R2, /* not supported */ + ECCurve_SECG_CHAR2_131R1, /* not supported */ + ECCurve_SECG_CHAR2_131R2, /* not supported */ + /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */ + ECCurve_SECG_CHAR2_163R1, /* not supported */ + /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */ + ECCurve_SECG_CHAR2_193R1, /* not supported */ + ECCurve_SECG_CHAR2_193R2, /* not supported */ + /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */ + /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */ + ECCurve_SECG_CHAR2_239K1, /* not supported */ + /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */ + /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */ + /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */ + /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */ + /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */ + /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */ + + /* WTLS curves */ + ECCurve_WTLS_1, /* not supported */ + /* there is no WTLS 2 curve */ + /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */ + /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */ + /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */ + /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */ + /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */ + ECCurve_WTLS_8, /* not supported */ + ECCurve_WTLS_9, /* not supported */ + /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */ + /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */ + /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */ + + ECCurve25519, + + ECCurve_pastLastCurve +} ECCurveName; + +/* Aliased named curves */ + +#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */ +#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384 +#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521 +#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163 /* not supported */ +#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283 /* not supported */ +#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283 /* not supported */ +#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409 /* not supported */ +#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409 /* not supported */ +#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571 /* not supported */ +#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571 /* not supported */ +#define ECCurve_WTLS_3 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1 /* not supported */ +#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */ +#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1 /* not supported */ +#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1 /* not supported */ +#define ECCurve_WTLS_10 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_WTLS_11 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_WTLS_12 ECCurve_NIST_P224 /* not supported */ + +#endif /* __ecl_exp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h new file mode 100644 index 000000000..f43f19327 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-priv.h @@ -0,0 +1,257 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_priv_h_ +#define __ecl_priv_h_ + +#include "ecl.h" +#include "mpi.h" +#include "mplogic.h" +#include "../blapii.h" + +/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */ +/* the following needs to go away... */ +#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT) +#define ECL_SIXTY_FOUR_BIT +#else +#define ECL_THIRTY_TWO_BIT +#endif + +#define ECL_CURVE_DIGITS(curve_size_in_bits) \ + (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8)) +#define ECL_BITS (sizeof(mp_digit) * 8) +#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit)) + +/* Gets the i'th bit in the binary representation of a. If i >= length(a), + * then return 0. (The above behaviour differs from mpl_get_bit, which + * causes an error if i >= length(a).) */ +#define MP_GET_BIT(a, i) \ + ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i)) + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_word w; \ + w = ((mp_word)carry) + (a1) + (a2); \ + s = ACCUM(w); \ + carry = CARRYOUT(w); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_word w; \ + w = ((mp_word)(a1)) - (a2)-borrow; \ + s = ACCUM(w); \ + borrow = (w >> MP_DIGIT_BIT) & 1; \ + } + +#else +/* NOTE, + * carry and borrow are both read and written. + * a1 or a2 and s could be the same variable. + * don't trash those outputs until their respective inputs have + * been read. */ +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_digit tmp, sum; \ + tmp = (a1); \ + sum = tmp + (a2); \ + tmp = (sum < tmp); /* detect overflow */ \ + s = sum += carry; \ + carry = tmp + (sum < carry); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_digit tmp; \ + tmp = (a1); \ + s = tmp - (a2); \ + tmp = (s > tmp); /* detect borrow */ \ + if (borrow && !s--) \ + tmp++; \ + borrow = tmp; \ + } +#endif + +struct GFMethodStr; +typedef struct GFMethodStr GFMethod; +struct GFMethodStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Irreducible that defines the field. For prime fields, this is the + * prime p. For binary polynomial fields, this is the bitstring + * representation of the irreducible polynomial. */ + mp_int irr; + /* For prime fields, the value irr_arr[0] is the number of bits in the + * field. For binary polynomial fields, the irreducible polynomial + * f(t) is represented as an array of unsigned int[], where f(t) is + * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0] + * > p[1] > ... > p[4] = 0. */ + unsigned int irr_arr[5]; + /* Field arithmetic methods. All methods (except field_enc and + * field_dec) are assumed to take field-encoded parameters and return + * field-encoded values. All methods (except field_enc and field_dec) + * are required to be implemented. */ + mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(GFMethod *meth); +}; + +/* Construct generic GFMethods. */ +GFMethod *GFMethod_consGFp(const mp_int *irr); +GFMethod *GFMethod_consGFp_mont(const mp_int *irr); + +/* Free the memory allocated (if any) to a GFMethod object. */ +void GFMethod_free(GFMethod *meth); + +struct ECGroupStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Field definition and arithmetic. */ + GFMethod *meth; + /* Textual representation of curve name, if any. */ + char *text; + /* Curve parameters, field-encoded. */ + mp_int curvea, curveb; + /* x and y coordinates of the base point, field-encoded. */ + mp_int genx, geny; + /* Order and cofactor of the base point. */ + mp_int order; + int cofactor; + /* Point arithmetic methods. All methods are assumed to take + * field-encoded parameters and return field-encoded values. All + * methods (except base_point_mul and points_mul) are required to be + * implemented. */ + mp_err (*point_add)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_sub)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_mul)(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*points_mul)(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(ECGroup *group); +}; + +/* Wrapper functions for generic prime field arithmetic. */ +mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* fixed length in-line adds. Count is in words */ +mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +/* Wrapper functions for generic binary polynomial field arithmetic. */ +mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* Montgomery prime field arithmetic. */ +mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +void ec_GFp_extra_free_mont(GFMethod *meth); + +/* point multiplication */ +mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); +mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, + int w); + +/* Optimized field arithmetic */ +mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName); +mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name); + +/* Optimized point multiplication */ +mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name); + +/* Optimized floating-point arithmetic */ +#ifdef ECL_USE_FP +mp_err ec_group_set_secp160r1_fp(ECGroup *group); +mp_err ec_group_set_nistp192_fp(ECGroup *group); +mp_err ec_group_set_nistp224_fp(ECGroup *group); +#endif + +SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p); +#endif /* __ecl_priv_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c new file mode 100644 index 000000000..3540af781 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.c @@ -0,0 +1,301 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include "ecp.h" +#include <stdlib.h> +#include <string.h> + +/* Allocate memory for a new ECGroup object. */ +ECGroup * +ECGroup_new() +{ + mp_err res = MP_OKAY; + ECGroup *group; + group = (ECGroup *)malloc(sizeof(ECGroup)); + if (group == NULL) + return NULL; + group->constructed = MP_YES; + group->meth = NULL; + group->text = NULL; + MP_DIGITS(&group->curvea) = 0; + MP_DIGITS(&group->curveb) = 0; + MP_DIGITS(&group->genx) = 0; + MP_DIGITS(&group->geny) = 0; + MP_DIGITS(&group->order) = 0; + group->base_point_mul = NULL; + group->points_mul = NULL; + group->validate_point = NULL; + group->extra1 = NULL; + group->extra2 = NULL; + group->extra_free = NULL; + MP_CHECKOK(mp_init(&group->curvea)); + MP_CHECKOK(mp_init(&group->curveb)); + MP_CHECKOK(mp_init(&group->genx)); + MP_CHECKOK(mp_init(&group->geny)); + MP_CHECKOK(mp_init(&group->order)); + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields. */ +ECGroup * +ECGroup_consGFp(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(mp_copy(curvea, &group->curvea)); + MP_CHECKOK(mp_copy(curveb, &group->curveb)); + MP_CHECKOK(mp_copy(genx, &group->genx)); + MP_CHECKOK(mp_copy(geny, &group->geny)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields with + * field arithmetic implemented in Montgomery coordinates. */ +ECGroup * +ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp_mont(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth)); + MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth)); + MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth)); + MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECGroup from hex parameters and name, if any. Called by + * ECGroup_fromHex and ECGroup_fromName. */ +ECGroup * +ecgroup_fromNameAndHex(const ECCurveName name, + const ECCurveParams *params) +{ + mp_int irr, curvea, curveb, genx, geny, order; + int bits; + ECGroup *group = NULL; + mp_err res = MP_OKAY; + + /* initialize values */ + MP_DIGITS(&irr) = 0; + MP_DIGITS(&curvea) = 0; + MP_DIGITS(&curveb) = 0; + MP_DIGITS(&genx) = 0; + MP_DIGITS(&geny) = 0; + MP_DIGITS(&order) = 0; + MP_CHECKOK(mp_init(&irr)); + MP_CHECKOK(mp_init(&curvea)); + MP_CHECKOK(mp_init(&curveb)); + MP_CHECKOK(mp_init(&genx)); + MP_CHECKOK(mp_init(&geny)); + MP_CHECKOK(mp_init(&order)); + MP_CHECKOK(mp_read_radix(&irr, params->irr, 16)); + MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16)); + MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16)); + MP_CHECKOK(mp_read_radix(&genx, params->genx, 16)); + MP_CHECKOK(mp_read_radix(&geny, params->geny, 16)); + MP_CHECKOK(mp_read_radix(&order, params->order, 16)); + + /* determine number of bits */ + bits = mpl_significant_bits(&irr) - 1; + if (bits < MP_OKAY) { + res = bits; + goto CLEANUP; + } + + /* determine which optimizations (if any) to use */ + if (params->field == ECField_GFp) { + switch (name) { + case ECCurve_SECG_PRIME_256R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp256(group, name)); + MP_CHECKOK(ec_group_set_gfp256_32(group, name)); + break; + case ECCurve_SECG_PRIME_521R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp521(group, name)); + break; + default: + /* use generic arithmetic */ + group = + ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + } + } else { + res = MP_UNDEF; + goto CLEANUP; + } + + /* set name, if any */ + if ((group != NULL) && (params->text != NULL)) { + group->text = strdup(params->text); + if (group->text == NULL) { + res = MP_MEM; + } + } + +CLEANUP: + mp_clear(&irr); + mp_clear(&curvea); + mp_clear(&curveb); + mp_clear(&genx); + mp_clear(&geny); + mp_clear(&order); + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECGroup from hexadecimal representations of parameters. */ +ECGroup * +ECGroup_fromHex(const ECCurveParams *params) +{ + return ecgroup_fromNameAndHex(ECCurve_noName, params); +} + +/* Construct ECGroup from named parameters. */ +ECGroup * +ECGroup_fromName(const ECCurveName name) +{ + ECGroup *group = NULL; + ECCurveParams *params = NULL; + mp_err res = MP_OKAY; + + params = EC_GetNamedCurveParams(name); + if (params == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + + /* construct actual group */ + group = ecgroup_fromNameAndHex(name, params); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + +CLEANUP: + EC_FreeCurveParams(params); + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. */ +mp_err +ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py) +{ + /* 1: Verify that publicValue is not the point at infinity */ + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + /* 3: Verify that publicValue is on the curve. */ + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + return group->validate_point(px, py, group); +} + +/* Free the memory allocated (if any) to an ECGroup object. */ +void +ECGroup_free(ECGroup *group) +{ + if (group == NULL) + return; + GFMethod_free(group->meth); + if (group->constructed == MP_NO) + return; + mp_clear(&group->curvea); + mp_clear(&group->curveb); + mp_clear(&group->genx); + mp_clear(&group->geny); + mp_clear(&group->order); + if (group->text != NULL) + free(group->text); + if (group->extra_free != NULL) + group->extra_free(group); + free(group); +} diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h new file mode 100644 index 000000000..ddcbb1f3a --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.h @@ -0,0 +1,60 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Although this is not an exported header file, code which uses elliptic + * curve point operations will need to include it. */ + +#ifndef __ecl_h_ +#define __ecl_h_ + +#include "blapi.h" +#include "ecl-exp.h" +#include "mpi.h" + +struct ECGroupStr; +typedef struct ECGroupStr ECGroup; + +/* Construct ECGroup from hexadecimal representations of parameters. */ +ECGroup *ECGroup_fromHex(const ECCurveParams *params); + +/* Construct ECGroup from named parameters. */ +ECGroup *ECGroup_fromName(const ECCurveName name); + +/* Free an allocated ECGroup. */ +void ECGroup_free(ECGroup *group); + +/* Construct ECCurveParams from an ECCurveName */ +ECCurveParams *EC_GetNamedCurveParams(const ECCurveName name); + +/* Duplicates an ECCurveParams */ +ECCurveParams *ECCurveParams_dup(const ECCurveParams *params); + +/* Free an allocated ECCurveParams */ +void EC_FreeCurveParams(ECCurveParams *params); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *qx, mp_int *qy); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Input and output values are assumed to + * be NOT field-encoded. */ +mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1, + const mp_int *k2, const mp_int *px, const mp_int *py, + mp_int *qx, mp_int *qy); + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. + * Returns MP_YES if the public key is valid, MP_NO if the public key + * is invalid, or an error code if the validation could not be + * performed. */ +mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py); + +SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P); +SECStatus ec_Curve25519_pt_validate(const SECItem *px); + +#endif /* __ecl_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl_curve.c b/security/nss/lib/freebl/ecl/ecl_curve.c new file mode 100644 index 000000000..cf090cfc3 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_curve.c @@ -0,0 +1,93 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl.h" +#include "ecl-curve.h" +#include "ecl-priv.h" +#include <stdlib.h> +#include <string.h> + +#define CHECK(func) \ + if ((func) == NULL) { \ + res = 0; \ + goto CLEANUP; \ + } + +/* Duplicates an ECCurveParams */ +ECCurveParams * +ECCurveParams_dup(const ECCurveParams *params) +{ + int res = 1; + ECCurveParams *ret = NULL; + + CHECK(ret = (ECCurveParams *)calloc(1, sizeof(ECCurveParams))); + if (params->text != NULL) { + CHECK(ret->text = strdup(params->text)); + } + ret->field = params->field; + ret->size = params->size; + if (params->irr != NULL) { + CHECK(ret->irr = strdup(params->irr)); + } + if (params->curvea != NULL) { + CHECK(ret->curvea = strdup(params->curvea)); + } + if (params->curveb != NULL) { + CHECK(ret->curveb = strdup(params->curveb)); + } + if (params->genx != NULL) { + CHECK(ret->genx = strdup(params->genx)); + } + if (params->geny != NULL) { + CHECK(ret->geny = strdup(params->geny)); + } + if (params->order != NULL) { + CHECK(ret->order = strdup(params->order)); + } + ret->cofactor = params->cofactor; + +CLEANUP: + if (res != 1) { + EC_FreeCurveParams(ret); + return NULL; + } + return ret; +} + +#undef CHECK + +/* Construct ECCurveParams from an ECCurveName */ +ECCurveParams * +EC_GetNamedCurveParams(const ECCurveName name) +{ + if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) || + (ecCurve_map[name] == NULL)) { + return NULL; + } else { + return ECCurveParams_dup(ecCurve_map[name]); + } +} + +/* Free the memory allocated (if any) to an ECCurveParams object. */ +void +EC_FreeCurveParams(ECCurveParams *params) +{ + if (params == NULL) + return; + if (params->text != NULL) + free(params->text); + if (params->irr != NULL) + free(params->irr); + if (params->curvea != NULL) + free(params->curvea); + if (params->curveb != NULL) + free(params->curveb); + if (params->genx != NULL) + free(params->genx); + if (params->geny != NULL) + free(params->geny); + if (params->order != NULL) + free(params->order); + free(params); +} diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c new file mode 100644 index 000000000..81b007705 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_gf.c @@ -0,0 +1,958 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mp_gf2m.h" +#include "ecl-priv.h" +#include "mpi-priv.h" +#include <stdlib.h> + +/* Allocate memory for a new GFMethod object. */ +GFMethod * +GFMethod_new() +{ + mp_err res = MP_OKAY; + GFMethod *meth; + meth = (GFMethod *)malloc(sizeof(GFMethod)); + if (meth == NULL) + return NULL; + meth->constructed = MP_YES; + MP_DIGITS(&meth->irr) = 0; + meth->extra_free = NULL; + MP_CHECKOK(mp_init(&meth->irr)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + + meth = GFMethod_new(); + if (meth == NULL) + return NULL; + + MP_CHECKOK(mp_copy(irr, &meth->irr)); + meth->irr_arr[0] = mpl_significant_bits(irr); + meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] = + meth->irr_arr[4] = 0; + switch (MP_USED(&meth->irr)) { + /* maybe we need 1 and 2 words here as well?*/ + case 3: + meth->field_add = &ec_GFp_add_3; + meth->field_sub = &ec_GFp_sub_3; + break; + case 4: + meth->field_add = &ec_GFp_add_4; + meth->field_sub = &ec_GFp_sub_4; + break; + case 5: + meth->field_add = &ec_GFp_add_5; + meth->field_sub = &ec_GFp_sub_5; + break; + case 6: + meth->field_add = &ec_GFp_add_6; + meth->field_sub = &ec_GFp_sub_6; + break; + default: + meth->field_add = &ec_GFp_add; + meth->field_sub = &ec_GFp_sub; + } + meth->field_neg = &ec_GFp_neg; + meth->field_mod = &ec_GFp_mod; + meth->field_mul = &ec_GFp_mul; + meth->field_sqr = &ec_GFp_sqr; + meth->field_div = &ec_GFp_div; + meth->field_enc = NULL; + meth->field_dec = NULL; + meth->extra1 = NULL; + meth->extra2 = NULL; + meth->extra_free = NULL; + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Free the memory allocated (if any) to a GFMethod object. */ +void +GFMethod_free(GFMethod *meth) +{ + if (meth == NULL) + return; + if (meth->constructed == MP_NO) + return; + mp_clear(&meth->irr); + if (meth->extra_free != NULL) + meth->extra_free(meth); + free(meth); +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Add two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */ + mp_err res; + + if ((res = mp_add(a, b, r)) != MP_OKAY) { + return res; + } + if (mp_cmp(r, &meth->irr) >= 0) { + return mp_sub(r, &meth->irr, r); + } + return res; +} + +/* Negates a field element. Assumes that 0 <= a < meth->irr */ +mp_err +ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */ + + if (mp_cmp_z(a) == 0) { + mp_zero(r); + return MP_OKAY; + } + return mp_sub(&meth->irr, a, r); +} + +/* Subtracts two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */ + res = mp_sub(a, b, r); + if (res == MP_RANGE) { + MP_CHECKOK(mp_sub(b, a, r)); + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } + MP_CHECKOK(ec_GFp_neg(r, r, meth)); + } + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } +CLEANUP: + return res; +} +/* + * Inline adds for small curve lengths. + */ +/* 3 words */ +mp_err +ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); +#else + __asm__( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a2 = MP_DIGIT(&meth->irr, 2); + if (carry || r2 > a2 || + ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) { + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); +#else + __asm__( + "subq %3,%0 \n\t" + "sbbq %4,%1 \n\t" + "sbbq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); +#else + __asm__( + "xorq %4,%4 \n\t" + "addq %5,%0 \n\t" + "adcq %6,%1 \n\t" + "adcq %7,%2 \n\t" + "adcq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a3 = MP_DIGIT(&meth->irr, 3); + if (carry || r3 > a3 || + ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); +#else + __asm__( + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "sbbq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a4 = MP_DIGIT(&meth->irr, 4); + if (carry || r4 > a4 || + ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) { + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + r5 = MP_DIGIT(b, 5); + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + MP_ADD_CARRY(a5, r5, r5, carry); + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a5 = MP_DIGIT(&meth->irr, 5); + if (carry || r5 > a5 || + ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { + a4 = MP_DIGIT(&meth->irr, 4); + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_SUB_BORROW(r5, a5, r5, carry); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* + * The following subraction functions do in-line subractions based + * on our curve size. + * + * ... 3 words + */ +mp_err +ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); +#else + __asm__( + "xorq %3,%3 \n\t" + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); +#else + __asm__( + "addq %3,%0 \n\t" + "adcq %4,%1 \n\t" + "adcq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + } + +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r2 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); +#else + __asm__( + "xorq %4,%4 \n\t" + "subq %5,%0 \n\t" + "sbbq %6,%1 \n\t" + "sbbq %7,%2 \n\t" + "sbbq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); +#else + __asm__( + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + } +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + } + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 6: + r5 = MP_DIGIT(a, 5); + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + b5 = MP_DIGIT(b, 5); + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + MP_SUB_BORROW(r5, b5, r5, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b5 = MP_DIGIT(&meth->irr, 5); + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + MP_ADD_CARRY(b5, r5, r5, borrow); + } + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* Reduces an integer to a field element. */ +mp_err +ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_mod(a, &meth->irr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_mulmod(a, b, &meth->irr, r); +} + +/* Squares a field element. */ +mp_err +ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_sqrmod(a, &meth->irr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wrapper functions for generic binary polynomial field arithmetic. */ + +/* Adds two field elements. */ +mp_err +ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_badd(a, b, r); +} + +/* Negates a field element. Note that for binary polynomial fields, the + * negation of a field element is the field element itself. */ +mp_err +ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + if (a == r) { + return MP_OKAY; + } else { + return mp_copy(a, r); + } +} + +/* Reduces a binary polynomial to a field element. */ +mp_err +ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bmod(a, meth->irr_arr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_bmulmod(a, b, meth->irr_arr, r); +} + +/* Squares a field element. */ +mp_err +ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bsqrmod(a, meth->irr_arr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + /* The GF(2^m) portion of MPI doesn't support invmod, so we + * compute 1/b. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_set_int(&t, 1)); + MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r)); + CLEANUP: + mp_clear(&t); + return res; + } else { + return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r); + } +} diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c new file mode 100644 index 000000000..ffbcbf1d9 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_mult.c @@ -0,0 +1,305 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include <stdlib.h> + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err +ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int kt; + + ARGCHK((k != NULL) && (group != NULL), MP_BADARG); + MP_DIGITS(&kt) = 0; + + /* want scalar to be less than or equal to group order */ + if (mp_cmp(k, &group->order) > 0) { + MP_CHECKOK(mp_init(&kt)); + MP_CHECKOK(mp_mod(k, &group->order, &kt)); + } else { + MP_SIGN(&kt) = MP_ZPOS; + MP_USED(&kt) = MP_USED(k); + MP_ALLOC(&kt) = MP_ALLOC(k); + MP_DIGITS(&kt) = MP_DIGITS(k); + } + + if ((px == NULL) || (py == NULL)) { + if (group->base_point_mul) { + MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry, + group)); + } + } else { + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(py, ry, group->meth)); + MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group)); + } + } + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + if (MP_DIGITS(&kt) != MP_DIGITS(k)) { + mp_clear(&kt); + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int sx, sy; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy)); + MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry)); + + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth)); + MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth)); + MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth)); + } + + MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&sx); + mp_clear(&sy); + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. Uses + * algorithm 15 (simultaneous multiple point multiplication) from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST + * Elliptic Curves over Prime Fields. */ +mp_err +ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init_size(&precomp[i][j][0], + ECL_MAX_FIELD_SIZE_DIGITS)); + MP_CHECKOK(mp_init_size(&precomp[i][j][1], + ECL_MAX_FIELD_SIZE_DIGITS)); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + mp_zero(rx); + mp_zero(ry); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0], + &precomp[ai][bi][1], rx, ry, group)); + } + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int k1t, k2t; + const mp_int *k1p, *k2p; + + MP_DIGITS(&k1t) = 0; + MP_DIGITS(&k2t) = 0; + + ARGCHK(group != NULL, MP_BADARG); + + /* want scalar to be less than or equal to group order */ + if (k1 != NULL) { + if (mp_cmp(k1, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k1t)); + MP_CHECKOK(mp_mod(k1, &group->order, &k1t)); + k1p = &k1t; + } else { + k1p = k1; + } + } else { + k1p = k1; + } + if (k2 != NULL) { + if (mp_cmp(k2, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k2t)); + MP_CHECKOK(mp_mod(k2, &group->order, &k2t)); + k2p = &k2t; + } else { + k2p = k2; + } + } else { + k2p = k2; + } + + /* if points_mul is defined, then use it */ + if (group->points_mul) { + res = group->points_mul(k1p, k2p, px, py, rx, ry, group); + } else { + res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group); + } + +CLEANUP: + mp_clear(&k1t); + mp_clear(&k2t); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h new file mode 100644 index 000000000..7e54e4e07 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp.h @@ -0,0 +1,106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecp_h_ +#define __ecp_h_ + +#include "ecl-priv.h" + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py); + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py); + +/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx, + * qy). Uses affine coordinates. */ +mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = P - Q. Uses affine coordinates. */ +mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = 2P. Uses affine coordinates. */ +mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Validates a point on a GFp curve. */ +mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses affine coordinates. */ +mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). */ +mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group); + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). */ +mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, + const mp_int *pz); + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz); + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, qz). Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, + const mp_int *pz, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +/* Computes R = 2P. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator + * (base point) of the group of points on the elliptic curve. Allows k1 = + * NULL or { k2, P } = NULL. Implemented using mixed Jacobian-affine + * coordinates. Input and output values are assumed to be NOT + * field-encoded and are in affine form. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group); + +#endif /* __ecp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c new file mode 100644 index 000000000..a8d41520e --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_25519.c @@ -0,0 +1,120 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "ecl-priv.h" +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "secmpi.h" +#include "secitem.h" +#include "secport.h" +#include <stdlib.h> +#include <stdio.h> + +/* + * point validation is not necessary in general. But this checks a point (px) + * against some known bad values. + */ +SECStatus +ec_Curve25519_pt_validate(const SECItem *px) +{ + PRUint8 *p; + int i; + PRUint8 forbiddenValues[12][32] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + }; + + /* The point must not be longer than 32 (it can be smaller). */ + if (px->len <= 32) { + p = px->data; + } else { + return SECFailure; + } + + for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) { + if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) { + return SECFailure; + } + } + + return SECSuccess; +} + +/* + * Scalar multiplication for Curve25519. + * If P == NULL, the base point is used. + * Returns X = k*P + */ +SECStatus +ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P) +{ + PRUint8 *px; + PRUint8 basePoint[32] = { 9 }; + + if (!P) { + px = basePoint; + } else { + PORT_Assert(P->len == 32); + if (P->len != 32) { + return SECFailure; + } + px = P->data; + } + + return ec_Curve25519_mul(X->data, k->data, px); +} diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c new file mode 100644 index 000000000..ad4e630c1 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256.c @@ -0,0 +1,401 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. + * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_size a_used = MP_USED(a); + int a_bits = mpl_significant_bits(a); + mp_digit carry; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0; + mp_digit r0, r1, r2, r3, r4, r5, r6, r7; + int r8; /* must be a signed value ! */ +#else + mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0; + mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; + mp_digit r0, r1, r2, r3; + int r4; /* must be a signed value ! */ +#endif + /* for polynomials larger than twice the field size + * use regular reduction */ + if (a_bits < 256) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + if (a_bits > 512) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + +#ifdef ECL_THIRTY_TWO_BIT + switch (a_used) { + case 16: + a15 = MP_DIGIT(a, 15); + case 15: + a14 = MP_DIGIT(a, 14); + case 14: + a13 = MP_DIGIT(a, 13); + case 13: + a12 = MP_DIGIT(a, 12); + case 12: + a11 = MP_DIGIT(a, 11); + case 11: + a10 = MP_DIGIT(a, 10); + case 10: + a9 = MP_DIGIT(a, 9); + case 9: + a8 = MP_DIGIT(a, 8); + } + + r0 = MP_DIGIT(a, 0); + r1 = MP_DIGIT(a, 1); + r2 = MP_DIGIT(a, 2); + r3 = MP_DIGIT(a, 3); + r4 = MP_DIGIT(a, 4); + r5 = MP_DIGIT(a, 5); + r6 = MP_DIGIT(a, 6); + r7 = MP_DIGIT(a, 7); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 = carry; + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 += carry; + carry = 0; + /* sum 2 */ + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* combine last bottom of sum 3 with second sum 2 */ + MP_ADD_CARRY(r0, a8, r0, carry); + MP_ADD_CARRY(r1, a9, r1, carry); + MP_ADD_CARRY(r2, a10, r2, carry); + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */ + r8 += carry; + carry = 0; + /* sum 3 (rest of it)*/ + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* sum 4 (rest of it)*/ + MP_ADD_CARRY(r0, a9, r0, carry); + MP_ADD_CARRY(r1, a10, r1, carry); + MP_ADD_CARRY(r2, a11, r2, carry); + MP_ADD_CARRY(r3, a13, r3, carry); + MP_ADD_CARRY(r4, a14, r4, carry); + MP_ADD_CARRY(r5, a15, r5, carry); + MP_ADD_CARRY(r6, a13, r6, carry); + MP_ADD_CARRY(r7, a8, r7, carry); + r8 += carry; + carry = 0; + /* diff 5 */ + MP_SUB_BORROW(r0, a11, r0, carry); + MP_SUB_BORROW(r1, a12, r1, carry); + MP_SUB_BORROW(r2, a13, r2, carry); + MP_SUB_BORROW(r3, 0, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a8, r6, carry); + MP_SUB_BORROW(r7, a10, r7, carry); + r8 -= carry; + carry = 0; + /* diff 6 */ + MP_SUB_BORROW(r0, a12, r0, carry); + MP_SUB_BORROW(r1, a13, r1, carry); + MP_SUB_BORROW(r2, a14, r2, carry); + MP_SUB_BORROW(r3, a15, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a9, r6, carry); + MP_SUB_BORROW(r7, a11, r7, carry); + r8 -= carry; + carry = 0; + /* diff 7 */ + MP_SUB_BORROW(r0, a13, r0, carry); + MP_SUB_BORROW(r1, a14, r1, carry); + MP_SUB_BORROW(r2, a15, r2, carry); + MP_SUB_BORROW(r3, a8, r3, carry); + MP_SUB_BORROW(r4, a9, r4, carry); + MP_SUB_BORROW(r5, a10, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a12, r7, carry); + r8 -= carry; + carry = 0; + /* diff 8 */ + MP_SUB_BORROW(r0, a14, r0, carry); + MP_SUB_BORROW(r1, a15, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a9, r3, carry); + MP_SUB_BORROW(r4, a10, r4, carry); + MP_SUB_BORROW(r5, a11, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a13, r7, carry); + r8 -= carry; + + /* reduce the overflows */ + while (r8 > 0) { + mp_digit r8_d = r8; + carry = 0; + MP_ADD_CARRY(r0, r8_d, r0, carry); + MP_ADD_CARRY(r1, 0, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, 0 - r8_d, r3, carry); + MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry); + MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry); + MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry); + MP_ADD_CARRY(r7, (r8_d - 1), r7, carry); + r8 = carry; + } + + /* reduce the underflows */ + while (r8 < 0) { + mp_digit r8_d = -r8; + carry = 0; + MP_SUB_BORROW(r0, r8_d, r0, carry); + MP_SUB_BORROW(r1, 0, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, 0 - r8_d, r3, carry); + MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry); + MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry); + MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry); + MP_SUB_BORROW(r7, (r8_d - 1), r7, carry); + r8 = 0 - carry; + } + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 8)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 8; + + MP_DIGIT(r, 7) = r7; + MP_DIGIT(r, 6) = r6; + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r7 == MP_DIGIT_MAX) && + ((r6 > 1) || ((r6 == 1) && + (r5 || r4 || r3 || + ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) { + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#else + switch (a_used) { + case 8: + a7 = MP_DIGIT(a, 7); + case 7: + a6 = MP_DIGIT(a, 6); + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + } + a7l = a7 << 32; + a7h = a7 >> 32; + a6l = a6 << 32; + a6h = a6 >> 32; + a5l = a5 << 32; + a5h = a5 >> 32; + a4l = a4 << 32; + a4h = a4 >> 32; + r3 = MP_DIGIT(a, 3); + r2 = MP_DIGIT(a, 2); + r1 = MP_DIGIT(a, 1); + r0 = MP_DIGIT(a, 0); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 = carry; + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 2 */ + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + + /* sum 3 */ + carry = 0; + MP_ADD_CARRY(r0, a4, r0, carry); + MP_ADD_CARRY(r1, a5l >> 32, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 4 */ + carry = 0; + MP_ADD_CARRY(r0, a4h | a5l, r0, carry); + MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry); + MP_ADD_CARRY(r2, a7, r2, carry); + MP_ADD_CARRY(r3, a6h | a4l, r3, carry); + r4 += carry; + /* diff 5 */ + carry = 0; + MP_SUB_BORROW(r0, a5h | a6l, r0, carry); + MP_SUB_BORROW(r1, a6h, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry); + r4 -= carry; + /* diff 6 */ + carry = 0; + MP_SUB_BORROW(r0, a6, r0, carry); + MP_SUB_BORROW(r1, a7, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry); + r4 -= carry; + /* diff 7 */ + carry = 0; + MP_SUB_BORROW(r0, a6h | a7l, r0, carry); + MP_SUB_BORROW(r1, a7h | a4l, r1, carry); + MP_SUB_BORROW(r2, a4h | a5l, r2, carry); + MP_SUB_BORROW(r3, a6l, r3, carry); + r4 -= carry; + /* diff 8 */ + carry = 0; + MP_SUB_BORROW(r0, a7, r0, carry); + MP_SUB_BORROW(r1, a4h << 32, r1, carry); + MP_SUB_BORROW(r2, a5, r2, carry); + MP_SUB_BORROW(r3, a6h << 32, r3, carry); + r4 -= carry; + + /* reduce the overflows */ + while (r4 > 0) { + mp_digit r4_long = r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_ADD_CARRY(r0, r4_long, r0, carry); + MP_ADD_CARRY(r1, 0 - r4l, r1, carry); + MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry); + MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry); + r4 = carry; + } + + /* reduce the underflows */ + while (r4 < 0) { + mp_digit r4_long = -r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_SUB_BORROW(r0, r4_long, r0, carry); + MP_SUB_BORROW(r1, 0 - r4l, r1, carry); + MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry); + MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry); + r4 = 0 - carry; + } + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 4)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r3 > 0xFFFFFFFF00000001ULL) || + ((r3 == 0xFFFFFFFF00000001ULL) && + (r2 || (r1 >> 32) || + (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { + /* very rare, just use mp_sub */ + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#endif + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p256. Store the + * result in r. r could be a. Uses optimized modular reduction for p256. + */ +static mp_err +ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p256. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p256. */ +static mp_err +ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp256(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->meth->field_mod = &ec_GFp_nistp256_mod; + group->meth->field_mul = &ec_GFp_nistp256_mul; + group->meth->field_sqr = &ec_GFp_nistp256_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c new file mode 100644 index 000000000..515f6f731 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256_32.c @@ -0,0 +1,1535 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* A 32-bit implementation of the NIST P-256 elliptic curve. */ + +#include <string.h> + +#include "prtypes.h" +#include "mpi.h" +#include "mpi-priv.h" +#include "ecp.h" + +typedef PRUint8 u8; +typedef PRUint32 u32; +typedef PRUint64 u64; + +/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's + * MPI library calls them digits, but here they are called limbs, which is + * GMP's terminology. + * + * The value of an felem (field element) is: + * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) + * + * That is, each limb is alternately 29 or 28-bits wide in little-endian + * order. + * + * This means that an felem hits 2**257, rather than 2**256 as we would like. A + * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems + * when multiplying as terms end up one bit short of a limb which would require + * much bit-shifting to correct. + * + * Finally, the values stored in an felem are in Montgomery form. So the value + * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257. + */ +typedef u32 limb; +#define NLIMBS 9 +typedef limb felem[NLIMBS]; + +static const limb kBottom28Bits = 0xfffffff; +static const limb kBottom29Bits = 0x1fffffff; + +/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and + * 28-bit words. + */ +static const felem kOne = { + 2, 0, 0, 0xffff800, + 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, + 0 +}; +static const felem kZero = { 0 }; +static const felem kP = { + 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, + 0, 0, 0x200000, 0xf000000, + 0xfffffff +}; +static const felem k2P = { + 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, + 0, 0, 0x400000, 0xe000000, + 0x1fffffff +}; + +/* kPrecomputed contains precomputed values to aid the calculation of scalar + * multiples of the base point, G. It's actually two, equal length, tables + * concatenated. + * + * The first table contains (x,y) felem pairs for 16 multiples of the base + * point, G. + * + * Index | Index (binary) | Value + * 0 | 0000 | 0G (all zeros, omitted) + * 1 | 0001 | G + * 2 | 0010 | 2**64G + * 3 | 0011 | 2**64G + G + * 4 | 0100 | 2**128G + * 5 | 0101 | 2**128G + G + * 6 | 0110 | 2**128G + 2**64G + * 7 | 0111 | 2**128G + 2**64G + G + * 8 | 1000 | 2**192G + * 9 | 1001 | 2**192G + G + * 10 | 1010 | 2**192G + 2**64G + * 11 | 1011 | 2**192G + 2**64G + G + * 12 | 1100 | 2**192G + 2**128G + * 13 | 1101 | 2**192G + 2**128G + G + * 14 | 1110 | 2**192G + 2**128G + 2**64G + * 15 | 1111 | 2**192G + 2**128G + 2**64G + G + * + * The second table follows the same style, but the terms are 2**32G, + * 2**96G, 2**160G, 2**224G. + * + * This is ~2KB of data. + */ +static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = { + 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, + 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, + 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, + 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, + 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, + 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, + 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, + 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, + 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, + 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, + 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, + 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, + 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, + 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, + 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, + 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, + 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, + 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, + 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, + 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, + 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, + 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, + 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, + 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, + 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, + 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, + 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, + 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, + 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, + 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, + 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, + 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, + 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, + 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, + 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, + 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, + 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, + 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, + 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, + 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, + 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, + 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, + 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, + 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, + 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, + 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, + 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, + 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, + 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, + 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, + 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, + 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, + 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, + 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, + 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, + 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, + 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, + 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, + 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, + 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, +}; + +/* Field element operations: + */ + +/* NON_ZERO_TO_ALL_ONES returns: + * 0xffffffff for 0 < x <= 2**31 + * 0 for x == 0 or x > 2**31. + * + * x must be a u32 or an equivalent type such as limb. + */ +#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1) + +/* felem_reduce_carry adds a multiple of p in order to cancel |carry|, + * which is a term at 2**257. + * + * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. + * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. + */ +static void +felem_reduce_carry(felem inout, limb carry) +{ + const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); + + inout[0] += carry << 1; + inout[3] += 0x10000000 & carry_mask; + /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the + * previous line therefore this doesn't underflow. + */ + inout[3] -= carry << 11; + inout[4] += (0x20000000 - 1) & carry_mask; + inout[5] += (0x10000000 - 1) & carry_mask; + inout[6] += (0x20000000 - 1) & carry_mask; + inout[6] -= carry << 22; + /* This may underflow if carry is non-zero but, if so, we'll fix it in the + * next line. + */ + inout[7] -= 1 & carry_mask; + inout[7] += carry << 25; +} + +/* felem_sum sets out = in+in2. + * + * On entry, in[i]+in2[i] must not overflow a 32-bit word. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_sum(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + for (i = 0;; i++) { + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +#define two31m3 (((limb)1) << 31) - (((limb)1) << 3) +#define two30m2 (((limb)1) << 30) - (((limb)1) << 2) +#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2) +#define two31m2 (((limb)1) << 31) - (((limb)1) << 2) +#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2) +#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2) + +/* zero31 is 0 mod p. + */ +static const felem zero31 = { + two31m3, two30m2, two31m2, two30p13m2, + two31m2, two30m2, two31p24m2, two30m27m2, + two31m2 +}; + +/* felem_diff sets out = in-in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_diff(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words + * with the same 29,28,... bit positions as an felem. + * + * The values in felems are in Montgomery form: x*R mod p where R = 2**257. + * Since we just multiplied two Montgomery values together, the result is + * x*y*R*R mod p. We wish to divide by R in order for the result also to be + * in Montgomery form. + * + * On entry: tmp[i] < 2**64 + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_reduce_degree(felem out, u64 tmp[17]) +{ + /* The following table may be helpful when reading this code: + * + * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... + * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 + * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 + * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 + */ + limb tmp2[18], carry, x, xMask; + unsigned int i; + + /* tmp contains 64-bit words with the same 29,28,29-bit positions as an + * felem. So the top of an element of tmp might overlap with another + * element two positions down. The following loop eliminates this + * overlap. + */ + tmp2[0] = tmp[0] & kBottom29Bits; + + /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try + * and hint to the compiler that it can do a single-word shift by selecting + * the right register rather than doing a double-word shift and truncating + * afterwards. + */ + tmp2[1] = ((limb)tmp[0]) >> 29; + tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits; + tmp2[1] += ((limb)tmp[1]) & kBottom28Bits; + carry = tmp2[1] >> 28; + tmp2[1] &= kBottom28Bits; + + for (i = 2; i < 17; i++) { + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 28; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom29Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 29; + tmp2[i] &= kBottom29Bits; + + i++; + if (i == 17) + break; + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 29; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom28Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 28; + tmp2[i] &= kBottom28Bits; + } + + tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25; + tmp2[17] += ((limb)(tmp[16])) >> 29; + tmp2[17] += (((limb)(tmp[16] >> 32)) << 3); + tmp2[17] += carry; + + /* Montgomery elimination of terms: + * + * Since R is 2**257, we can divide by R with a bitwise shift if we can + * ensure that the right-most 257 bits are all zero. We can make that true + * by adding multiplies of p without affecting the value. + * + * So we eliminate limbs from right to left. Since the bottom 29 bits of p + * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. + * We can do that for 8 further limbs and then right shift to eliminate the + * extra factor of R. + */ + for (i = 0;; i += 2) { + tmp2[i + 1] += tmp2[i] >> 29; + x = tmp2[i] & kBottom29Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i] = 0; + + /* The bounds calculations for this loop are tricky. Each iteration of + * the loop eliminates two words by adding values to words to their + * right. + * + * The following table contains the amounts added to each word (as an + * offset from the value of i at the top of the loop). The amounts are + * accounted for from the first and second half of the loop separately + * and are written as, for example, 28 to mean a value <2**28. + * + * Word: 3 4 5 6 7 8 9 10 + * Added in top half: 28 11 29 21 29 28 + * 28 29 + * 29 + * Added in bottom half: 29 10 28 21 28 28 + * 29 + * + * The value that is currently offset 7 will be offset 5 for the next + * iteration and then offset 3 for the iteration after that. Therefore + * the total value added will be the values added at 7, 5 and 3. + * + * The following table accumulates these values. The sums at the bottom + * are written as, for example, 29+28, to mean a value < 2**29+2**28. + * + * Word: 3 4 5 6 7 8 9 10 11 12 13 + * 28 11 10 29 21 29 28 28 28 28 28 + * 29 28 11 28 29 28 29 28 29 28 + * 29 28 21 21 29 21 29 21 + * 10 29 28 21 28 21 28 + * 28 29 28 29 28 29 28 + * 11 10 29 10 29 10 + * 29 28 11 28 11 + * 29 29 + * -------------------------------------------- + * 30+ 31+ 30+ 31+ 30+ + * 28+ 29+ 28+ 29+ 21+ + * 21+ 28+ 21+ 28+ 10 + * 10 21+ 10 21+ + * 11 11 + * + * So the greatest amount is added to tmp2[10] and tmp2[12]. If + * tmp2[10/12] has an initial value of <2**29, then the maximum value + * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, + * as required. + */ + tmp2[i + 3] += (x << 10) & kBottom28Bits; + tmp2[i + 4] += (x >> 18); + + tmp2[i + 6] += (x << 21) & kBottom29Bits; + tmp2[i + 7] += x >> 8; + + /* At position 200, which is the starting bit position for word 7, we + * have a factor of 0xf000000 = 2**28 - 2**24. + */ + tmp2[i + 7] += 0x10000000 & xMask; + /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */ + tmp2[i + 8] += (x - 1) & xMask; + tmp2[i + 7] -= (x << 24) & kBottom28Bits; + tmp2[i + 8] -= x >> 4; + + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 8] -= x; + tmp2[i + 8] += (x << 28) & kBottom29Bits; + tmp2[i + 9] += ((x >> 1) - 1) & xMask; + + if (i + 1 == NLIMBS) + break; + tmp2[i + 2] += tmp2[i + 1] >> 28; + x = tmp2[i + 1] & kBottom28Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i + 1] = 0; + + tmp2[i + 4] += (x << 11) & kBottom29Bits; + tmp2[i + 5] += (x >> 18); + + tmp2[i + 7] += (x << 21) & kBottom28Bits; + tmp2[i + 8] += x >> 7; + + /* At position 199, which is the starting bit of the 8th word when + * dealing with a context starting on an odd word, we have a factor of + * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th + * word from i+1 is i+8. + */ + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 9] += (x - 1) & xMask; + tmp2[i + 8] -= (x << 25) & kBottom29Bits; + tmp2[i + 9] -= x >> 4; + + tmp2[i + 9] += 0x10000000 & xMask; + tmp2[i + 9] -= x; + tmp2[i + 10] += (x - 1) & xMask; + } + + /* We merge the right shift with a carry chain. The words above 2**257 have + * widths of 28,29,... which we need to correct when copying them down. + */ + carry = 0; + for (i = 0; i < 8; i++) { + /* The maximum value of tmp2[i + 9] occurs on the first iteration and + * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is + * therefore safe. + */ + out[i] = tmp2[i + 9]; + out[i] += carry; + out[i] += (tmp2[i + 10] << 28) & kBottom29Bits; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + out[i] = tmp2[i + 9] >> 1; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + out[8] = tmp2[17]; + out[8] += carry; + carry = out[8] >> 29; + out[8] &= kBottom29Bits; + + felem_reduce_carry(out, carry); +} + +/* felem_square sets out=in*in. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_square(felem out, const felem in) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in[0]; + tmp[1] = ((u64)in[0]) * (in[1] << 1); + tmp[2] = ((u64)in[0]) * (in[2] << 1) + + ((u64)in[1]) * (in[1] << 1); + tmp[3] = ((u64)in[0]) * (in[3] << 1) + + ((u64)in[1]) * (in[2] << 1); + tmp[4] = ((u64)in[0]) * (in[4] << 1) + + ((u64)in[1]) * (in[3] << 2) + + ((u64)in[2]) * in[2]; + tmp[5] = ((u64)in[0]) * (in[5] << 1) + + ((u64)in[1]) * (in[4] << 1) + + ((u64)in[2]) * (in[3] << 1); + tmp[6] = ((u64)in[0]) * (in[6] << 1) + + ((u64)in[1]) * (in[5] << 2) + + ((u64)in[2]) * (in[4] << 1) + + ((u64)in[3]) * (in[3] << 1); + tmp[7] = ((u64)in[0]) * (in[7] << 1) + + ((u64)in[1]) * (in[6] << 1) + + ((u64)in[2]) * (in[5] << 1) + + ((u64)in[3]) * (in[4] << 1); + /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, + * which is < 2**64 as required. + */ + tmp[8] = ((u64)in[0]) * (in[8] << 1) + + ((u64)in[1]) * (in[7] << 2) + + ((u64)in[2]) * (in[6] << 1) + + ((u64)in[3]) * (in[5] << 2) + + ((u64)in[4]) * in[4]; + tmp[9] = ((u64)in[1]) * (in[8] << 1) + + ((u64)in[2]) * (in[7] << 1) + + ((u64)in[3]) * (in[6] << 1) + + ((u64)in[4]) * (in[5] << 1); + tmp[10] = ((u64)in[2]) * (in[8] << 1) + + ((u64)in[3]) * (in[7] << 2) + + ((u64)in[4]) * (in[6] << 1) + + ((u64)in[5]) * (in[5] << 1); + tmp[11] = ((u64)in[3]) * (in[8] << 1) + + ((u64)in[4]) * (in[7] << 1) + + ((u64)in[5]) * (in[6] << 1); + tmp[12] = ((u64)in[4]) * (in[8] << 1) + + ((u64)in[5]) * (in[7] << 2) + + ((u64)in[6]) * in[6]; + tmp[13] = ((u64)in[5]) * (in[8] << 1) + + ((u64)in[6]) * (in[7] << 1); + tmp[14] = ((u64)in[6]) * (in[8] << 1) + + ((u64)in[7]) * (in[7] << 1); + tmp[15] = ((u64)in[7]) * (in[8] << 1); + tmp[16] = ((u64)in[8]) * in[8]; + + felem_reduce_degree(out, tmp); +} + +/* felem_mul sets out=in*in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_mul(felem out, const felem in, const felem in2) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in2[0]; + tmp[1] = ((u64)in[0]) * (in2[1] << 0) + + ((u64)in[1]) * (in2[0] << 0); + tmp[2] = ((u64)in[0]) * (in2[2] << 0) + + ((u64)in[1]) * (in2[1] << 1) + + ((u64)in[2]) * (in2[0] << 0); + tmp[3] = ((u64)in[0]) * (in2[3] << 0) + + ((u64)in[1]) * (in2[2] << 0) + + ((u64)in[2]) * (in2[1] << 0) + + ((u64)in[3]) * (in2[0] << 0); + tmp[4] = ((u64)in[0]) * (in2[4] << 0) + + ((u64)in[1]) * (in2[3] << 1) + + ((u64)in[2]) * (in2[2] << 0) + + ((u64)in[3]) * (in2[1] << 1) + + ((u64)in[4]) * (in2[0] << 0); + tmp[5] = ((u64)in[0]) * (in2[5] << 0) + + ((u64)in[1]) * (in2[4] << 0) + + ((u64)in[2]) * (in2[3] << 0) + + ((u64)in[3]) * (in2[2] << 0) + + ((u64)in[4]) * (in2[1] << 0) + + ((u64)in[5]) * (in2[0] << 0); + tmp[6] = ((u64)in[0]) * (in2[6] << 0) + + ((u64)in[1]) * (in2[5] << 1) + + ((u64)in[2]) * (in2[4] << 0) + + ((u64)in[3]) * (in2[3] << 1) + + ((u64)in[4]) * (in2[2] << 0) + + ((u64)in[5]) * (in2[1] << 1) + + ((u64)in[6]) * (in2[0] << 0); + tmp[7] = ((u64)in[0]) * (in2[7] << 0) + + ((u64)in[1]) * (in2[6] << 0) + + ((u64)in[2]) * (in2[5] << 0) + + ((u64)in[3]) * (in2[4] << 0) + + ((u64)in[4]) * (in2[3] << 0) + + ((u64)in[5]) * (in2[2] << 0) + + ((u64)in[6]) * (in2[1] << 0) + + ((u64)in[7]) * (in2[0] << 0); + /* tmp[8] has the greatest value but doesn't overflow. See logic in + * felem_square. + */ + tmp[8] = ((u64)in[0]) * (in2[8] << 0) + + ((u64)in[1]) * (in2[7] << 1) + + ((u64)in[2]) * (in2[6] << 0) + + ((u64)in[3]) * (in2[5] << 1) + + ((u64)in[4]) * (in2[4] << 0) + + ((u64)in[5]) * (in2[3] << 1) + + ((u64)in[6]) * (in2[2] << 0) + + ((u64)in[7]) * (in2[1] << 1) + + ((u64)in[8]) * (in2[0] << 0); + tmp[9] = ((u64)in[1]) * (in2[8] << 0) + + ((u64)in[2]) * (in2[7] << 0) + + ((u64)in[3]) * (in2[6] << 0) + + ((u64)in[4]) * (in2[5] << 0) + + ((u64)in[5]) * (in2[4] << 0) + + ((u64)in[6]) * (in2[3] << 0) + + ((u64)in[7]) * (in2[2] << 0) + + ((u64)in[8]) * (in2[1] << 0); + tmp[10] = ((u64)in[2]) * (in2[8] << 0) + + ((u64)in[3]) * (in2[7] << 1) + + ((u64)in[4]) * (in2[6] << 0) + + ((u64)in[5]) * (in2[5] << 1) + + ((u64)in[6]) * (in2[4] << 0) + + ((u64)in[7]) * (in2[3] << 1) + + ((u64)in[8]) * (in2[2] << 0); + tmp[11] = ((u64)in[3]) * (in2[8] << 0) + + ((u64)in[4]) * (in2[7] << 0) + + ((u64)in[5]) * (in2[6] << 0) + + ((u64)in[6]) * (in2[5] << 0) + + ((u64)in[7]) * (in2[4] << 0) + + ((u64)in[8]) * (in2[3] << 0); + tmp[12] = ((u64)in[4]) * (in2[8] << 0) + + ((u64)in[5]) * (in2[7] << 1) + + ((u64)in[6]) * (in2[6] << 0) + + ((u64)in[7]) * (in2[5] << 1) + + ((u64)in[8]) * (in2[4] << 0); + tmp[13] = ((u64)in[5]) * (in2[8] << 0) + + ((u64)in[6]) * (in2[7] << 0) + + ((u64)in[7]) * (in2[6] << 0) + + ((u64)in[8]) * (in2[5] << 0); + tmp[14] = ((u64)in[6]) * (in2[8] << 0) + + ((u64)in[7]) * (in2[7] << 1) + + ((u64)in[8]) * (in2[6] << 0); + tmp[15] = ((u64)in[7]) * (in2[8] << 0) + + ((u64)in[8]) * (in2[7] << 0); + tmp[16] = ((u64)in[8]) * (in2[8] << 0); + + felem_reduce_degree(out, tmp); +} + +static void +felem_assign(felem out, const felem in) +{ + memcpy(out, in, sizeof(felem)); +} + +/* felem_inv calculates |out| = |in|^{-1} + * + * Based on Fermat's Little Theorem: + * a^p = a (mod p) + * a^{p-1} = 1 (mod p) + * a^{p-2} = a^{-1} (mod p) + */ +static void +felem_inv(felem out, const felem in) +{ + felem ftmp, ftmp2; + /* each e_I will hold |in|^{2^I - 1} */ + felem e2, e4, e8, e16, e32, e64; + unsigned int i; + + felem_square(ftmp, in); /* 2^1 */ + felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */ + felem_assign(e2, ftmp); + felem_square(ftmp, ftmp); /* 2^3 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^4 - 2^2 */ + felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */ + felem_assign(e4, ftmp); + felem_square(ftmp, ftmp); /* 2^5 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^6 - 2^2 */ + felem_square(ftmp, ftmp); /* 2^7 - 2^3 */ + felem_square(ftmp, ftmp); /* 2^8 - 2^4 */ + felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */ + felem_assign(e8, ftmp); + for (i = 0; i < 8; i++) { + felem_square(ftmp, ftmp); + } /* 2^16 - 2^8 */ + felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */ + felem_assign(e16, ftmp); + for (i = 0; i < 16; i++) { + felem_square(ftmp, ftmp); + } /* 2^32 - 2^16 */ + felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */ + felem_assign(e32, ftmp); + for (i = 0; i < 32; i++) { + felem_square(ftmp, ftmp); + } /* 2^64 - 2^32 */ + felem_assign(e64, ftmp); + felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */ + for (i = 0; i < 192; i++) { + felem_square(ftmp, ftmp); + } /* 2^256 - 2^224 + 2^192 */ + + felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */ + for (i = 0; i < 16; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^80 - 2^16 */ + felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */ + for (i = 0; i < 8; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^88 - 2^8 */ + felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */ + for (i = 0; i < 4; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^92 - 2^4 */ + felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */ + felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */ + felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */ + + felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ +} + +/* felem_scalar_3 sets out=3*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_3(felem out) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_4 sets out=4*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_4(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 27; + out[i] <<= 2; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 26; + out[i] <<= 2; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_8 sets out=8*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_8(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 26; + out[i] <<= 3; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 25; + out[i] <<= 3; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of + * time depending on the value of |in|. + */ +static char +felem_is_zero_vartime(const felem in) +{ + limb carry; + int i; + limb tmp[NLIMBS]; + felem_assign(tmp, in); + + /* First, reduce tmp to a minimal form. + */ + do { + carry = 0; + for (i = 0;; i++) { + tmp[i] += carry; + carry = tmp[i] >> 29; + tmp[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + tmp[i] += carry; + carry = tmp[i] >> 28; + tmp[i] &= kBottom28Bits; + } + + felem_reduce_carry(tmp, carry); + } while (carry); + + /* tmp < 2**257, so the only possible zero values are 0, p and 2p. + */ + return memcmp(tmp, kZero, sizeof(tmp)) == 0 || + memcmp(tmp, kP, sizeof(tmp)) == 0 || + memcmp(tmp, k2P, sizeof(tmp)) == 0; +} + +/* Group operations: + * + * Elements of the elliptic curve group are represented in Jacobian + * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in + * Jacobian form. + */ + +/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l + */ +static void +point_double(felem x_out, felem y_out, felem z_out, + const felem x, const felem y, const felem z) +{ + felem delta, gamma, alpha, beta, tmp, tmp2; + + felem_square(delta, z); + felem_square(gamma, y); + felem_mul(beta, x, gamma); + + felem_sum(tmp, x, delta); + felem_diff(tmp2, x, delta); + felem_mul(alpha, tmp, tmp2); + felem_scalar_3(alpha); + + felem_sum(tmp, y, z); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, gamma); + felem_diff(z_out, tmp, delta); + + felem_scalar_4(beta); + felem_square(x_out, alpha); + felem_diff(x_out, x_out, beta); + felem_diff(x_out, x_out, beta); + + felem_diff(tmp, beta, x_out); + felem_mul(tmp, alpha, tmp); + felem_square(tmp2, gamma); + felem_scalar_8(tmp2); + felem_diff(y_out, tmp, tmp2); +} + +/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}. + * (i.e. the second point is affine.) + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add_mixed(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2) +{ + felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_sum(tmp, z1, z1); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, x1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, y1); + felem_sum(r, r, r); + felem_mul(v, x1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, y1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} + + * {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * This function handles the case where {x1,y1,z1}={x2,y2,z2}. + */ +static void +point_add_or_double_vartime( + felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + char x_equal, y_equal; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + x_equal = felem_is_zero_vartime(h); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + y_equal = felem_is_zero_vartime(r); + if (x_equal && y_equal) { + point_double(x_out, y_out, z_out, x1, y1, z1); + return; + } + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* copy_conditional sets out=in if mask = 0xffffffff in constant time. + * + * On entry: mask is either 0 or 0xffffffff. + */ +static void +copy_conditional(felem out, const felem in, limb mask) +{ + int i; + + for (i = 0; i < NLIMBS; i++) { + const limb tmp = mask & (in[i] ^ out[i]); + out[i] ^= tmp; + } +} + +/* select_affine_point sets {out_x,out_y} to the index'th entry of table. + * On entry: index < 16, table[0] must be zero. + */ +static void +select_affine_point(felem out_x, felem out_y, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + } +} + +/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of + * table. On entry: index < 16, table[0] must be zero. + */ +static void +select_jacobian_point(felem out_x, felem out_y, felem out_z, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + memset(out_z, 0, sizeof(felem)); + + /* The implicit value at index 0 is all zero. We don't need to perform that + * iteration of the loop because we already set out_* to zero. + */ + table += 3 * NLIMBS; + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_z[j] |= *table & mask; + } + } +} + +/* get_bit returns the bit'th bit of scalar. */ +static char +get_bit(const u8 scalar[32], int bit) +{ + return ((scalar[bit >> 3]) >> (bit & 7)) & 1; +} + +/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian + * number. Note that the value of scalar must be less than the order of the + * group. + */ +static void +scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32]) +{ + int i, j; + limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask; + u32 table_offset; + + felem px, py; + felem tx, ty, tz; + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + + /* The loop adds bits at positions 0, 64, 128 and 192, followed by + * positions 32,96,160 and 224 and does this 32 times. + */ + for (i = 0; i < 32; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + } + table_offset = 0; + for (j = 0; j <= 32; j += 32) { + char bit0 = get_bit(scalar, 31 - i + j); + char bit1 = get_bit(scalar, 95 - i + j); + char bit2 = get_bit(scalar, 159 - i + j); + char bit3 = get_bit(scalar, 223 - i + j); + limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); + + select_affine_point(px, py, kPrecomputed + table_offset, index); + table_offset += 30 * NLIMBS; + + /* Since scalar is less than the order of the group, we know that + * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle + * below. + */ + point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); + /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero + * (a.k.a. the point at infinity). We handle that situation by + * copying the point from the table. + */ + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, kOne, n_is_infinity_mask); + + /* Equally, the result is also wrong if the point from the table is + * zero, which happens when the index is zero. We handle that by + * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. + */ + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + /* If p was not zero, then n is now non-zero. */ + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } + } +} + +/* point_to_affine converts a Jacobian point to an affine point. If the input + * is the point at infinity then it returns (0, 0) in constant time. + */ +static void +point_to_affine(felem x_out, felem y_out, + const felem nx, const felem ny, const felem nz) +{ + felem z_inv, z_inv_sq; + felem_inv(z_inv, nz); + felem_square(z_inv_sq, z_inv); + felem_mul(x_out, nx, z_inv_sq); + felem_mul(z_inv, z_inv, z_inv_sq); + felem_mul(y_out, ny, z_inv); +} + +/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */ +static void +scalar_mult(felem nx, felem ny, felem nz, + const felem x, const felem y, const u8 scalar[32]) +{ + int i; + felem px, py, pz, tx, ty, tz; + felem precomp[16][3]; + limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask; + + /* We precompute 0,1,2,... times {x,y}. */ + memset(precomp, 0, sizeof(felem) * 3); + memcpy(&precomp[1][0], x, sizeof(felem)); + memcpy(&precomp[1][1], y, sizeof(felem)); + memcpy(&precomp[1][2], kOne, sizeof(felem)); + + for (i = 2; i < 16; i += 2) { + point_double(precomp[i][0], precomp[i][1], precomp[i][2], + precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]); + + point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2], + precomp[i][0], precomp[i][1], precomp[i][2], x, y); + } + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + n_is_infinity_mask = -1; + + /* We add in a window of four bits each iteration and do this 64 times. */ + for (i = 0; i < 64; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + } + + index = scalar[31 - i / 2]; + if ((i & 1) == 1) { + index &= 15; + } else { + index >>= 4; + } + + /* See the comments in scalar_base_mult about handling infinities. */ + select_jacobian_point(px, py, pz, precomp[0][0], index); + point_add(tx, ty, tz, nx, ny, nz, px, py, pz); + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, pz, n_is_infinity_mask); + + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } +} + +/* Interface with Freebl: */ + +/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to + * little-endian order. + */ +#ifdef IS_BIG_ENDIAN +#ifdef __APPLE__ +#include <libkern/OSByteOrder.h> +#define BYTESWAP32(x) OSSwapInt32(x) +#define BYTESWAP64(x) OSSwapInt64(x) +#else +#define BYTESWAP32(x) \ + (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24)) +#define BYTESWAP64(x) \ + (((x) >> 56) | (((x) >> 40) & 0xff00) | \ + (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \ + (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) | \ + (((x)&0xff00) << 40) | ((x) << 56)) +#endif + +#ifdef MP_USE_UINT_DIGIT +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x) +#else +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x) +#endif +#endif /* IS_BIG_ENDIAN */ + +#ifdef MP_USE_UINT_DIGIT +static const mp_digit kRInvDigits[8] = { + 0x80000000, 1, 0xffffffff, 0, + 0x80000001, 0xfffffffe, 1, 0x7fffffff +}; +#else +static const mp_digit kRInvDigits[4] = { + PR_UINT64(0x180000000), 0xffffffff, + PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) +}; +#endif +#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit)) +static const mp_int kRInv = { + MP_ZPOS, + MP_DIGITS_IN_256_BITS, + MP_DIGITS_IN_256_BITS, + (mp_digit *)kRInvDigits +}; + +static const limb kTwo28 = 0x10000000; +static const limb kTwo29 = 0x20000000; + +/* to_montgomery sets out = R*in. */ +static mp_err +to_montgomery(felem out, const mp_int *in, const ECGroup *group) +{ + /* There are no MPI functions for bitshift operations and we wish to shift + * in 257 bits left so we move the digits 256-bits left and then multiply + * by two. + */ + mp_int in_shifted; + int i; + mp_err res; + + MP_CHECKOK(mp_init(&in_shifted)); + MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS)); + memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS), + MP_DIGITS(in), + MP_USED(in) * sizeof(mp_digit)); + MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted)); + MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth)); + + for (i = 0;; i++) { + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL)); + + i++; + if (i == NLIMBS) + break; + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL)); + } + +CLEANUP: + mp_clear(&in_shifted); + return res; +} + +/* from_montgomery sets out=in/R. */ +static mp_err +from_montgomery(mp_int *out, const felem in, + const ECGroup *group) +{ + mp_int result, tmp; + mp_err res; + int i; + + MP_CHECKOK(mp_init(&result)); + MP_CHECKOK(mp_init(&tmp)); + + MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result)); + for (i = NLIMBS - 2; i >= 0; i--) { + if ((i & 1) == 0) { + MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp)); + } else { + MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp)); + } + MP_CHECKOK(mp_add_d(&tmp, in[i], &result)); + } + + MP_CHECKOK(mp_mul(&result, &kRInv, out)); + MP_CHECKOK(group->meth->field_mod(out, out, group->meth)); + +CLEANUP: + mp_clear(&result); + mp_clear(&tmp); + return res; +} + +/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ +static void +scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) +{ + /* We require that |n| is less than the order of the group and therefore it + * will fit into |out_scalar|. However, these is a timing side-channel here + * that we cannot avoid: if |n| is sufficiently small it may be one or more + * words too short and we'll copy less data. + */ + memset(out_scalar, 0, 32); +#ifdef IS_LITTLE_ENDIAN + memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); +#else + { + mp_size i; + mp_digit swapped[MP_DIGITS_IN_256_BITS]; + for (i = 0; i < MP_USED(n); i++) { + swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); + } + memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); + } +#endif +} + +/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the + * order of the group. + */ +static mp_err +ec_GFp_nistp256_base_point_mul(const mp_int *n, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine; + mp_err res; + + /* FIXME(agl): test that n < order. */ + + scalar_from_mp_int(scalar, n); + scalar_base_mult(x, y, z, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is < + * the order of the group. + */ +static mp_err +ec_GFp_nistp256_point_mul(const mp_int *n, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine, px, py; + mp_err res; + + scalar_from_mp_int(scalar, n); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + + scalar_mult(x, y, z, px, py, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G + + * n2*{in_x,in_y}, where n1 and n2 are < the order of the group. + * + * As indicated by the name, this function operates in variable time. This + * is safe because it's used for signature validation which doesn't deal + * with secrets. + */ +static mp_err +ec_GFp_nistp256_points_mul_vartime( + const mp_int *n1, const mp_int *n2, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar1[32], scalar2[32]; + felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py; + mp_err res = MP_OKAY; + + /* If n2 == NULL, this is just a base-point multiplication. */ + if (n2 == NULL) { + return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group); + } + + /* If n1 == nULL, this is just an arbitary-point multiplication. */ + if (n1 == NULL) { + return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group); + } + + /* If both scalars are zero, then the result is the point at infinity. */ + if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) { + mp_zero(out_x); + mp_zero(out_y); + return res; + } + + scalar_from_mp_int(scalar1, n1); + scalar_from_mp_int(scalar2, n2); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + scalar_base_mult(x1, y1, z1, scalar1); + scalar_mult(x2, y2, z2, px, py, scalar2); + + if (mp_cmp_z(n2) == 0) { + /* If n2 == 0, then {x2,y2,z2} is zero and the result is just + * {x1,y1,z1}. */ + } else if (mp_cmp_z(n1) == 0) { + /* If n1 == 0, then {x1,y1,z1} is zero and the result is just + * {x2,y2,z2}. */ + memcpy(x1, x2, sizeof(x2)); + memcpy(y1, y2, sizeof(y2)); + memcpy(z1, z2, sizeof(z2)); + } else { + /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */ + point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2); + } + + point_to_affine(x_affine, y_affine, x1, y1, z1); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* Wire in fast point multiplication for named curves. */ +mp_err +ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->base_point_mul = &ec_GFp_nistp256_base_point_mul; + group->point_mul = &ec_GFp_nistp256_point_mul; + group->points_mul = &ec_GFp_nistp256_points_mul_vartime; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c new file mode 100644 index 000000000..702fd976e --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_384.c @@ -0,0 +1,258 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r. + * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m[10]; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit s[10][12]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 12; + MP_USED(&m[i]) = 12; + MP_DIGITS(&m[i]) = s[i]; + } +#else + mp_digit s[10][6]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 6; + MP_USED(&m[i]) = 6; + MP_DIGITS(&m[i]) = s[i]; + } +#endif + +#ifdef ECL_THIRTY_TWO_BIT + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 12; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = 0; + s[1][3] = 0; + s[1][4] = MP_DIGIT(a, 21); + s[1][5] = MP_DIGIT(a, 22); + s[1][6] = MP_DIGIT(a, 23); + s[1][7] = 0; + s[1][8] = 0; + s[1][9] = 0; + s[1][10] = 0; + s[1][11] = 0; + for (i = 0; i < 12; i++) { + s[2][i] = MP_DIGIT(a, i + 12); + } + s[3][0] = MP_DIGIT(a, 21); + s[3][1] = MP_DIGIT(a, 22); + s[3][2] = MP_DIGIT(a, 23); + for (i = 3; i < 12; i++) { + s[3][i] = MP_DIGIT(a, i + 9); + } + s[4][0] = 0; + s[4][1] = MP_DIGIT(a, 23); + s[4][2] = 0; + s[4][3] = MP_DIGIT(a, 20); + for (i = 4; i < 12; i++) { + s[4][i] = MP_DIGIT(a, i + 8); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = 0; + s[5][3] = 0; + s[5][4] = MP_DIGIT(a, 20); + s[5][5] = MP_DIGIT(a, 21); + s[5][6] = MP_DIGIT(a, 22); + s[5][7] = MP_DIGIT(a, 23); + s[5][8] = 0; + s[5][9] = 0; + s[5][10] = 0; + s[5][11] = 0; + s[6][0] = MP_DIGIT(a, 20); + s[6][1] = 0; + s[6][2] = 0; + s[6][3] = MP_DIGIT(a, 21); + s[6][4] = MP_DIGIT(a, 22); + s[6][5] = MP_DIGIT(a, 23); + s[6][6] = 0; + s[6][7] = 0; + s[6][8] = 0; + s[6][9] = 0; + s[6][10] = 0; + s[6][11] = 0; + s[7][0] = MP_DIGIT(a, 23); + for (i = 1; i < 12; i++) { + s[7][i] = MP_DIGIT(a, i + 11); + } + s[8][0] = 0; + s[8][1] = MP_DIGIT(a, 20); + s[8][2] = MP_DIGIT(a, 21); + s[8][3] = MP_DIGIT(a, 22); + s[8][4] = MP_DIGIT(a, 23); + s[8][5] = 0; + s[8][6] = 0; + s[8][7] = 0; + s[8][8] = 0; + s[8][9] = 0; + s[8][10] = 0; + s[8][11] = 0; + s[9][0] = 0; + s[9][1] = 0; + s[9][2] = 0; + s[9][3] = MP_DIGIT(a, 23); + s[9][4] = MP_DIGIT(a, 23); + s[9][5] = 0; + s[9][6] = 0; + s[9][7] = 0; + s[9][8] = 0; + s[9][9] = 0; + s[9][10] = 0; + s[9][11] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#else + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 6; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[1][3] = MP_DIGIT(a, 11) >> 32; + s[1][4] = 0; + s[1][5] = 0; + for (i = 0; i < 6; i++) { + s[2][i] = MP_DIGIT(a, i + 6); + } + s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 2; i < 6; i++) { + s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32); + } + s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32; + s[4][1] = MP_DIGIT(a, 10) << 32; + for (i = 2; i < 6; i++) { + s[4][i] = MP_DIGIT(a, i + 4); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = MP_DIGIT(a, 10); + s[5][3] = MP_DIGIT(a, 11); + s[5][4] = 0; + s[5][5] = 0; + s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32; + s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32; + s[6][2] = MP_DIGIT(a, 11); + s[6][3] = 0; + s[6][4] = 0; + s[6][5] = 0; + s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 1; i < 6; i++) { + s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32); + } + s[8][0] = MP_DIGIT(a, 10) << 32; + s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[8][2] = MP_DIGIT(a, 11) >> 32; + s[8][3] = 0; + s[8][4] = 0; + s[8][5] = 0; + s[9][0] = 0; + s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32; + s[9][2] = MP_DIGIT(a, 11) >> 32; + s[9][3] = 0; + s[9][4] = 0; + s[9][5] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#endif + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p384. Store the + * result in r. r could be a. Uses optimized modular reduction for p384. + */ +static mp_err +ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p384. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p384. */ +static mp_err +ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp384(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P384) { + group->meth->field_mod = &ec_GFp_nistp384_mod; + group->meth->field_mul = &ec_GFp_nistp384_mul; + group->meth->field_sqr = &ec_GFp_nistp384_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c new file mode 100644 index 000000000..6ca0dbb11 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_521.c @@ -0,0 +1,137 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +#define ECP521_DIGITS ECL_CURVE_DIGITS(521) + +/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses + * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + unsigned int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m1; + + mp_digit s1[ECP521_DIGITS] = { 0 }; + + MP_SIGN(&m1) = MP_ZPOS; + MP_ALLOC(&m1) = ECP521_DIGITS; + MP_USED(&m1) = ECP521_DIGITS; + MP_DIGITS(&m1) = s1; + + if (a_bits < 521) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if (a_bits > (521 * 2)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { +#define FIRST_DIGIT (ECP521_DIGITS - 1) + for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) { + s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9)); + } + s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9; + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS)); + for (i = 0; i < ECP521_DIGITS; i++) { + MP_DIGIT(r, i) = MP_DIGIT(a, i); + } + } + MP_USED(r) = ECP521_DIGITS; + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + + MP_CHECKOK(s_mp_add(r, &m1)); + if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) { + MP_CHECKOK(s_mp_add_d(r, 1)); + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + } else if (s_mp_cmp(r, &meth->irr) == 0) { + mp_zero(r); + } + s_mp_clamp(r); + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p521. Store the + * result in r. r could be a. Uses optimized modular reduction for p521. + */ +static mp_err +ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p521. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p521. */ +static mp_err +ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +static mp_err +ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mul(a, &t, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp521(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P521) { + group->meth->field_mod = &ec_GFp_nistp521_mod; + group->meth->field_mul = &ec_GFp_nistp521_mul; + group->meth->field_sqr = &ec_GFp_nistp521_sqr; + group->meth->field_div = &ec_GFp_nistp521_div; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c new file mode 100644 index 000000000..47fb27326 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_aff.c @@ -0,0 +1,308 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include <stdlib.h> + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py) +{ + + if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) { + return MP_YES; + } else { + return MP_NO; + } +} + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py) +{ + mp_zero(px); + mp_zero(py); + return MP_OKAY; +} + +/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P, + * Q, and R can all be identical. Uses affine coordinates. Assumes input + * is already field-encoded using field_enc, and returns output that is + * still field-encoded. */ +mp_err +ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int lambda, temp, tempx, tempy; + + MP_DIGITS(&lambda) = 0; + MP_DIGITS(&temp) = 0; + MP_DIGITS(&tempx) = 0; + MP_DIGITS(&tempy) = 0; + MP_CHECKOK(mp_init(&lambda)); + MP_CHECKOK(mp_init(&temp)); + MP_CHECKOK(mp_init(&tempx)); + MP_CHECKOK(mp_init(&tempy)); + /* if P = inf, then R = Q */ + if (ec_GFp_pt_is_inf_aff(px, py) == 0) { + MP_CHECKOK(mp_copy(qx, rx)); + MP_CHECKOK(mp_copy(qy, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if Q = inf, then R = P */ + if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if px != qx, then lambda = (py-qy) / (px-qx) */ + if (mp_cmp(px, qx) != 0) { + MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth)); + } else { + /* if py != qy or qy = 0, then R = inf */ + if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* lambda = (3qx^2+a) / (2qy) */ + MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 3)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 2)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth)); + } + /* rx = lambda^2 - px - qx */ + MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth)); + /* ry = (x1-x2) * lambda - y1 */ + MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth)); + MP_CHECKOK(mp_copy(&tempx, rx)); + MP_CHECKOK(mp_copy(&tempy, ry)); + +CLEANUP: + mp_clear(&lambda); + mp_clear(&temp); + mp_clear(&tempx); + mp_clear(&tempy); + return res; +} + +/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be + * identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int nqy; + + MP_DIGITS(&nqy) = 0; + MP_CHECKOK(mp_init(&nqy)); + /* nqy = -qy */ + MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth)); + res = group->point_add(px, py, qx, &nqy, rx, ry, group); +CLEANUP: + mp_clear(&nqy); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * affine coordinates. Assumes input is already field-encoded using + * field_enc, and returns output that is still field-encoded. */ +mp_err +ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group) +{ + return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group); +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and + * R can be identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int k, k3, qx, qy, sx, sy; + int b1, b3, i, l; + + MP_DIGITS(&k) = 0; + MP_DIGITS(&k3) = 0; + MP_DIGITS(&qx) = 0; + MP_DIGITS(&qy) = 0; + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&k)); + MP_CHECKOK(mp_init(&k3)); + MP_CHECKOK(mp_init(&qx)); + MP_CHECKOK(mp_init(&qy)); + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + /* if n = 0 then r = inf */ + if (mp_cmp_z(n) == 0) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* Q = P, k = n */ + MP_CHECKOK(mp_copy(px, &qx)); + MP_CHECKOK(mp_copy(py, &qy)); + MP_CHECKOK(mp_copy(n, &k)); + /* if n < 0 then Q = -Q, k = -k */ + if (mp_cmp_z(n) < 0) { + MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth)); + MP_CHECKOK(mp_neg(&k, &k)); + } +#ifdef ECL_DEBUG /* basic double and add method */ + l = mpl_significant_bits(&k) - 1; + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + for (i = l - 1; i >= 0; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + /* if k_i = 1, then S = S + Q */ + if (mpl_get_bit(&k, i) != 0) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#else /* double and add/subtract method from \ + * standard */ + /* k3 = 3 * k */ + MP_CHECKOK(mp_set_int(&k3, 3)); + MP_CHECKOK(mp_mul(&k, &k3, &k3)); + /* S = Q */ + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + /* l = index of high order bit in binary representation of 3*k */ + l = mpl_significant_bits(&k3) - 1; + /* for i = l-1 downto 1 */ + for (i = l - 1; i >= 1; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + b3 = MP_GET_BIT(&k3, i); + b1 = MP_GET_BIT(&k, i); + /* if k3_i = 1 and k_i = 0, then S = S + Q */ + if ((b3 == 1) && (b1 == 0)) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + /* if k3_i = 0 and k_i = 1, then S = S - Q */ + } else if ((b3 == 0) && (b1 == 1)) { + MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#endif + /* output S */ + MP_CHECKOK(mp_copy(&sx, rx)); + MP_CHECKOK(mp_copy(&sy, ry)); + +CLEANUP: + mp_clear(&k); + mp_clear(&k3); + mp_clear(&qx); + mp_clear(&qy); + mp_clear(&sx); + mp_clear(&sy); + return res; +} +#endif + +/* Validates a point on a GFp curve. */ +mp_err +ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group) +{ + mp_err res = MP_NO; + mp_int accl, accr, tmp, pxt, pyt; + + MP_DIGITS(&accl) = 0; + MP_DIGITS(&accr) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&pxt) = 0; + MP_DIGITS(&pyt) = 0; + MP_CHECKOK(mp_init(&accl)); + MP_CHECKOK(mp_init(&accr)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&pxt)); + MP_CHECKOK(mp_init(&pyt)); + + /* 1: Verify that publicValue is not the point at infinity */ + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + res = MP_NO; + goto CLEANUP; + } + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) || + (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) { + res = MP_NO; + goto CLEANUP; + } + /* 3: Verify that publicValue is on the curve. */ + if (group->meth->field_enc) { + group->meth->field_enc(px, &pxt, group->meth); + group->meth->field_enc(py, &pyt, group->meth); + } else { + MP_CHECKOK(mp_copy(px, &pxt)); + MP_CHECKOK(mp_copy(py, &pyt)); + } + /* left-hand side: y^2 */ + MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth)); + /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */ + MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth)); + MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth)); + /* check LHS - RHS == 0 */ + MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth)); + if (mp_cmp_z(&accr) != 0) { + res = MP_NO; + goto CLEANUP; + } + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt)); + if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) { + res = MP_NO; + goto CLEANUP; + } + + res = MP_YES; + +CLEANUP: + mp_clear(&accl); + mp_clear(&accr); + mp_clear(&tmp); + mp_clear(&pxt); + mp_clear(&pyt); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c new file mode 100644 index 000000000..535e75903 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jac.c @@ -0,0 +1,513 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include <stdlib.h> +#ifdef ECL_DEBUG +#include <assert.h> +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } else { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_set_int(rz, 1)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth)); + } + } +CLEANUP: + return res; +} + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). P and R can share x and y coordinates. + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. */ +mp_err +ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int z1, z2, z3; + + MP_DIGITS(&z1) = 0; + MP_DIGITS(&z2) = 0; + MP_DIGITS(&z3) = 0; + MP_CHECKOK(mp_init(&z1)); + MP_CHECKOK(mp_init(&z2)); + MP_CHECKOK(mp_init(&z3)); + + /* if point at infinity, then set point at infinity and exit */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry)); + goto CLEANUP; + } + + /* transform (px, py, pz) into (px / pz^2, py / pz^3) */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + } else { + MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth)); + MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth)); + MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth)); + } + +CLEANUP: + mp_clear(&z1); + mp_clear(&z2); + mp_clear(&z3); + return res; +} + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz) +{ + return mp_cmp_z(pz); +} + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz) +{ + mp_zero(pz); + return MP_OKAY; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Jacobian-affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and + * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime + * Fields. */ +mp_err +ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int A, B, C, D, C2, C3; + + MP_DIGITS(&A) = 0; + MP_DIGITS(&B) = 0; + MP_DIGITS(&C) = 0; + MP_DIGITS(&D) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&C3) = 0; + MP_CHECKOK(mp_init(&A)); + MP_CHECKOK(mp_init(&B)); + MP_CHECKOK(mp_init(&C)); + MP_CHECKOK(mp_init(&D)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&C3)); + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth)); + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth)); + MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth)); + + if (mp_cmp_z(&C) == 0) { + /* P == Q or P == -Q */ + if (mp_cmp_z(&D) == 0) { + /* P == Q */ + /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */ + MP_DIGIT(&D, 0) = 1; /* Set D to 1. */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group)); + } else { + /* P == -Q */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } + goto CLEANUP; + } + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth)); + +CLEANUP: + mp_clear(&A); + mp_clear(&B); + mp_clear(&C); + mp_clear(&D); + mp_clear(&C2); + mp_clear(&C3); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + * This routine implements Point Doubling in the Jacobian Projective + * space as described in the paper "Efficient elliptic curve exponentiation + * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. + */ +mp_err +ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int t0, t1, M, S; + + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&M) = 0; + MP_DIGITS(&S) = 0; + MP_CHECKOK(mp_init(&t0)); + MP_CHECKOK(mp_init(&t1)); + MP_CHECKOK(mp_init(&M)); + MP_CHECKOK(mp_init(&S)); + + /* P == inf or P == -P */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + if (mp_cmp_d(pz, 1) == 0) { + /* M = 3 * px^2 + a */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth)); + } else if (MP_SIGN(&group->curvea) == MP_NEG && + MP_USED(&group->curvea) == 1 && + MP_DIGIT(&group->curvea, 0) == 3) { + /* M = 3 * (px + pz^2) * (px - pz^2) */ + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); + } else { + /* M = 3 * (px^2) + a * (pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); + } + + /* rz = 2 * py * pz */ + /* t0 = 4 * py^2 */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); + } else { + MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); + } + + /* S = 4 * px * py^2 = px * (2 * py)^2 */ + MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); + + /* rx = M^2 - 2 * S */ + MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); + + /* ry = M * (S - rx) - 8 * py^4 */ + MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); + if (mp_isodd(&t1)) { + MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); + } + MP_CHECKOK(mp_div_2(&t1, &t1)); + MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); + MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&M); + mp_clear(&S); + return res; +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Elliptic curve points P and R can be + * identical. Uses mixed Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. Uses 4-bit window method. */ +mp_err +ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz; + int i, ni, d; + + MP_DIGITS(&rz) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + + /* fill precomputation table */ + mp_zero(&precomp[0][0]); + mp_zero(&precomp[0][1]); + MP_CHECKOK(mp_copy(px, &precomp[1][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][1])); + for (i = 2; i < 16; i++) { + MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1], + &precomp[i - 1][0], &precomp[i - 1][1], + &precomp[i][0], &precomp[i][1], group)); + } + + d = (mpl_significant_bits(n) + 3) / 4; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d - 1; i >= 0; i--) { + /* compute window ni */ + ni = MP_GET_BIT(n, 4 * i + 3); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 2); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 1); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i); + /* R = 2^4 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ni * P) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry, + &rz, group)); + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + return res; +} +#endif + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Uses mixed Jacobian-affine coordinates. Input and output values are + * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous + * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes. + * Software Implementation of the NIST Elliptic Curves over Prime Fields. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + mp_int rz; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + MP_DIGITS(&rz) = 0; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init(&precomp[i][j][0])); + MP_CHECKOK(mp_init(&precomp[i][j][1])); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1], + rx, ry, &rz, group)); + } + + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c new file mode 100644 index 000000000..a1106cea8 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jm.c @@ -0,0 +1,283 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "ecl-priv.h" +#include "mplogic.h" +#include <stdlib.h> + +#define MAX_SCRATCH 6 + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Modified Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + */ +static mp_err +ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *t0, *t1, *M, *S; + + t0 = &scratch[0]; + t1 = &scratch[1]; + M = &scratch[2]; + S = &scratch[3]; + +#if MAX_SCRATCH < 4 +#error "Scratch array defined too small " +#endif + + /* Check for point at infinity */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + /* Set r = pt at infinity by setting rz = 0 */ + + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + /* M = 3 (px^2) + a*(pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth)); + + /* rz = 2 * py * pz */ + MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth)); + + /* t0 = 2y^2 , t1 = 8y^4 */ + MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth)); + MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth)); + + /* S = 4 * px * py^2 = 2 * px * t0 */ + MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, S, group->meth)); + + /* rx = M^2 - 2S */ + MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + + /* ry = M * (S - rx) - t1 */ + MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth)); + MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth)); + + /* ra*z^4 = 2*t1*(apz4) */ + MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth)); + MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth)); + +CLEANUP: + return res; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. */ +static mp_err +ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *A, *B, *C, *D, *C2, *C3; + + A = &scratch[0]; + B = &scratch[1]; + C = &scratch[2]; + D = &scratch[3]; + C2 = &scratch[4]; + C3 = &scratch[5]; + +#if MAX_SCRATCH < 6 +#error "Scratch array defined too small " +#endif + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + MP_CHECKOK(mp_copy(paz4, raz4)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth)); + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth)); + MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth)); + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(D, A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth)); + + /* raz4 = a * rz^4 */ + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); +CLEANUP: + return res; +} + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz, tpx, tpy; + mp_int raz4; + mp_int scratch[MAX_SCRATCH]; + signed char *naf = NULL; + int i, orderBitSize; + + MP_DIGITS(&rz) = 0; + MP_DIGITS(&raz4) = 0; + MP_DIGITS(&tpx) = 0; + MP_DIGITS(&tpy) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_DIGITS(&scratch[i]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + MP_CHECKOK(mp_init(&tpx)); + MP_CHECKOK(mp_init(&tpy)); + ; + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(mp_init(&raz4)); + + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_CHECKOK(mp_init(&scratch[i])); + } + + /* Set out[8] = P */ + MP_CHECKOK(mp_copy(px, &precomp[8][0])); + MP_CHECKOK(mp_copy(py, &precomp[8][1])); + + /* Set (tpx, tpy) = 2P */ + MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy, + group)); + + /* Set 3P, 5P, ..., 15P */ + for (i = 8; i < 15; i++) { + MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy, + &precomp[i + 1][0], &precomp[i + 1][1], + group)); + } + + /* Set -15P, -13P, ..., -P */ + for (i = 0; i < 8; i++) { + MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0])); + MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1], + group->meth)); + } + + /* R = inf */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + orderBitSize = mpl_significant_bits(&group->order); + + /* Allocate memory for NAF */ + naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1)); + if (naf == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + /* Compute 5NAF */ + ec_compute_wNAF(naf, orderBitSize, n, 5); + + /* wNAF method */ + for (i = orderBitSize; i >= 0; i--) { + /* R = 2R */ + ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz, + &raz4, scratch, group); + if (naf[i] != 0) { + ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4, + &precomp[(naf[i] + 15) / 2][0], + &precomp[(naf[i] + 15) / 2][1], rx, ry, + &rz, &raz4, scratch, group); + } + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + for (i = 0; i < MAX_SCRATCH; i++) { + mp_clear(&scratch[i]); + } + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + mp_clear(&tpx); + mp_clear(&tpy); + mp_clear(&rz); + mp_clear(&raz4); + free(naf); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c new file mode 100644 index 000000000..779685b4d --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_mont.c @@ -0,0 +1,154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Uses Montgomery reduction for field arithmetic. See mpi/mpmontg.c for + * code implementation. */ + +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "ecl-priv.h" +#include "ecp.h" +#include <stdlib.h> +#include <stdio.h> + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp_mont(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + mp_mont_modulus *mmm; + + meth = GFMethod_consGFp(irr); + if (meth == NULL) + return NULL; + + mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus)); + if (mmm == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + meth->field_mul = &ec_GFp_mul_mont; + meth->field_sqr = &ec_GFp_sqr_mont; + meth->field_div = &ec_GFp_div_mont; + meth->field_enc = &ec_GFp_enc_mont; + meth->field_dec = &ec_GFp_dec_mont; + meth->extra1 = mmm; + meth->extra2 = NULL; + meth->extra_free = &ec_GFp_extra_free_mont; + + mmm->N = meth->irr; + mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Field multiplication using Montgomery reduction. */ +mp_err +ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + +#ifdef MP_MONT_USE_MP_MUL + /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont + * is not implemented and we have to use mp_mul and s_mp_redc directly + */ + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +#else + mp_int s; + + MP_DIGITS(&s) = 0; + /* s_mp_mul_mont doesn't allow source and destination to be the same */ + if ((a == r) || (b == r)) { + MP_CHECKOK(mp_init(&s)); + MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1)); + MP_CHECKOK(mp_copy(&s, r)); + mp_clear(&s); + } else { + return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1); + } +#endif +CLEANUP: + return res; +} + +/* Field squaring using Montgomery reduction. */ +mp_err +ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return ec_GFp_mul_mont(a, a, r, meth); +} + +/* Field division using Montgomery reduction. */ +mp_err +ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* if A=aZ represents a encoded in montgomery coordinates with Z and # + * and \ respectively represent multiplication and division in + * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv = + * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */ + MP_CHECKOK(ec_GFp_div(a, b, r, meth)); + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + if (a == NULL) { + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + } +CLEANUP: + return res; +} + +/* Encode a field element in Montgomery form. See s_mp_to_mont in + * mpi/mpmontg.c */ +mp_err +ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_mont_modulus *mmm; + mp_err res = MP_OKAY; + + mmm = (mp_mont_modulus *)meth->extra1; + MP_CHECKOK(mp_copy(a, r)); + MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N))); + MP_CHECKOK(mp_mod(r, &mmm->N, r)); +CLEANUP: + return res; +} + +/* Decode a field element from Montgomery form. */ +mp_err +ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +CLEANUP: + return res; +} + +/* Free the memory allocated to the extra fields of Montgomery GFMethod + * object. */ +void +ec_GFp_extra_free_mont(GFMethod *meth) +{ + if (meth->extra1 != NULL) { + free(meth->extra1); + meth->extra1 = NULL; + } +} diff --git a/security/nss/lib/freebl/ecl/tests/ec_naft.c b/security/nss/lib/freebl/ecl/tests/ec_naft.c new file mode 100644 index 000000000..61ef15c36 --- /dev/null +++ b/security/nss/lib/freebl/ecl/tests/ec_naft.c @@ -0,0 +1,121 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecp.h" +#include "ecl-priv.h" + +#include <sys/types.h> +#include <stdio.h> +#include <time.h> +#include <sys/time.h> +#include <sys/resource.h> + +/* Returns 2^e as an integer. This is meant to be used for small powers of + * two. */ +int ec_twoTo(int e); + +/* Number of bits of scalar to test */ +#define BITSIZE 160 + +/* Time k repetitions of operation op. */ +#define M_TimeOperation(op, k) \ + { \ + double dStart, dNow, dUserTime; \ + struct rusage ru; \ + int i; \ + getrusage(RUSAGE_SELF, &ru); \ + dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + for (i = 0; i < k; i++) { \ + { \ + op; \ + } \ + }; \ + getrusage(RUSAGE_SELF, &ru); \ + dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + dUserTime = dNow - dStart; \ + if (dUserTime) \ + printf(" %-45s\n k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \ + } + +/* Tests wNAF computation. Non-adjacent-form is discussed in the paper: D. + * Hankerson, J. Hernandez and A. Menezes, "Software implementation of + * elliptic curve cryptography over binary fields", Proc. CHES 2000. */ + +mp_err +main(void) +{ + signed char naf[BITSIZE + 1]; + ECGroup *group = NULL; + mp_int k; + mp_int *scalar; + int i, count; + int res; + int w = 5; + char s[1000]; + + /* Get a 160 bit scalar to compute wNAF from */ + group = ECGroup_fromName(ECCurve_SECG_PRIME_160R1); + scalar = &group->genx; + + /* Compute wNAF representation of scalar */ + ec_compute_wNAF(naf, BITSIZE, scalar, w); + + /* Verify correctness of representation */ + mp_init(&k); /* init k to 0 */ + + for (i = BITSIZE; i >= 0; i--) { + mp_add(&k, &k, &k); + /* digits in mp_???_d are unsigned */ + if (naf[i] >= 0) { + mp_add_d(&k, naf[i], &k); + } else { + mp_sub_d(&k, -naf[i], &k); + } + } + + if (mp_cmp(&k, scalar) != 0) { + printf("Error: incorrect NAF value.\n"); + MP_CHECKOK(mp_toradix(&k, s, 16)); + printf("NAF value %s\n", s); + MP_CHECKOK(mp_toradix(scalar, s, 16)); + printf("original value %s\n", s); + goto CLEANUP; + } + + /* Verify digits of representation are valid */ + for (i = 0; i <= BITSIZE; i++) { + if (naf[i] % 2 == 0 && naf[i] != 0) { + printf("Error: Even non-zero digit found.\n"); + goto CLEANUP; + } + if (naf[i] < -(ec_twoTo(w - 1)) || naf[i] >= ec_twoTo(w - 1)) { + printf("Error: Magnitude of naf digit too large.\n"); + goto CLEANUP; + } + } + + /* Verify sparsity of representation */ + count = w - 1; + for (i = 0; i <= BITSIZE; i++) { + if (naf[i] != 0) { + if (count < w - 1) { + printf("Error: Sparsity failed.\n"); + goto CLEANUP; + } + count = 0; + } else + count++; + } + + /* Check timing */ + M_TimeOperation(ec_compute_wNAF(naf, BITSIZE, scalar, w), 10000); + + printf("Test passed.\n"); +CLEANUP: + ECGroup_free(group); + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/tests/ecp_test.c b/security/nss/lib/freebl/ecl/tests/ecp_test.c new file mode 100644 index 000000000..dcec4d747 --- /dev/null +++ b/security/nss/lib/freebl/ecl/tests/ecp_test.c @@ -0,0 +1,409 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "mpprime.h" +#include "ecl.h" +#include "ecl-curve.h" +#include "ecp.h" +#include <stdio.h> +#include <strings.h> +#include <assert.h> + +#include <time.h> +#include <sys/time.h> +#include <sys/resource.h> + +/* Time k repetitions of operation op. */ +#define M_TimeOperation(op, k) \ + { \ + double dStart, dNow, dUserTime; \ + struct rusage ru; \ + int i; \ + getrusage(RUSAGE_SELF, &ru); \ + dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + for (i = 0; i < k; i++) { \ + { \ + op; \ + } \ + }; \ + getrusage(RUSAGE_SELF, &ru); \ + dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + dUserTime = dNow - dStart; \ + if (dUserTime) \ + printf(" %-45s k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \ + } + +/* Test curve using generic field arithmetic. */ +#define ECTEST_GENERIC_GFP(name_c, name) \ + printf("Testing %s using generic implementation...\n", name_c); \ + params = EC_GetNamedCurveParams(name); \ + if (params == NULL) { \ + printf(" Error: could not construct params.\n"); \ + res = MP_NO; \ + goto CLEANUP; \ + } \ + ECGroup_free(group); \ + group = ECGroup_fromHex(params); \ + if (group == NULL) { \ + printf(" Error: could not construct group.\n"); \ + res = MP_NO; \ + goto CLEANUP; \ + } \ + MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 1)); \ + printf("... okay.\n"); + +/* Test curve using specific field arithmetic. */ +#define ECTEST_NAMED_GFP(name_c, name) \ + printf("Testing %s using specific implementation...\n", name_c); \ + ECGroup_free(group); \ + group = ECGroup_fromName(name); \ + if (group == NULL) { \ + printf(" Warning: could not construct group.\n"); \ + printf("... failed; continuing with remaining tests.\n"); \ + } else { \ + MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 0)); \ + printf("... okay.\n"); \ + } + +/* Performs basic tests of elliptic curve cryptography over prime fields. + * If tests fail, then it prints an error message, aborts, and returns an + * error code. Otherwise, returns 0. */ +int +ectest_curve_GFp(ECGroup *group, int ectestPrint, int ectestTime, + int generic) +{ + + mp_int one, order_1, gx, gy, rx, ry, n; + int size; + mp_err res; + char s[1000]; + + /* initialize values */ + MP_CHECKOK(mp_init(&one)); + MP_CHECKOK(mp_init(&order_1)); + MP_CHECKOK(mp_init(&gx)); + MP_CHECKOK(mp_init(&gy)); + MP_CHECKOK(mp_init(&rx)); + MP_CHECKOK(mp_init(&ry)); + MP_CHECKOK(mp_init(&n)); + + MP_CHECKOK(mp_set_int(&one, 1)); + MP_CHECKOK(mp_sub(&group->order, &one, &order_1)); + + /* encode base point */ + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(&group->genx, &gx, group->meth)); + MP_CHECKOK(group->meth->field_dec(&group->geny, &gy, group->meth)); + } else { + MP_CHECKOK(mp_copy(&group->genx, &gx)); + MP_CHECKOK(mp_copy(&group->geny, &gy)); + } + if (ectestPrint) { + /* output base point */ + printf(" base point P:\n"); + MP_CHECKOK(mp_toradix(&gx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&gy, s, 16)); + printf(" %s\n", s); + if (group->meth->field_enc) { + printf(" base point P (encoded):\n"); + MP_CHECKOK(mp_toradix(&group->genx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&group->geny, s, 16)); + printf(" %s\n", s); + } + } + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ec_GFp_pt_mul_aff(&order_1, &group->genx, &group->geny, &rx, &ry, group)); + if (ectestPrint) { + printf(" (order-1)*P (affine):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth)); + if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ec_GFp_pt_mul_jac(&order_1, &group->genx, &group->geny, &rx, &ry, group)); + if (ectestPrint) { + printf(" (order-1)*P (jacobian):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth)); + if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ECPoint_mul(group, &order_1, NULL, NULL, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ECPoint_mul(group, &order_1, &gx, &gy, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ec_GFp_pt_mul_aff(&group->order, &group->genx, &group->geny, &rx, &ry, + group)); + if (ectestPrint) { + printf(" (order)*P (affine):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ec_GFp_pt_mul_jac(&group->order, &group->genx, &group->geny, &rx, &ry, + group)); + if (ectestPrint) { + printf(" (order)*P (jacobian):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ECPoint_mul(group, &group->order, NULL, NULL, &rx, &ry)); + if (ectestPrint) { + printf(" (order)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ECPoint_mul(group, &group->order, &gx, &gy, &rx, &ry)); + if (ectestPrint) { + printf(" (order)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* check that (order-1)P + (order-1)P + P == (order-1)P */ + MP_CHECKOK(ECPoints_mul(group, &order_1, &order_1, &gx, &gy, &rx, &ry)); + MP_CHECKOK(ECPoints_mul(group, &one, &one, &rx, &ry, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P + (order-1)*P + P == (order-1)*P (ECPoints_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* test validate_point function */ + if (ECPoint_validate(group, &gx, &gy) != MP_YES) { + printf(" Error: validate point on base point failed.\n"); + res = MP_NO; + goto CLEANUP; + } + MP_CHECKOK(mp_add_d(&gy, 1, &ry)); + if (ECPoint_validate(group, &gx, &ry) != MP_NO) { + printf(" Error: validate point on invalid point passed.\n"); + res = MP_NO; + goto CLEANUP; + } + + if (ectestTime) { + /* compute random scalar */ + size = mpl_significant_bits(&group->meth->irr); + if (size < MP_OKAY) { + goto CLEANUP; + } + MP_CHECKOK(mpp_random_size(&n, (size + ECL_BITS - 1) / ECL_BITS)); + MP_CHECKOK(group->meth->field_mod(&n, &n, group->meth)); + /* timed test */ + if (generic) { +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + M_TimeOperation(MP_CHECKOK(ec_GFp_pt_mul_aff(&n, &group->genx, &group->geny, &rx, &ry, + group)), + 100); +#endif + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100); + } else { + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, &gx, &gy, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100); + } + } + +CLEANUP: + mp_clear(&one); + mp_clear(&order_1); + mp_clear(&gx); + mp_clear(&gy); + mp_clear(&rx); + mp_clear(&ry); + mp_clear(&n); + if (res != MP_OKAY) { + printf(" Error: exiting with error value %i\n", res); + } + return res; +} + +/* Prints help information. */ +void +printUsage() +{ + printf("Usage: ecp_test [--print] [--time]\n"); + printf(" --print Print out results of each point arithmetic test.\n"); + printf(" --time Benchmark point operations and print results.\n"); +} + +/* Performs tests of elliptic curve cryptography over prime fields If + * tests fail, then it prints an error message, aborts, and returns an + * error code. Otherwise, returns 0. */ +int +main(int argv, char **argc) +{ + + int ectestTime = 0; + int ectestPrint = 0; + int i; + ECGroup *group = NULL; + ECCurveParams *params = NULL; + mp_err res; + + /* read command-line arguments */ + for (i = 1; i < argv; i++) { + if ((strcasecmp(argc[i], "time") == 0) || (strcasecmp(argc[i], "-time") == 0) || (strcasecmp(argc[i], "--time") == 0)) { + ectestTime = 1; + } else if ((strcasecmp(argc[i], "print") == 0) || (strcasecmp(argc[i], "-print") == 0) || (strcasecmp(argc[i], "--print") == 0)) { + ectestPrint = 1; + } else { + printUsage(); + return 0; + } + } + + /* generic arithmetic tests */ + ECTEST_GENERIC_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1); + + /* specific arithmetic tests */ + ECTEST_NAMED_GFP("NIST-P192", ECCurve_NIST_P192); + ECTEST_NAMED_GFP("NIST-P224", ECCurve_NIST_P224); + ECTEST_NAMED_GFP("NIST-P256", ECCurve_NIST_P256); + ECTEST_NAMED_GFP("NIST-P384", ECCurve_NIST_P384); + ECTEST_NAMED_GFP("NIST-P521", ECCurve_NIST_P521); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v1", ECCurve_X9_62_PRIME_192V1); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v2", ECCurve_X9_62_PRIME_192V2); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v3", ECCurve_X9_62_PRIME_192V3); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v1", ECCurve_X9_62_PRIME_239V1); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v2", ECCurve_X9_62_PRIME_239V2); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v3", ECCurve_X9_62_PRIME_239V3); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME256v1", ECCurve_X9_62_PRIME_256V1); + ECTEST_NAMED_GFP("SECP-112R1", ECCurve_SECG_PRIME_112R1); + ECTEST_NAMED_GFP("SECP-112R2", ECCurve_SECG_PRIME_112R2); + ECTEST_NAMED_GFP("SECP-128R1", ECCurve_SECG_PRIME_128R1); + ECTEST_NAMED_GFP("SECP-128R2", ECCurve_SECG_PRIME_128R2); + ECTEST_NAMED_GFP("SECP-160K1", ECCurve_SECG_PRIME_160K1); + ECTEST_NAMED_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1); + ECTEST_NAMED_GFP("SECP-160R2", ECCurve_SECG_PRIME_160R2); + ECTEST_NAMED_GFP("SECP-192K1", ECCurve_SECG_PRIME_192K1); + ECTEST_NAMED_GFP("SECP-192R1", ECCurve_SECG_PRIME_192R1); + ECTEST_NAMED_GFP("SECP-224K1", ECCurve_SECG_PRIME_224K1); + ECTEST_NAMED_GFP("SECP-224R1", ECCurve_SECG_PRIME_224R1); + ECTEST_NAMED_GFP("SECP-256K1", ECCurve_SECG_PRIME_256K1); + ECTEST_NAMED_GFP("SECP-256R1", ECCurve_SECG_PRIME_256R1); + ECTEST_NAMED_GFP("SECP-384R1", ECCurve_SECG_PRIME_384R1); + ECTEST_NAMED_GFP("SECP-521R1", ECCurve_SECG_PRIME_521R1); + ECTEST_NAMED_GFP("WTLS-6 (112)", ECCurve_WTLS_6); + ECTEST_NAMED_GFP("WTLS-7 (160)", ECCurve_WTLS_7); + ECTEST_NAMED_GFP("WTLS-8 (112)", ECCurve_WTLS_8); + ECTEST_NAMED_GFP("WTLS-9 (160)", ECCurve_WTLS_9); + ECTEST_NAMED_GFP("WTLS-12 (224)", ECCurve_WTLS_12); + ECTEST_NAMED_GFP("Curve25519", ECCurve25519); + +CLEANUP: + EC_FreeCurveParams(params); + ECGroup_free(group); + if (res != MP_OKAY) { + printf("Error: exiting with error value %i\n", res); + } + return res; +} diff --git a/security/nss/lib/freebl/ecl/uint128.c b/security/nss/lib/freebl/ecl/uint128.c new file mode 100644 index 000000000..22cbd023c --- /dev/null +++ b/security/nss/lib/freebl/ecl/uint128.c @@ -0,0 +1,87 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "uint128.h" + +/* helper functions */ +uint64_t +mask51(uint128_t x) +{ + return x.lo & MASK51; +} + +uint64_t +mask_lower(uint128_t x) +{ + return x.lo; +} + +uint128_t +mask51full(uint128_t x) +{ + uint128_t ret = { x.lo & MASK51, 0 }; + return ret; +} + +uint128_t +init128x(uint64_t x) +{ + uint128_t ret = { x, 0 }; + return ret; +} + +/* arithmetic */ + +uint128_t +add128(uint128_t a, uint128_t b) +{ + uint128_t ret; + ret.lo = a.lo + b.lo; + ret.hi = a.hi + b.hi + (ret.lo < b.lo); + return ret; +} + +/* out = 19 * a */ +uint128_t +mul12819(uint128_t a) +{ + uint128_t ret = lshift128(a, 4); + ret = add128(ret, a); + ret = add128(ret, a); + ret = add128(ret, a); + return ret; +} + +uint128_t +mul6464(uint64_t a, uint64_t b) +{ + uint128_t ret; + uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b); + uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32); + uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1); + ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0); + ret.hi = (a >> 32) * (b >> 32); + ret.hi += (t2 >> 32) + (t1 >> 32); + return ret; +} + +/* only defined for n < 64 */ +uint128_t +rshift128(uint128_t x, uint8_t n) +{ + uint128_t ret; + ret.lo = (x.lo >> n) + (x.hi << (64 - n)); + ret.hi = x.hi >> n; + return ret; +} + +/* only defined for n < 64 */ +uint128_t +lshift128(uint128_t x, uint8_t n) +{ + uint128_t ret; + ret.hi = (x.hi << n) + (x.lo >> (64 - n)); + ret.lo = x.lo << n; + return ret; +} diff --git a/security/nss/lib/freebl/ecl/uint128.h b/security/nss/lib/freebl/ecl/uint128.h new file mode 100644 index 000000000..a3a71e6e7 --- /dev/null +++ b/security/nss/lib/freebl/ecl/uint128.h @@ -0,0 +1,35 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include <stdint.h> + +#define MASK51 0x7ffffffffffffULL + +#ifdef HAVE_INT128_SUPPORT +typedef unsigned __int128 uint128_t; +#define add128(a, b) (a) + (b) +#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b) +#define mul12819(a) (uint128_t)(a) * 19 +#define rshift128(x, n) (x) >> (n) +#define lshift128(x, n) (x) << (n) +#define mask51(x) (x) & 0x7ffffffffffff +#define mask_lower(x) (uint64_t)(x) +#define mask51full(x) (x) & 0x7ffffffffffff +#define init128x(x) (x) +#else /* uint128_t for Windows and 32 bit intel systems */ +struct uint128_t_str { + uint64_t lo; + uint64_t hi; +}; +typedef struct uint128_t_str uint128_t; +uint128_t add128(uint128_t a, uint128_t b); +uint128_t mul6464(uint64_t a, uint64_t b); +uint128_t mul12819(uint128_t a); +uint128_t rshift128(uint128_t x, uint8_t n); +uint128_t lshift128(uint128_t x, uint8_t n); +uint64_t mask51(uint128_t x); +uint64_t mask_lower(uint128_t x); +uint128_t mask51full(uint128_t x); +uint128_t init128x(uint64_t x); +#endif |