diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-06-06 21:27:04 +0200 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-06-06 21:27:04 +0200 |
commit | 4a71b30364a4b6d1eaf16fcfdc8e873e6697f293 (patch) | |
tree | a47014077c14579249859ad34afcc5a8f2f0730a /security/nss/lib/freebl/ecl/curve25519_64.c | |
parent | d7da72799521386c110dbba73b1e483b00a0a56a (diff) | |
parent | 2dad0ec41d0b69c0a815012e6ea4bdde81b2875b (diff) | |
download | UXP-4a71b30364a4b6d1eaf16fcfdc8e873e6697f293.tar UXP-4a71b30364a4b6d1eaf16fcfdc8e873e6697f293.tar.gz UXP-4a71b30364a4b6d1eaf16fcfdc8e873e6697f293.tar.lz UXP-4a71b30364a4b6d1eaf16fcfdc8e873e6697f293.tar.xz UXP-4a71b30364a4b6d1eaf16fcfdc8e873e6697f293.zip |
Merge branch 'NSS-335'
Diffstat (limited to 'security/nss/lib/freebl/ecl/curve25519_64.c')
-rw-r--r-- | security/nss/lib/freebl/ecl/curve25519_64.c | 508 |
1 files changed, 4 insertions, 504 deletions
diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c index 65f6bd41b..a2e4296bb 100644 --- a/security/nss/lib/freebl/ecl/curve25519_64.c +++ b/security/nss/lib/freebl/ecl/curve25519_64.c @@ -2,513 +2,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -/* - * Derived from public domain C code by Adan Langley and Daniel J. Bernstein - */ - -#include "uint128.h" - #include "ecl-priv.h" -#include "mpi.h" - -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -typedef uint8_t u8; -typedef uint64_t felem; - -/* Sum two numbers: output += in */ -static void -fsum(felem *output, const felem *in) -{ - unsigned i; - for (i = 0; i < 5; ++i) { - output[i] += in[i]; - } -} - -/* Find the difference of two numbers: output = in - output - * (note the order of the arguments!) - */ -static void -fdifference_backwards(felem *ioutput, const felem *iin) -{ - static const int64_t twotothe51 = ((int64_t)1l << 51); - const int64_t *in = (const int64_t *)iin; - int64_t *out = (int64_t *)ioutput; - - out[0] = in[0] - out[0]; - out[1] = in[1] - out[1]; - out[2] = in[2] - out[2]; - out[3] = in[3] - out[3]; - out[4] = in[4] - out[4]; - - // An arithmetic shift right of 63 places turns a positive number to 0 and a - // negative number to all 1's. This gives us a bitmask that lets us avoid - // side-channel prone branches. - int64_t t; - -#define NEGCHAIN(a, b) \ - t = out[a] >> 63; \ - out[a] += twotothe51 & t; \ - out[b] -= 1 & t; - -#define NEGCHAIN19(a, b) \ - t = out[a] >> 63; \ - out[a] += twotothe51 & t; \ - out[b] -= 19 & t; - - NEGCHAIN(0, 1); - NEGCHAIN(1, 2); - NEGCHAIN(2, 3); - NEGCHAIN(3, 4); - NEGCHAIN19(4, 0); - NEGCHAIN(0, 1); - NEGCHAIN(1, 2); - NEGCHAIN(2, 3); - NEGCHAIN(3, 4); -} - -/* Multiply a number by a scalar: output = in * scalar */ -static void -fscalar_product(felem *output, const felem *in, - const felem scalar) -{ - uint128_t tmp, tmp2; - - tmp = mul6464(in[0], scalar); - output[0] = mask51(tmp); - - tmp2 = mul6464(in[1], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[1] = mask51(tmp); - - tmp2 = mul6464(in[2], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[2] = mask51(tmp); - - tmp2 = mul6464(in[3], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[3] = mask51(tmp); - - tmp2 = mul6464(in[4], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[4] = mask51(tmp); - - output[0] += mask_lower(rshift128(tmp, 51)) * 19; -} - -/* Multiply two numbers: output = in2 * in - * - * output must be distinct to both inputs. The inputs are reduced coefficient - * form, the output is not. - */ -static void -fmul(felem *output, const felem *in2, const felem *in) -{ - uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; - - t0 = mul6464(in[0], in2[0]); - t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1])); - t2 = add128(add128(mul6464(in[0], in2[2]), - mul6464(in[2], in2[0])), - mul6464(in[1], in2[1])); - t3 = add128(add128(add128(mul6464(in[0], in2[3]), - mul6464(in[3], in2[0])), - mul6464(in[1], in2[2])), - mul6464(in[2], in2[1])); - t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]), - mul6464(in[4], in2[0])), - mul6464(in[3], in2[1])), - mul6464(in[1], in2[3])), - mul6464(in[2], in2[2])); - t5 = add128(add128(add128(mul6464(in[4], in2[1]), - mul6464(in[1], in2[4])), - mul6464(in[2], in2[3])), - mul6464(in[3], in2[2])); - t6 = add128(add128(mul6464(in[4], in2[2]), - mul6464(in[2], in2[4])), - mul6464(in[3], in2[3])); - t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3])); - t8 = mul6464(in[4], in2[4]); - - t0 = add128(t0, mul12819(t5)); - t1 = add128(t1, mul12819(t6)); - t2 = add128(t2, mul12819(t7)); - t3 = add128(t3, mul12819(t8)); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t4 = add128(t4, rshift128(t3, 51)); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t1 = add128(t1, rshift128(t0, 51)); - t2 = mask51full(t2); - t2 = add128(t2, rshift128(t1, 51)); - - output[0] = mask51(t0); - output[1] = mask51(t1); - output[2] = mask_lower(t2); - output[3] = mask51(t3); - output[4] = mask51(t4); -} - -static void -fsquare(felem *output, const felem *in) -{ - uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; - - t0 = mul6464(in[0], in[0]); - t1 = lshift128(mul6464(in[0], in[1]), 1); - t2 = add128(lshift128(mul6464(in[0], in[2]), 1), - mul6464(in[1], in[1])); - t3 = add128(lshift128(mul6464(in[0], in[3]), 1), - lshift128(mul6464(in[1], in[2]), 1)); - t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1), - lshift128(mul6464(in[3], in[1]), 1)), - mul6464(in[2], in[2])); - t5 = add128(lshift128(mul6464(in[4], in[1]), 1), - lshift128(mul6464(in[2], in[3]), 1)); - t6 = add128(lshift128(mul6464(in[4], in[2]), 1), - mul6464(in[3], in[3])); - t7 = lshift128(mul6464(in[3], in[4]), 1); - t8 = mul6464(in[4], in[4]); - - t0 = add128(t0, mul12819(t5)); - t1 = add128(t1, mul12819(t6)); - t2 = add128(t2, mul12819(t7)); - t3 = add128(t3, mul12819(t8)); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t4 = add128(t4, rshift128(t3, 51)); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t1 = add128(t1, rshift128(t0, 51)); - - output[0] = mask51(t0); - output[1] = mask_lower(t1); - output[2] = mask51(t2); - output[3] = mask51(t3); - output[4] = mask51(t4); -} - -/* Take a 32-byte number and expand it into polynomial form */ -static void NO_SANITIZE_ALIGNMENT -fexpand(felem *output, const u8 *in) -{ - output[0] = *((const uint64_t *)(in)) & MASK51; - output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51; - output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51; - output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51; - output[4] = (*((const uint64_t *)(in + 24)) >> 12) & MASK51; -} - -/* Take a fully reduced polynomial form number and contract it into a - * 32-byte array - */ -static void -fcontract(u8 *output, const felem *input) -{ - uint128_t t0 = init128x(input[0]); - uint128_t t1 = init128x(input[1]); - uint128_t t2 = init128x(input[2]); - uint128_t t3 = init128x(input[3]); - uint128_t t4 = init128x(input[4]); - uint128_t tmp = init128x(19); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - /* now t is between 0 and 2^255-1, properly carried. */ - /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ - - t0 = add128(t0, tmp); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - /* now between 19 and 2^255-1 in both cases, and offset by 19. */ - - t0 = add128(t0, init128x(0x8000000000000 - 19)); - tmp = init128x(0x8000000000000 - 1); - t1 = add128(t1, tmp); - t2 = add128(t2, tmp); - t3 = add128(t3, tmp); - t4 = add128(t4, tmp); - - /* now between 2^255 and 2^256-20, and offset by 2^255. */ - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t4 = mask51full(t4); - - *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51; - *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38); - *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25); - *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12); -} - -/* Input: Q, Q', Q-Q' - * Output: 2Q, Q+Q' - * - * x2 z3: long form - * x3 z3: long form - * x z: short form, destroyed - * xprime zprime: short form, destroyed - * qmqp: short form, preserved - */ -static void -fmonty(felem *x2, felem *z2, /* output 2Q */ - felem *x3, felem *z3, /* output Q + Q' */ - felem *x, felem *z, /* input Q */ - felem *xprime, felem *zprime, /* input Q' */ - const felem *qmqp /* input Q - Q' */) -{ - felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], - zzzprime[5]; - - memcpy(origx, x, 5 * sizeof(felem)); - fsum(x, z); - fdifference_backwards(z, origx); // does x - z - - memcpy(origxprime, xprime, sizeof(felem) * 5); - fsum(xprime, zprime); - fdifference_backwards(zprime, origxprime); - fmul(xxprime, xprime, z); - fmul(zzprime, x, zprime); - memcpy(origxprime, xxprime, sizeof(felem) * 5); - fsum(xxprime, zzprime); - fdifference_backwards(zzprime, origxprime); - fsquare(x3, xxprime); - fsquare(zzzprime, zzprime); - fmul(z3, zzzprime, qmqp); - - fsquare(xx, x); - fsquare(zz, z); - fmul(x2, xx, zz); - fdifference_backwards(zz, xx); // does zz = xx - zz - fscalar_product(zzz, zz, 121665); - fsum(zzz, xx); - fmul(z2, zz, zzz); -} - -// ----------------------------------------------------------------------------- -// Maybe swap the contents of two felem arrays (@a and @b), each @len elements -// long. Perform the swap iff @swap is non-zero. -// -// This function performs the swap without leaking any side-channel -// information. -// ----------------------------------------------------------------------------- -static void -swap_conditional(felem *a, felem *b, unsigned len, felem iswap) -{ - unsigned i; - const felem swap = 1 + ~iswap; - - for (i = 0; i < len; ++i) { - const felem x = swap & (a[i] ^ b[i]); - a[i] ^= x; - b[i] ^= x; - } -} - -/* Calculates nQ where Q is the x-coordinate of a point on the curve - * - * resultx/resultz: the x coordinate of the resulting curve point (short form) - * n: a 32-byte number - * q: a point of the curve (short form) - */ -static void -cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) -{ - felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 }; - felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; - felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 }; - felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; - - unsigned i, j; - - memcpy(nqpqx, q, sizeof(felem) * 5); - - for (i = 0; i < 32; ++i) { - u8 byte = n[31 - i]; - for (j = 0; j < 8; ++j) { - const felem bit = byte >> 7; - - swap_conditional(nqx, nqpqx, 5, bit); - swap_conditional(nqz, nqpqz, 5, bit); - fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); - swap_conditional(nqx2, nqpqx2, 5, bit); - swap_conditional(nqz2, nqpqz2, 5, bit); - - t = nqx; - nqx = nqx2; - nqx2 = t; - t = nqz; - nqz = nqz2; - nqz2 = t; - t = nqpqx; - nqpqx = nqpqx2; - nqpqx2 = t; - t = nqpqz; - nqpqz = nqpqz2; - nqpqz2 = t; - - byte <<= 1; - } - } - - memcpy(resultx, nqx, sizeof(felem) * 5); - memcpy(resultz, nqz, sizeof(felem) * 5); -} - -// ----------------------------------------------------------------------------- -// Shamelessly copied from djb's code -// ----------------------------------------------------------------------------- -static void -crecip(felem *out, const felem *z) -{ - felem z2[5]; - felem z9[5]; - felem z11[5]; - felem z2_5_0[5]; - felem z2_10_0[5]; - felem z2_20_0[5]; - felem z2_50_0[5]; - felem z2_100_0[5]; - felem t0[5]; - felem t1[5]; - int i; - - /* 2 */ fsquare(z2, z); - /* 4 */ fsquare(t1, z2); - /* 8 */ fsquare(t0, t1); - /* 9 */ fmul(z9, t0, z); - /* 11 */ fmul(z11, z9, z2); - /* 22 */ fsquare(t0, z11); - /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9); - - /* 2^6 - 2^1 */ fsquare(t0, z2_5_0); - /* 2^7 - 2^2 */ fsquare(t1, t0); - /* 2^8 - 2^3 */ fsquare(t0, t1); - /* 2^9 - 2^4 */ fsquare(t1, t0); - /* 2^10 - 2^5 */ fsquare(t0, t1); - /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0); - - /* 2^11 - 2^1 */ fsquare(t0, z2_10_0); - /* 2^12 - 2^2 */ fsquare(t1, t0); - /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0); - - /* 2^21 - 2^1 */ fsquare(t0, z2_20_0); - /* 2^22 - 2^2 */ fsquare(t1, t0); - /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0); - - /* 2^41 - 2^1 */ fsquare(t1, t0); - /* 2^42 - 2^2 */ fsquare(t0, t1); - /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { - fsquare(t1, t0); - fsquare(t0, t1); - } - /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0); - - /* 2^51 - 2^1 */ fsquare(t0, z2_50_0); - /* 2^52 - 2^2 */ fsquare(t1, t0); - /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0); - - /* 2^101 - 2^1 */ fsquare(t1, z2_100_0); - /* 2^102 - 2^2 */ fsquare(t0, t1); - /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { - fsquare(t1, t0); - fsquare(t0, t1); - } - /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0); - - /* 2^201 - 2^1 */ fsquare(t0, t1); - /* 2^202 - 2^2 */ fsquare(t1, t0); - /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0); - - /* 2^251 - 2^1 */ fsquare(t1, t0); - /* 2^252 - 2^2 */ fsquare(t0, t1); - /* 2^253 - 2^3 */ fsquare(t1, t0); - /* 2^254 - 2^4 */ fsquare(t0, t1); - /* 2^255 - 2^5 */ fsquare(t1, t0); - /* 2^255 - 21 */ fmul(out, t1, z11); -} +#include "../verified/Hacl_Curve25519.h" SECStatus -ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, - const uint8_t *basepoint) +ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint) { - felem bp[5], x[5], z[5], zmone[5]; - uint8_t e[32]; - int i; - - for (i = 0; i < 32; ++i) { - e[i] = secret[i]; - } - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - fexpand(bp, basepoint); - cmult(x, z, e, bp); - crecip(zmone, z); - fmul(z, x, zmone); - fcontract(mypublic, z); - + // Note: this cast is safe because HaCl* state has a post-condition that only "mypublic" changed. + Hacl_Curve25519_crypto_scalarmult(mypublic, (uint8_t *)secret, (uint8_t *)basepoint); return 0; } |