diff options
Diffstat (limited to 'security/nss/lib/freebl')
-rw-r--r-- | security/nss/lib/freebl/Makefile | 10 | ||||
-rw-r--r-- | security/nss/lib/freebl/blake2b.c | 2 | ||||
-rw-r--r-- | security/nss/lib/freebl/chacha20poly1305.c | 88 | ||||
-rw-r--r-- | security/nss/lib/freebl/dsa.c | 37 | ||||
-rw-r--r-- | security/nss/lib/freebl/ec.c | 29 | ||||
-rw-r--r-- | security/nss/lib/freebl/freebl.gyp | 36 | ||||
-rw-r--r-- | security/nss/lib/freebl/freebl_base.gypi | 15 | ||||
-rw-r--r-- | security/nss/lib/freebl/loader.c | 4 | ||||
-rw-r--r-- | security/nss/lib/freebl/mpi/mpi.c | 13 | ||||
-rw-r--r-- | security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c | 881 | ||||
-rw-r--r-- | security/nss/lib/freebl/poly1305.c | 314 | ||||
-rw-r--r-- | security/nss/lib/freebl/poly1305.h | 30 | ||||
-rw-r--r-- | security/nss/lib/freebl/unix_urandom.c | 33 | ||||
-rw-r--r-- | security/nss/lib/freebl/verified/Hacl_Poly1305_32.c | 578 | ||||
-rw-r--r-- | security/nss/lib/freebl/verified/Hacl_Poly1305_32.h | 103 |
15 files changed, 846 insertions, 1327 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile index a4b1a86ae..bff11c7c8 100644 --- a/security/nss/lib/freebl/Makefile +++ b/security/nss/lib/freebl/Makefile @@ -517,13 +517,13 @@ ifndef NSS_DISABLE_CHACHAPOLY ifdef HAVE_INT128_SUPPORT EXTRA_SRCS += Hacl_Poly1305_64.c else - EXTRA_SRCS += poly1305.c + EXTRA_SRCS += Hacl_Poly1305_32.c endif else ifeq ($(CPU_ARCH),aarch64) EXTRA_SRCS += Hacl_Poly1305_64.c else - EXTRA_SRCS += poly1305.c + EXTRA_SRCS += Hacl_Poly1305_32.c endif endif # x86_64 @@ -535,12 +535,16 @@ ifeq (,$(filter-out i386 x386 x86 x86_64 aarch64,$(CPU_ARCH))) # All intel architectures get the 64 bit version # With custom uint128 if necessary (faster than generic 32 bit version). ECL_SRCS += curve25519_64.c - VERIFIED_SRCS += Hacl_Curve25519.c FStar.c + VERIFIED_SRCS += Hacl_Curve25519.c else # All non intel architectures get the generic 32 bit implementation (slow!) ECL_SRCS += curve25519_32.c endif +ifndef HAVE_INT128_SUPPORT + VERIFIED_SRCS += FStar.c +endif + ####################################################################### # (5) Execute "global" rules. (OPTIONAL) # ####################################################################### diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c index 4099c67e0..b4a0442c9 100644 --- a/security/nss/lib/freebl/blake2b.c +++ b/security/nss/lib/freebl/blake2b.c @@ -180,7 +180,7 @@ blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key, return SECSuccess; failure: - PORT_Memset(&ctx, 0, sizeof(ctx)); + PORT_Memset(ctx, 0, sizeof(*ctx)); PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c index 859d05316..302f0db9e 100644 --- a/security/nss/lib/freebl/chacha20poly1305.c +++ b/security/nss/lib/freebl/chacha20poly1305.c @@ -24,36 +24,60 @@ extern void Hacl_Chacha20_Vec128_chacha20(uint8_t *output, uint8_t *plain, extern void Hacl_Chacha20_chacha20(uint8_t *output, uint8_t *plain, uint32_t len, uint8_t *k, uint8_t *n1, uint32_t ctr); -/* Poly1305Do writes the Poly1305 authenticator of the given additional data - * and ciphertext to |out|. */ #if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__)) /* Use HACL* Poly1305 on 64-bit Intel and ARM */ #include "verified/Hacl_Poly1305_64.h" +#define NSS_POLY1305_64 1 +#define Hacl_Poly1305_update Hacl_Poly1305_64_update +#define Hacl_Poly1305_mk_state Hacl_Poly1305_64_mk_state +#define Hacl_Poly1305_init Hacl_Poly1305_64_init +#define Hacl_Poly1305_finish Hacl_Poly1305_64_finish +typedef Hacl_Impl_Poly1305_64_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state; +#else +/* All other platforms get the 32-bit poly1305 HACL* implementation. */ +#include "verified/Hacl_Poly1305_32.h" +#define NSS_POLY1305_32 1 +#define Hacl_Poly1305_update Hacl_Poly1305_32_update +#define Hacl_Poly1305_mk_state Hacl_Poly1305_32_mk_state +#define Hacl_Poly1305_init Hacl_Poly1305_32_init +#define Hacl_Poly1305_finish Hacl_Poly1305_32_finish +typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state; +#endif /* HAVE_INT128_SUPPORT */ static void -Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state, +Poly1305PadUpdate(Hacl_Impl_Poly1305_State_poly1305_state state, unsigned char *block, const unsigned char *p, const unsigned int pLen) { unsigned int pRemLen = pLen % 16; - Hacl_Poly1305_64_update(state, (uint8_t *)p, (pLen / 16)); + Hacl_Poly1305_update(state, (uint8_t *)p, (pLen / 16)); if (pRemLen > 0) { memcpy(block, p + (pLen - pRemLen), pRemLen); - Hacl_Poly1305_64_update(state, block, 1); + Hacl_Poly1305_update(state, block, 1); } } +/* Poly1305Do writes the Poly1305 authenticator of the given additional data + * and ciphertext to |out|. */ static void Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, const unsigned char *ciphertext, unsigned int ciphertextLen, const unsigned char key[32]) { - uint64_t tmp1[6U] = { 0U }; - Hacl_Impl_Poly1305_64_State_poly1305_state state = - Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3); +#ifdef NSS_POLY1305_64 + uint64_t stateStack[6U] = { 0U }; + size_t offset = 3; +#elif defined NSS_POLY1305_32 + uint32_t stateStack[10U] = { 0U }; + size_t offset = 5; +#else +#error "This can't happen." +#endif + Hacl_Impl_Poly1305_State_poly1305_state state = + Hacl_Poly1305_mk_state(stateStack, stateStack + offset); unsigned char block[16] = { 0 }; - Hacl_Poly1305_64_init(state, (uint8_t *)key); + Hacl_Poly1305_init(state, (uint8_t *)key); Poly1305PadUpdate(state, block, ad, adLen); memset(block, 0, 16); @@ -68,49 +92,11 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, block[i] = j; } - Hacl_Poly1305_64_update(state, block, 1); - Hacl_Poly1305_64_finish(state, out, (uint8_t *)(key + 16)); + Hacl_Poly1305_update(state, block, 1); + Hacl_Poly1305_finish(state, out, (uint8_t *)(key + 16)); +#undef NSS_POLY1305_64 +#undef NSS_POLY1305_32 } -#else -/* All other platforms get the 32-bit poly1305 reference implementation. */ -#include "poly1305.h" - -static void -Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, - const unsigned char *ciphertext, unsigned int ciphertextLen, - const unsigned char key[32]) -{ - poly1305_state state; - unsigned int j; - unsigned char lengthBytes[8]; - static const unsigned char zeros[15]; - unsigned int i; - - Poly1305Init(&state, key); - Poly1305Update(&state, ad, adLen); - if (adLen % 16 > 0) { - Poly1305Update(&state, zeros, 16 - adLen % 16); - } - Poly1305Update(&state, ciphertext, ciphertextLen); - if (ciphertextLen % 16 > 0) { - Poly1305Update(&state, zeros, 16 - ciphertextLen % 16); - } - j = adLen; - for (i = 0; i < sizeof(lengthBytes); i++) { - lengthBytes[i] = j; - j >>= 8; - } - Poly1305Update(&state, lengthBytes, sizeof(lengthBytes)); - j = ciphertextLen; - for (i = 0; i < sizeof(lengthBytes); i++) { - lengthBytes[i] = j; - j >>= 8; - } - Poly1305Update(&state, lengthBytes, sizeof(lengthBytes)); - Poly1305Finish(&state, out); -} - -#endif /* HAVE_INT128_SUPPORT */ #endif /* NSS_DISABLE_CHACHAPOLY */ SECStatus diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c index 9324d306b..aef353967 100644 --- a/security/nss/lib/freebl/dsa.c +++ b/security/nss/lib/freebl/dsa.c @@ -16,14 +16,11 @@ #include "blapi.h" #include "nssilock.h" #include "secitem.h" -#include "blapi.h" +#include "blapit.h" #include "mpi.h" #include "secmpi.h" #include "pqg.h" -/* XXX to be replaced by define in blapit.h */ -#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048 - /* * FIPS 186-2 requires result from random output to be reduced mod q when * generating random numbers for DSA. @@ -168,7 +165,7 @@ dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed, return SECFailure; } /* Initialize an arena for the DSA key. */ - arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE); + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); if (!arena) { PORT_SetError(SEC_ERROR_NO_MEMORY); return SECFailure; @@ -213,8 +210,9 @@ cleanup: mp_clear(&g); mp_clear(&x); mp_clear(&y); - if (key) + if (key) { PORT_FreeArena(key->params.arena, PR_TRUE); + } if (err) { translate_mpi_error(err); return SECFailure; @@ -321,6 +319,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, mp_int x, k; /* private key & pseudo-random integer */ mp_int r, s; /* tuple (r, s) is signature) */ mp_int t; /* holding tmp values */ + mp_int ar; /* holding blinding values */ mp_err err = MP_OKAY; SECStatus rv = SECSuccess; unsigned int dsa_subprime_len, dsa_signature_len, offset; @@ -364,6 +363,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, MP_DIGITS(&r) = 0; MP_DIGITS(&s) = 0; MP_DIGITS(&t) = 0; + MP_DIGITS(&ar) = 0; CHECK_MPI_OK(mp_init(&p)); CHECK_MPI_OK(mp_init(&q)); CHECK_MPI_OK(mp_init(&g)); @@ -372,6 +372,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, CHECK_MPI_OK(mp_init(&r)); CHECK_MPI_OK(mp_init(&s)); CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&ar)); /* ** Convert stored PQG and private key into MPI integers. */ @@ -397,14 +398,28 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, rv = SECFailure; goto cleanup; } - SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */ + SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */ + SECITEM_FreeItem(&t2, PR_FALSE); + if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(t2, &ar); /* ar <-$ Zq */ + SECITEM_FreeItem(&t2, PR_FALSE); + + /* Using mp_invmod on k directly would leak bits from k. */ + CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */ - CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */ - CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */ - CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */ + /* To avoid leaking secret bits here the addition is blinded. */ + CHECK_MPI_OK(mp_mul(&x, &ar, &x)); /* x = x * ar */ + CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */ + CHECK_MPI_OK(mp_mulmod(&s, &ar, &q, &t)); /* t = s * ar mod q */ + CHECK_MPI_OK(mp_add(&t, &x, &s)); /* s = t + x */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */ /* ** verify r != 0 and s != 0 ** mentioned as optional in FIPS 186-1. @@ -438,7 +453,7 @@ cleanup: mp_clear(&r); mp_clear(&s); mp_clear(&t); - SECITEM_FreeItem(&t2, PR_FALSE); + mp_clear(&ar); if (err) { translate_mpi_error(err); rv = SECFailure; diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c index b28815ade..6468a10d6 100644 --- a/security/nss/lib/freebl/ec.c +++ b/security/nss/lib/freebl/ec.c @@ -653,6 +653,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, mp_int r, s; /* tuple (r, s) is the signature */ mp_int t; /* holding tmp values */ mp_int n; + mp_int ar; /* blinding value */ mp_err err = MP_OKAY; ECParams *ecParams = NULL; SECItem kGpoint = { siBuffer, NULL, 0 }; @@ -674,6 +675,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, MP_DIGITS(&s) = 0; MP_DIGITS(&n) = 0; MP_DIGITS(&t) = 0; + MP_DIGITS(&ar) = 0; /* Check args */ if (!key || !signature || !digest || !kb || (kblen < 0)) { @@ -700,6 +702,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, CHECK_MPI_OK(mp_init(&s)); CHECK_MPI_OK(mp_init(&n)); CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&ar)); SECITEM_TO_MPINT(ecParams->order, &n); SECITEM_TO_MPINT(key->privateValue, &d); @@ -815,12 +818,25 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, goto cleanup; } CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */ - CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ - CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */ - CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ - CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d)); /* d = d * r mod n */ - CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s)); /* s = s + d mod n */ - CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */ + PORT_Memset(t2, 0, 2 * ecParams->order.len); + if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + CHECK_MPI_OK(mp_read_unsigned_octets(&ar, t2, 2 * ecParams->order.len)); /* ar <-$ Zn */ + + /* Using mp_invmod on k directly would leak bits from k. */ + CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + /* To avoid leaking secret bits here the addition is blinded. */ + CHECK_MPI_OK(mp_mul(&d, &ar, &t)); /* t = d * ar */ + CHECK_MPI_OK(mp_mulmod(&t, &r, &n, &d)); /* d = t * r mod n */ + CHECK_MPI_OK(mp_mulmod(&s, &ar, &n, &t)); /* t = s * ar mod n */ + CHECK_MPI_OK(mp_add(&t, &d, &s)); /* s = t + d */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */ #if EC_DEBUG mp_todecimal(&s, mpstr); @@ -858,6 +874,7 @@ cleanup: mp_clear(&s); mp_clear(&n); mp_clear(&t); + mp_clear(&ar); if (t2) { PORT_Free(t2); diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp index fae56f709..004807483 100644 --- a/security/nss/lib/freebl/freebl.gyp +++ b/security/nss/lib/freebl/freebl.gyp @@ -272,28 +272,15 @@ }, }], [ 'cc_use_gnu_ld==1 and OS=="win" and target_arch=="x64"', { + # mingw x64 'defines': [ 'MP_IS_LITTLE_ENDIAN', - 'NSS_BEVAND_ARCFOUR', - 'MPI_AMD64', - 'MP_ASSEMBLY_MULTIPLY', - 'NSS_USE_COMBA', - 'USE_HW_AES', - 'INTEL_GCM', ], }], - [ 'OS!="win"', { - 'conditions': [ - [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', { - 'defines': [ - # The Makefile does version-tests on GCC, but we're not doing that here. - 'HAVE_INT128_SUPPORT', - ], - }, { - 'defines': [ - 'KRML_NOUINT128', - ], - }], + [ 'have_int128_support==1', { + 'defines': [ + # The Makefile does version-tests on GCC, but we're not doing that here. + 'HAVE_INT128_SUPPORT', ], }, { 'defines': [ @@ -355,5 +342,18 @@ }, 'variables': { 'module': 'nss', + 'conditions': [ + [ 'OS!="win"', { + 'conditions': [ + [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', { + 'have_int128_support%': 1, + }, { + 'have_int128_support%': 0, + }], + ], + }, { + 'have_int128_support%': 0, + }], + ], } } diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi index ebd1018d8..1372994f4 100644 --- a/security/nss/lib/freebl/freebl_base.gypi +++ b/security/nss/lib/freebl/freebl_base.gypi @@ -59,7 +59,7 @@ 'sha_fast.c', 'shvfy.c', 'sysrand.c', - 'tlsprfalg.c' + 'tlsprfalg.c', ], 'conditions': [ [ 'OS=="linux" or OS=="android"', { @@ -122,6 +122,11 @@ 'intel-gcm-x86-masm.asm', ], }], + [ 'cc_use_gnu_ld==1', { + # mingw + 'sources': [ + ], + }], [ 'cc_is_clang!=1', { # MSVC 'sources': [ @@ -135,7 +140,6 @@ # All intel and 64-bit ARM architectures get the 64 bit version. 'ecl/curve25519_64.c', 'verified/Hacl_Curve25519.c', - 'verified/FStar.c', ], }, { 'sources': [ @@ -167,7 +171,7 @@ }, { # !Windows & !x64 & !arm64 & !aarch64 'sources': [ - 'poly1305.c', + 'verified/Hacl_Poly1305_32.c', ], }], ], @@ -176,7 +180,7 @@ }, { # Windows 'sources': [ - 'poly1305.c', + 'verified/Hacl_Poly1305_32.c', ], }], ], @@ -215,6 +219,9 @@ }], ], }], + [ 'have_int128_support==0', { + 'sources': [ 'verified/FStar.c' ], + }], ], 'ldflags': [ '-Wl,-Bsymbolic' diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c index fe5e0a668..6d200e6dd 100644 --- a/security/nss/lib/freebl/loader.c +++ b/security/nss/lib/freebl/loader.c @@ -2164,12 +2164,12 @@ BLAKE2B_NewContext(void) } void -BLAKE2B_DestroyContext(BLAKE2BContext *BLAKE2BContext, PRBool freeit) +BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit) { if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { return; } - (vector->p_BLAKE2B_DestroyContext)(BLAKE2BContext, freeit); + (vector->p_BLAKE2B_DestroyContext)(ctx, freeit); } SECStatus diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c index ae404019d..8c893fb5f 100644 --- a/security/nss/lib/freebl/mpi/mpi.c +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -2657,10 +2657,10 @@ mp_toradix(mp_int *mp, char *str, int radix) /* Reverse the digits and sign indicator */ ix = 0; while (ix < pos) { - char tmp = str[ix]; + char tmpc = str[ix]; str[ix] = str[pos]; - str[pos] = tmp; + str[pos] = tmpc; ++ix; --pos; } @@ -3313,13 +3313,14 @@ s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) /* could check for power of 2 here, but mp_div_d does that. */ if (MP_USED(mp) == 1) { mp_digit n = MP_DIGIT(mp, 0); - mp_digit rem; + mp_digit remdig; q = n / d; - rem = n % d; + remdig = n % d; MP_DIGIT(mp, 0) = q; - if (r) - *r = rem; + if (r) { + *r = remdig; + } return MP_OKAY; } diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c deleted file mode 100644 index 3c803c167..000000000 --- a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c +++ /dev/null @@ -1,881 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/* This implementation of poly1305 is by Andrew Moon - * (https://github.com/floodyberry/poly1305-donna) and released as public - * domain. It implements SIMD vectorization based on the algorithm described in - * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte - * block size. */ - -#include <emmintrin.h> -#include <stdint.h> - -#include "poly1305.h" -#include "blapii.h" - -#define ALIGN(x) __attribute__((aligned(x))) -#define INLINE inline -#define U8TO64_LE(m) (*(uint64_t *)(m)) -#define U8TO32_LE(m) (*(uint32_t *)(m)) -#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v - -typedef __m128i xmmi; -typedef unsigned __int128 uint128_t; - -static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 }; -static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 }; -static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 }; - -static uint128_t INLINE -add128(uint128_t a, uint128_t b) -{ - return a + b; -} - -static uint128_t INLINE -add128_64(uint128_t a, uint64_t b) -{ - return a + b; -} - -static uint128_t INLINE -mul64x64_128(uint64_t a, uint64_t b) -{ - return (uint128_t)a * b; -} - -static uint64_t INLINE -lo128(uint128_t a) -{ - return (uint64_t)a; -} - -static uint64_t INLINE -shr128(uint128_t v, const int shift) -{ - return (uint64_t)(v >> shift); -} - -static uint64_t INLINE -shr128_pair(uint64_t hi, uint64_t lo, const int shift) -{ - return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); -} - -typedef struct poly1305_power_t { - union { - xmmi v; - uint64_t u[2]; - uint32_t d[4]; - } R20, R21, R22, R23, R24, S21, S22, S23, S24; -} poly1305_power; - -typedef struct poly1305_state_internal_t { - poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */ - union { - xmmi H[5]; /* 80 bytes */ - uint64_t HH[10]; - }; - /* uint64_t r0,r1,r2; [24 bytes] */ - /* uint64_t pad0,pad1; [16 bytes] */ - uint64_t started; /* 8 bytes */ - uint64_t leftover; /* 8 bytes */ - uint8_t buffer[64]; /* 64 bytes */ -} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */ - -static poly1305_state_internal INLINE - * - poly1305_aligned_state(poly1305_state *state) -{ - return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); -} - -/* copy 0-63 bytes */ -static void INLINE NO_SANITIZE_ALIGNMENT -poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) -{ - size_t offset = src - dst; - if (bytes & 32) { - _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0))); - _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16))); - dst += 32; - } - if (bytes & 16) { - _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset))); - dst += 16; - } - if (bytes & 8) { - *(uint64_t *)dst = *(uint64_t *)(dst + offset); - dst += 8; - } - if (bytes & 4) { - *(uint32_t *)dst = *(uint32_t *)(dst + offset); - dst += 4; - } - if (bytes & 2) { - *(uint16_t *)dst = *(uint16_t *)(dst + offset); - dst += 2; - } - if (bytes & 1) { - *(uint8_t *)dst = *(uint8_t *)(dst + offset); - } -} - -/* zero 0-15 bytes */ -static void INLINE -poly1305_block_zero(uint8_t *dst, size_t bytes) -{ - if (bytes & 8) { - *(uint64_t *)dst = 0; - dst += 8; - } - if (bytes & 4) { - *(uint32_t *)dst = 0; - dst += 4; - } - if (bytes & 2) { - *(uint16_t *)dst = 0; - dst += 2; - } - if (bytes & 1) { - *(uint8_t *)dst = 0; - } -} - -static size_t INLINE -poly1305_min(size_t a, size_t b) -{ - return (a < b) ? a : b; -} - -void -Poly1305Init(poly1305_state *state, const unsigned char key[32]) -{ - poly1305_state_internal *st = poly1305_aligned_state(state); - poly1305_power *p; - uint64_t r0, r1, r2; - uint64_t t0, t1; - - /* clamp key */ - t0 = U8TO64_LE(key + 0); - t1 = U8TO64_LE(key + 8); - r0 = t0 & 0xffc0fffffff; - t0 >>= 44; - t0 |= t1 << 20; - r1 = t0 & 0xfffffc0ffff; - t1 >>= 24; - r2 = t1 & 0x00ffffffc0f; - - /* store r in un-used space of st->P[1] */ - p = &st->P[1]; - p->R20.d[1] = (uint32_t)(r0); - p->R20.d[3] = (uint32_t)(r0 >> 32); - p->R21.d[1] = (uint32_t)(r1); - p->R21.d[3] = (uint32_t)(r1 >> 32); - p->R22.d[1] = (uint32_t)(r2); - p->R22.d[3] = (uint32_t)(r2 >> 32); - - /* store pad */ - p->R23.d[1] = U8TO32_LE(key + 16); - p->R23.d[3] = U8TO32_LE(key + 20); - p->R24.d[1] = U8TO32_LE(key + 24); - p->R24.d[3] = U8TO32_LE(key + 28); - - /* H = 0 */ - st->H[0] = _mm_setzero_si128(); - st->H[1] = _mm_setzero_si128(); - st->H[2] = _mm_setzero_si128(); - st->H[3] = _mm_setzero_si128(); - st->H[4] = _mm_setzero_si128(); - - st->started = 0; - st->leftover = 0; -} - -static void -poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) -{ - const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); - const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); - const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); - xmmi T5, T6; - poly1305_power *p; - uint128_t d[3]; - uint64_t r0, r1, r2; - uint64_t r20, r21, r22, s22; - uint64_t pad0, pad1; - uint64_t c; - uint64_t i; - - /* pull out stored info */ - p = &st->P[1]; - - r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; - r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; - r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; - pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; - pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; - - /* compute powers r^2,r^4 */ - r20 = r0; - r21 = r1; - r22 = r2; - for (i = 0; i < 2; i++) { - s22 = r22 * (5 << 2); - - d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)); - d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)); - d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)); - - r20 = lo128(d[0]) & 0xfffffffffff; - c = shr128(d[0], 44); - d[1] = add128_64(d[1], c); - r21 = lo128(d[1]) & 0xfffffffffff; - c = shr128(d[1], 44); - d[2] = add128_64(d[2], c); - r22 = lo128(d[2]) & 0x3ffffffffff; - c = shr128(d[2], 42); - r20 += c * 5; - c = (r20 >> 44); - r20 = r20 & 0xfffffffffff; - r21 += c; - - p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); - p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); - p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); - p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); - p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0)); - p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); - p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); - p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); - p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); - p--; - } - - /* put saved info back */ - p = &st->P[1]; - p->R20.d[1] = (uint32_t)(r0); - p->R20.d[3] = (uint32_t)(r0 >> 32); - p->R21.d[1] = (uint32_t)(r1); - p->R21.d[3] = (uint32_t)(r1 >> 32); - p->R22.d[1] = (uint32_t)(r2); - p->R22.d[3] = (uint32_t)(r2 >> 32); - p->R23.d[1] = (uint32_t)(pad0); - p->R23.d[3] = (uint32_t)(pad0 >> 32); - p->R24.d[1] = (uint32_t)(pad1); - p->R24.d[3] = (uint32_t)(pad1 >> 32); - - /* H = [Mx,My] */ - T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); - T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); - st->H[0] = _mm_and_si128(MMASK, T5); - st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); - st->H[2] = _mm_and_si128(MMASK, T5); - st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); -} - -static void -poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) -{ - const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); - const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); - const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); - - poly1305_power *p; - xmmi H0, H1, H2, H3, H4; - xmmi T0, T1, T2, T3, T4, T5, T6; - xmmi M0, M1, M2, M3, M4; - xmmi C1, C2; - - H0 = st->H[0]; - H1 = st->H[1]; - H2 = st->H[2]; - H3 = st->H[3]; - H4 = st->H[4]; - - while (bytes >= 64) { - /* H *= [r^4,r^4] */ - p = &st->P[0]; - T0 = _mm_mul_epu32(H0, p->R20.v); - T1 = _mm_mul_epu32(H0, p->R21.v); - T2 = _mm_mul_epu32(H0, p->R22.v); - T3 = _mm_mul_epu32(H0, p->R23.v); - T4 = _mm_mul_epu32(H0, p->R24.v); - T5 = _mm_mul_epu32(H1, p->S24.v); - T6 = _mm_mul_epu32(H1, p->R20.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H2, p->S23.v); - T6 = _mm_mul_epu32(H2, p->S24.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H3, p->S22.v); - T6 = _mm_mul_epu32(H3, p->S23.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H4, p->S21.v); - T6 = _mm_mul_epu32(H4, p->S22.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H1, p->R21.v); - T6 = _mm_mul_epu32(H1, p->R22.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H2, p->R20.v); - T6 = _mm_mul_epu32(H2, p->R21.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H3, p->S24.v); - T6 = _mm_mul_epu32(H3, p->R20.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H4, p->S23.v); - T6 = _mm_mul_epu32(H4, p->S24.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H1, p->R23.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H2, p->R22.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H3, p->R21.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H4, p->R20.v); - T4 = _mm_add_epi64(T4, T5); - - /* H += [Mx,My]*[r^2,r^2] */ - T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); - T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); - M0 = _mm_and_si128(MMASK, T5); - M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); - M2 = _mm_and_si128(MMASK, T5); - M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); - - p = &st->P[1]; - T5 = _mm_mul_epu32(M0, p->R20.v); - T6 = _mm_mul_epu32(M0, p->R21.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(M1, p->S24.v); - T6 = _mm_mul_epu32(M1, p->R20.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(M2, p->S23.v); - T6 = _mm_mul_epu32(M2, p->S24.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(M3, p->S22.v); - T6 = _mm_mul_epu32(M3, p->S23.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(M4, p->S21.v); - T6 = _mm_mul_epu32(M4, p->S22.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(M0, p->R22.v); - T6 = _mm_mul_epu32(M0, p->R23.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(M1, p->R21.v); - T6 = _mm_mul_epu32(M1, p->R22.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(M2, p->R20.v); - T6 = _mm_mul_epu32(M2, p->R21.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(M3, p->S24.v); - T6 = _mm_mul_epu32(M3, p->R20.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(M4, p->S23.v); - T6 = _mm_mul_epu32(M4, p->S24.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(M0, p->R24.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(M1, p->R23.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(M2, p->R22.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(M3, p->R21.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(M4, p->R20.v); - T4 = _mm_add_epi64(T4, T5); - - /* H += [Mx,My] */ - T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48))); - T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56))); - M0 = _mm_and_si128(MMASK, T5); - M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); - M2 = _mm_and_si128(MMASK, T5); - M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); - - T0 = _mm_add_epi64(T0, M0); - T1 = _mm_add_epi64(T1, M1); - T2 = _mm_add_epi64(T2, M2); - T3 = _mm_add_epi64(T3, M3); - T4 = _mm_add_epi64(T4, M4); - - /* reduce */ - C1 = _mm_srli_epi64(T0, 26); - C2 = _mm_srli_epi64(T3, 26); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_and_si128(T3, MMASK); - T1 = _mm_add_epi64(T1, C1); - T4 = _mm_add_epi64(T4, C2); - C1 = _mm_srli_epi64(T1, 26); - C2 = _mm_srli_epi64(T4, 26); - T1 = _mm_and_si128(T1, MMASK); - T4 = _mm_and_si128(T4, MMASK); - T2 = _mm_add_epi64(T2, C1); - T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); - C1 = _mm_srli_epi64(T2, 26); - C2 = _mm_srli_epi64(T0, 26); - T2 = _mm_and_si128(T2, MMASK); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_add_epi64(T3, C1); - T1 = _mm_add_epi64(T1, C2); - C1 = _mm_srli_epi64(T3, 26); - T3 = _mm_and_si128(T3, MMASK); - T4 = _mm_add_epi64(T4, C1); - - /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ - H0 = T0; - H1 = T1; - H2 = T2; - H3 = T3; - H4 = T4; - - m += 64; - bytes -= 64; - } - - st->H[0] = H0; - st->H[1] = H1; - st->H[2] = H2; - st->H[3] = H3; - st->H[4] = H4; -} - -static size_t -poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) -{ - const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); - const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); - const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); - - poly1305_power *p; - xmmi H0, H1, H2, H3, H4; - xmmi M0, M1, M2, M3, M4; - xmmi T0, T1, T2, T3, T4, T5, T6; - xmmi C1, C2; - - uint64_t r0, r1, r2; - uint64_t t0, t1, t2, t3, t4; - uint64_t c; - size_t consumed = 0; - - H0 = st->H[0]; - H1 = st->H[1]; - H2 = st->H[2]; - H3 = st->H[3]; - H4 = st->H[4]; - - /* p = [r^2,r^2] */ - p = &st->P[1]; - - if (bytes >= 32) { - /* H *= [r^2,r^2] */ - T0 = _mm_mul_epu32(H0, p->R20.v); - T1 = _mm_mul_epu32(H0, p->R21.v); - T2 = _mm_mul_epu32(H0, p->R22.v); - T3 = _mm_mul_epu32(H0, p->R23.v); - T4 = _mm_mul_epu32(H0, p->R24.v); - T5 = _mm_mul_epu32(H1, p->S24.v); - T6 = _mm_mul_epu32(H1, p->R20.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H2, p->S23.v); - T6 = _mm_mul_epu32(H2, p->S24.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H3, p->S22.v); - T6 = _mm_mul_epu32(H3, p->S23.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H4, p->S21.v); - T6 = _mm_mul_epu32(H4, p->S22.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H1, p->R21.v); - T6 = _mm_mul_epu32(H1, p->R22.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H2, p->R20.v); - T6 = _mm_mul_epu32(H2, p->R21.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H3, p->S24.v); - T6 = _mm_mul_epu32(H3, p->R20.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H4, p->S23.v); - T6 = _mm_mul_epu32(H4, p->S24.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H1, p->R23.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H2, p->R22.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H3, p->R21.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H4, p->R20.v); - T4 = _mm_add_epi64(T4, T5); - - /* H += [Mx,My] */ - T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); - T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); - M0 = _mm_and_si128(MMASK, T5); - M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); - M2 = _mm_and_si128(MMASK, T5); - M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); - M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); - - T0 = _mm_add_epi64(T0, M0); - T1 = _mm_add_epi64(T1, M1); - T2 = _mm_add_epi64(T2, M2); - T3 = _mm_add_epi64(T3, M3); - T4 = _mm_add_epi64(T4, M4); - - /* reduce */ - C1 = _mm_srli_epi64(T0, 26); - C2 = _mm_srli_epi64(T3, 26); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_and_si128(T3, MMASK); - T1 = _mm_add_epi64(T1, C1); - T4 = _mm_add_epi64(T4, C2); - C1 = _mm_srli_epi64(T1, 26); - C2 = _mm_srli_epi64(T4, 26); - T1 = _mm_and_si128(T1, MMASK); - T4 = _mm_and_si128(T4, MMASK); - T2 = _mm_add_epi64(T2, C1); - T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); - C1 = _mm_srli_epi64(T2, 26); - C2 = _mm_srli_epi64(T0, 26); - T2 = _mm_and_si128(T2, MMASK); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_add_epi64(T3, C1); - T1 = _mm_add_epi64(T1, C2); - C1 = _mm_srli_epi64(T3, 26); - T3 = _mm_and_si128(T3, MMASK); - T4 = _mm_add_epi64(T4, C1); - - /* H = (H*[r^2,r^2] + [Mx,My]) */ - H0 = T0; - H1 = T1; - H2 = T2; - H3 = T3; - H4 = T4; - - consumed = 32; - } - - /* finalize, H *= [r^2,r] */ - r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; - r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; - r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; - - p->R20.d[2] = (uint32_t)(r0)&0x3ffffff; - p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; - p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff; - p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; - p->R24.d[2] = (uint32_t)((r2 >> 16)); - p->S21.d[2] = p->R21.d[2] * 5; - p->S22.d[2] = p->R22.d[2] * 5; - p->S23.d[2] = p->R23.d[2] * 5; - p->S24.d[2] = p->R24.d[2] * 5; - - /* H *= [r^2,r] */ - T0 = _mm_mul_epu32(H0, p->R20.v); - T1 = _mm_mul_epu32(H0, p->R21.v); - T2 = _mm_mul_epu32(H0, p->R22.v); - T3 = _mm_mul_epu32(H0, p->R23.v); - T4 = _mm_mul_epu32(H0, p->R24.v); - T5 = _mm_mul_epu32(H1, p->S24.v); - T6 = _mm_mul_epu32(H1, p->R20.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H2, p->S23.v); - T6 = _mm_mul_epu32(H2, p->S24.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H3, p->S22.v); - T6 = _mm_mul_epu32(H3, p->S23.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H4, p->S21.v); - T6 = _mm_mul_epu32(H4, p->S22.v); - T0 = _mm_add_epi64(T0, T5); - T1 = _mm_add_epi64(T1, T6); - T5 = _mm_mul_epu32(H1, p->R21.v); - T6 = _mm_mul_epu32(H1, p->R22.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H2, p->R20.v); - T6 = _mm_mul_epu32(H2, p->R21.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H3, p->S24.v); - T6 = _mm_mul_epu32(H3, p->R20.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H4, p->S23.v); - T6 = _mm_mul_epu32(H4, p->S24.v); - T2 = _mm_add_epi64(T2, T5); - T3 = _mm_add_epi64(T3, T6); - T5 = _mm_mul_epu32(H1, p->R23.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H2, p->R22.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H3, p->R21.v); - T4 = _mm_add_epi64(T4, T5); - T5 = _mm_mul_epu32(H4, p->R20.v); - T4 = _mm_add_epi64(T4, T5); - - C1 = _mm_srli_epi64(T0, 26); - C2 = _mm_srli_epi64(T3, 26); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_and_si128(T3, MMASK); - T1 = _mm_add_epi64(T1, C1); - T4 = _mm_add_epi64(T4, C2); - C1 = _mm_srli_epi64(T1, 26); - C2 = _mm_srli_epi64(T4, 26); - T1 = _mm_and_si128(T1, MMASK); - T4 = _mm_and_si128(T4, MMASK); - T2 = _mm_add_epi64(T2, C1); - T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); - C1 = _mm_srli_epi64(T2, 26); - C2 = _mm_srli_epi64(T0, 26); - T2 = _mm_and_si128(T2, MMASK); - T0 = _mm_and_si128(T0, MMASK); - T3 = _mm_add_epi64(T3, C1); - T1 = _mm_add_epi64(T1, C2); - C1 = _mm_srli_epi64(T3, 26); - T3 = _mm_and_si128(T3, MMASK); - T4 = _mm_add_epi64(T4, C1); - - /* H = H[0]+H[1] */ - H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); - H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); - H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); - H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); - H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); - - t0 = _mm_cvtsi128_si32(H0); - c = (t0 >> 26); - t0 &= 0x3ffffff; - t1 = _mm_cvtsi128_si32(H1) + c; - c = (t1 >> 26); - t1 &= 0x3ffffff; - t2 = _mm_cvtsi128_si32(H2) + c; - c = (t2 >> 26); - t2 &= 0x3ffffff; - t3 = _mm_cvtsi128_si32(H3) + c; - c = (t3 >> 26); - t3 &= 0x3ffffff; - t4 = _mm_cvtsi128_si32(H4) + c; - c = (t4 >> 26); - t4 &= 0x3ffffff; - t0 = t0 + (c * 5); - c = (t0 >> 26); - t0 &= 0x3ffffff; - t1 = t1 + c; - - st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull; - st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; - st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull; - - return consumed; -} - -void -Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes) -{ - poly1305_state_internal *st = poly1305_aligned_state(state); - size_t want; - - /* need at least 32 initial bytes to start the accelerated branch */ - if (!st->started) { - if ((st->leftover == 0) && (bytes > 32)) { - poly1305_first_block(st, m); - m += 32; - bytes -= 32; - } else { - want = poly1305_min(32 - st->leftover, bytes); - poly1305_block_copy(st->buffer + st->leftover, m, want); - bytes -= want; - m += want; - st->leftover += want; - if ((st->leftover < 32) || (bytes == 0)) - return; - poly1305_first_block(st, st->buffer); - st->leftover = 0; - } - st->started = 1; - } - - /* handle leftover */ - if (st->leftover) { - want = poly1305_min(64 - st->leftover, bytes); - poly1305_block_copy(st->buffer + st->leftover, m, want); - bytes -= want; - m += want; - st->leftover += want; - if (st->leftover < 64) - return; - poly1305_blocks(st, st->buffer, 64); - st->leftover = 0; - } - - /* process 64 byte blocks */ - if (bytes >= 64) { - want = (bytes & ~63); - poly1305_blocks(st, m, want); - m += want; - bytes -= want; - } - - if (bytes) { - poly1305_block_copy(st->buffer + st->leftover, m, bytes); - st->leftover += bytes; - } -} - -void -Poly1305Finish(poly1305_state *state, unsigned char mac[16]) -{ - poly1305_state_internal *st = poly1305_aligned_state(state); - size_t leftover = st->leftover; - uint8_t *m = st->buffer; - uint128_t d[3]; - uint64_t h0, h1, h2; - uint64_t t0, t1; - uint64_t g0, g1, g2, c, nc; - uint64_t r0, r1, r2, s1, s2; - poly1305_power *p; - - if (st->started) { - size_t consumed = poly1305_combine(st, m, leftover); - leftover -= consumed; - m += consumed; - } - - /* st->HH will either be 0 or have the combined result */ - h0 = st->HH[0]; - h1 = st->HH[1]; - h2 = st->HH[2]; - - p = &st->P[1]; - r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; - r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; - r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; - s1 = r1 * (5 << 2); - s2 = r2 * (5 << 2); - - if (leftover < 16) - goto poly1305_donna_atmost15bytes; - -poly1305_donna_atleast16bytes: - t0 = U8TO64_LE(m + 0); - t1 = U8TO64_LE(m + 8); - h0 += t0 & 0xfffffffffff; - t0 = shr128_pair(t1, t0, 44); - h1 += t0 & 0xfffffffffff; - h2 += (t1 >> 24) | ((uint64_t)1 << 40); - -poly1305_donna_mul: - d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1)); - d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2)); - d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0)); - h0 = lo128(d[0]) & 0xfffffffffff; - c = shr128(d[0], 44); - d[1] = add128_64(d[1], c); - h1 = lo128(d[1]) & 0xfffffffffff; - c = shr128(d[1], 44); - d[2] = add128_64(d[2], c); - h2 = lo128(d[2]) & 0x3ffffffffff; - c = shr128(d[2], 42); - h0 += c * 5; - - m += 16; - leftover -= 16; - if (leftover >= 16) - goto poly1305_donna_atleast16bytes; - -/* final bytes */ -poly1305_donna_atmost15bytes: - if (!leftover) - goto poly1305_donna_finish; - - m[leftover++] = 1; - poly1305_block_zero(m + leftover, 16 - leftover); - leftover = 16; - - t0 = U8TO64_LE(m + 0); - t1 = U8TO64_LE(m + 8); - h0 += t0 & 0xfffffffffff; - t0 = shr128_pair(t1, t0, 44); - h1 += t0 & 0xfffffffffff; - h2 += (t1 >> 24); - - goto poly1305_donna_mul; - -poly1305_donna_finish: - c = (h0 >> 44); - h0 &= 0xfffffffffff; - h1 += c; - c = (h1 >> 44); - h1 &= 0xfffffffffff; - h2 += c; - c = (h2 >> 42); - h2 &= 0x3ffffffffff; - h0 += c * 5; - - g0 = h0 + 5; - c = (g0 >> 44); - g0 &= 0xfffffffffff; - g1 = h1 + c; - c = (g1 >> 44); - g1 &= 0xfffffffffff; - g2 = h2 + c - ((uint64_t)1 << 42); - - c = (g2 >> 63) - 1; - nc = ~c; - h0 = (h0 & nc) | (g0 & c); - h1 = (h1 & nc) | (g1 & c); - h2 = (h2 & nc) | (g2 & c); - - /* pad */ - t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; - t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; - h0 += (t0 & 0xfffffffffff); - c = (h0 >> 44); - h0 &= 0xfffffffffff; - t0 = shr128_pair(t1, t0, 44); - h1 += (t0 & 0xfffffffffff) + c; - c = (h1 >> 44); - h1 &= 0xfffffffffff; - t1 = (t1 >> 24); - h2 += (t1) + c; - - U64TO8_LE(mac + 0, ((h0) | (h1 << 44))); - U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); -} diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c deleted file mode 100644 index eb3e3cd55..000000000 --- a/security/nss/lib/freebl/poly1305.c +++ /dev/null @@ -1,314 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/* This implementation of poly1305 is by Andrew Moon - * (https://github.com/floodyberry/poly1305-donna) and released as public - * domain. */ - -#include <string.h> - -#include "poly1305.h" - -#if defined(_MSC_VER) && _MSC_VER < 1600 -#include "prtypes.h" -typedef PRUint32 uint32_t; -typedef PRUint64 uint64_t; -#else -#include <stdint.h> -#endif - -#if defined(NSS_X86) || defined(NSS_X64) -/* We can assume little-endian. */ -static uint32_t -U8TO32_LE(const unsigned char *m) -{ - uint32_t r; - memcpy(&r, m, sizeof(r)); - return r; -} - -static void -U32TO8_LE(unsigned char *m, uint32_t v) -{ - memcpy(m, &v, sizeof(v)); -} -#else -static uint32_t -U8TO32_LE(const unsigned char *m) -{ - return (uint32_t)m[0] | - (uint32_t)m[1] << 8 | - (uint32_t)m[2] << 16 | - (uint32_t)m[3] << 24; -} - -static void -U32TO8_LE(unsigned char *m, uint32_t v) -{ - m[0] = v; - m[1] = v >> 8; - m[2] = v >> 16; - m[3] = v >> 24; -} -#endif - -static uint64_t -mul32x32_64(uint32_t a, uint32_t b) -{ - return (uint64_t)a * b; -} - -struct poly1305_state_st { - uint32_t r0, r1, r2, r3, r4; - uint32_t s1, s2, s3, s4; - uint32_t h0, h1, h2, h3, h4; - unsigned char buf[16]; - unsigned int buf_used; - unsigned char key[16]; -}; - -/* update updates |state| given some amount of input data. This function may - * only be called with a |len| that is not a multiple of 16 at the end of the - * data. Otherwise the input must be buffered into 16 byte blocks. */ -static void -update(struct poly1305_state_st *state, const unsigned char *in, - size_t len) -{ - uint32_t t0, t1, t2, t3; - uint64_t t[5]; - uint32_t b; - uint64_t c; - size_t j; - unsigned char mp[16]; - - if (len < 16) - goto poly1305_donna_atmost15bytes; - -poly1305_donna_16bytes: - t0 = U8TO32_LE(in); - t1 = U8TO32_LE(in + 4); - t2 = U8TO32_LE(in + 8); - t3 = U8TO32_LE(in + 12); - - in += 16; - len -= 16; - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8) | (1 << 24); - -poly1305_donna_mul: - t[0] = mul32x32_64(state->h0, state->r0) + - mul32x32_64(state->h1, state->s4) + - mul32x32_64(state->h2, state->s3) + - mul32x32_64(state->h3, state->s2) + - mul32x32_64(state->h4, state->s1); - t[1] = mul32x32_64(state->h0, state->r1) + - mul32x32_64(state->h1, state->r0) + - mul32x32_64(state->h2, state->s4) + - mul32x32_64(state->h3, state->s3) + - mul32x32_64(state->h4, state->s2); - t[2] = mul32x32_64(state->h0, state->r2) + - mul32x32_64(state->h1, state->r1) + - mul32x32_64(state->h2, state->r0) + - mul32x32_64(state->h3, state->s4) + - mul32x32_64(state->h4, state->s3); - t[3] = mul32x32_64(state->h0, state->r3) + - mul32x32_64(state->h1, state->r2) + - mul32x32_64(state->h2, state->r1) + - mul32x32_64(state->h3, state->r0) + - mul32x32_64(state->h4, state->s4); - t[4] = mul32x32_64(state->h0, state->r4) + - mul32x32_64(state->h1, state->r3) + - mul32x32_64(state->h2, state->r2) + - mul32x32_64(state->h3, state->r1) + - mul32x32_64(state->h4, state->r0); - - state->h0 = (uint32_t)t[0] & 0x3ffffff; - c = (t[0] >> 26); - t[1] += c; - state->h1 = (uint32_t)t[1] & 0x3ffffff; - b = (uint32_t)(t[1] >> 26); - t[2] += b; - state->h2 = (uint32_t)t[2] & 0x3ffffff; - b = (uint32_t)(t[2] >> 26); - t[3] += b; - state->h3 = (uint32_t)t[3] & 0x3ffffff; - b = (uint32_t)(t[3] >> 26); - t[4] += b; - state->h4 = (uint32_t)t[4] & 0x3ffffff; - b = (uint32_t)(t[4] >> 26); - state->h0 += b * 5; - - if (len >= 16) - goto poly1305_donna_16bytes; - -/* final bytes */ -poly1305_donna_atmost15bytes: - if (!len) - return; - - for (j = 0; j < len; j++) - mp[j] = in[j]; - mp[j++] = 1; - for (; j < 16; j++) - mp[j] = 0; - len = 0; - - t0 = U8TO32_LE(mp + 0); - t1 = U8TO32_LE(mp + 4); - t2 = U8TO32_LE(mp + 8); - t3 = U8TO32_LE(mp + 12); - - state->h0 += t0 & 0x3ffffff; - state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - state->h4 += (t3 >> 8); - - goto poly1305_donna_mul; -} - -void -Poly1305Init(poly1305_state *statep, const unsigned char key[32]) -{ - struct poly1305_state_st *state = (struct poly1305_state_st *)statep; - uint32_t t0, t1, t2, t3; - - t0 = U8TO32_LE(key + 0); - t1 = U8TO32_LE(key + 4); - t2 = U8TO32_LE(key + 8); - t3 = U8TO32_LE(key + 12); - - /* precompute multipliers */ - state->r0 = t0 & 0x3ffffff; - t0 >>= 26; - t0 |= t1 << 6; - state->r1 = t0 & 0x3ffff03; - t1 >>= 20; - t1 |= t2 << 12; - state->r2 = t1 & 0x3ffc0ff; - t2 >>= 14; - t2 |= t3 << 18; - state->r3 = t2 & 0x3f03fff; - t3 >>= 8; - state->r4 = t3 & 0x00fffff; - - state->s1 = state->r1 * 5; - state->s2 = state->r2 * 5; - state->s3 = state->r3 * 5; - state->s4 = state->r4 * 5; - - /* init state */ - state->h0 = 0; - state->h1 = 0; - state->h2 = 0; - state->h3 = 0; - state->h4 = 0; - - state->buf_used = 0; - memcpy(state->key, key + 16, sizeof(state->key)); -} - -void -Poly1305Update(poly1305_state *statep, const unsigned char *in, - size_t in_len) -{ - unsigned int i; - struct poly1305_state_st *state = (struct poly1305_state_st *)statep; - - if (state->buf_used) { - unsigned int todo = 16 - state->buf_used; - if (todo > in_len) - todo = in_len; - for (i = 0; i < todo; i++) - state->buf[state->buf_used + i] = in[i]; - state->buf_used += todo; - in_len -= todo; - in += todo; - - if (state->buf_used == 16) { - update(state, state->buf, 16); - state->buf_used = 0; - } - } - - if (in_len >= 16) { - size_t todo = in_len & ~0xf; - update(state, in, todo); - in += todo; - in_len &= 0xf; - } - - if (in_len) { - for (i = 0; i < in_len; i++) - state->buf[i] = in[i]; - state->buf_used = in_len; - } -} - -void -Poly1305Finish(poly1305_state *statep, unsigned char mac[16]) -{ - struct poly1305_state_st *state = (struct poly1305_state_st *)statep; - uint64_t f0, f1, f2, f3; - uint32_t g0, g1, g2, g3, g4; - uint32_t b, nb; - - if (state->buf_used) - update(state, state->buf, state->buf_used); - - b = state->h0 >> 26; - state->h0 = state->h0 & 0x3ffffff; - state->h1 += b; - b = state->h1 >> 26; - state->h1 = state->h1 & 0x3ffffff; - state->h2 += b; - b = state->h2 >> 26; - state->h2 = state->h2 & 0x3ffffff; - state->h3 += b; - b = state->h3 >> 26; - state->h3 = state->h3 & 0x3ffffff; - state->h4 += b; - b = state->h4 >> 26; - state->h4 = state->h4 & 0x3ffffff; - state->h0 += b * 5; - - g0 = state->h0 + 5; - b = g0 >> 26; - g0 &= 0x3ffffff; - g1 = state->h1 + b; - b = g1 >> 26; - g1 &= 0x3ffffff; - g2 = state->h2 + b; - b = g2 >> 26; - g2 &= 0x3ffffff; - g3 = state->h3 + b; - b = g3 >> 26; - g3 &= 0x3ffffff; - g4 = state->h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - state->h0 = (state->h0 & nb) | (g0 & b); - state->h1 = (state->h1 & nb) | (g1 & b); - state->h2 = (state->h2 & nb) | (g2 & b); - state->h3 = (state->h3 & nb) | (g3 & b); - state->h4 = (state->h4 & nb) | (g4 & b); - - f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); - f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]); - f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]); - f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]); - - U32TO8_LE(&mac[0], (uint32_t)f0); - f1 += (f0 >> 32); - U32TO8_LE(&mac[4], (uint32_t)f1); - f2 += (f1 >> 32); - U32TO8_LE(&mac[8], (uint32_t)f2); - f3 += (f2 >> 32); - U32TO8_LE(&mac[12], (uint32_t)f3); -} diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h deleted file mode 100644 index 125f49b3b..000000000 --- a/security/nss/lib/freebl/poly1305.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * poly1305.h - header file for Poly1305 implementation. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef FREEBL_POLY1305_H_ -#define FREEBL_POLY1305_H_ - -#include "stddef.h" - -typedef unsigned char poly1305_state[512]; - -/* Poly1305Init sets up |state| so that it can be used to calculate an - * authentication tag with the one-time key |key|. Note that |key| is a - * one-time key and therefore there is no `reset' method because that would - * enable several messages to be authenticated with the same key. */ -extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]); - -/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or - * more times after poly1305_init. */ -extern void Poly1305Update(poly1305_state* state, const unsigned char* in, - size_t inLen); - -/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte - * authentication tag to |mac|. */ -extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]); - -#endif /* FREEBL_POLY1305_H_ */ diff --git a/security/nss/lib/freebl/unix_urandom.c b/security/nss/lib/freebl/unix_urandom.c index 25e6ad91c..869a5ed8c 100644 --- a/security/nss/lib/freebl/unix_urandom.c +++ b/security/nss/lib/freebl/unix_urandom.c @@ -4,10 +4,14 @@ #include <fcntl.h> #include <unistd.h> +#include <errno.h> #include "secerr.h" #include "secrng.h" #include "prprf.h" +/* syscall getentropy() is limited to retrieving 256 bytes */ +#define GETENTROPY_MAX_BYTES 256 + void RNG_SystemInfoForRNG(void) { @@ -28,6 +32,35 @@ RNG_SystemRNG(void *dest, size_t maxLen) size_t fileBytes = 0; unsigned char *buffer = dest; +#if defined(__OpenBSD__) || (defined(LINUX) && defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 25)))) + int result; + + while (fileBytes < maxLen) { + size_t getBytes = maxLen - fileBytes; + if (getBytes > GETENTROPY_MAX_BYTES) { + getBytes = GETENTROPY_MAX_BYTES; + } + result = getentropy(buffer, getBytes); + if (result == 0) { /* success */ + fileBytes += getBytes; + buffer += getBytes; + } else { + break; + } + } + if (fileBytes == maxLen) { /* success */ + return maxLen; + } + /* If we failed with an error other than ENOSYS, it means the destination + * buffer is not writeable. We don't need to try writing to it again. */ + if (errno != ENOSYS) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return 0; + } + /* ENOSYS means the kernel doesn't support getentropy()/getrandom(). + * Reset the number of bytes to get and fall back to /dev/urandom. */ + fileBytes = 0; +#endif fd = open("/dev/urandom", O_RDONLY); if (fd < 0) { PORT_SetError(SEC_ERROR_NEED_RANDOM); diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c new file mode 100644 index 000000000..246a41af3 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c @@ -0,0 +1,578 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Hacl_Poly1305_32.h" + +inline static void +Hacl_Bignum_Modulo_reduce(uint32_t *b) +{ + uint32_t b0 = b[0U]; + b[0U] = (b0 << (uint32_t)2U) + b0; +} + +inline static void +Hacl_Bignum_Modulo_carry_top(uint32_t *b) +{ + uint32_t b4 = b[4U]; + uint32_t b0 = b[0U]; + uint32_t b4_26 = b4 >> (uint32_t)26U; + b[4U] = b4 & (uint32_t)0x3ffffffU; + b[0U] = (b4_26 << (uint32_t)2U) + b4_26 + b0; +} + +inline static void +Hacl_Bignum_Modulo_carry_top_wide(uint64_t *b) +{ + uint64_t b4 = b[4U]; + uint64_t b0 = b[0U]; + uint64_t b4_ = b4 & (uint64_t)(uint32_t)0x3ffffffU; + uint32_t b4_26 = (uint32_t)(b4 >> (uint32_t)26U); + uint64_t b0_ = b0 + (uint64_t)((b4_26 << (uint32_t)2U) + b4_26); + b[4U] = b4_; + b[0U] = b0_; +} + +inline static void +Hacl_Bignum_Fproduct_copy_from_wide_(uint32_t *output, uint64_t *input) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { + uint64_t xi = input[i]; + output[i] = (uint32_t)xi; + } +} + +inline static void +Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint64_t *output, uint32_t *input, uint32_t s) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { + uint64_t xi = output[i]; + uint32_t yi = input[i]; + uint64_t x_wide = (uint64_t)yi; + uint64_t y_wide = (uint64_t)s; + output[i] = xi + x_wide * y_wide; + } +} + +inline static void +Hacl_Bignum_Fproduct_carry_wide_(uint64_t *tmp) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t ctr = i; + uint64_t tctr = tmp[ctr]; + uint64_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint32_t r0 = (uint32_t)tctr & (uint32_t)0x3ffffffU; + uint64_t c = tctr >> (uint32_t)26U; + tmp[ctr] = (uint64_t)r0; + tmp[ctr + (uint32_t)1U] = tctrp1 + c; + } +} + +inline static void +Hacl_Bignum_Fproduct_carry_limb_(uint32_t *tmp) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t ctr = i; + uint32_t tctr = tmp[ctr]; + uint32_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint32_t r0 = tctr & (uint32_t)0x3ffffffU; + uint32_t c = tctr >> (uint32_t)26U; + tmp[ctr] = r0; + tmp[ctr + (uint32_t)1U] = tctrp1 + c; + } +} + +inline static void +Hacl_Bignum_Fmul_shift_reduce(uint32_t *output) +{ + uint32_t tmp = output[4U]; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U; + uint32_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + output[0U] = tmp; + Hacl_Bignum_Modulo_reduce(output); +} + +static void +Hacl_Bignum_Fmul_mul_shift_reduce_(uint64_t *output, uint32_t *input, uint32_t *input2) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) { + uint32_t input2i = input2[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + uint32_t i = (uint32_t)4U; + uint32_t input2i = input2[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +inline static void +Hacl_Bignum_Fmul_fmul(uint32_t *output, uint32_t *input, uint32_t *input2) +{ + uint32_t tmp[5U] = { 0U }; + memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]); + uint64_t t[5U] = { 0U }; + Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); + Hacl_Bignum_Fproduct_carry_wide_(t); + Hacl_Bignum_Modulo_carry_top_wide(t); + Hacl_Bignum_Fproduct_copy_from_wide_(output, t); + uint32_t i0 = output[0U]; + uint32_t i1 = output[1U]; + uint32_t i0_ = i0 & (uint32_t)0x3ffffffU; + uint32_t i1_ = i1 + (i0 >> (uint32_t)26U); + output[0U] = i0_; + output[1U] = i1_; +} + +inline static void +Hacl_Bignum_AddAndMultiply_add_and_multiply(uint32_t *acc, uint32_t *block, uint32_t *r) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) { + uint32_t xi = acc[i]; + uint32_t yi = block[i]; + acc[i] = xi + yi; + } + Hacl_Bignum_Fmul_fmul(acc, acc, r); +} + +inline static void +Hacl_Impl_Poly1305_32_poly1305_update( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m) +{ + Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; + uint32_t *h = scrut0.h; + uint32_t *acc = h; + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *r = scrut.r; + uint32_t *r5 = r; + uint32_t tmp[5U] = { 0U }; + uint8_t *s0 = m; + uint8_t *s1 = m + (uint32_t)3U; + uint8_t *s2 = m + (uint32_t)6U; + uint8_t *s3 = m + (uint32_t)9U; + uint8_t *s4 = m + (uint32_t)12U; + uint32_t i0 = load32_le(s0); + uint32_t i1 = load32_le(s1); + uint32_t i2 = load32_le(s2); + uint32_t i3 = load32_le(s3); + uint32_t i4 = load32_le(s4); + uint32_t r0 = i0 & (uint32_t)0x3ffffffU; + uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU; + uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU; + uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU; + uint32_t r4 = i4 >> (uint32_t)8U; + tmp[0U] = r0; + tmp[1U] = r1; + tmp[2U] = r2; + tmp[3U] = r3; + tmp[4U] = r4; + uint32_t b4 = tmp[4U]; + uint32_t b4_ = (uint32_t)0x1000000U | b4; + tmp[4U] = b4_; + Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r5); +} + +inline static void +Hacl_Impl_Poly1305_32_poly1305_process_last_block_( + uint8_t *block, + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint64_t rem_) +{ + uint32_t tmp[5U] = { 0U }; + uint8_t *s0 = block; + uint8_t *s1 = block + (uint32_t)3U; + uint8_t *s2 = block + (uint32_t)6U; + uint8_t *s3 = block + (uint32_t)9U; + uint8_t *s4 = block + (uint32_t)12U; + uint32_t i0 = load32_le(s0); + uint32_t i1 = load32_le(s1); + uint32_t i2 = load32_le(s2); + uint32_t i3 = load32_le(s3); + uint32_t i4 = load32_le(s4); + uint32_t r0 = i0 & (uint32_t)0x3ffffffU; + uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU; + uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU; + uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU; + uint32_t r4 = i4 >> (uint32_t)8U; + tmp[0U] = r0; + tmp[1U] = r1; + tmp[2U] = r2; + tmp[3U] = r3; + tmp[4U] = r4; + Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; + uint32_t *h = scrut0.h; + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *r = scrut.r; + Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r); +} + +inline static void +Hacl_Impl_Poly1305_32_poly1305_process_last_block( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint64_t rem_) +{ + uint8_t zero1 = (uint8_t)0U; + KRML_CHECK_SIZE(zero1, (uint32_t)16U); + uint8_t block[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + block[_i] = zero1; + uint32_t i0 = (uint32_t)rem_; + uint32_t i = (uint32_t)rem_; + memcpy(block, m, i * sizeof m[0U]); + block[i0] = (uint8_t)1U; + Hacl_Impl_Poly1305_32_poly1305_process_last_block_(block, st, m, rem_); +} + +static void +Hacl_Impl_Poly1305_32_poly1305_last_pass(uint32_t *acc) +{ + Hacl_Bignum_Fproduct_carry_limb_(acc); + Hacl_Bignum_Modulo_carry_top(acc); + uint32_t t0 = acc[0U]; + uint32_t t10 = acc[1U]; + uint32_t t20 = acc[2U]; + uint32_t t30 = acc[3U]; + uint32_t t40 = acc[4U]; + uint32_t t1_ = t10 + (t0 >> (uint32_t)26U); + uint32_t mask_261 = (uint32_t)0x3ffffffU; + uint32_t t0_ = t0 & mask_261; + uint32_t t2_ = t20 + (t1_ >> (uint32_t)26U); + uint32_t t1__ = t1_ & mask_261; + uint32_t t3_ = t30 + (t2_ >> (uint32_t)26U); + uint32_t t2__ = t2_ & mask_261; + uint32_t t4_ = t40 + (t3_ >> (uint32_t)26U); + uint32_t t3__ = t3_ & mask_261; + acc[0U] = t0_; + acc[1U] = t1__; + acc[2U] = t2__; + acc[3U] = t3__; + acc[4U] = t4_; + Hacl_Bignum_Modulo_carry_top(acc); + uint32_t t00 = acc[0U]; + uint32_t t1 = acc[1U]; + uint32_t t2 = acc[2U]; + uint32_t t3 = acc[3U]; + uint32_t t4 = acc[4U]; + uint32_t t1_0 = t1 + (t00 >> (uint32_t)26U); + uint32_t t0_0 = t00 & (uint32_t)0x3ffffffU; + uint32_t t2_0 = t2 + (t1_0 >> (uint32_t)26U); + uint32_t t1__0 = t1_0 & (uint32_t)0x3ffffffU; + uint32_t t3_0 = t3 + (t2_0 >> (uint32_t)26U); + uint32_t t2__0 = t2_0 & (uint32_t)0x3ffffffU; + uint32_t t4_0 = t4 + (t3_0 >> (uint32_t)26U); + uint32_t t3__0 = t3_0 & (uint32_t)0x3ffffffU; + acc[0U] = t0_0; + acc[1U] = t1__0; + acc[2U] = t2__0; + acc[3U] = t3__0; + acc[4U] = t4_0; + Hacl_Bignum_Modulo_carry_top(acc); + uint32_t i0 = acc[0U]; + uint32_t i1 = acc[1U]; + uint32_t i0_ = i0 & (uint32_t)0x3ffffffU; + uint32_t i1_ = i1 + (i0 >> (uint32_t)26U); + acc[0U] = i0_; + acc[1U] = i1_; + uint32_t a0 = acc[0U]; + uint32_t a1 = acc[1U]; + uint32_t a2 = acc[2U]; + uint32_t a3 = acc[3U]; + uint32_t a4 = acc[4U]; + uint32_t mask0 = FStar_UInt32_gte_mask(a0, (uint32_t)0x3fffffbU); + uint32_t mask1 = FStar_UInt32_eq_mask(a1, (uint32_t)0x3ffffffU); + uint32_t mask2 = FStar_UInt32_eq_mask(a2, (uint32_t)0x3ffffffU); + uint32_t mask3 = FStar_UInt32_eq_mask(a3, (uint32_t)0x3ffffffU); + uint32_t mask4 = FStar_UInt32_eq_mask(a4, (uint32_t)0x3ffffffU); + uint32_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; + uint32_t a0_ = a0 - ((uint32_t)0x3fffffbU & mask); + uint32_t a1_ = a1 - ((uint32_t)0x3ffffffU & mask); + uint32_t a2_ = a2 - ((uint32_t)0x3ffffffU & mask); + uint32_t a3_ = a3 - ((uint32_t)0x3ffffffU & mask); + uint32_t a4_ = a4 - ((uint32_t)0x3ffffffU & mask); + acc[0U] = a0_; + acc[1U] = a1_; + acc[2U] = a2_; + acc[3U] = a3_; + acc[4U] = a4_; +} + +static Hacl_Impl_Poly1305_32_State_poly1305_state +Hacl_Impl_Poly1305_32_mk_state(uint32_t *r, uint32_t *h) +{ + return ((Hacl_Impl_Poly1305_32_State_poly1305_state){.r = r, .h = h }); +} + +static void +Hacl_Standalone_Poly1305_32_poly1305_blocks( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint64_t len1) +{ + if (!(len1 == (uint64_t)0U)) { + uint8_t *block = m; + uint8_t *tail1 = m + (uint32_t)16U; + Hacl_Impl_Poly1305_32_poly1305_update(st, block); + uint64_t len2 = len1 - (uint64_t)1U; + Hacl_Standalone_Poly1305_32_poly1305_blocks(st, tail1, len2); + } +} + +static void +Hacl_Standalone_Poly1305_32_poly1305_partial( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *input, + uint64_t len1, + uint8_t *kr) +{ + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *r = scrut.r; + uint32_t *x0 = r; + FStar_UInt128_t k1 = load128_le(kr); + FStar_UInt128_t + k_clamped = + FStar_UInt128_logand(k1, + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); + uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU; + uint32_t + r1 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU; + uint32_t + r2 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU; + uint32_t + r3 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU; + uint32_t + r4 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU; + x0[0U] = r0; + x0[1U] = r1; + x0[2U] = r2; + x0[3U] = r3; + x0[4U] = r4; + Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; + uint32_t *h = scrut0.h; + uint32_t *x00 = h; + x00[0U] = (uint32_t)0U; + x00[1U] = (uint32_t)0U; + x00[2U] = (uint32_t)0U; + x00[3U] = (uint32_t)0U; + x00[4U] = (uint32_t)0U; + Hacl_Standalone_Poly1305_32_poly1305_blocks(st, input, len1); +} + +static void +Hacl_Standalone_Poly1305_32_poly1305_complete( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint64_t len1, + uint8_t *k1) +{ + uint8_t *kr = k1; + uint64_t len16 = len1 >> (uint32_t)4U; + uint64_t rem16 = len1 & (uint64_t)0xfU; + uint8_t *part_input = m; + uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16); + Hacl_Standalone_Poly1305_32_poly1305_partial(st, part_input, len16, kr); + if (!(rem16 == (uint64_t)0U)) + Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, last_block, rem16); + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *h = scrut.h; + uint32_t *acc = h; + Hacl_Impl_Poly1305_32_poly1305_last_pass(acc); +} + +static void +Hacl_Standalone_Poly1305_32_crypto_onetimeauth_( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + uint32_t buf[10U] = { 0U }; + uint32_t *r = buf; + uint32_t *h = buf + (uint32_t)5U; + Hacl_Impl_Poly1305_32_State_poly1305_state st = Hacl_Impl_Poly1305_32_mk_state(r, h); + uint8_t *key_s = k1 + (uint32_t)16U; + Hacl_Standalone_Poly1305_32_poly1305_complete(st, input, len1, k1); + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *h5 = scrut.h; + uint32_t *acc = h5; + FStar_UInt128_t k_ = load128_le(key_s); + uint32_t h0 = acc[0U]; + uint32_t h1 = acc[1U]; + uint32_t h2 = acc[2U]; + uint32_t h3 = acc[3U]; + uint32_t h4 = acc[4U]; + FStar_UInt128_t + acc_ = + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4), + (uint32_t)104U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3), + (uint32_t)78U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2), + (uint32_t)52U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1), + (uint32_t)26U), + FStar_UInt128_uint64_to_uint128((uint64_t)h0))))); + FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); + store128_le(output, mac_); +} + +static void +Hacl_Standalone_Poly1305_32_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(output, input, len1, k1); +} + +void * +Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b) +{ + return (void *)(uint8_t)0U; +} + +Hacl_Impl_Poly1305_32_State_poly1305_state +Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc) +{ + return Hacl_Impl_Poly1305_32_mk_state(r, acc); +} + +void +Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1) +{ + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *r = scrut.r; + uint32_t *x0 = r; + FStar_UInt128_t k10 = load128_le(k1); + FStar_UInt128_t + k_clamped = + FStar_UInt128_logand(k10, + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); + uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU; + uint32_t + r1 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU; + uint32_t + r2 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU; + uint32_t + r3 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU; + uint32_t + r4 = + (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU; + x0[0U] = r0; + x0[1U] = r1; + x0[2U] = r2; + x0[3U] = r3; + x0[4U] = r4; + Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st; + uint32_t *h = scrut0.h; + uint32_t *x00 = h; + x00[0U] = (uint32_t)0U; + x00[1U] = (uint32_t)0U; + x00[2U] = (uint32_t)0U; + x00[3U] = (uint32_t)0U; + x00[4U] = (uint32_t)0U; +} + +void *Hacl_Poly1305_32_empty_log = (void *)(uint8_t)0U; + +void +Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m) +{ + Hacl_Impl_Poly1305_32_poly1305_update(st, m); +} + +void +Hacl_Poly1305_32_update( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint32_t len1) +{ + if (!(len1 == (uint32_t)0U)) { + uint8_t *block = m; + uint8_t *m_ = m + (uint32_t)16U; + uint32_t len2 = len1 - (uint32_t)1U; + Hacl_Poly1305_32_update_block(st, block); + Hacl_Poly1305_32_update(st, m_, len2); + } +} + +void +Hacl_Poly1305_32_update_last( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint32_t len1) +{ + if (!((uint64_t)len1 == (uint64_t)0U)) + Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, m, (uint64_t)len1); + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *h = scrut.h; + uint32_t *acc = h; + Hacl_Impl_Poly1305_32_poly1305_last_pass(acc); +} + +void +Hacl_Poly1305_32_finish( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *mac, + uint8_t *k1) +{ + Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st; + uint32_t *h = scrut.h; + uint32_t *acc = h; + FStar_UInt128_t k_ = load128_le(k1); + uint32_t h0 = acc[0U]; + uint32_t h1 = acc[1U]; + uint32_t h2 = acc[2U]; + uint32_t h3 = acc[3U]; + uint32_t h4 = acc[4U]; + FStar_UInt128_t + acc_ = + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4), + (uint32_t)104U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3), + (uint32_t)78U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2), + (uint32_t)52U), + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1), + (uint32_t)26U), + FStar_UInt128_uint64_to_uint128((uint64_t)h0))))); + FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); + store128_le(mac, mac_); +} + +void +Hacl_Poly1305_32_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + Hacl_Standalone_Poly1305_32_crypto_onetimeauth(output, input, len1, k1); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h new file mode 100644 index 000000000..4dd070026 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h @@ -0,0 +1,103 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kremlib.h" +#ifndef __Hacl_Poly1305_32_H +#define __Hacl_Poly1305_32_H + +typedef uint32_t Hacl_Bignum_Constants_limb; + +typedef uint64_t Hacl_Bignum_Constants_wide; + +typedef uint64_t Hacl_Bignum_Wide_t; + +typedef uint32_t Hacl_Bignum_Limb_t; + +typedef void *Hacl_Impl_Poly1305_32_State_log_t; + +typedef uint8_t *Hacl_Impl_Poly1305_32_State_uint8_p; + +typedef uint32_t *Hacl_Impl_Poly1305_32_State_bigint; + +typedef void *Hacl_Impl_Poly1305_32_State_seqelem; + +typedef uint32_t *Hacl_Impl_Poly1305_32_State_elemB; + +typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB; + +typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB_16; + +typedef struct +{ + uint32_t *r; + uint32_t *h; +} Hacl_Impl_Poly1305_32_State_poly1305_state; + +typedef void *Hacl_Impl_Poly1305_32_log_t; + +typedef uint32_t *Hacl_Impl_Poly1305_32_bigint; + +typedef uint8_t *Hacl_Impl_Poly1305_32_uint8_p; + +typedef uint32_t *Hacl_Impl_Poly1305_32_elemB; + +typedef uint8_t *Hacl_Impl_Poly1305_32_wordB; + +typedef uint8_t *Hacl_Impl_Poly1305_32_wordB_16; + +typedef uint8_t *Hacl_Poly1305_32_uint8_p; + +typedef uint64_t Hacl_Poly1305_32_uint64_t; + +void *Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b); + +typedef uint8_t *Hacl_Poly1305_32_key; + +typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Poly1305_32_state; + +Hacl_Impl_Poly1305_32_State_poly1305_state +Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc); + +void Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1); + +extern void *Hacl_Poly1305_32_empty_log; + +void Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m); + +void +Hacl_Poly1305_32_update( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint32_t len1); + +void +Hacl_Poly1305_32_update_last( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *m, + uint32_t len1); + +void +Hacl_Poly1305_32_finish( + Hacl_Impl_Poly1305_32_State_poly1305_state st, + uint8_t *mac, + uint8_t *k1); + +void +Hacl_Poly1305_32_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1); +#endif |