summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl')
-rw-r--r--security/nss/lib/freebl/Makefile10
-rw-r--r--security/nss/lib/freebl/blake2b.c2
-rw-r--r--security/nss/lib/freebl/chacha20poly1305.c88
-rw-r--r--security/nss/lib/freebl/dsa.c37
-rw-r--r--security/nss/lib/freebl/ec.c29
-rw-r--r--security/nss/lib/freebl/freebl.gyp36
-rw-r--r--security/nss/lib/freebl/freebl_base.gypi15
-rw-r--r--security/nss/lib/freebl/loader.c4
-rw-r--r--security/nss/lib/freebl/mpi/mpi.c13
-rw-r--r--security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c881
-rw-r--r--security/nss/lib/freebl/poly1305.c314
-rw-r--r--security/nss/lib/freebl/poly1305.h30
-rw-r--r--security/nss/lib/freebl/unix_urandom.c33
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.c578
-rw-r--r--security/nss/lib/freebl/verified/Hacl_Poly1305_32.h103
15 files changed, 846 insertions, 1327 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile
index a4b1a86ae..bff11c7c8 100644
--- a/security/nss/lib/freebl/Makefile
+++ b/security/nss/lib/freebl/Makefile
@@ -517,13 +517,13 @@ ifndef NSS_DISABLE_CHACHAPOLY
ifdef HAVE_INT128_SUPPORT
EXTRA_SRCS += Hacl_Poly1305_64.c
else
- EXTRA_SRCS += poly1305.c
+ EXTRA_SRCS += Hacl_Poly1305_32.c
endif
else
ifeq ($(CPU_ARCH),aarch64)
EXTRA_SRCS += Hacl_Poly1305_64.c
else
- EXTRA_SRCS += poly1305.c
+ EXTRA_SRCS += Hacl_Poly1305_32.c
endif
endif # x86_64
@@ -535,12 +535,16 @@ ifeq (,$(filter-out i386 x386 x86 x86_64 aarch64,$(CPU_ARCH)))
# All intel architectures get the 64 bit version
# With custom uint128 if necessary (faster than generic 32 bit version).
ECL_SRCS += curve25519_64.c
- VERIFIED_SRCS += Hacl_Curve25519.c FStar.c
+ VERIFIED_SRCS += Hacl_Curve25519.c
else
# All non intel architectures get the generic 32 bit implementation (slow!)
ECL_SRCS += curve25519_32.c
endif
+ifndef HAVE_INT128_SUPPORT
+ VERIFIED_SRCS += FStar.c
+endif
+
#######################################################################
# (5) Execute "global" rules. (OPTIONAL) #
#######################################################################
diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c
index 4099c67e0..b4a0442c9 100644
--- a/security/nss/lib/freebl/blake2b.c
+++ b/security/nss/lib/freebl/blake2b.c
@@ -180,7 +180,7 @@ blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key,
return SECSuccess;
failure:
- PORT_Memset(&ctx, 0, sizeof(ctx));
+ PORT_Memset(ctx, 0, sizeof(*ctx));
PORT_SetError(SEC_ERROR_INVALID_ARGS);
return SECFailure;
}
diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c
index 859d05316..302f0db9e 100644
--- a/security/nss/lib/freebl/chacha20poly1305.c
+++ b/security/nss/lib/freebl/chacha20poly1305.c
@@ -24,36 +24,60 @@ extern void Hacl_Chacha20_Vec128_chacha20(uint8_t *output, uint8_t *plain,
extern void Hacl_Chacha20_chacha20(uint8_t *output, uint8_t *plain, uint32_t len,
uint8_t *k, uint8_t *n1, uint32_t ctr);
-/* Poly1305Do writes the Poly1305 authenticator of the given additional data
- * and ciphertext to |out|. */
#if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__))
/* Use HACL* Poly1305 on 64-bit Intel and ARM */
#include "verified/Hacl_Poly1305_64.h"
+#define NSS_POLY1305_64 1
+#define Hacl_Poly1305_update Hacl_Poly1305_64_update
+#define Hacl_Poly1305_mk_state Hacl_Poly1305_64_mk_state
+#define Hacl_Poly1305_init Hacl_Poly1305_64_init
+#define Hacl_Poly1305_finish Hacl_Poly1305_64_finish
+typedef Hacl_Impl_Poly1305_64_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state;
+#else
+/* All other platforms get the 32-bit poly1305 HACL* implementation. */
+#include "verified/Hacl_Poly1305_32.h"
+#define NSS_POLY1305_32 1
+#define Hacl_Poly1305_update Hacl_Poly1305_32_update
+#define Hacl_Poly1305_mk_state Hacl_Poly1305_32_mk_state
+#define Hacl_Poly1305_init Hacl_Poly1305_32_init
+#define Hacl_Poly1305_finish Hacl_Poly1305_32_finish
+typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state;
+#endif /* HAVE_INT128_SUPPORT */
static void
-Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state,
+Poly1305PadUpdate(Hacl_Impl_Poly1305_State_poly1305_state state,
unsigned char *block, const unsigned char *p,
const unsigned int pLen)
{
unsigned int pRemLen = pLen % 16;
- Hacl_Poly1305_64_update(state, (uint8_t *)p, (pLen / 16));
+ Hacl_Poly1305_update(state, (uint8_t *)p, (pLen / 16));
if (pRemLen > 0) {
memcpy(block, p + (pLen - pRemLen), pRemLen);
- Hacl_Poly1305_64_update(state, block, 1);
+ Hacl_Poly1305_update(state, block, 1);
}
}
+/* Poly1305Do writes the Poly1305 authenticator of the given additional data
+ * and ciphertext to |out|. */
static void
Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
const unsigned char *ciphertext, unsigned int ciphertextLen,
const unsigned char key[32])
{
- uint64_t tmp1[6U] = { 0U };
- Hacl_Impl_Poly1305_64_State_poly1305_state state =
- Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3);
+#ifdef NSS_POLY1305_64
+ uint64_t stateStack[6U] = { 0U };
+ size_t offset = 3;
+#elif defined NSS_POLY1305_32
+ uint32_t stateStack[10U] = { 0U };
+ size_t offset = 5;
+#else
+#error "This can't happen."
+#endif
+ Hacl_Impl_Poly1305_State_poly1305_state state =
+ Hacl_Poly1305_mk_state(stateStack, stateStack + offset);
unsigned char block[16] = { 0 };
- Hacl_Poly1305_64_init(state, (uint8_t *)key);
+ Hacl_Poly1305_init(state, (uint8_t *)key);
Poly1305PadUpdate(state, block, ad, adLen);
memset(block, 0, 16);
@@ -68,49 +92,11 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
block[i] = j;
}
- Hacl_Poly1305_64_update(state, block, 1);
- Hacl_Poly1305_64_finish(state, out, (uint8_t *)(key + 16));
+ Hacl_Poly1305_update(state, block, 1);
+ Hacl_Poly1305_finish(state, out, (uint8_t *)(key + 16));
+#undef NSS_POLY1305_64
+#undef NSS_POLY1305_32
}
-#else
-/* All other platforms get the 32-bit poly1305 reference implementation. */
-#include "poly1305.h"
-
-static void
-Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
- const unsigned char *ciphertext, unsigned int ciphertextLen,
- const unsigned char key[32])
-{
- poly1305_state state;
- unsigned int j;
- unsigned char lengthBytes[8];
- static const unsigned char zeros[15];
- unsigned int i;
-
- Poly1305Init(&state, key);
- Poly1305Update(&state, ad, adLen);
- if (adLen % 16 > 0) {
- Poly1305Update(&state, zeros, 16 - adLen % 16);
- }
- Poly1305Update(&state, ciphertext, ciphertextLen);
- if (ciphertextLen % 16 > 0) {
- Poly1305Update(&state, zeros, 16 - ciphertextLen % 16);
- }
- j = adLen;
- for (i = 0; i < sizeof(lengthBytes); i++) {
- lengthBytes[i] = j;
- j >>= 8;
- }
- Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
- j = ciphertextLen;
- for (i = 0; i < sizeof(lengthBytes); i++) {
- lengthBytes[i] = j;
- j >>= 8;
- }
- Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
- Poly1305Finish(&state, out);
-}
-
-#endif /* HAVE_INT128_SUPPORT */
#endif /* NSS_DISABLE_CHACHAPOLY */
SECStatus
diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c
index 9324d306b..aef353967 100644
--- a/security/nss/lib/freebl/dsa.c
+++ b/security/nss/lib/freebl/dsa.c
@@ -16,14 +16,11 @@
#include "blapi.h"
#include "nssilock.h"
#include "secitem.h"
-#include "blapi.h"
+#include "blapit.h"
#include "mpi.h"
#include "secmpi.h"
#include "pqg.h"
-/* XXX to be replaced by define in blapit.h */
-#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048
-
/*
* FIPS 186-2 requires result from random output to be reduced mod q when
* generating random numbers for DSA.
@@ -168,7 +165,7 @@ dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed,
return SECFailure;
}
/* Initialize an arena for the DSA key. */
- arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE);
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
if (!arena) {
PORT_SetError(SEC_ERROR_NO_MEMORY);
return SECFailure;
@@ -213,8 +210,9 @@ cleanup:
mp_clear(&g);
mp_clear(&x);
mp_clear(&y);
- if (key)
+ if (key) {
PORT_FreeArena(key->params.arena, PR_TRUE);
+ }
if (err) {
translate_mpi_error(err);
return SECFailure;
@@ -321,6 +319,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
mp_int x, k; /* private key & pseudo-random integer */
mp_int r, s; /* tuple (r, s) is signature) */
mp_int t; /* holding tmp values */
+ mp_int ar; /* holding blinding values */
mp_err err = MP_OKAY;
SECStatus rv = SECSuccess;
unsigned int dsa_subprime_len, dsa_signature_len, offset;
@@ -364,6 +363,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
MP_DIGITS(&r) = 0;
MP_DIGITS(&s) = 0;
MP_DIGITS(&t) = 0;
+ MP_DIGITS(&ar) = 0;
CHECK_MPI_OK(mp_init(&p));
CHECK_MPI_OK(mp_init(&q));
CHECK_MPI_OK(mp_init(&g));
@@ -372,6 +372,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
CHECK_MPI_OK(mp_init(&r));
CHECK_MPI_OK(mp_init(&s));
CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&ar));
/*
** Convert stored PQG and private key into MPI integers.
*/
@@ -397,14 +398,28 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
rv = SECFailure;
goto cleanup;
}
- SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */
+ SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */
+ SECITEM_FreeItem(&t2, PR_FALSE);
+ if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(t2, &ar); /* ar <-$ Zq */
+ SECITEM_FreeItem(&t2, PR_FALSE);
+
+ /* Using mp_invmod on k directly would leak bits from k. */
+ CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */
CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */
CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */
- CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
- CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */
- CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
+ /* To avoid leaking secret bits here the addition is blinded. */
+ CHECK_MPI_OK(mp_mul(&x, &ar, &x)); /* x = x * ar */
+ CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
+ CHECK_MPI_OK(mp_mulmod(&s, &ar, &q, &t)); /* t = s * ar mod q */
+ CHECK_MPI_OK(mp_add(&t, &x, &s)); /* s = t + x */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
/*
** verify r != 0 and s != 0
** mentioned as optional in FIPS 186-1.
@@ -438,7 +453,7 @@ cleanup:
mp_clear(&r);
mp_clear(&s);
mp_clear(&t);
- SECITEM_FreeItem(&t2, PR_FALSE);
+ mp_clear(&ar);
if (err) {
translate_mpi_error(err);
rv = SECFailure;
diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c
index b28815ade..6468a10d6 100644
--- a/security/nss/lib/freebl/ec.c
+++ b/security/nss/lib/freebl/ec.c
@@ -653,6 +653,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
mp_int r, s; /* tuple (r, s) is the signature */
mp_int t; /* holding tmp values */
mp_int n;
+ mp_int ar; /* blinding value */
mp_err err = MP_OKAY;
ECParams *ecParams = NULL;
SECItem kGpoint = { siBuffer, NULL, 0 };
@@ -674,6 +675,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
MP_DIGITS(&s) = 0;
MP_DIGITS(&n) = 0;
MP_DIGITS(&t) = 0;
+ MP_DIGITS(&ar) = 0;
/* Check args */
if (!key || !signature || !digest || !kb || (kblen < 0)) {
@@ -700,6 +702,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
CHECK_MPI_OK(mp_init(&s));
CHECK_MPI_OK(mp_init(&n));
CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&ar));
SECITEM_TO_MPINT(ecParams->order, &n);
SECITEM_TO_MPINT(key->privateValue, &d);
@@ -815,12 +818,25 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
goto cleanup;
}
CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */
- CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
- CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */
- CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
- CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d)); /* d = d * r mod n */
- CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s)); /* s = s + d mod n */
- CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */
+ PORT_Memset(t2, 0, 2 * ecParams->order.len);
+ if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_read_unsigned_octets(&ar, t2, 2 * ecParams->order.len)); /* ar <-$ Zn */
+
+ /* Using mp_invmod on k directly would leak bits from k. */
+ CHECK_MPI_OK(mp_mul(&k, &ar, &k)); /* k = k * ar */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ /* To avoid leaking secret bits here the addition is blinded. */
+ CHECK_MPI_OK(mp_mul(&d, &ar, &t)); /* t = d * ar */
+ CHECK_MPI_OK(mp_mulmod(&t, &r, &n, &d)); /* d = t * r mod n */
+ CHECK_MPI_OK(mp_mulmod(&s, &ar, &n, &t)); /* t = s * ar mod n */
+ CHECK_MPI_OK(mp_add(&t, &d, &s)); /* s = t + d */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */
#if EC_DEBUG
mp_todecimal(&s, mpstr);
@@ -858,6 +874,7 @@ cleanup:
mp_clear(&s);
mp_clear(&n);
mp_clear(&t);
+ mp_clear(&ar);
if (t2) {
PORT_Free(t2);
diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp
index fae56f709..004807483 100644
--- a/security/nss/lib/freebl/freebl.gyp
+++ b/security/nss/lib/freebl/freebl.gyp
@@ -272,28 +272,15 @@
},
}],
[ 'cc_use_gnu_ld==1 and OS=="win" and target_arch=="x64"', {
+ # mingw x64
'defines': [
'MP_IS_LITTLE_ENDIAN',
- 'NSS_BEVAND_ARCFOUR',
- 'MPI_AMD64',
- 'MP_ASSEMBLY_MULTIPLY',
- 'NSS_USE_COMBA',
- 'USE_HW_AES',
- 'INTEL_GCM',
],
}],
- [ 'OS!="win"', {
- 'conditions': [
- [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
- 'defines': [
- # The Makefile does version-tests on GCC, but we're not doing that here.
- 'HAVE_INT128_SUPPORT',
- ],
- }, {
- 'defines': [
- 'KRML_NOUINT128',
- ],
- }],
+ [ 'have_int128_support==1', {
+ 'defines': [
+ # The Makefile does version-tests on GCC, but we're not doing that here.
+ 'HAVE_INT128_SUPPORT',
],
}, {
'defines': [
@@ -355,5 +342,18 @@
},
'variables': {
'module': 'nss',
+ 'conditions': [
+ [ 'OS!="win"', {
+ 'conditions': [
+ [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
+ 'have_int128_support%': 1,
+ }, {
+ 'have_int128_support%': 0,
+ }],
+ ],
+ }, {
+ 'have_int128_support%': 0,
+ }],
+ ],
}
}
diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi
index ebd1018d8..1372994f4 100644
--- a/security/nss/lib/freebl/freebl_base.gypi
+++ b/security/nss/lib/freebl/freebl_base.gypi
@@ -59,7 +59,7 @@
'sha_fast.c',
'shvfy.c',
'sysrand.c',
- 'tlsprfalg.c'
+ 'tlsprfalg.c',
],
'conditions': [
[ 'OS=="linux" or OS=="android"', {
@@ -122,6 +122,11 @@
'intel-gcm-x86-masm.asm',
],
}],
+ [ 'cc_use_gnu_ld==1', {
+ # mingw
+ 'sources': [
+ ],
+ }],
[ 'cc_is_clang!=1', {
# MSVC
'sources': [
@@ -135,7 +140,6 @@
# All intel and 64-bit ARM architectures get the 64 bit version.
'ecl/curve25519_64.c',
'verified/Hacl_Curve25519.c',
- 'verified/FStar.c',
],
}, {
'sources': [
@@ -167,7 +171,7 @@
}, {
# !Windows & !x64 & !arm64 & !aarch64
'sources': [
- 'poly1305.c',
+ 'verified/Hacl_Poly1305_32.c',
],
}],
],
@@ -176,7 +180,7 @@
}, {
# Windows
'sources': [
- 'poly1305.c',
+ 'verified/Hacl_Poly1305_32.c',
],
}],
],
@@ -215,6 +219,9 @@
}],
],
}],
+ [ 'have_int128_support==0', {
+ 'sources': [ 'verified/FStar.c' ],
+ }],
],
'ldflags': [
'-Wl,-Bsymbolic'
diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c
index fe5e0a668..6d200e6dd 100644
--- a/security/nss/lib/freebl/loader.c
+++ b/security/nss/lib/freebl/loader.c
@@ -2164,12 +2164,12 @@ BLAKE2B_NewContext(void)
}
void
-BLAKE2B_DestroyContext(BLAKE2BContext *BLAKE2BContext, PRBool freeit)
+BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit)
{
if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
return;
}
- (vector->p_BLAKE2B_DestroyContext)(BLAKE2BContext, freeit);
+ (vector->p_BLAKE2B_DestroyContext)(ctx, freeit);
}
SECStatus
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
index ae404019d..8c893fb5f 100644
--- a/security/nss/lib/freebl/mpi/mpi.c
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -2657,10 +2657,10 @@ mp_toradix(mp_int *mp, char *str, int radix)
/* Reverse the digits and sign indicator */
ix = 0;
while (ix < pos) {
- char tmp = str[ix];
+ char tmpc = str[ix];
str[ix] = str[pos];
- str[pos] = tmp;
+ str[pos] = tmpc;
++ix;
--pos;
}
@@ -3313,13 +3313,14 @@ s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
/* could check for power of 2 here, but mp_div_d does that. */
if (MP_USED(mp) == 1) {
mp_digit n = MP_DIGIT(mp, 0);
- mp_digit rem;
+ mp_digit remdig;
q = n / d;
- rem = n % d;
+ remdig = n % d;
MP_DIGIT(mp, 0) = q;
- if (r)
- *r = rem;
+ if (r) {
+ *r = remdig;
+ }
return MP_OKAY;
}
diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
deleted file mode 100644
index 3c803c167..000000000
--- a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
+++ /dev/null
@@ -1,881 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/* This implementation of poly1305 is by Andrew Moon
- * (https://github.com/floodyberry/poly1305-donna) and released as public
- * domain. It implements SIMD vectorization based on the algorithm described in
- * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
- * block size. */
-
-#include <emmintrin.h>
-#include <stdint.h>
-
-#include "poly1305.h"
-#include "blapii.h"
-
-#define ALIGN(x) __attribute__((aligned(x)))
-#define INLINE inline
-#define U8TO64_LE(m) (*(uint64_t *)(m))
-#define U8TO32_LE(m) (*(uint32_t *)(m))
-#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
-
-typedef __m128i xmmi;
-typedef unsigned __int128 uint128_t;
-
-static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 };
-static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 };
-static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 };
-
-static uint128_t INLINE
-add128(uint128_t a, uint128_t b)
-{
- return a + b;
-}
-
-static uint128_t INLINE
-add128_64(uint128_t a, uint64_t b)
-{
- return a + b;
-}
-
-static uint128_t INLINE
-mul64x64_128(uint64_t a, uint64_t b)
-{
- return (uint128_t)a * b;
-}
-
-static uint64_t INLINE
-lo128(uint128_t a)
-{
- return (uint64_t)a;
-}
-
-static uint64_t INLINE
-shr128(uint128_t v, const int shift)
-{
- return (uint64_t)(v >> shift);
-}
-
-static uint64_t INLINE
-shr128_pair(uint64_t hi, uint64_t lo, const int shift)
-{
- return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
-}
-
-typedef struct poly1305_power_t {
- union {
- xmmi v;
- uint64_t u[2];
- uint32_t d[4];
- } R20, R21, R22, R23, R24, S21, S22, S23, S24;
-} poly1305_power;
-
-typedef struct poly1305_state_internal_t {
- poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */
- union {
- xmmi H[5]; /* 80 bytes */
- uint64_t HH[10];
- };
- /* uint64_t r0,r1,r2; [24 bytes] */
- /* uint64_t pad0,pad1; [16 bytes] */
- uint64_t started; /* 8 bytes */
- uint64_t leftover; /* 8 bytes */
- uint8_t buffer[64]; /* 64 bytes */
-} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */
-
-static poly1305_state_internal INLINE
- *
- poly1305_aligned_state(poly1305_state *state)
-{
- return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
-}
-
-/* copy 0-63 bytes */
-static void INLINE NO_SANITIZE_ALIGNMENT
-poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
-{
- size_t offset = src - dst;
- if (bytes & 32) {
- _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
- _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16)));
- dst += 32;
- }
- if (bytes & 16) {
- _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
- dst += 16;
- }
- if (bytes & 8) {
- *(uint64_t *)dst = *(uint64_t *)(dst + offset);
- dst += 8;
- }
- if (bytes & 4) {
- *(uint32_t *)dst = *(uint32_t *)(dst + offset);
- dst += 4;
- }
- if (bytes & 2) {
- *(uint16_t *)dst = *(uint16_t *)(dst + offset);
- dst += 2;
- }
- if (bytes & 1) {
- *(uint8_t *)dst = *(uint8_t *)(dst + offset);
- }
-}
-
-/* zero 0-15 bytes */
-static void INLINE
-poly1305_block_zero(uint8_t *dst, size_t bytes)
-{
- if (bytes & 8) {
- *(uint64_t *)dst = 0;
- dst += 8;
- }
- if (bytes & 4) {
- *(uint32_t *)dst = 0;
- dst += 4;
- }
- if (bytes & 2) {
- *(uint16_t *)dst = 0;
- dst += 2;
- }
- if (bytes & 1) {
- *(uint8_t *)dst = 0;
- }
-}
-
-static size_t INLINE
-poly1305_min(size_t a, size_t b)
-{
- return (a < b) ? a : b;
-}
-
-void
-Poly1305Init(poly1305_state *state, const unsigned char key[32])
-{
- poly1305_state_internal *st = poly1305_aligned_state(state);
- poly1305_power *p;
- uint64_t r0, r1, r2;
- uint64_t t0, t1;
-
- /* clamp key */
- t0 = U8TO64_LE(key + 0);
- t1 = U8TO64_LE(key + 8);
- r0 = t0 & 0xffc0fffffff;
- t0 >>= 44;
- t0 |= t1 << 20;
- r1 = t0 & 0xfffffc0ffff;
- t1 >>= 24;
- r2 = t1 & 0x00ffffffc0f;
-
- /* store r in un-used space of st->P[1] */
- p = &st->P[1];
- p->R20.d[1] = (uint32_t)(r0);
- p->R20.d[3] = (uint32_t)(r0 >> 32);
- p->R21.d[1] = (uint32_t)(r1);
- p->R21.d[3] = (uint32_t)(r1 >> 32);
- p->R22.d[1] = (uint32_t)(r2);
- p->R22.d[3] = (uint32_t)(r2 >> 32);
-
- /* store pad */
- p->R23.d[1] = U8TO32_LE(key + 16);
- p->R23.d[3] = U8TO32_LE(key + 20);
- p->R24.d[1] = U8TO32_LE(key + 24);
- p->R24.d[3] = U8TO32_LE(key + 28);
-
- /* H = 0 */
- st->H[0] = _mm_setzero_si128();
- st->H[1] = _mm_setzero_si128();
- st->H[2] = _mm_setzero_si128();
- st->H[3] = _mm_setzero_si128();
- st->H[4] = _mm_setzero_si128();
-
- st->started = 0;
- st->leftover = 0;
-}
-
-static void
-poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
-{
- const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
- const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
- const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
- xmmi T5, T6;
- poly1305_power *p;
- uint128_t d[3];
- uint64_t r0, r1, r2;
- uint64_t r20, r21, r22, s22;
- uint64_t pad0, pad1;
- uint64_t c;
- uint64_t i;
-
- /* pull out stored info */
- p = &st->P[1];
-
- r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
- r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
- r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
- pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
- pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
-
- /* compute powers r^2,r^4 */
- r20 = r0;
- r21 = r1;
- r22 = r2;
- for (i = 0; i < 2; i++) {
- s22 = r22 * (5 << 2);
-
- d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
- d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
- d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
-
- r20 = lo128(d[0]) & 0xfffffffffff;
- c = shr128(d[0], 44);
- d[1] = add128_64(d[1], c);
- r21 = lo128(d[1]) & 0xfffffffffff;
- c = shr128(d[1], 44);
- d[2] = add128_64(d[2], c);
- r22 = lo128(d[2]) & 0x3ffffffffff;
- c = shr128(d[2], 42);
- r20 += c * 5;
- c = (r20 >> 44);
- r20 = r20 & 0xfffffffffff;
- r21 += c;
-
- p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
- p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
- p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
- p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
- p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0));
- p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
- p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
- p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
- p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
- p--;
- }
-
- /* put saved info back */
- p = &st->P[1];
- p->R20.d[1] = (uint32_t)(r0);
- p->R20.d[3] = (uint32_t)(r0 >> 32);
- p->R21.d[1] = (uint32_t)(r1);
- p->R21.d[3] = (uint32_t)(r1 >> 32);
- p->R22.d[1] = (uint32_t)(r2);
- p->R22.d[3] = (uint32_t)(r2 >> 32);
- p->R23.d[1] = (uint32_t)(pad0);
- p->R23.d[3] = (uint32_t)(pad0 >> 32);
- p->R24.d[1] = (uint32_t)(pad1);
- p->R24.d[3] = (uint32_t)(pad1 >> 32);
-
- /* H = [Mx,My] */
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
- st->H[0] = _mm_and_si128(MMASK, T5);
- st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
- st->H[2] = _mm_and_si128(MMASK, T5);
- st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-}
-
-static void
-poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
-{
- const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
- const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
- const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
-
- poly1305_power *p;
- xmmi H0, H1, H2, H3, H4;
- xmmi T0, T1, T2, T3, T4, T5, T6;
- xmmi M0, M1, M2, M3, M4;
- xmmi C1, C2;
-
- H0 = st->H[0];
- H1 = st->H[1];
- H2 = st->H[2];
- H3 = st->H[3];
- H4 = st->H[4];
-
- while (bytes >= 64) {
- /* H *= [r^4,r^4] */
- p = &st->P[0];
- T0 = _mm_mul_epu32(H0, p->R20.v);
- T1 = _mm_mul_epu32(H0, p->R21.v);
- T2 = _mm_mul_epu32(H0, p->R22.v);
- T3 = _mm_mul_epu32(H0, p->R23.v);
- T4 = _mm_mul_epu32(H0, p->R24.v);
- T5 = _mm_mul_epu32(H1, p->S24.v);
- T6 = _mm_mul_epu32(H1, p->R20.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H2, p->S23.v);
- T6 = _mm_mul_epu32(H2, p->S24.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H3, p->S22.v);
- T6 = _mm_mul_epu32(H3, p->S23.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H4, p->S21.v);
- T6 = _mm_mul_epu32(H4, p->S22.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H1, p->R21.v);
- T6 = _mm_mul_epu32(H1, p->R22.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H2, p->R20.v);
- T6 = _mm_mul_epu32(H2, p->R21.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H3, p->S24.v);
- T6 = _mm_mul_epu32(H3, p->R20.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H4, p->S23.v);
- T6 = _mm_mul_epu32(H4, p->S24.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H1, p->R23.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H2, p->R22.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H3, p->R21.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H4, p->R20.v);
- T4 = _mm_add_epi64(T4, T5);
-
- /* H += [Mx,My]*[r^2,r^2] */
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
- M0 = _mm_and_si128(MMASK, T5);
- M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
- M2 = _mm_and_si128(MMASK, T5);
- M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
- p = &st->P[1];
- T5 = _mm_mul_epu32(M0, p->R20.v);
- T6 = _mm_mul_epu32(M0, p->R21.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(M1, p->S24.v);
- T6 = _mm_mul_epu32(M1, p->R20.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(M2, p->S23.v);
- T6 = _mm_mul_epu32(M2, p->S24.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(M3, p->S22.v);
- T6 = _mm_mul_epu32(M3, p->S23.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(M4, p->S21.v);
- T6 = _mm_mul_epu32(M4, p->S22.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(M0, p->R22.v);
- T6 = _mm_mul_epu32(M0, p->R23.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(M1, p->R21.v);
- T6 = _mm_mul_epu32(M1, p->R22.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(M2, p->R20.v);
- T6 = _mm_mul_epu32(M2, p->R21.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(M3, p->S24.v);
- T6 = _mm_mul_epu32(M3, p->R20.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(M4, p->S23.v);
- T6 = _mm_mul_epu32(M4, p->S24.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(M0, p->R24.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(M1, p->R23.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(M2, p->R22.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(M3, p->R21.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(M4, p->R20.v);
- T4 = _mm_add_epi64(T4, T5);
-
- /* H += [Mx,My] */
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48)));
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56)));
- M0 = _mm_and_si128(MMASK, T5);
- M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
- M2 = _mm_and_si128(MMASK, T5);
- M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
- T0 = _mm_add_epi64(T0, M0);
- T1 = _mm_add_epi64(T1, M1);
- T2 = _mm_add_epi64(T2, M2);
- T3 = _mm_add_epi64(T3, M3);
- T4 = _mm_add_epi64(T4, M4);
-
- /* reduce */
- C1 = _mm_srli_epi64(T0, 26);
- C2 = _mm_srli_epi64(T3, 26);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_and_si128(T3, MMASK);
- T1 = _mm_add_epi64(T1, C1);
- T4 = _mm_add_epi64(T4, C2);
- C1 = _mm_srli_epi64(T1, 26);
- C2 = _mm_srli_epi64(T4, 26);
- T1 = _mm_and_si128(T1, MMASK);
- T4 = _mm_and_si128(T4, MMASK);
- T2 = _mm_add_epi64(T2, C1);
- T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
- C1 = _mm_srli_epi64(T2, 26);
- C2 = _mm_srli_epi64(T0, 26);
- T2 = _mm_and_si128(T2, MMASK);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_add_epi64(T3, C1);
- T1 = _mm_add_epi64(T1, C2);
- C1 = _mm_srli_epi64(T3, 26);
- T3 = _mm_and_si128(T3, MMASK);
- T4 = _mm_add_epi64(T4, C1);
-
- /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
- H0 = T0;
- H1 = T1;
- H2 = T2;
- H3 = T3;
- H4 = T4;
-
- m += 64;
- bytes -= 64;
- }
-
- st->H[0] = H0;
- st->H[1] = H1;
- st->H[2] = H2;
- st->H[3] = H3;
- st->H[4] = H4;
-}
-
-static size_t
-poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
-{
- const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
- const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
- const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
-
- poly1305_power *p;
- xmmi H0, H1, H2, H3, H4;
- xmmi M0, M1, M2, M3, M4;
- xmmi T0, T1, T2, T3, T4, T5, T6;
- xmmi C1, C2;
-
- uint64_t r0, r1, r2;
- uint64_t t0, t1, t2, t3, t4;
- uint64_t c;
- size_t consumed = 0;
-
- H0 = st->H[0];
- H1 = st->H[1];
- H2 = st->H[2];
- H3 = st->H[3];
- H4 = st->H[4];
-
- /* p = [r^2,r^2] */
- p = &st->P[1];
-
- if (bytes >= 32) {
- /* H *= [r^2,r^2] */
- T0 = _mm_mul_epu32(H0, p->R20.v);
- T1 = _mm_mul_epu32(H0, p->R21.v);
- T2 = _mm_mul_epu32(H0, p->R22.v);
- T3 = _mm_mul_epu32(H0, p->R23.v);
- T4 = _mm_mul_epu32(H0, p->R24.v);
- T5 = _mm_mul_epu32(H1, p->S24.v);
- T6 = _mm_mul_epu32(H1, p->R20.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H2, p->S23.v);
- T6 = _mm_mul_epu32(H2, p->S24.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H3, p->S22.v);
- T6 = _mm_mul_epu32(H3, p->S23.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H4, p->S21.v);
- T6 = _mm_mul_epu32(H4, p->S22.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H1, p->R21.v);
- T6 = _mm_mul_epu32(H1, p->R22.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H2, p->R20.v);
- T6 = _mm_mul_epu32(H2, p->R21.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H3, p->S24.v);
- T6 = _mm_mul_epu32(H3, p->R20.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H4, p->S23.v);
- T6 = _mm_mul_epu32(H4, p->S24.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H1, p->R23.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H2, p->R22.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H3, p->R21.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H4, p->R20.v);
- T4 = _mm_add_epi64(T4, T5);
-
- /* H += [Mx,My] */
- T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
- T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
- M0 = _mm_and_si128(MMASK, T5);
- M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
- M2 = _mm_and_si128(MMASK, T5);
- M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
- M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
- T0 = _mm_add_epi64(T0, M0);
- T1 = _mm_add_epi64(T1, M1);
- T2 = _mm_add_epi64(T2, M2);
- T3 = _mm_add_epi64(T3, M3);
- T4 = _mm_add_epi64(T4, M4);
-
- /* reduce */
- C1 = _mm_srli_epi64(T0, 26);
- C2 = _mm_srli_epi64(T3, 26);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_and_si128(T3, MMASK);
- T1 = _mm_add_epi64(T1, C1);
- T4 = _mm_add_epi64(T4, C2);
- C1 = _mm_srli_epi64(T1, 26);
- C2 = _mm_srli_epi64(T4, 26);
- T1 = _mm_and_si128(T1, MMASK);
- T4 = _mm_and_si128(T4, MMASK);
- T2 = _mm_add_epi64(T2, C1);
- T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
- C1 = _mm_srli_epi64(T2, 26);
- C2 = _mm_srli_epi64(T0, 26);
- T2 = _mm_and_si128(T2, MMASK);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_add_epi64(T3, C1);
- T1 = _mm_add_epi64(T1, C2);
- C1 = _mm_srli_epi64(T3, 26);
- T3 = _mm_and_si128(T3, MMASK);
- T4 = _mm_add_epi64(T4, C1);
-
- /* H = (H*[r^2,r^2] + [Mx,My]) */
- H0 = T0;
- H1 = T1;
- H2 = T2;
- H3 = T3;
- H4 = T4;
-
- consumed = 32;
- }
-
- /* finalize, H *= [r^2,r] */
- r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
- r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
- r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
-
- p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
- p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
- p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
- p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
- p->R24.d[2] = (uint32_t)((r2 >> 16));
- p->S21.d[2] = p->R21.d[2] * 5;
- p->S22.d[2] = p->R22.d[2] * 5;
- p->S23.d[2] = p->R23.d[2] * 5;
- p->S24.d[2] = p->R24.d[2] * 5;
-
- /* H *= [r^2,r] */
- T0 = _mm_mul_epu32(H0, p->R20.v);
- T1 = _mm_mul_epu32(H0, p->R21.v);
- T2 = _mm_mul_epu32(H0, p->R22.v);
- T3 = _mm_mul_epu32(H0, p->R23.v);
- T4 = _mm_mul_epu32(H0, p->R24.v);
- T5 = _mm_mul_epu32(H1, p->S24.v);
- T6 = _mm_mul_epu32(H1, p->R20.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H2, p->S23.v);
- T6 = _mm_mul_epu32(H2, p->S24.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H3, p->S22.v);
- T6 = _mm_mul_epu32(H3, p->S23.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H4, p->S21.v);
- T6 = _mm_mul_epu32(H4, p->S22.v);
- T0 = _mm_add_epi64(T0, T5);
- T1 = _mm_add_epi64(T1, T6);
- T5 = _mm_mul_epu32(H1, p->R21.v);
- T6 = _mm_mul_epu32(H1, p->R22.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H2, p->R20.v);
- T6 = _mm_mul_epu32(H2, p->R21.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H3, p->S24.v);
- T6 = _mm_mul_epu32(H3, p->R20.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H4, p->S23.v);
- T6 = _mm_mul_epu32(H4, p->S24.v);
- T2 = _mm_add_epi64(T2, T5);
- T3 = _mm_add_epi64(T3, T6);
- T5 = _mm_mul_epu32(H1, p->R23.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H2, p->R22.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H3, p->R21.v);
- T4 = _mm_add_epi64(T4, T5);
- T5 = _mm_mul_epu32(H4, p->R20.v);
- T4 = _mm_add_epi64(T4, T5);
-
- C1 = _mm_srli_epi64(T0, 26);
- C2 = _mm_srli_epi64(T3, 26);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_and_si128(T3, MMASK);
- T1 = _mm_add_epi64(T1, C1);
- T4 = _mm_add_epi64(T4, C2);
- C1 = _mm_srli_epi64(T1, 26);
- C2 = _mm_srli_epi64(T4, 26);
- T1 = _mm_and_si128(T1, MMASK);
- T4 = _mm_and_si128(T4, MMASK);
- T2 = _mm_add_epi64(T2, C1);
- T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
- C1 = _mm_srli_epi64(T2, 26);
- C2 = _mm_srli_epi64(T0, 26);
- T2 = _mm_and_si128(T2, MMASK);
- T0 = _mm_and_si128(T0, MMASK);
- T3 = _mm_add_epi64(T3, C1);
- T1 = _mm_add_epi64(T1, C2);
- C1 = _mm_srli_epi64(T3, 26);
- T3 = _mm_and_si128(T3, MMASK);
- T4 = _mm_add_epi64(T4, C1);
-
- /* H = H[0]+H[1] */
- H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
- H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
- H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
- H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
- H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
-
- t0 = _mm_cvtsi128_si32(H0);
- c = (t0 >> 26);
- t0 &= 0x3ffffff;
- t1 = _mm_cvtsi128_si32(H1) + c;
- c = (t1 >> 26);
- t1 &= 0x3ffffff;
- t2 = _mm_cvtsi128_si32(H2) + c;
- c = (t2 >> 26);
- t2 &= 0x3ffffff;
- t3 = _mm_cvtsi128_si32(H3) + c;
- c = (t3 >> 26);
- t3 &= 0x3ffffff;
- t4 = _mm_cvtsi128_si32(H4) + c;
- c = (t4 >> 26);
- t4 &= 0x3ffffff;
- t0 = t0 + (c * 5);
- c = (t0 >> 26);
- t0 &= 0x3ffffff;
- t1 = t1 + c;
-
- st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
- st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
- st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
-
- return consumed;
-}
-
-void
-Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes)
-{
- poly1305_state_internal *st = poly1305_aligned_state(state);
- size_t want;
-
- /* need at least 32 initial bytes to start the accelerated branch */
- if (!st->started) {
- if ((st->leftover == 0) && (bytes > 32)) {
- poly1305_first_block(st, m);
- m += 32;
- bytes -= 32;
- } else {
- want = poly1305_min(32 - st->leftover, bytes);
- poly1305_block_copy(st->buffer + st->leftover, m, want);
- bytes -= want;
- m += want;
- st->leftover += want;
- if ((st->leftover < 32) || (bytes == 0))
- return;
- poly1305_first_block(st, st->buffer);
- st->leftover = 0;
- }
- st->started = 1;
- }
-
- /* handle leftover */
- if (st->leftover) {
- want = poly1305_min(64 - st->leftover, bytes);
- poly1305_block_copy(st->buffer + st->leftover, m, want);
- bytes -= want;
- m += want;
- st->leftover += want;
- if (st->leftover < 64)
- return;
- poly1305_blocks(st, st->buffer, 64);
- st->leftover = 0;
- }
-
- /* process 64 byte blocks */
- if (bytes >= 64) {
- want = (bytes & ~63);
- poly1305_blocks(st, m, want);
- m += want;
- bytes -= want;
- }
-
- if (bytes) {
- poly1305_block_copy(st->buffer + st->leftover, m, bytes);
- st->leftover += bytes;
- }
-}
-
-void
-Poly1305Finish(poly1305_state *state, unsigned char mac[16])
-{
- poly1305_state_internal *st = poly1305_aligned_state(state);
- size_t leftover = st->leftover;
- uint8_t *m = st->buffer;
- uint128_t d[3];
- uint64_t h0, h1, h2;
- uint64_t t0, t1;
- uint64_t g0, g1, g2, c, nc;
- uint64_t r0, r1, r2, s1, s2;
- poly1305_power *p;
-
- if (st->started) {
- size_t consumed = poly1305_combine(st, m, leftover);
- leftover -= consumed;
- m += consumed;
- }
-
- /* st->HH will either be 0 or have the combined result */
- h0 = st->HH[0];
- h1 = st->HH[1];
- h2 = st->HH[2];
-
- p = &st->P[1];
- r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
- r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
- r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
- s1 = r1 * (5 << 2);
- s2 = r2 * (5 << 2);
-
- if (leftover < 16)
- goto poly1305_donna_atmost15bytes;
-
-poly1305_donna_atleast16bytes:
- t0 = U8TO64_LE(m + 0);
- t1 = U8TO64_LE(m + 8);
- h0 += t0 & 0xfffffffffff;
- t0 = shr128_pair(t1, t0, 44);
- h1 += t0 & 0xfffffffffff;
- h2 += (t1 >> 24) | ((uint64_t)1 << 40);
-
-poly1305_donna_mul:
- d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1));
- d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2));
- d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0));
- h0 = lo128(d[0]) & 0xfffffffffff;
- c = shr128(d[0], 44);
- d[1] = add128_64(d[1], c);
- h1 = lo128(d[1]) & 0xfffffffffff;
- c = shr128(d[1], 44);
- d[2] = add128_64(d[2], c);
- h2 = lo128(d[2]) & 0x3ffffffffff;
- c = shr128(d[2], 42);
- h0 += c * 5;
-
- m += 16;
- leftover -= 16;
- if (leftover >= 16)
- goto poly1305_donna_atleast16bytes;
-
-/* final bytes */
-poly1305_donna_atmost15bytes:
- if (!leftover)
- goto poly1305_donna_finish;
-
- m[leftover++] = 1;
- poly1305_block_zero(m + leftover, 16 - leftover);
- leftover = 16;
-
- t0 = U8TO64_LE(m + 0);
- t1 = U8TO64_LE(m + 8);
- h0 += t0 & 0xfffffffffff;
- t0 = shr128_pair(t1, t0, 44);
- h1 += t0 & 0xfffffffffff;
- h2 += (t1 >> 24);
-
- goto poly1305_donna_mul;
-
-poly1305_donna_finish:
- c = (h0 >> 44);
- h0 &= 0xfffffffffff;
- h1 += c;
- c = (h1 >> 44);
- h1 &= 0xfffffffffff;
- h2 += c;
- c = (h2 >> 42);
- h2 &= 0x3ffffffffff;
- h0 += c * 5;
-
- g0 = h0 + 5;
- c = (g0 >> 44);
- g0 &= 0xfffffffffff;
- g1 = h1 + c;
- c = (g1 >> 44);
- g1 &= 0xfffffffffff;
- g2 = h2 + c - ((uint64_t)1 << 42);
-
- c = (g2 >> 63) - 1;
- nc = ~c;
- h0 = (h0 & nc) | (g0 & c);
- h1 = (h1 & nc) | (g1 & c);
- h2 = (h2 & nc) | (g2 & c);
-
- /* pad */
- t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
- t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
- h0 += (t0 & 0xfffffffffff);
- c = (h0 >> 44);
- h0 &= 0xfffffffffff;
- t0 = shr128_pair(t1, t0, 44);
- h1 += (t0 & 0xfffffffffff) + c;
- c = (h1 >> 44);
- h1 &= 0xfffffffffff;
- t1 = (t1 >> 24);
- h2 += (t1) + c;
-
- U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
- U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
-}
diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c
deleted file mode 100644
index eb3e3cd55..000000000
--- a/security/nss/lib/freebl/poly1305.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/* This implementation of poly1305 is by Andrew Moon
- * (https://github.com/floodyberry/poly1305-donna) and released as public
- * domain. */
-
-#include <string.h>
-
-#include "poly1305.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1600
-#include "prtypes.h"
-typedef PRUint32 uint32_t;
-typedef PRUint64 uint64_t;
-#else
-#include <stdint.h>
-#endif
-
-#if defined(NSS_X86) || defined(NSS_X64)
-/* We can assume little-endian. */
-static uint32_t
-U8TO32_LE(const unsigned char *m)
-{
- uint32_t r;
- memcpy(&r, m, sizeof(r));
- return r;
-}
-
-static void
-U32TO8_LE(unsigned char *m, uint32_t v)
-{
- memcpy(m, &v, sizeof(v));
-}
-#else
-static uint32_t
-U8TO32_LE(const unsigned char *m)
-{
- return (uint32_t)m[0] |
- (uint32_t)m[1] << 8 |
- (uint32_t)m[2] << 16 |
- (uint32_t)m[3] << 24;
-}
-
-static void
-U32TO8_LE(unsigned char *m, uint32_t v)
-{
- m[0] = v;
- m[1] = v >> 8;
- m[2] = v >> 16;
- m[3] = v >> 24;
-}
-#endif
-
-static uint64_t
-mul32x32_64(uint32_t a, uint32_t b)
-{
- return (uint64_t)a * b;
-}
-
-struct poly1305_state_st {
- uint32_t r0, r1, r2, r3, r4;
- uint32_t s1, s2, s3, s4;
- uint32_t h0, h1, h2, h3, h4;
- unsigned char buf[16];
- unsigned int buf_used;
- unsigned char key[16];
-};
-
-/* update updates |state| given some amount of input data. This function may
- * only be called with a |len| that is not a multiple of 16 at the end of the
- * data. Otherwise the input must be buffered into 16 byte blocks. */
-static void
-update(struct poly1305_state_st *state, const unsigned char *in,
- size_t len)
-{
- uint32_t t0, t1, t2, t3;
- uint64_t t[5];
- uint32_t b;
- uint64_t c;
- size_t j;
- unsigned char mp[16];
-
- if (len < 16)
- goto poly1305_donna_atmost15bytes;
-
-poly1305_donna_16bytes:
- t0 = U8TO32_LE(in);
- t1 = U8TO32_LE(in + 4);
- t2 = U8TO32_LE(in + 8);
- t3 = U8TO32_LE(in + 12);
-
- in += 16;
- len -= 16;
-
- state->h0 += t0 & 0x3ffffff;
- state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
- state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
- state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
- state->h4 += (t3 >> 8) | (1 << 24);
-
-poly1305_donna_mul:
- t[0] = mul32x32_64(state->h0, state->r0) +
- mul32x32_64(state->h1, state->s4) +
- mul32x32_64(state->h2, state->s3) +
- mul32x32_64(state->h3, state->s2) +
- mul32x32_64(state->h4, state->s1);
- t[1] = mul32x32_64(state->h0, state->r1) +
- mul32x32_64(state->h1, state->r0) +
- mul32x32_64(state->h2, state->s4) +
- mul32x32_64(state->h3, state->s3) +
- mul32x32_64(state->h4, state->s2);
- t[2] = mul32x32_64(state->h0, state->r2) +
- mul32x32_64(state->h1, state->r1) +
- mul32x32_64(state->h2, state->r0) +
- mul32x32_64(state->h3, state->s4) +
- mul32x32_64(state->h4, state->s3);
- t[3] = mul32x32_64(state->h0, state->r3) +
- mul32x32_64(state->h1, state->r2) +
- mul32x32_64(state->h2, state->r1) +
- mul32x32_64(state->h3, state->r0) +
- mul32x32_64(state->h4, state->s4);
- t[4] = mul32x32_64(state->h0, state->r4) +
- mul32x32_64(state->h1, state->r3) +
- mul32x32_64(state->h2, state->r2) +
- mul32x32_64(state->h3, state->r1) +
- mul32x32_64(state->h4, state->r0);
-
- state->h0 = (uint32_t)t[0] & 0x3ffffff;
- c = (t[0] >> 26);
- t[1] += c;
- state->h1 = (uint32_t)t[1] & 0x3ffffff;
- b = (uint32_t)(t[1] >> 26);
- t[2] += b;
- state->h2 = (uint32_t)t[2] & 0x3ffffff;
- b = (uint32_t)(t[2] >> 26);
- t[3] += b;
- state->h3 = (uint32_t)t[3] & 0x3ffffff;
- b = (uint32_t)(t[3] >> 26);
- t[4] += b;
- state->h4 = (uint32_t)t[4] & 0x3ffffff;
- b = (uint32_t)(t[4] >> 26);
- state->h0 += b * 5;
-
- if (len >= 16)
- goto poly1305_donna_16bytes;
-
-/* final bytes */
-poly1305_donna_atmost15bytes:
- if (!len)
- return;
-
- for (j = 0; j < len; j++)
- mp[j] = in[j];
- mp[j++] = 1;
- for (; j < 16; j++)
- mp[j] = 0;
- len = 0;
-
- t0 = U8TO32_LE(mp + 0);
- t1 = U8TO32_LE(mp + 4);
- t2 = U8TO32_LE(mp + 8);
- t3 = U8TO32_LE(mp + 12);
-
- state->h0 += t0 & 0x3ffffff;
- state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
- state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
- state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
- state->h4 += (t3 >> 8);
-
- goto poly1305_donna_mul;
-}
-
-void
-Poly1305Init(poly1305_state *statep, const unsigned char key[32])
-{
- struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
- uint32_t t0, t1, t2, t3;
-
- t0 = U8TO32_LE(key + 0);
- t1 = U8TO32_LE(key + 4);
- t2 = U8TO32_LE(key + 8);
- t3 = U8TO32_LE(key + 12);
-
- /* precompute multipliers */
- state->r0 = t0 & 0x3ffffff;
- t0 >>= 26;
- t0 |= t1 << 6;
- state->r1 = t0 & 0x3ffff03;
- t1 >>= 20;
- t1 |= t2 << 12;
- state->r2 = t1 & 0x3ffc0ff;
- t2 >>= 14;
- t2 |= t3 << 18;
- state->r3 = t2 & 0x3f03fff;
- t3 >>= 8;
- state->r4 = t3 & 0x00fffff;
-
- state->s1 = state->r1 * 5;
- state->s2 = state->r2 * 5;
- state->s3 = state->r3 * 5;
- state->s4 = state->r4 * 5;
-
- /* init state */
- state->h0 = 0;
- state->h1 = 0;
- state->h2 = 0;
- state->h3 = 0;
- state->h4 = 0;
-
- state->buf_used = 0;
- memcpy(state->key, key + 16, sizeof(state->key));
-}
-
-void
-Poly1305Update(poly1305_state *statep, const unsigned char *in,
- size_t in_len)
-{
- unsigned int i;
- struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
-
- if (state->buf_used) {
- unsigned int todo = 16 - state->buf_used;
- if (todo > in_len)
- todo = in_len;
- for (i = 0; i < todo; i++)
- state->buf[state->buf_used + i] = in[i];
- state->buf_used += todo;
- in_len -= todo;
- in += todo;
-
- if (state->buf_used == 16) {
- update(state, state->buf, 16);
- state->buf_used = 0;
- }
- }
-
- if (in_len >= 16) {
- size_t todo = in_len & ~0xf;
- update(state, in, todo);
- in += todo;
- in_len &= 0xf;
- }
-
- if (in_len) {
- for (i = 0; i < in_len; i++)
- state->buf[i] = in[i];
- state->buf_used = in_len;
- }
-}
-
-void
-Poly1305Finish(poly1305_state *statep, unsigned char mac[16])
-{
- struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
- uint64_t f0, f1, f2, f3;
- uint32_t g0, g1, g2, g3, g4;
- uint32_t b, nb;
-
- if (state->buf_used)
- update(state, state->buf, state->buf_used);
-
- b = state->h0 >> 26;
- state->h0 = state->h0 & 0x3ffffff;
- state->h1 += b;
- b = state->h1 >> 26;
- state->h1 = state->h1 & 0x3ffffff;
- state->h2 += b;
- b = state->h2 >> 26;
- state->h2 = state->h2 & 0x3ffffff;
- state->h3 += b;
- b = state->h3 >> 26;
- state->h3 = state->h3 & 0x3ffffff;
- state->h4 += b;
- b = state->h4 >> 26;
- state->h4 = state->h4 & 0x3ffffff;
- state->h0 += b * 5;
-
- g0 = state->h0 + 5;
- b = g0 >> 26;
- g0 &= 0x3ffffff;
- g1 = state->h1 + b;
- b = g1 >> 26;
- g1 &= 0x3ffffff;
- g2 = state->h2 + b;
- b = g2 >> 26;
- g2 &= 0x3ffffff;
- g3 = state->h3 + b;
- b = g3 >> 26;
- g3 &= 0x3ffffff;
- g4 = state->h4 + b - (1 << 26);
-
- b = (g4 >> 31) - 1;
- nb = ~b;
- state->h0 = (state->h0 & nb) | (g0 & b);
- state->h1 = (state->h1 & nb) | (g1 & b);
- state->h2 = (state->h2 & nb) | (g2 & b);
- state->h3 = (state->h3 & nb) | (g3 & b);
- state->h4 = (state->h4 & nb) | (g4 & b);
-
- f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]);
- f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]);
- f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]);
- f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]);
-
- U32TO8_LE(&mac[0], (uint32_t)f0);
- f1 += (f0 >> 32);
- U32TO8_LE(&mac[4], (uint32_t)f1);
- f2 += (f1 >> 32);
- U32TO8_LE(&mac[8], (uint32_t)f2);
- f3 += (f2 >> 32);
- U32TO8_LE(&mac[12], (uint32_t)f3);
-}
diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h
deleted file mode 100644
index 125f49b3b..000000000
--- a/security/nss/lib/freebl/poly1305.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * poly1305.h - header file for Poly1305 implementation.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef FREEBL_POLY1305_H_
-#define FREEBL_POLY1305_H_
-
-#include "stddef.h"
-
-typedef unsigned char poly1305_state[512];
-
-/* Poly1305Init sets up |state| so that it can be used to calculate an
- * authentication tag with the one-time key |key|. Note that |key| is a
- * one-time key and therefore there is no `reset' method because that would
- * enable several messages to be authenticated with the same key. */
-extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]);
-
-/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or
- * more times after poly1305_init. */
-extern void Poly1305Update(poly1305_state* state, const unsigned char* in,
- size_t inLen);
-
-/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte
- * authentication tag to |mac|. */
-extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]);
-
-#endif /* FREEBL_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/unix_urandom.c b/security/nss/lib/freebl/unix_urandom.c
index 25e6ad91c..869a5ed8c 100644
--- a/security/nss/lib/freebl/unix_urandom.c
+++ b/security/nss/lib/freebl/unix_urandom.c
@@ -4,10 +4,14 @@
#include <fcntl.h>
#include <unistd.h>
+#include <errno.h>
#include "secerr.h"
#include "secrng.h"
#include "prprf.h"
+/* syscall getentropy() is limited to retrieving 256 bytes */
+#define GETENTROPY_MAX_BYTES 256
+
void
RNG_SystemInfoForRNG(void)
{
@@ -28,6 +32,35 @@ RNG_SystemRNG(void *dest, size_t maxLen)
size_t fileBytes = 0;
unsigned char *buffer = dest;
+#if defined(__OpenBSD__) || (defined(LINUX) && defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 25))))
+ int result;
+
+ while (fileBytes < maxLen) {
+ size_t getBytes = maxLen - fileBytes;
+ if (getBytes > GETENTROPY_MAX_BYTES) {
+ getBytes = GETENTROPY_MAX_BYTES;
+ }
+ result = getentropy(buffer, getBytes);
+ if (result == 0) { /* success */
+ fileBytes += getBytes;
+ buffer += getBytes;
+ } else {
+ break;
+ }
+ }
+ if (fileBytes == maxLen) { /* success */
+ return maxLen;
+ }
+ /* If we failed with an error other than ENOSYS, it means the destination
+ * buffer is not writeable. We don't need to try writing to it again. */
+ if (errno != ENOSYS) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return 0;
+ }
+ /* ENOSYS means the kernel doesn't support getentropy()/getrandom().
+ * Reset the number of bytes to get and fall back to /dev/urandom. */
+ fileBytes = 0;
+#endif
fd = open("/dev/urandom", O_RDONLY);
if (fd < 0) {
PORT_SetError(SEC_ERROR_NEED_RANDOM);
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
new file mode 100644
index 000000000..246a41af3
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
@@ -0,0 +1,578 @@
+/* Copyright 2016-2017 INRIA and Microsoft Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Hacl_Poly1305_32.h"
+
+inline static void
+Hacl_Bignum_Modulo_reduce(uint32_t *b)
+{
+ uint32_t b0 = b[0U];
+ b[0U] = (b0 << (uint32_t)2U) + b0;
+}
+
+inline static void
+Hacl_Bignum_Modulo_carry_top(uint32_t *b)
+{
+ uint32_t b4 = b[4U];
+ uint32_t b0 = b[0U];
+ uint32_t b4_26 = b4 >> (uint32_t)26U;
+ b[4U] = b4 & (uint32_t)0x3ffffffU;
+ b[0U] = (b4_26 << (uint32_t)2U) + b4_26 + b0;
+}
+
+inline static void
+Hacl_Bignum_Modulo_carry_top_wide(uint64_t *b)
+{
+ uint64_t b4 = b[4U];
+ uint64_t b0 = b[0U];
+ uint64_t b4_ = b4 & (uint64_t)(uint32_t)0x3ffffffU;
+ uint32_t b4_26 = (uint32_t)(b4 >> (uint32_t)26U);
+ uint64_t b0_ = b0 + (uint64_t)((b4_26 << (uint32_t)2U) + b4_26);
+ b[4U] = b4_;
+ b[0U] = b0_;
+}
+
+inline static void
+Hacl_Bignum_Fproduct_copy_from_wide_(uint32_t *output, uint64_t *input)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+ uint64_t xi = input[i];
+ output[i] = (uint32_t)xi;
+ }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint64_t *output, uint32_t *input, uint32_t s)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+ uint64_t xi = output[i];
+ uint32_t yi = input[i];
+ uint64_t x_wide = (uint64_t)yi;
+ uint64_t y_wide = (uint64_t)s;
+ output[i] = xi + x_wide * y_wide;
+ }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_carry_wide_(uint64_t *tmp)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+ uint32_t ctr = i;
+ uint64_t tctr = tmp[ctr];
+ uint64_t tctrp1 = tmp[ctr + (uint32_t)1U];
+ uint32_t r0 = (uint32_t)tctr & (uint32_t)0x3ffffffU;
+ uint64_t c = tctr >> (uint32_t)26U;
+ tmp[ctr] = (uint64_t)r0;
+ tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+ }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_carry_limb_(uint32_t *tmp)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+ uint32_t ctr = i;
+ uint32_t tctr = tmp[ctr];
+ uint32_t tctrp1 = tmp[ctr + (uint32_t)1U];
+ uint32_t r0 = tctr & (uint32_t)0x3ffffffU;
+ uint32_t c = tctr >> (uint32_t)26U;
+ tmp[ctr] = r0;
+ tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+ }
+}
+
+inline static void
+Hacl_Bignum_Fmul_shift_reduce(uint32_t *output)
+{
+ uint32_t tmp = output[4U];
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+ uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
+ uint32_t z = output[ctr - (uint32_t)1U];
+ output[ctr] = z;
+ }
+ output[0U] = tmp;
+ Hacl_Bignum_Modulo_reduce(output);
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(uint64_t *output, uint32_t *input, uint32_t *input2)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+ uint32_t input2i = input2[i];
+ Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+ Hacl_Bignum_Fmul_shift_reduce(input);
+ }
+ uint32_t i = (uint32_t)4U;
+ uint32_t input2i = input2[i];
+ Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+inline static void
+Hacl_Bignum_Fmul_fmul(uint32_t *output, uint32_t *input, uint32_t *input2)
+{
+ uint32_t tmp[5U] = { 0U };
+ memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
+ uint64_t t[5U] = { 0U };
+ Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
+ Hacl_Bignum_Fproduct_carry_wide_(t);
+ Hacl_Bignum_Modulo_carry_top_wide(t);
+ Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+ uint32_t i0 = output[0U];
+ uint32_t i1 = output[1U];
+ uint32_t i0_ = i0 & (uint32_t)0x3ffffffU;
+ uint32_t i1_ = i1 + (i0 >> (uint32_t)26U);
+ output[0U] = i0_;
+ output[1U] = i1_;
+}
+
+inline static void
+Hacl_Bignum_AddAndMultiply_add_and_multiply(uint32_t *acc, uint32_t *block, uint32_t *r)
+{
+ for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+ uint32_t xi = acc[i];
+ uint32_t yi = block[i];
+ acc[i] = xi + yi;
+ }
+ Hacl_Bignum_Fmul_fmul(acc, acc, r);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_update(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m)
+{
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+ uint32_t *h = scrut0.h;
+ uint32_t *acc = h;
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *r = scrut.r;
+ uint32_t *r5 = r;
+ uint32_t tmp[5U] = { 0U };
+ uint8_t *s0 = m;
+ uint8_t *s1 = m + (uint32_t)3U;
+ uint8_t *s2 = m + (uint32_t)6U;
+ uint8_t *s3 = m + (uint32_t)9U;
+ uint8_t *s4 = m + (uint32_t)12U;
+ uint32_t i0 = load32_le(s0);
+ uint32_t i1 = load32_le(s1);
+ uint32_t i2 = load32_le(s2);
+ uint32_t i3 = load32_le(s3);
+ uint32_t i4 = load32_le(s4);
+ uint32_t r0 = i0 & (uint32_t)0x3ffffffU;
+ uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU;
+ uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU;
+ uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU;
+ uint32_t r4 = i4 >> (uint32_t)8U;
+ tmp[0U] = r0;
+ tmp[1U] = r1;
+ tmp[2U] = r2;
+ tmp[3U] = r3;
+ tmp[4U] = r4;
+ uint32_t b4 = tmp[4U];
+ uint32_t b4_ = (uint32_t)0x1000000U | b4;
+ tmp[4U] = b4_;
+ Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r5);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_process_last_block_(
+ uint8_t *block,
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint64_t rem_)
+{
+ uint32_t tmp[5U] = { 0U };
+ uint8_t *s0 = block;
+ uint8_t *s1 = block + (uint32_t)3U;
+ uint8_t *s2 = block + (uint32_t)6U;
+ uint8_t *s3 = block + (uint32_t)9U;
+ uint8_t *s4 = block + (uint32_t)12U;
+ uint32_t i0 = load32_le(s0);
+ uint32_t i1 = load32_le(s1);
+ uint32_t i2 = load32_le(s2);
+ uint32_t i3 = load32_le(s3);
+ uint32_t i4 = load32_le(s4);
+ uint32_t r0 = i0 & (uint32_t)0x3ffffffU;
+ uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU;
+ uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU;
+ uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU;
+ uint32_t r4 = i4 >> (uint32_t)8U;
+ tmp[0U] = r0;
+ tmp[1U] = r1;
+ tmp[2U] = r2;
+ tmp[3U] = r3;
+ tmp[4U] = r4;
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+ uint32_t *h = scrut0.h;
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *r = scrut.r;
+ Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_process_last_block(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint64_t rem_)
+{
+ uint8_t zero1 = (uint8_t)0U;
+ KRML_CHECK_SIZE(zero1, (uint32_t)16U);
+ uint8_t block[16U];
+ for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+ block[_i] = zero1;
+ uint32_t i0 = (uint32_t)rem_;
+ uint32_t i = (uint32_t)rem_;
+ memcpy(block, m, i * sizeof m[0U]);
+ block[i0] = (uint8_t)1U;
+ Hacl_Impl_Poly1305_32_poly1305_process_last_block_(block, st, m, rem_);
+}
+
+static void
+Hacl_Impl_Poly1305_32_poly1305_last_pass(uint32_t *acc)
+{
+ Hacl_Bignum_Fproduct_carry_limb_(acc);
+ Hacl_Bignum_Modulo_carry_top(acc);
+ uint32_t t0 = acc[0U];
+ uint32_t t10 = acc[1U];
+ uint32_t t20 = acc[2U];
+ uint32_t t30 = acc[3U];
+ uint32_t t40 = acc[4U];
+ uint32_t t1_ = t10 + (t0 >> (uint32_t)26U);
+ uint32_t mask_261 = (uint32_t)0x3ffffffU;
+ uint32_t t0_ = t0 & mask_261;
+ uint32_t t2_ = t20 + (t1_ >> (uint32_t)26U);
+ uint32_t t1__ = t1_ & mask_261;
+ uint32_t t3_ = t30 + (t2_ >> (uint32_t)26U);
+ uint32_t t2__ = t2_ & mask_261;
+ uint32_t t4_ = t40 + (t3_ >> (uint32_t)26U);
+ uint32_t t3__ = t3_ & mask_261;
+ acc[0U] = t0_;
+ acc[1U] = t1__;
+ acc[2U] = t2__;
+ acc[3U] = t3__;
+ acc[4U] = t4_;
+ Hacl_Bignum_Modulo_carry_top(acc);
+ uint32_t t00 = acc[0U];
+ uint32_t t1 = acc[1U];
+ uint32_t t2 = acc[2U];
+ uint32_t t3 = acc[3U];
+ uint32_t t4 = acc[4U];
+ uint32_t t1_0 = t1 + (t00 >> (uint32_t)26U);
+ uint32_t t0_0 = t00 & (uint32_t)0x3ffffffU;
+ uint32_t t2_0 = t2 + (t1_0 >> (uint32_t)26U);
+ uint32_t t1__0 = t1_0 & (uint32_t)0x3ffffffU;
+ uint32_t t3_0 = t3 + (t2_0 >> (uint32_t)26U);
+ uint32_t t2__0 = t2_0 & (uint32_t)0x3ffffffU;
+ uint32_t t4_0 = t4 + (t3_0 >> (uint32_t)26U);
+ uint32_t t3__0 = t3_0 & (uint32_t)0x3ffffffU;
+ acc[0U] = t0_0;
+ acc[1U] = t1__0;
+ acc[2U] = t2__0;
+ acc[3U] = t3__0;
+ acc[4U] = t4_0;
+ Hacl_Bignum_Modulo_carry_top(acc);
+ uint32_t i0 = acc[0U];
+ uint32_t i1 = acc[1U];
+ uint32_t i0_ = i0 & (uint32_t)0x3ffffffU;
+ uint32_t i1_ = i1 + (i0 >> (uint32_t)26U);
+ acc[0U] = i0_;
+ acc[1U] = i1_;
+ uint32_t a0 = acc[0U];
+ uint32_t a1 = acc[1U];
+ uint32_t a2 = acc[2U];
+ uint32_t a3 = acc[3U];
+ uint32_t a4 = acc[4U];
+ uint32_t mask0 = FStar_UInt32_gte_mask(a0, (uint32_t)0x3fffffbU);
+ uint32_t mask1 = FStar_UInt32_eq_mask(a1, (uint32_t)0x3ffffffU);
+ uint32_t mask2 = FStar_UInt32_eq_mask(a2, (uint32_t)0x3ffffffU);
+ uint32_t mask3 = FStar_UInt32_eq_mask(a3, (uint32_t)0x3ffffffU);
+ uint32_t mask4 = FStar_UInt32_eq_mask(a4, (uint32_t)0x3ffffffU);
+ uint32_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+ uint32_t a0_ = a0 - ((uint32_t)0x3fffffbU & mask);
+ uint32_t a1_ = a1 - ((uint32_t)0x3ffffffU & mask);
+ uint32_t a2_ = a2 - ((uint32_t)0x3ffffffU & mask);
+ uint32_t a3_ = a3 - ((uint32_t)0x3ffffffU & mask);
+ uint32_t a4_ = a4 - ((uint32_t)0x3ffffffU & mask);
+ acc[0U] = a0_;
+ acc[1U] = a1_;
+ acc[2U] = a2_;
+ acc[3U] = a3_;
+ acc[4U] = a4_;
+}
+
+static Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Impl_Poly1305_32_mk_state(uint32_t *r, uint32_t *h)
+{
+ return ((Hacl_Impl_Poly1305_32_State_poly1305_state){.r = r, .h = h });
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_blocks(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint64_t len1)
+{
+ if (!(len1 == (uint64_t)0U)) {
+ uint8_t *block = m;
+ uint8_t *tail1 = m + (uint32_t)16U;
+ Hacl_Impl_Poly1305_32_poly1305_update(st, block);
+ uint64_t len2 = len1 - (uint64_t)1U;
+ Hacl_Standalone_Poly1305_32_poly1305_blocks(st, tail1, len2);
+ }
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_partial(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *input,
+ uint64_t len1,
+ uint8_t *kr)
+{
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *r = scrut.r;
+ uint32_t *x0 = r;
+ FStar_UInt128_t k1 = load128_le(kr);
+ FStar_UInt128_t
+ k_clamped =
+ FStar_UInt128_logand(k1,
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU),
+ (uint32_t)64U),
+ FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU)));
+ uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r1 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r2 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r3 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r4 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU;
+ x0[0U] = r0;
+ x0[1U] = r1;
+ x0[2U] = r2;
+ x0[3U] = r3;
+ x0[4U] = r4;
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+ uint32_t *h = scrut0.h;
+ uint32_t *x00 = h;
+ x00[0U] = (uint32_t)0U;
+ x00[1U] = (uint32_t)0U;
+ x00[2U] = (uint32_t)0U;
+ x00[3U] = (uint32_t)0U;
+ x00[4U] = (uint32_t)0U;
+ Hacl_Standalone_Poly1305_32_poly1305_blocks(st, input, len1);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_complete(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint64_t len1,
+ uint8_t *k1)
+{
+ uint8_t *kr = k1;
+ uint64_t len16 = len1 >> (uint32_t)4U;
+ uint64_t rem16 = len1 & (uint64_t)0xfU;
+ uint8_t *part_input = m;
+ uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16);
+ Hacl_Standalone_Poly1305_32_poly1305_partial(st, part_input, len16, kr);
+ if (!(rem16 == (uint64_t)0U))
+ Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, last_block, rem16);
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *h = scrut.h;
+ uint32_t *acc = h;
+ Hacl_Impl_Poly1305_32_poly1305_last_pass(acc);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(
+ uint8_t *output,
+ uint8_t *input,
+ uint64_t len1,
+ uint8_t *k1)
+{
+ uint32_t buf[10U] = { 0U };
+ uint32_t *r = buf;
+ uint32_t *h = buf + (uint32_t)5U;
+ Hacl_Impl_Poly1305_32_State_poly1305_state st = Hacl_Impl_Poly1305_32_mk_state(r, h);
+ uint8_t *key_s = k1 + (uint32_t)16U;
+ Hacl_Standalone_Poly1305_32_poly1305_complete(st, input, len1, k1);
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *h5 = scrut.h;
+ uint32_t *acc = h5;
+ FStar_UInt128_t k_ = load128_le(key_s);
+ uint32_t h0 = acc[0U];
+ uint32_t h1 = acc[1U];
+ uint32_t h2 = acc[2U];
+ uint32_t h3 = acc[3U];
+ uint32_t h4 = acc[4U];
+ FStar_UInt128_t
+ acc_ =
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4),
+ (uint32_t)104U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3),
+ (uint32_t)78U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2),
+ (uint32_t)52U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1),
+ (uint32_t)26U),
+ FStar_UInt128_uint64_to_uint128((uint64_t)h0)))));
+ FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_);
+ store128_le(output, mac_);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_crypto_onetimeauth(
+ uint8_t *output,
+ uint8_t *input,
+ uint64_t len1,
+ uint8_t *k1)
+{
+ Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(output, input, len1, k1);
+}
+
+void *
+Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b)
+{
+ return (void *)(uint8_t)0U;
+}
+
+Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc)
+{
+ return Hacl_Impl_Poly1305_32_mk_state(r, acc);
+}
+
+void
+Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1)
+{
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *r = scrut.r;
+ uint32_t *x0 = r;
+ FStar_UInt128_t k10 = load128_le(k1);
+ FStar_UInt128_t
+ k_clamped =
+ FStar_UInt128_logand(k10,
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU),
+ (uint32_t)64U),
+ FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU)));
+ uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r1 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r2 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r3 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU;
+ uint32_t
+ r4 =
+ (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU;
+ x0[0U] = r0;
+ x0[1U] = r1;
+ x0[2U] = r2;
+ x0[3U] = r3;
+ x0[4U] = r4;
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+ uint32_t *h = scrut0.h;
+ uint32_t *x00 = h;
+ x00[0U] = (uint32_t)0U;
+ x00[1U] = (uint32_t)0U;
+ x00[2U] = (uint32_t)0U;
+ x00[3U] = (uint32_t)0U;
+ x00[4U] = (uint32_t)0U;
+}
+
+void *Hacl_Poly1305_32_empty_log = (void *)(uint8_t)0U;
+
+void
+Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m)
+{
+ Hacl_Impl_Poly1305_32_poly1305_update(st, m);
+}
+
+void
+Hacl_Poly1305_32_update(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint32_t len1)
+{
+ if (!(len1 == (uint32_t)0U)) {
+ uint8_t *block = m;
+ uint8_t *m_ = m + (uint32_t)16U;
+ uint32_t len2 = len1 - (uint32_t)1U;
+ Hacl_Poly1305_32_update_block(st, block);
+ Hacl_Poly1305_32_update(st, m_, len2);
+ }
+}
+
+void
+Hacl_Poly1305_32_update_last(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint32_t len1)
+{
+ if (!((uint64_t)len1 == (uint64_t)0U))
+ Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, m, (uint64_t)len1);
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *h = scrut.h;
+ uint32_t *acc = h;
+ Hacl_Impl_Poly1305_32_poly1305_last_pass(acc);
+}
+
+void
+Hacl_Poly1305_32_finish(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *mac,
+ uint8_t *k1)
+{
+ Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+ uint32_t *h = scrut.h;
+ uint32_t *acc = h;
+ FStar_UInt128_t k_ = load128_le(k1);
+ uint32_t h0 = acc[0U];
+ uint32_t h1 = acc[1U];
+ uint32_t h2 = acc[2U];
+ uint32_t h3 = acc[3U];
+ uint32_t h4 = acc[4U];
+ FStar_UInt128_t
+ acc_ =
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4),
+ (uint32_t)104U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3),
+ (uint32_t)78U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2),
+ (uint32_t)52U),
+ FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1),
+ (uint32_t)26U),
+ FStar_UInt128_uint64_to_uint128((uint64_t)h0)))));
+ FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_);
+ store128_le(mac, mac_);
+}
+
+void
+Hacl_Poly1305_32_crypto_onetimeauth(
+ uint8_t *output,
+ uint8_t *input,
+ uint64_t len1,
+ uint8_t *k1)
+{
+ Hacl_Standalone_Poly1305_32_crypto_onetimeauth(output, input, len1, k1);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
new file mode 100644
index 000000000..4dd070026
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
@@ -0,0 +1,103 @@
+/* Copyright 2016-2017 INRIA and Microsoft Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kremlib.h"
+#ifndef __Hacl_Poly1305_32_H
+#define __Hacl_Poly1305_32_H
+
+typedef uint32_t Hacl_Bignum_Constants_limb;
+
+typedef uint64_t Hacl_Bignum_Constants_wide;
+
+typedef uint64_t Hacl_Bignum_Wide_t;
+
+typedef uint32_t Hacl_Bignum_Limb_t;
+
+typedef void *Hacl_Impl_Poly1305_32_State_log_t;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_uint8_p;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_State_bigint;
+
+typedef void *Hacl_Impl_Poly1305_32_State_seqelem;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_State_elemB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB_16;
+
+typedef struct
+{
+ uint32_t *r;
+ uint32_t *h;
+} Hacl_Impl_Poly1305_32_State_poly1305_state;
+
+typedef void *Hacl_Impl_Poly1305_32_log_t;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_bigint;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_uint8_p;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_elemB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_wordB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_wordB_16;
+
+typedef uint8_t *Hacl_Poly1305_32_uint8_p;
+
+typedef uint64_t Hacl_Poly1305_32_uint64_t;
+
+void *Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b);
+
+typedef uint8_t *Hacl_Poly1305_32_key;
+
+typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Poly1305_32_state;
+
+Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc);
+
+void Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1);
+
+extern void *Hacl_Poly1305_32_empty_log;
+
+void Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m);
+
+void
+Hacl_Poly1305_32_update(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint32_t len1);
+
+void
+Hacl_Poly1305_32_update_last(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *m,
+ uint32_t len1);
+
+void
+Hacl_Poly1305_32_finish(
+ Hacl_Impl_Poly1305_32_State_poly1305_state st,
+ uint8_t *mac,
+ uint8_t *k1);
+
+void
+Hacl_Poly1305_32_crypto_onetimeauth(
+ uint8_t *output,
+ uint8_t *input,
+ uint64_t len1,
+ uint8_t *k1);
+#endif