15 files changed, 846 insertions, 1327 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile
index a4b1a86ae..bff11c7c8 100644
--- a/security/nss/lib/freebl/Makefile
+++ b/security/nss/lib/freebl/Makefile
@@ -517,13 +517,13 @@ ifndef NSS_DISABLE_CHACHAPOLY
         ifdef HAVE_INT128_SUPPORT
             EXTRA_SRCS += Hacl_Poly1305_64.c
         else
-            EXTRA_SRCS += poly1305.c
+            EXTRA_SRCS += Hacl_Poly1305_32.c
         endif
     else
         ifeq ($(CPU_ARCH),aarch64)
             EXTRA_SRCS += Hacl_Poly1305_64.c
         else
-            EXTRA_SRCS += poly1305.c
+            EXTRA_SRCS += Hacl_Poly1305_32.c
         endif
     endif # x86_64
 
@@ -535,12 +535,16 @@ ifeq (,$(filter-out i386 x386 x86 x86_64 aarch64,$(CPU_ARCH)))
     # All intel architectures get the 64 bit version
     # With custom uint128 if necessary (faster than generic 32 bit version).
     ECL_SRCS += curve25519_64.c
-    VERIFIED_SRCS += Hacl_Curve25519.c FStar.c
+    VERIFIED_SRCS += Hacl_Curve25519.c
 else
     # All non intel architectures get the generic 32 bit implementation (slow!)
     ECL_SRCS += curve25519_32.c
 endif
 
+ifndef HAVE_INT128_SUPPORT
+    VERIFIED_SRCS += FStar.c
+endif
+
 #######################################################################
 # (5) Execute "global" rules. (OPTIONAL)                              #
 #######################################################################
diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c
index 4099c67e0..b4a0442c9 100644
--- a/security/nss/lib/freebl/blake2b.c
+++ b/security/nss/lib/freebl/blake2b.c
@@ -180,7 +180,7 @@ blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key,
     return SECSuccess;
 
 failure:
-    PORT_Memset(&ctx, 0, sizeof(ctx));
+    PORT_Memset(ctx, 0, sizeof(*ctx));
     PORT_SetError(SEC_ERROR_INVALID_ARGS);
     return SECFailure;
 }
diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c
index 859d05316..302f0db9e 100644
--- a/security/nss/lib/freebl/chacha20poly1305.c
+++ b/security/nss/lib/freebl/chacha20poly1305.c
@@ -24,36 +24,60 @@ extern void Hacl_Chacha20_Vec128_chacha20(uint8_t *output, uint8_t *plain,
 extern void Hacl_Chacha20_chacha20(uint8_t *output, uint8_t *plain, uint32_t len,
                                    uint8_t *k, uint8_t *n1, uint32_t ctr);
 
-/* Poly1305Do writes the Poly1305 authenticator of the given additional data
- * and ciphertext to |out|. */
 #if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__))
 /* Use HACL* Poly1305 on 64-bit Intel and ARM */
 #include "verified/Hacl_Poly1305_64.h"
+#define NSS_POLY1305_64 1
+#define Hacl_Poly1305_update Hacl_Poly1305_64_update
+#define Hacl_Poly1305_mk_state Hacl_Poly1305_64_mk_state
+#define Hacl_Poly1305_init Hacl_Poly1305_64_init
+#define Hacl_Poly1305_finish Hacl_Poly1305_64_finish
+typedef Hacl_Impl_Poly1305_64_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state;
+#else
+/* All other platforms get the 32-bit poly1305 HACL* implementation. */
+#include "verified/Hacl_Poly1305_32.h"
+#define NSS_POLY1305_32 1
+#define Hacl_Poly1305_update Hacl_Poly1305_32_update
+#define Hacl_Poly1305_mk_state Hacl_Poly1305_32_mk_state
+#define Hacl_Poly1305_init Hacl_Poly1305_32_init
+#define Hacl_Poly1305_finish Hacl_Poly1305_32_finish
+typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Impl_Poly1305_State_poly1305_state;
+#endif /* HAVE_INT128_SUPPORT */
 
 static void
-Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state,
+Poly1305PadUpdate(Hacl_Impl_Poly1305_State_poly1305_state state,
                   unsigned char *block, const unsigned char *p,
                   const unsigned int pLen)
 {
     unsigned int pRemLen = pLen % 16;
-    Hacl_Poly1305_64_update(state, (uint8_t *)p, (pLen / 16));
+    Hacl_Poly1305_update(state, (uint8_t *)p, (pLen / 16));
     if (pRemLen > 0) {
         memcpy(block, p + (pLen - pRemLen), pRemLen);
-        Hacl_Poly1305_64_update(state, block, 1);
+        Hacl_Poly1305_update(state, block, 1);
     }
 }
 
+/* Poly1305Do writes the Poly1305 authenticator of the given additional data
+ * and ciphertext to |out|. */
 static void
 Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
            const unsigned char *ciphertext, unsigned int ciphertextLen,
            const unsigned char key[32])
 {
-    uint64_t tmp1[6U] = { 0U };
-    Hacl_Impl_Poly1305_64_State_poly1305_state state =
-        Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3);
+#ifdef NSS_POLY1305_64
+    uint64_t stateStack[6U] = { 0U };
+    size_t offset = 3;
+#elif defined NSS_POLY1305_32
+    uint32_t stateStack[10U] = { 0U };
+    size_t offset = 5;
+#else
+#error "This can't happen."
+#endif
+    Hacl_Impl_Poly1305_State_poly1305_state state =
+        Hacl_Poly1305_mk_state(stateStack, stateStack + offset);
 
     unsigned char block[16] = { 0 };
-    Hacl_Poly1305_64_init(state, (uint8_t *)key);
+    Hacl_Poly1305_init(state, (uint8_t *)key);
 
     Poly1305PadUpdate(state, block, ad, adLen);
     memset(block, 0, 16);
@@ -68,49 +92,11 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
         block[i] = j;
     }
 
-    Hacl_Poly1305_64_update(state, block, 1);
-    Hacl_Poly1305_64_finish(state, out, (uint8_t *)(key + 16));
+    Hacl_Poly1305_update(state, block, 1);
+    Hacl_Poly1305_finish(state, out, (uint8_t *)(key + 16));
+#undef NSS_POLY1305_64
+#undef NSS_POLY1305_32
 }
-#else
-/* All other platforms get the 32-bit poly1305 reference implementation. */
-#include "poly1305.h"
-
-static void
-Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
-           const unsigned char *ciphertext, unsigned int ciphertextLen,
-           const unsigned char key[32])
-{
-    poly1305_state state;
-    unsigned int j;
-    unsigned char lengthBytes[8];
-    static const unsigned char zeros[15];
-    unsigned int i;
-
-    Poly1305Init(&state, key);
-    Poly1305Update(&state, ad, adLen);
-    if (adLen % 16 > 0) {
-        Poly1305Update(&state, zeros, 16 - adLen % 16);
-    }
-    Poly1305Update(&state, ciphertext, ciphertextLen);
-    if (ciphertextLen % 16 > 0) {
-        Poly1305Update(&state, zeros, 16 - ciphertextLen % 16);
-    }
-    j = adLen;
-    for (i = 0; i < sizeof(lengthBytes); i++) {
-        lengthBytes[i] = j;
-        j >>= 8;
-    }
-    Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
-    j = ciphertextLen;
-    for (i = 0; i < sizeof(lengthBytes); i++) {
-        lengthBytes[i] = j;
-        j >>= 8;
-    }
-    Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
-    Poly1305Finish(&state, out);
-}
-
-#endif /* HAVE_INT128_SUPPORT */
 #endif /* NSS_DISABLE_CHACHAPOLY */
 
 SECStatus
diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c
index 9324d306b..aef353967 100644
--- a/security/nss/lib/freebl/dsa.c
+++ b/security/nss/lib/freebl/dsa.c
@@ -16,14 +16,11 @@
 #include "blapi.h"
 #include "nssilock.h"
 #include "secitem.h"
-#include "blapi.h"
+#include "blapit.h"
 #include "mpi.h"
 #include "secmpi.h"
 #include "pqg.h"
 
-/* XXX to be replaced by define in blapit.h */
-#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048
-
 /*
  * FIPS 186-2 requires result from random output to be reduced mod q when
  * generating random numbers for DSA.
@@ -168,7 +165,7 @@ dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed,
         return SECFailure;
     }
     /* Initialize an arena for the DSA key. */
-    arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE);
+    arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
     if (!arena) {
         PORT_SetError(SEC_ERROR_NO_MEMORY);
         return SECFailure;
@@ -213,8 +210,9 @@ cleanup:
     mp_clear(&g);
     mp_clear(&x);
     mp_clear(&y);
-    if (key)
+    if (key) {
         PORT_FreeArena(key->params.arena, PR_TRUE);
+    }
     if (err) {
         translate_mpi_error(err);
         return SECFailure;
@@ -321,6 +319,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
     mp_int x, k;    /* private key & pseudo-random integer */
     mp_int r, s;    /* tuple (r, s) is signature) */
     mp_int t;       /* holding tmp values */
+    mp_int ar;      /* holding blinding values */
     mp_err err = MP_OKAY;
     SECStatus rv = SECSuccess;
     unsigned int dsa_subprime_len, dsa_signature_len, offset;
@@ -364,6 +363,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
     MP_DIGITS(&r) = 0;
     MP_DIGITS(&s) = 0;
     MP_DIGITS(&t) = 0;
+    MP_DIGITS(&ar) = 0;
     CHECK_MPI_OK(mp_init(&p));
     CHECK_MPI_OK(mp_init(&q));
     CHECK_MPI_OK(mp_init(&g));
@@ -372,6 +372,7 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
     CHECK_MPI_OK(mp_init(&r));
     CHECK_MPI_OK(mp_init(&s));
     CHECK_MPI_OK(mp_init(&t));
+    CHECK_MPI_OK(mp_init(&ar));
     /*
     ** Convert stored PQG and private key into MPI integers.
     */
@@ -397,14 +398,28 @@ dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
         rv = SECFailure;
         goto cleanup;
     }
-    SECITEM_TO_MPINT(t2, &t);                /* t <-$ Zq */
+    SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */
+    SECITEM_FreeItem(&t2, PR_FALSE);
+    if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    SECITEM_TO_MPINT(t2, &ar); /* ar <-$ Zq */
+    SECITEM_FreeItem(&t2, PR_FALSE);
+
+    /* Using mp_invmod on k directly would leak bits from k. */
+    CHECK_MPI_OK(mp_mul(&k, &ar, &k));       /* k = k * ar */
     CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
     CHECK_MPI_OK(mp_invmod(&k, &q, &k));     /* k = k**-1 mod q */
     CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
     SECITEM_TO_MPINT(localDigest, &s);       /* s = HASH(M)     */
-    CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
-    CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */
-    CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
+    /* To avoid leaking secret bits here the addition is blinded. */
+    CHECK_MPI_OK(mp_mul(&x, &ar, &x));        /* x = x * ar */
+    CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x));  /* x = x * r mod q */
+    CHECK_MPI_OK(mp_mulmod(&s, &ar, &q, &t)); /* t = s * ar mod q */
+    CHECK_MPI_OK(mp_add(&t, &x, &s));         /* s = t + x */
+    CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s));  /* s = s * k mod q */
     /*
     ** verify r != 0 and s != 0
     ** mentioned as optional in FIPS 186-1.
@@ -438,7 +453,7 @@ cleanup:
     mp_clear(&r);
     mp_clear(&s);
     mp_clear(&t);
-    SECITEM_FreeItem(&t2, PR_FALSE);
+    mp_clear(&ar);
     if (err) {
         translate_mpi_error(err);
         rv = SECFailure;
diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c
index b28815ade..6468a10d6 100644
--- a/security/nss/lib/freebl/ec.c
+++ b/security/nss/lib/freebl/ec.c
@@ -653,6 +653,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
     mp_int r, s; /* tuple (r, s) is the signature */
     mp_int t;    /* holding tmp values */
     mp_int n;
+    mp_int ar; /* blinding value */
     mp_err err = MP_OKAY;
     ECParams *ecParams = NULL;
     SECItem kGpoint = { siBuffer, NULL, 0 };
@@ -674,6 +675,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
     MP_DIGITS(&s) = 0;
     MP_DIGITS(&n) = 0;
     MP_DIGITS(&t) = 0;
+    MP_DIGITS(&ar) = 0;
 
     /* Check args */
     if (!key || !signature || !digest || !kb || (kblen < 0)) {
@@ -700,6 +702,7 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
     CHECK_MPI_OK(mp_init(&s));
     CHECK_MPI_OK(mp_init(&n));
     CHECK_MPI_OK(mp_init(&t));
+    CHECK_MPI_OK(mp_init(&ar));
 
     SECITEM_TO_MPINT(ecParams->order, &n);
     SECITEM_TO_MPINT(key->privateValue, &d);
@@ -815,12 +818,25 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
         goto cleanup;
     }
     CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */
-    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k));                                /* k = k * t mod n */
-    CHECK_MPI_OK(mp_invmod(&k, &n, &k));                                    /* k = k**-1 mod n */
-    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k));                                /* k = k * t mod n */
-    CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d));                                /* d = d * r mod n */
-    CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s));                                /* s = s + d mod n */
-    CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s));                                /* s = s * k mod n */
+    PORT_Memset(t2, 0, 2 * ecParams->order.len);
+    if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        rv = SECFailure;
+        goto cleanup;
+    }
+    CHECK_MPI_OK(mp_read_unsigned_octets(&ar, t2, 2 * ecParams->order.len)); /* ar <-$ Zn */
+
+    /* Using mp_invmod on k directly would leak bits from k. */
+    CHECK_MPI_OK(mp_mul(&k, &ar, &k));       /* k = k * ar */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+    CHECK_MPI_OK(mp_invmod(&k, &n, &k));     /* k = k**-1 mod n */
+    CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+    /* To avoid leaking secret bits here the addition is blinded. */
+    CHECK_MPI_OK(mp_mul(&d, &ar, &t));        /* t = d * ar */
+    CHECK_MPI_OK(mp_mulmod(&t, &r, &n, &d));  /* d = t * r mod n */
+    CHECK_MPI_OK(mp_mulmod(&s, &ar, &n, &t)); /* t = s * ar mod n */
+    CHECK_MPI_OK(mp_add(&t, &d, &s));         /* s = t + d */
+    CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s));  /* s = s * k mod n */
 
 #if EC_DEBUG
     mp_todecimal(&s, mpstr);
@@ -858,6 +874,7 @@ cleanup:
     mp_clear(&s);
     mp_clear(&n);
     mp_clear(&t);
+    mp_clear(&ar);
 
     if (t2) {
         PORT_Free(t2);
diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp
index fae56f709..004807483 100644
--- a/security/nss/lib/freebl/freebl.gyp
+++ b/security/nss/lib/freebl/freebl.gyp
@@ -272,28 +272,15 @@
         },
       }],
       [ 'cc_use_gnu_ld==1 and OS=="win" and target_arch=="x64"', {
+        # mingw x64
         'defines': [
           'MP_IS_LITTLE_ENDIAN',
-          'NSS_BEVAND_ARCFOUR',
-          'MPI_AMD64',
-          'MP_ASSEMBLY_MULTIPLY',
-          'NSS_USE_COMBA',
-          'USE_HW_AES',
-          'INTEL_GCM',
          ],
       }],
-      [ 'OS!="win"', {
-        'conditions': [
-          [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
-            'defines': [
-              # The Makefile does version-tests on GCC, but we're not doing that here.
-              'HAVE_INT128_SUPPORT',
-            ],
-          }, {
-            'defines': [
-              'KRML_NOUINT128',
-            ],
-          }],
+      [ 'have_int128_support==1', {
+        'defines': [
+          # The Makefile does version-tests on GCC, but we're not doing that here.
+          'HAVE_INT128_SUPPORT',
         ],
       }, {
         'defines': [
@@ -355,5 +342,18 @@
   },
   'variables': {
     'module': 'nss',
+    'conditions': [
+      [ 'OS!="win"', {
+        'conditions': [
+          [ 'target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', {
+            'have_int128_support%': 1,
+          }, {
+            'have_int128_support%': 0,
+          }],
+        ],
+      }, {
+        'have_int128_support%': 0,
+      }],
+    ],
   }
 }
diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi
index ebd1018d8..1372994f4 100644
--- a/security/nss/lib/freebl/freebl_base.gypi
+++ b/security/nss/lib/freebl/freebl_base.gypi
@@ -59,7 +59,7 @@
     'sha_fast.c',
     'shvfy.c',
     'sysrand.c',
-    'tlsprfalg.c'
+    'tlsprfalg.c',
   ],
   'conditions': [
     [ 'OS=="linux" or OS=="android"', {
@@ -122,6 +122,11 @@
             'intel-gcm-x86-masm.asm',
           ],
         }],
+        [ 'cc_use_gnu_ld==1', {
+          # mingw
+          'sources': [
+          ],
+        }],
         [ 'cc_is_clang!=1', {
           # MSVC
           'sources': [
@@ -135,7 +140,6 @@
         # All intel and 64-bit ARM architectures get the 64 bit version.
         'ecl/curve25519_64.c',
         'verified/Hacl_Curve25519.c',
-        'verified/FStar.c',
       ],
     }, {
       'sources': [
@@ -167,7 +171,7 @@
                 }, {
                   # !Windows & !x64 & !arm64 & !aarch64
                   'sources': [
-                    'poly1305.c',
+                    'verified/Hacl_Poly1305_32.c',
                   ],
                 }],
               ],
@@ -176,7 +180,7 @@
         }, {
           # Windows
           'sources': [
-            'poly1305.c',
+            'verified/Hacl_Poly1305_32.c',
           ],
         }],
       ],
@@ -215,6 +219,9 @@
         }],
       ],
     }],
+    [ 'have_int128_support==0', {
+        'sources': [ 'verified/FStar.c' ],
+    }],
   ],
  'ldflags': [
    '-Wl,-Bsymbolic'
diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c
index fe5e0a668..6d200e6dd 100644
--- a/security/nss/lib/freebl/loader.c
+++ b/security/nss/lib/freebl/loader.c
@@ -2164,12 +2164,12 @@ BLAKE2B_NewContext(void)
 }
 
 void
-BLAKE2B_DestroyContext(BLAKE2BContext *BLAKE2BContext, PRBool freeit)
+BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit)
 {
     if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
         return;
     }
-    (vector->p_BLAKE2B_DestroyContext)(BLAKE2BContext, freeit);
+    (vector->p_BLAKE2B_DestroyContext)(ctx, freeit);
 }
 
 SECStatus
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
index ae404019d..8c893fb5f 100644
--- a/security/nss/lib/freebl/mpi/mpi.c
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -2657,10 +2657,10 @@ mp_toradix(mp_int *mp, char *str, int radix)
         /* Reverse the digits and sign indicator     */
         ix = 0;
         while (ix < pos) {
-            char tmp = str[ix];
+            char tmpc = str[ix];
 
             str[ix] = str[pos];
-            str[pos] = tmp;
+            str[pos] = tmpc;
             ++ix;
             --pos;
         }
@@ -3313,13 +3313,14 @@ s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
     /* could check for power of 2 here, but mp_div_d does that. */
     if (MP_USED(mp) == 1) {
         mp_digit n = MP_DIGIT(mp, 0);
-        mp_digit rem;
+        mp_digit remdig;
 
         q = n / d;
-        rem = n % d;
+        remdig = n % d;
         MP_DIGIT(mp, 0) = q;
-        if (r)
-            *r = rem;
+        if (r) {
+            *r = remdig;
+        }
         return MP_OKAY;
     }
 
diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
deleted file mode 100644
index 3c803c167..000000000
--- a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
+++ /dev/null
@@ -1,881 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/* This implementation of poly1305 is by Andrew Moon
- * (https://github.com/floodyberry/poly1305-donna) and released as public
- * domain. It implements SIMD vectorization based on the algorithm described in
- * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
- * block size. */
-
-#include <emmintrin.h>
-#include <stdint.h>
-
-#include "poly1305.h"
-#include "blapii.h"
-
-#define ALIGN(x) __attribute__((aligned(x)))
-#define INLINE inline
-#define U8TO64_LE(m) (*(uint64_t *)(m))
-#define U8TO32_LE(m) (*(uint32_t *)(m))
-#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
-
-typedef __m128i xmmi;
-typedef unsigned __int128 uint128_t;
-
-static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 };
-static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 };
-static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 };
-
-static uint128_t INLINE
-add128(uint128_t a, uint128_t b)
-{
-    return a + b;
-}
-
-static uint128_t INLINE
-add128_64(uint128_t a, uint64_t b)
-{
-    return a + b;
-}
-
-static uint128_t INLINE
-mul64x64_128(uint64_t a, uint64_t b)
-{
-    return (uint128_t)a * b;
-}
-
-static uint64_t INLINE
-lo128(uint128_t a)
-{
-    return (uint64_t)a;
-}
-
-static uint64_t INLINE
-shr128(uint128_t v, const int shift)
-{
-    return (uint64_t)(v >> shift);
-}
-
-static uint64_t INLINE
-shr128_pair(uint64_t hi, uint64_t lo, const int shift)
-{
-    return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
-}
-
-typedef struct poly1305_power_t {
-    union {
-        xmmi v;
-        uint64_t u[2];
-        uint32_t d[4];
-    } R20, R21, R22, R23, R24, S21, S22, S23, S24;
-} poly1305_power;
-
-typedef struct poly1305_state_internal_t {
-    poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */
-    union {
-        xmmi H[5]; /*  80 bytes  */
-        uint64_t HH[10];
-    };
-    /* uint64_t r0,r1,r2;       [24 bytes] */
-    /* uint64_t pad0,pad1;      [16 bytes] */
-    uint64_t started;      /*   8 bytes  */
-    uint64_t leftover;     /*   8 bytes  */
-    uint8_t buffer[64];    /*  64 bytes  */
-} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */
-
-static poly1305_state_internal INLINE
-    *
-    poly1305_aligned_state(poly1305_state *state)
-{
-    return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
-}
-
-/* copy 0-63 bytes */
-static void INLINE NO_SANITIZE_ALIGNMENT
-poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
-{
-    size_t offset = src - dst;
-    if (bytes & 32) {
-        _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
-        _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16)));
-        dst += 32;
-    }
-    if (bytes & 16) {
-        _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
-        dst += 16;
-    }
-    if (bytes & 8) {
-        *(uint64_t *)dst = *(uint64_t *)(dst + offset);
-        dst += 8;
-    }
-    if (bytes & 4) {
-        *(uint32_t *)dst = *(uint32_t *)(dst + offset);
-        dst += 4;
-    }
-    if (bytes & 2) {
-        *(uint16_t *)dst = *(uint16_t *)(dst + offset);
-        dst += 2;
-    }
-    if (bytes & 1) {
-        *(uint8_t *)dst = *(uint8_t *)(dst + offset);
-    }
-}
-
-/* zero 0-15 bytes */
-static void INLINE
-poly1305_block_zero(uint8_t *dst, size_t bytes)
-{
-    if (bytes & 8) {
-        *(uint64_t *)dst = 0;
-        dst += 8;
-    }
-    if (bytes & 4) {
-        *(uint32_t *)dst = 0;
-        dst += 4;
-    }
-    if (bytes & 2) {
-        *(uint16_t *)dst = 0;
-        dst += 2;
-    }
-    if (bytes & 1) {
-        *(uint8_t *)dst = 0;
-    }
-}
-
-static size_t INLINE
-poly1305_min(size_t a, size_t b)
-{
-    return (a < b) ? a : b;
-}
-
-void
-Poly1305Init(poly1305_state *state, const unsigned char key[32])
-{
-    poly1305_state_internal *st = poly1305_aligned_state(state);
-    poly1305_power *p;
-    uint64_t r0, r1, r2;
-    uint64_t t0, t1;
-
-    /* clamp key */
-    t0 = U8TO64_LE(key + 0);
-    t1 = U8TO64_LE(key + 8);
-    r0 = t0 & 0xffc0fffffff;
-    t0 >>= 44;
-    t0 |= t1 << 20;
-    r1 = t0 & 0xfffffc0ffff;
-    t1 >>= 24;
-    r2 = t1 & 0x00ffffffc0f;
-
-    /* store r in un-used space of st->P[1] */
-    p = &st->P[1];
-    p->R20.d[1] = (uint32_t)(r0);
-    p->R20.d[3] = (uint32_t)(r0 >> 32);
-    p->R21.d[1] = (uint32_t)(r1);
-    p->R21.d[3] = (uint32_t)(r1 >> 32);
-    p->R22.d[1] = (uint32_t)(r2);
-    p->R22.d[3] = (uint32_t)(r2 >> 32);
-
-    /* store pad */
-    p->R23.d[1] = U8TO32_LE(key + 16);
-    p->R23.d[3] = U8TO32_LE(key + 20);
-    p->R24.d[1] = U8TO32_LE(key + 24);
-    p->R24.d[3] = U8TO32_LE(key + 28);
-
-    /* H = 0 */
-    st->H[0] = _mm_setzero_si128();
-    st->H[1] = _mm_setzero_si128();
-    st->H[2] = _mm_setzero_si128();
-    st->H[3] = _mm_setzero_si128();
-    st->H[4] = _mm_setzero_si128();
-
-    st->started = 0;
-    st->leftover = 0;
-}
-
-static void
-poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
-{
-    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
-    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
-    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
-    xmmi T5, T6;
-    poly1305_power *p;
-    uint128_t d[3];
-    uint64_t r0, r1, r2;
-    uint64_t r20, r21, r22, s22;
-    uint64_t pad0, pad1;
-    uint64_t c;
-    uint64_t i;
-
-    /* pull out stored info */
-    p = &st->P[1];
-
-    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
-    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
-    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
-    pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
-    pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
-
-    /* compute powers r^2,r^4 */
-    r20 = r0;
-    r21 = r1;
-    r22 = r2;
-    for (i = 0; i < 2; i++) {
-        s22 = r22 * (5 << 2);
-
-        d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
-        d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
-        d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
-
-        r20 = lo128(d[0]) & 0xfffffffffff;
-        c = shr128(d[0], 44);
-        d[1] = add128_64(d[1], c);
-        r21 = lo128(d[1]) & 0xfffffffffff;
-        c = shr128(d[1], 44);
-        d[2] = add128_64(d[2], c);
-        r22 = lo128(d[2]) & 0x3ffffffffff;
-        c = shr128(d[2], 42);
-        r20 += c * 5;
-        c = (r20 >> 44);
-        r20 = r20 & 0xfffffffffff;
-        r21 += c;
-
-        p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
-        p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
-        p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
-        p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
-        p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0));
-        p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
-        p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
-        p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
-        p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
-        p--;
-    }
-
-    /* put saved info back */
-    p = &st->P[1];
-    p->R20.d[1] = (uint32_t)(r0);
-    p->R20.d[3] = (uint32_t)(r0 >> 32);
-    p->R21.d[1] = (uint32_t)(r1);
-    p->R21.d[3] = (uint32_t)(r1 >> 32);
-    p->R22.d[1] = (uint32_t)(r2);
-    p->R22.d[3] = (uint32_t)(r2 >> 32);
-    p->R23.d[1] = (uint32_t)(pad0);
-    p->R23.d[3] = (uint32_t)(pad0 >> 32);
-    p->R24.d[1] = (uint32_t)(pad1);
-    p->R24.d[3] = (uint32_t)(pad1 >> 32);
-
-    /* H = [Mx,My] */
-    T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
-    T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
-    st->H[0] = _mm_and_si128(MMASK, T5);
-    st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-    T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
-    st->H[2] = _mm_and_si128(MMASK, T5);
-    st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-    st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-}
-
-static void
-poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
-{
-    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
-    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
-    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
-
-    poly1305_power *p;
-    xmmi H0, H1, H2, H3, H4;
-    xmmi T0, T1, T2, T3, T4, T5, T6;
-    xmmi M0, M1, M2, M3, M4;
-    xmmi C1, C2;
-
-    H0 = st->H[0];
-    H1 = st->H[1];
-    H2 = st->H[2];
-    H3 = st->H[3];
-    H4 = st->H[4];
-
-    while (bytes >= 64) {
-        /* H *= [r^4,r^4] */
-        p = &st->P[0];
-        T0 = _mm_mul_epu32(H0, p->R20.v);
-        T1 = _mm_mul_epu32(H0, p->R21.v);
-        T2 = _mm_mul_epu32(H0, p->R22.v);
-        T3 = _mm_mul_epu32(H0, p->R23.v);
-        T4 = _mm_mul_epu32(H0, p->R24.v);
-        T5 = _mm_mul_epu32(H1, p->S24.v);
-        T6 = _mm_mul_epu32(H1, p->R20.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H2, p->S23.v);
-        T6 = _mm_mul_epu32(H2, p->S24.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H3, p->S22.v);
-        T6 = _mm_mul_epu32(H3, p->S23.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H4, p->S21.v);
-        T6 = _mm_mul_epu32(H4, p->S22.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H1, p->R21.v);
-        T6 = _mm_mul_epu32(H1, p->R22.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H2, p->R20.v);
-        T6 = _mm_mul_epu32(H2, p->R21.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H3, p->S24.v);
-        T6 = _mm_mul_epu32(H3, p->R20.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H4, p->S23.v);
-        T6 = _mm_mul_epu32(H4, p->S24.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H1, p->R23.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H2, p->R22.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H3, p->R21.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H4, p->R20.v);
-        T4 = _mm_add_epi64(T4, T5);
-
-        /* H += [Mx,My]*[r^2,r^2] */
-        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
-        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
-        M0 = _mm_and_si128(MMASK, T5);
-        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
-        M2 = _mm_and_si128(MMASK, T5);
-        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
-        p = &st->P[1];
-        T5 = _mm_mul_epu32(M0, p->R20.v);
-        T6 = _mm_mul_epu32(M0, p->R21.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(M1, p->S24.v);
-        T6 = _mm_mul_epu32(M1, p->R20.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(M2, p->S23.v);
-        T6 = _mm_mul_epu32(M2, p->S24.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(M3, p->S22.v);
-        T6 = _mm_mul_epu32(M3, p->S23.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(M4, p->S21.v);
-        T6 = _mm_mul_epu32(M4, p->S22.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(M0, p->R22.v);
-        T6 = _mm_mul_epu32(M0, p->R23.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(M1, p->R21.v);
-        T6 = _mm_mul_epu32(M1, p->R22.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(M2, p->R20.v);
-        T6 = _mm_mul_epu32(M2, p->R21.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(M3, p->S24.v);
-        T6 = _mm_mul_epu32(M3, p->R20.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(M4, p->S23.v);
-        T6 = _mm_mul_epu32(M4, p->S24.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(M0, p->R24.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(M1, p->R23.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(M2, p->R22.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(M3, p->R21.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(M4, p->R20.v);
-        T4 = _mm_add_epi64(T4, T5);
-
-        /* H += [Mx,My] */
-        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48)));
-        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56)));
-        M0 = _mm_and_si128(MMASK, T5);
-        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
-        M2 = _mm_and_si128(MMASK, T5);
-        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
-        T0 = _mm_add_epi64(T0, M0);
-        T1 = _mm_add_epi64(T1, M1);
-        T2 = _mm_add_epi64(T2, M2);
-        T3 = _mm_add_epi64(T3, M3);
-        T4 = _mm_add_epi64(T4, M4);
-
-        /* reduce */
-        C1 = _mm_srli_epi64(T0, 26);
-        C2 = _mm_srli_epi64(T3, 26);
-        T0 = _mm_and_si128(T0, MMASK);
-        T3 = _mm_and_si128(T3, MMASK);
-        T1 = _mm_add_epi64(T1, C1);
-        T4 = _mm_add_epi64(T4, C2);
-        C1 = _mm_srli_epi64(T1, 26);
-        C2 = _mm_srli_epi64(T4, 26);
-        T1 = _mm_and_si128(T1, MMASK);
-        T4 = _mm_and_si128(T4, MMASK);
-        T2 = _mm_add_epi64(T2, C1);
-        T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
-        C1 = _mm_srli_epi64(T2, 26);
-        C2 = _mm_srli_epi64(T0, 26);
-        T2 = _mm_and_si128(T2, MMASK);
-        T0 = _mm_and_si128(T0, MMASK);
-        T3 = _mm_add_epi64(T3, C1);
-        T1 = _mm_add_epi64(T1, C2);
-        C1 = _mm_srli_epi64(T3, 26);
-        T3 = _mm_and_si128(T3, MMASK);
-        T4 = _mm_add_epi64(T4, C1);
-
-        /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
-        H0 = T0;
-        H1 = T1;
-        H2 = T2;
-        H3 = T3;
-        H4 = T4;
-
-        m += 64;
-        bytes -= 64;
-    }
-
-    st->H[0] = H0;
-    st->H[1] = H1;
-    st->H[2] = H2;
-    st->H[3] = H3;
-    st->H[4] = H4;
-}
-
-static size_t
-poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
-{
-    const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
-    const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
-    const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
-
-    poly1305_power *p;
-    xmmi H0, H1, H2, H3, H4;
-    xmmi M0, M1, M2, M3, M4;
-    xmmi T0, T1, T2, T3, T4, T5, T6;
-    xmmi C1, C2;
-
-    uint64_t r0, r1, r2;
-    uint64_t t0, t1, t2, t3, t4;
-    uint64_t c;
-    size_t consumed = 0;
-
-    H0 = st->H[0];
-    H1 = st->H[1];
-    H2 = st->H[2];
-    H3 = st->H[3];
-    H4 = st->H[4];
-
-    /* p = [r^2,r^2] */
-    p = &st->P[1];
-
-    if (bytes >= 32) {
-        /* H *= [r^2,r^2] */
-        T0 = _mm_mul_epu32(H0, p->R20.v);
-        T1 = _mm_mul_epu32(H0, p->R21.v);
-        T2 = _mm_mul_epu32(H0, p->R22.v);
-        T3 = _mm_mul_epu32(H0, p->R23.v);
-        T4 = _mm_mul_epu32(H0, p->R24.v);
-        T5 = _mm_mul_epu32(H1, p->S24.v);
-        T6 = _mm_mul_epu32(H1, p->R20.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H2, p->S23.v);
-        T6 = _mm_mul_epu32(H2, p->S24.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H3, p->S22.v);
-        T6 = _mm_mul_epu32(H3, p->S23.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H4, p->S21.v);
-        T6 = _mm_mul_epu32(H4, p->S22.v);
-        T0 = _mm_add_epi64(T0, T5);
-        T1 = _mm_add_epi64(T1, T6);
-        T5 = _mm_mul_epu32(H1, p->R21.v);
-        T6 = _mm_mul_epu32(H1, p->R22.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H2, p->R20.v);
-        T6 = _mm_mul_epu32(H2, p->R21.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H3, p->S24.v);
-        T6 = _mm_mul_epu32(H3, p->R20.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H4, p->S23.v);
-        T6 = _mm_mul_epu32(H4, p->S24.v);
-        T2 = _mm_add_epi64(T2, T5);
-        T3 = _mm_add_epi64(T3, T6);
-        T5 = _mm_mul_epu32(H1, p->R23.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H2, p->R22.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H3, p->R21.v);
-        T4 = _mm_add_epi64(T4, T5);
-        T5 = _mm_mul_epu32(H4, p->R20.v);
-        T4 = _mm_add_epi64(T4, T5);
-
-        /* H += [Mx,My] */
-        T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
-        T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
-        M0 = _mm_and_si128(MMASK, T5);
-        M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
-        M2 = _mm_and_si128(MMASK, T5);
-        M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
-        M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
-
-        T0 = _mm_add_epi64(T0, M0);
-        T1 = _mm_add_epi64(T1, M1);
-        T2 = _mm_add_epi64(T2, M2);
-        T3 = _mm_add_epi64(T3, M3);
-        T4 = _mm_add_epi64(T4, M4);
-
-        /* reduce */
-        C1 = _mm_srli_epi64(T0, 26);
-        C2 = _mm_srli_epi64(T3, 26);
-        T0 = _mm_and_si128(T0, MMASK);
-        T3 = _mm_and_si128(T3, MMASK);
-        T1 = _mm_add_epi64(T1, C1);
-        T4 = _mm_add_epi64(T4, C2);
-        C1 = _mm_srli_epi64(T1, 26);
-        C2 = _mm_srli_epi64(T4, 26);
-        T1 = _mm_and_si128(T1, MMASK);
-        T4 = _mm_and_si128(T4, MMASK);
-        T2 = _mm_add_epi64(T2, C1);
-        T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
-        C1 = _mm_srli_epi64(T2, 26);
-        C2 = _mm_srli_epi64(T0, 26);
-        T2 = _mm_and_si128(T2, MMASK);
-        T0 = _mm_and_si128(T0, MMASK);
-        T3 = _mm_add_epi64(T3, C1);
-        T1 = _mm_add_epi64(T1, C2);
-        C1 = _mm_srli_epi64(T3, 26);
-        T3 = _mm_and_si128(T3, MMASK);
-        T4 = _mm_add_epi64(T4, C1);
-
-        /* H = (H*[r^2,r^2] + [Mx,My]) */
-        H0 = T0;
-        H1 = T1;
-        H2 = T2;
-        H3 = T3;
-        H4 = T4;
-
-        consumed = 32;
-    }
-
-    /* finalize, H *= [r^2,r] */
-    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
-    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
-    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
-
-    p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
-    p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
-    p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
-    p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
-    p->R24.d[2] = (uint32_t)((r2 >> 16));
-    p->S21.d[2] = p->R21.d[2] * 5;
-    p->S22.d[2] = p->R22.d[2] * 5;
-    p->S23.d[2] = p->R23.d[2] * 5;
-    p->S24.d[2] = p->R24.d[2] * 5;
-
-    /* H *= [r^2,r] */
-    T0 = _mm_mul_epu32(H0, p->R20.v);
-    T1 = _mm_mul_epu32(H0, p->R21.v);
-    T2 = _mm_mul_epu32(H0, p->R22.v);
-    T3 = _mm_mul_epu32(H0, p->R23.v);
-    T4 = _mm_mul_epu32(H0, p->R24.v);
-    T5 = _mm_mul_epu32(H1, p->S24.v);
-    T6 = _mm_mul_epu32(H1, p->R20.v);
-    T0 = _mm_add_epi64(T0, T5);
-    T1 = _mm_add_epi64(T1, T6);
-    T5 = _mm_mul_epu32(H2, p->S23.v);
-    T6 = _mm_mul_epu32(H2, p->S24.v);
-    T0 = _mm_add_epi64(T0, T5);
-    T1 = _mm_add_epi64(T1, T6);
-    T5 = _mm_mul_epu32(H3, p->S22.v);
-    T6 = _mm_mul_epu32(H3, p->S23.v);
-    T0 = _mm_add_epi64(T0, T5);
-    T1 = _mm_add_epi64(T1, T6);
-    T5 = _mm_mul_epu32(H4, p->S21.v);
-    T6 = _mm_mul_epu32(H4, p->S22.v);
-    T0 = _mm_add_epi64(T0, T5);
-    T1 = _mm_add_epi64(T1, T6);
-    T5 = _mm_mul_epu32(H1, p->R21.v);
-    T6 = _mm_mul_epu32(H1, p->R22.v);
-    T2 = _mm_add_epi64(T2, T5);
-    T3 = _mm_add_epi64(T3, T6);
-    T5 = _mm_mul_epu32(H2, p->R20.v);
-    T6 = _mm_mul_epu32(H2, p->R21.v);
-    T2 = _mm_add_epi64(T2, T5);
-    T3 = _mm_add_epi64(T3, T6);
-    T5 = _mm_mul_epu32(H3, p->S24.v);
-    T6 = _mm_mul_epu32(H3, p->R20.v);
-    T2 = _mm_add_epi64(T2, T5);
-    T3 = _mm_add_epi64(T3, T6);
-    T5 = _mm_mul_epu32(H4, p->S23.v);
-    T6 = _mm_mul_epu32(H4, p->S24.v);
-    T2 = _mm_add_epi64(T2, T5);
-    T3 = _mm_add_epi64(T3, T6);
-    T5 = _mm_mul_epu32(H1, p->R23.v);
-    T4 = _mm_add_epi64(T4, T5);
-    T5 = _mm_mul_epu32(H2, p->R22.v);
-    T4 = _mm_add_epi64(T4, T5);
-    T5 = _mm_mul_epu32(H3, p->R21.v);
-    T4 = _mm_add_epi64(T4, T5);
-    T5 = _mm_mul_epu32(H4, p->R20.v);
-    T4 = _mm_add_epi64(T4, T5);
-
-    C1 = _mm_srli_epi64(T0, 26);
-    C2 = _mm_srli_epi64(T3, 26);
-    T0 = _mm_and_si128(T0, MMASK);
-    T3 = _mm_and_si128(T3, MMASK);
-    T1 = _mm_add_epi64(T1, C1);
-    T4 = _mm_add_epi64(T4, C2);
-    C1 = _mm_srli_epi64(T1, 26);
-    C2 = _mm_srli_epi64(T4, 26);
-    T1 = _mm_and_si128(T1, MMASK);
-    T4 = _mm_and_si128(T4, MMASK);
-    T2 = _mm_add_epi64(T2, C1);
-    T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
-    C1 = _mm_srli_epi64(T2, 26);
-    C2 = _mm_srli_epi64(T0, 26);
-    T2 = _mm_and_si128(T2, MMASK);
-    T0 = _mm_and_si128(T0, MMASK);
-    T3 = _mm_add_epi64(T3, C1);
-    T1 = _mm_add_epi64(T1, C2);
-    C1 = _mm_srli_epi64(T3, 26);
-    T3 = _mm_and_si128(T3, MMASK);
-    T4 = _mm_add_epi64(T4, C1);
-
-    /* H = H[0]+H[1] */
-    H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
-    H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
-    H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
-    H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
-    H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
-
-    t0 = _mm_cvtsi128_si32(H0);
-    c = (t0 >> 26);
-    t0 &= 0x3ffffff;
-    t1 = _mm_cvtsi128_si32(H1) + c;
-    c = (t1 >> 26);
-    t1 &= 0x3ffffff;
-    t2 = _mm_cvtsi128_si32(H2) + c;
-    c = (t2 >> 26);
-    t2 &= 0x3ffffff;
-    t3 = _mm_cvtsi128_si32(H3) + c;
-    c = (t3 >> 26);
-    t3 &= 0x3ffffff;
-    t4 = _mm_cvtsi128_si32(H4) + c;
-    c = (t4 >> 26);
-    t4 &= 0x3ffffff;
-    t0 = t0 + (c * 5);
-    c = (t0 >> 26);
-    t0 &= 0x3ffffff;
-    t1 = t1 + c;
-
-    st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
-    st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
-    st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
-
-    return consumed;
-}
-
-void
-Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes)
-{
-    poly1305_state_internal *st = poly1305_aligned_state(state);
-    size_t want;
-
-    /* need at least 32 initial bytes to start the accelerated branch */
-    if (!st->started) {
-        if ((st->leftover == 0) && (bytes > 32)) {
-            poly1305_first_block(st, m);
-            m += 32;
-            bytes -= 32;
-        } else {
-            want = poly1305_min(32 - st->leftover, bytes);
-            poly1305_block_copy(st->buffer + st->leftover, m, want);
-            bytes -= want;
-            m += want;
-            st->leftover += want;
-            if ((st->leftover < 32) || (bytes == 0))
-                return;
-            poly1305_first_block(st, st->buffer);
-            st->leftover = 0;
-        }
-        st->started = 1;
-    }
-
-    /* handle leftover */
-    if (st->leftover) {
-        want = poly1305_min(64 - st->leftover, bytes);
-        poly1305_block_copy(st->buffer + st->leftover, m, want);
-        bytes -= want;
-        m += want;
-        st->leftover += want;
-        if (st->leftover < 64)
-            return;
-        poly1305_blocks(st, st->buffer, 64);
-        st->leftover = 0;
-    }
-
-    /* process 64 byte blocks */
-    if (bytes >= 64) {
-        want = (bytes & ~63);
-        poly1305_blocks(st, m, want);
-        m += want;
-        bytes -= want;
-    }
-
-    if (bytes) {
-        poly1305_block_copy(st->buffer + st->leftover, m, bytes);
-        st->leftover += bytes;
-    }
-}
-
-void
-Poly1305Finish(poly1305_state *state, unsigned char mac[16])
-{
-    poly1305_state_internal *st = poly1305_aligned_state(state);
-    size_t leftover = st->leftover;
-    uint8_t *m = st->buffer;
-    uint128_t d[3];
-    uint64_t h0, h1, h2;
-    uint64_t t0, t1;
-    uint64_t g0, g1, g2, c, nc;
-    uint64_t r0, r1, r2, s1, s2;
-    poly1305_power *p;
-
-    if (st->started) {
-        size_t consumed = poly1305_combine(st, m, leftover);
-        leftover -= consumed;
-        m += consumed;
-    }
-
-    /* st->HH will either be 0 or have the combined result */
-    h0 = st->HH[0];
-    h1 = st->HH[1];
-    h2 = st->HH[2];
-
-    p = &st->P[1];
-    r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
-    r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
-    r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
-    s1 = r1 * (5 << 2);
-    s2 = r2 * (5 << 2);
-
-    if (leftover < 16)
-        goto poly1305_donna_atmost15bytes;
-
-poly1305_donna_atleast16bytes:
-    t0 = U8TO64_LE(m + 0);
-    t1 = U8TO64_LE(m + 8);
-    h0 += t0 & 0xfffffffffff;
-    t0 = shr128_pair(t1, t0, 44);
-    h1 += t0 & 0xfffffffffff;
-    h2 += (t1 >> 24) | ((uint64_t)1 << 40);
-
-poly1305_donna_mul:
-    d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1));
-    d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2));
-    d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0));
-    h0 = lo128(d[0]) & 0xfffffffffff;
-    c = shr128(d[0], 44);
-    d[1] = add128_64(d[1], c);
-    h1 = lo128(d[1]) & 0xfffffffffff;
-    c = shr128(d[1], 44);
-    d[2] = add128_64(d[2], c);
-    h2 = lo128(d[2]) & 0x3ffffffffff;
-    c = shr128(d[2], 42);
-    h0 += c * 5;
-
-    m += 16;
-    leftover -= 16;
-    if (leftover >= 16)
-        goto poly1305_donna_atleast16bytes;
-
-/* final bytes */
-poly1305_donna_atmost15bytes:
-    if (!leftover)
-        goto poly1305_donna_finish;
-
-    m[leftover++] = 1;
-    poly1305_block_zero(m + leftover, 16 - leftover);
-    leftover = 16;
-
-    t0 = U8TO64_LE(m + 0);
-    t1 = U8TO64_LE(m + 8);
-    h0 += t0 & 0xfffffffffff;
-    t0 = shr128_pair(t1, t0, 44);
-    h1 += t0 & 0xfffffffffff;
-    h2 += (t1 >> 24);
-
-    goto poly1305_donna_mul;
-
-poly1305_donna_finish:
-    c = (h0 >> 44);
-    h0 &= 0xfffffffffff;
-    h1 += c;
-    c = (h1 >> 44);
-    h1 &= 0xfffffffffff;
-    h2 += c;
-    c = (h2 >> 42);
-    h2 &= 0x3ffffffffff;
-    h0 += c * 5;
-
-    g0 = h0 + 5;
-    c = (g0 >> 44);
-    g0 &= 0xfffffffffff;
-    g1 = h1 + c;
-    c = (g1 >> 44);
-    g1 &= 0xfffffffffff;
-    g2 = h2 + c - ((uint64_t)1 << 42);
-
-    c = (g2 >> 63) - 1;
-    nc = ~c;
-    h0 = (h0 & nc) | (g0 & c);
-    h1 = (h1 & nc) | (g1 & c);
-    h2 = (h2 & nc) | (g2 & c);
-
-    /* pad */
-    t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
-    t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
-    h0 += (t0 & 0xfffffffffff);
-    c = (h0 >> 44);
-    h0 &= 0xfffffffffff;
-    t0 = shr128_pair(t1, t0, 44);
-    h1 += (t0 & 0xfffffffffff) + c;
-    c = (h1 >> 44);
-    h1 &= 0xfffffffffff;
-    t1 = (t1 >> 24);
-    h2 += (t1) + c;
-
-    U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
-    U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
-}
diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c
deleted file mode 100644
index eb3e3cd55..000000000
--- a/security/nss/lib/freebl/poly1305.c
+++ /dev/null
@@ -1,314 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-/* This implementation of poly1305 is by Andrew Moon
- * (https://github.com/floodyberry/poly1305-donna) and released as public
- * domain. */
-
-#include <string.h>
-
-#include "poly1305.h"
-
-#if defined(_MSC_VER) && _MSC_VER < 1600
-#include "prtypes.h"
-typedef PRUint32 uint32_t;
-typedef PRUint64 uint64_t;
-#else
-#include <stdint.h>
-#endif
-
-#if defined(NSS_X86) || defined(NSS_X64)
-/* We can assume little-endian. */
-static uint32_t
-U8TO32_LE(const unsigned char *m)
-{
-    uint32_t r;
-    memcpy(&r, m, sizeof(r));
-    return r;
-}
-
-static void
-U32TO8_LE(unsigned char *m, uint32_t v)
-{
-    memcpy(m, &v, sizeof(v));
-}
-#else
-static uint32_t
-U8TO32_LE(const unsigned char *m)
-{
-    return (uint32_t)m[0] |
-           (uint32_t)m[1] << 8 |
-           (uint32_t)m[2] << 16 |
-           (uint32_t)m[3] << 24;
-}
-
-static void
-U32TO8_LE(unsigned char *m, uint32_t v)
-{
-    m[0] = v;
-    m[1] = v >> 8;
-    m[2] = v >> 16;
-    m[3] = v >> 24;
-}
-#endif
-
-static uint64_t
-mul32x32_64(uint32_t a, uint32_t b)
-{
-    return (uint64_t)a * b;
-}
-
-struct poly1305_state_st {
-    uint32_t r0, r1, r2, r3, r4;
-    uint32_t s1, s2, s3, s4;
-    uint32_t h0, h1, h2, h3, h4;
-    unsigned char buf[16];
-    unsigned int buf_used;
-    unsigned char key[16];
-};
-
-/* update updates |state| given some amount of input data. This function may
- * only be called with a |len| that is not a multiple of 16 at the end of the
- * data. Otherwise the input must be buffered into 16 byte blocks. */
-static void
-update(struct poly1305_state_st *state, const unsigned char *in,
-       size_t len)
-{
-    uint32_t t0, t1, t2, t3;
-    uint64_t t[5];
-    uint32_t b;
-    uint64_t c;
-    size_t j;
-    unsigned char mp[16];
-
-    if (len < 16)
-        goto poly1305_donna_atmost15bytes;
-
-poly1305_donna_16bytes:
-    t0 = U8TO32_LE(in);
-    t1 = U8TO32_LE(in + 4);
-    t2 = U8TO32_LE(in + 8);
-    t3 = U8TO32_LE(in + 12);
-
-    in += 16;
-    len -= 16;
-
-    state->h0 += t0 & 0x3ffffff;
-    state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
-    state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
-    state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
-    state->h4 += (t3 >> 8) | (1 << 24);
-
-poly1305_donna_mul:
-    t[0] = mul32x32_64(state->h0, state->r0) +
-           mul32x32_64(state->h1, state->s4) +
-           mul32x32_64(state->h2, state->s3) +
-           mul32x32_64(state->h3, state->s2) +
-           mul32x32_64(state->h4, state->s1);
-    t[1] = mul32x32_64(state->h0, state->r1) +
-           mul32x32_64(state->h1, state->r0) +
-           mul32x32_64(state->h2, state->s4) +
-           mul32x32_64(state->h3, state->s3) +
-           mul32x32_64(state->h4, state->s2);
-    t[2] = mul32x32_64(state->h0, state->r2) +
-           mul32x32_64(state->h1, state->r1) +
-           mul32x32_64(state->h2, state->r0) +
-           mul32x32_64(state->h3, state->s4) +
-           mul32x32_64(state->h4, state->s3);
-    t[3] = mul32x32_64(state->h0, state->r3) +
-           mul32x32_64(state->h1, state->r2) +
-           mul32x32_64(state->h2, state->r1) +
-           mul32x32_64(state->h3, state->r0) +
-           mul32x32_64(state->h4, state->s4);
-    t[4] = mul32x32_64(state->h0, state->r4) +
-           mul32x32_64(state->h1, state->r3) +
-           mul32x32_64(state->h2, state->r2) +
-           mul32x32_64(state->h3, state->r1) +
-           mul32x32_64(state->h4, state->r0);
-
-    state->h0 = (uint32_t)t[0] & 0x3ffffff;
-    c = (t[0] >> 26);
-    t[1] += c;
-    state->h1 = (uint32_t)t[1] & 0x3ffffff;
-    b = (uint32_t)(t[1] >> 26);
-    t[2] += b;
-    state->h2 = (uint32_t)t[2] & 0x3ffffff;
-    b = (uint32_t)(t[2] >> 26);
-    t[3] += b;
-    state->h3 = (uint32_t)t[3] & 0x3ffffff;
-    b = (uint32_t)(t[3] >> 26);
-    t[4] += b;
-    state->h4 = (uint32_t)t[4] & 0x3ffffff;
-    b = (uint32_t)(t[4] >> 26);
-    state->h0 += b * 5;
-
-    if (len >= 16)
-        goto poly1305_donna_16bytes;
-
-/* final bytes */
-poly1305_donna_atmost15bytes:
-    if (!len)
-        return;
-
-    for (j = 0; j < len; j++)
-        mp[j] = in[j];
-    mp[j++] = 1;
-    for (; j < 16; j++)
-        mp[j] = 0;
-    len = 0;
-
-    t0 = U8TO32_LE(mp + 0);
-    t1 = U8TO32_LE(mp + 4);
-    t2 = U8TO32_LE(mp + 8);
-    t3 = U8TO32_LE(mp + 12);
-
-    state->h0 += t0 & 0x3ffffff;
-    state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
-    state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
-    state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
-    state->h4 += (t3 >> 8);
-
-    goto poly1305_donna_mul;
-}
-
-void
-Poly1305Init(poly1305_state *statep, const unsigned char key[32])
-{
-    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
-    uint32_t t0, t1, t2, t3;
-
-    t0 = U8TO32_LE(key + 0);
-    t1 = U8TO32_LE(key + 4);
-    t2 = U8TO32_LE(key + 8);
-    t3 = U8TO32_LE(key + 12);
-
-    /* precompute multipliers */
-    state->r0 = t0 & 0x3ffffff;
-    t0 >>= 26;
-    t0 |= t1 << 6;
-    state->r1 = t0 & 0x3ffff03;
-    t1 >>= 20;
-    t1 |= t2 << 12;
-    state->r2 = t1 & 0x3ffc0ff;
-    t2 >>= 14;
-    t2 |= t3 << 18;
-    state->r3 = t2 & 0x3f03fff;
-    t3 >>= 8;
-    state->r4 = t3 & 0x00fffff;
-
-    state->s1 = state->r1 * 5;
-    state->s2 = state->r2 * 5;
-    state->s3 = state->r3 * 5;
-    state->s4 = state->r4 * 5;
-
-    /* init state */
-    state->h0 = 0;
-    state->h1 = 0;
-    state->h2 = 0;
-    state->h3 = 0;
-    state->h4 = 0;
-
-    state->buf_used = 0;
-    memcpy(state->key, key + 16, sizeof(state->key));
-}
-
-void
-Poly1305Update(poly1305_state *statep, const unsigned char *in,
-               size_t in_len)
-{
-    unsigned int i;
-    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
-
-    if (state->buf_used) {
-        unsigned int todo = 16 - state->buf_used;
-        if (todo > in_len)
-            todo = in_len;
-        for (i = 0; i < todo; i++)
-            state->buf[state->buf_used + i] = in[i];
-        state->buf_used += todo;
-        in_len -= todo;
-        in += todo;
-
-        if (state->buf_used == 16) {
-            update(state, state->buf, 16);
-            state->buf_used = 0;
-        }
-    }
-
-    if (in_len >= 16) {
-        size_t todo = in_len & ~0xf;
-        update(state, in, todo);
-        in += todo;
-        in_len &= 0xf;
-    }
-
-    if (in_len) {
-        for (i = 0; i < in_len; i++)
-            state->buf[i] = in[i];
-        state->buf_used = in_len;
-    }
-}
-
-void
-Poly1305Finish(poly1305_state *statep, unsigned char mac[16])
-{
-    struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
-    uint64_t f0, f1, f2, f3;
-    uint32_t g0, g1, g2, g3, g4;
-    uint32_t b, nb;
-
-    if (state->buf_used)
-        update(state, state->buf, state->buf_used);
-
-    b = state->h0 >> 26;
-    state->h0 = state->h0 & 0x3ffffff;
-    state->h1 += b;
-    b = state->h1 >> 26;
-    state->h1 = state->h1 & 0x3ffffff;
-    state->h2 += b;
-    b = state->h2 >> 26;
-    state->h2 = state->h2 & 0x3ffffff;
-    state->h3 += b;
-    b = state->h3 >> 26;
-    state->h3 = state->h3 & 0x3ffffff;
-    state->h4 += b;
-    b = state->h4 >> 26;
-    state->h4 = state->h4 & 0x3ffffff;
-    state->h0 += b * 5;
-
-    g0 = state->h0 + 5;
-    b = g0 >> 26;
-    g0 &= 0x3ffffff;
-    g1 = state->h1 + b;
-    b = g1 >> 26;
-    g1 &= 0x3ffffff;
-    g2 = state->h2 + b;
-    b = g2 >> 26;
-    g2 &= 0x3ffffff;
-    g3 = state->h3 + b;
-    b = g3 >> 26;
-    g3 &= 0x3ffffff;
-    g4 = state->h4 + b - (1 << 26);
-
-    b = (g4 >> 31) - 1;
-    nb = ~b;
-    state->h0 = (state->h0 & nb) | (g0 & b);
-    state->h1 = (state->h1 & nb) | (g1 & b);
-    state->h2 = (state->h2 & nb) | (g2 & b);
-    state->h3 = (state->h3 & nb) | (g3 & b);
-    state->h4 = (state->h4 & nb) | (g4 & b);
-
-    f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]);
-    f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]);
-    f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]);
-    f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]);
-
-    U32TO8_LE(&mac[0], (uint32_t)f0);
-    f1 += (f0 >> 32);
-    U32TO8_LE(&mac[4], (uint32_t)f1);
-    f2 += (f1 >> 32);
-    U32TO8_LE(&mac[8], (uint32_t)f2);
-    f3 += (f2 >> 32);
-    U32TO8_LE(&mac[12], (uint32_t)f3);
-}
diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h
deleted file mode 100644
index 125f49b3b..000000000
--- a/security/nss/lib/freebl/poly1305.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * poly1305.h - header file for Poly1305 implementation.
- *
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-#ifndef FREEBL_POLY1305_H_
-#define FREEBL_POLY1305_H_
-
-#include "stddef.h"
-
-typedef unsigned char poly1305_state[512];
-
-/* Poly1305Init sets up |state| so that it can be used to calculate an
- * authentication tag with the one-time key |key|. Note that |key| is a
- * one-time key and therefore there is no `reset' method because that would
- * enable several messages to be authenticated with the same key. */
-extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]);
-
-/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or
- * more times after poly1305_init. */
-extern void Poly1305Update(poly1305_state* state, const unsigned char* in,
-                           size_t inLen);
-
-/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte
- * authentication tag to |mac|. */
-extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]);
-
-#endif /* FREEBL_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/unix_urandom.c b/security/nss/lib/freebl/unix_urandom.c
index 25e6ad91c..869a5ed8c 100644
--- a/security/nss/lib/freebl/unix_urandom.c
+++ b/security/nss/lib/freebl/unix_urandom.c
@@ -4,10 +4,14 @@
 
 #include <fcntl.h>
 #include <unistd.h>
+#include <errno.h>
 #include "secerr.h"
 #include "secrng.h"
 #include "prprf.h"
 
+/* syscall getentropy() is limited to retrieving 256 bytes */
+#define GETENTROPY_MAX_BYTES 256
+
 void
 RNG_SystemInfoForRNG(void)
 {
@@ -28,6 +32,35 @@ RNG_SystemRNG(void *dest, size_t maxLen)
     size_t fileBytes = 0;
     unsigned char *buffer = dest;
 
+#if defined(__OpenBSD__) || (defined(LINUX) && defined(__GLIBC__) && ((__GLIBC__ > 2) || ((__GLIBC__ == 2) && (__GLIBC_MINOR__ >= 25))))
+    int result;
+
+    while (fileBytes < maxLen) {
+        size_t getBytes = maxLen - fileBytes;
+        if (getBytes > GETENTROPY_MAX_BYTES) {
+            getBytes = GETENTROPY_MAX_BYTES;
+        }
+        result = getentropy(buffer, getBytes);
+        if (result == 0) { /* success */
+            fileBytes += getBytes;
+            buffer += getBytes;
+        } else {
+            break;
+        }
+    }
+    if (fileBytes == maxLen) { /* success */
+        return maxLen;
+    }
+    /* If we failed with an error other than ENOSYS, it means the destination
+     * buffer is not writeable. We don't need to try writing to it again. */
+    if (errno != ENOSYS) {
+        PORT_SetError(SEC_ERROR_NEED_RANDOM);
+        return 0;
+    }
+    /* ENOSYS means the kernel doesn't support getentropy()/getrandom().
+     * Reset the number of bytes to get and fall back to /dev/urandom. */
+    fileBytes = 0;
+#endif
     fd = open("/dev/urandom", O_RDONLY);
     if (fd < 0) {
         PORT_SetError(SEC_ERROR_NEED_RANDOM);
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
new file mode 100644
index 000000000..246a41af3
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.c
@@ -0,0 +1,578 @@
+/* Copyright 2016-2017 INRIA and Microsoft Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Hacl_Poly1305_32.h"
+
+inline static void
+Hacl_Bignum_Modulo_reduce(uint32_t *b)
+{
+    uint32_t b0 = b[0U];
+    b[0U] = (b0 << (uint32_t)2U) + b0;
+}
+
+inline static void
+Hacl_Bignum_Modulo_carry_top(uint32_t *b)
+{
+    uint32_t b4 = b[4U];
+    uint32_t b0 = b[0U];
+    uint32_t b4_26 = b4 >> (uint32_t)26U;
+    b[4U] = b4 & (uint32_t)0x3ffffffU;
+    b[0U] = (b4_26 << (uint32_t)2U) + b4_26 + b0;
+}
+
+inline static void
+Hacl_Bignum_Modulo_carry_top_wide(uint64_t *b)
+{
+    uint64_t b4 = b[4U];
+    uint64_t b0 = b[0U];
+    uint64_t b4_ = b4 & (uint64_t)(uint32_t)0x3ffffffU;
+    uint32_t b4_26 = (uint32_t)(b4 >> (uint32_t)26U);
+    uint64_t b0_ = b0 + (uint64_t)((b4_26 << (uint32_t)2U) + b4_26);
+    b[4U] = b4_;
+    b[0U] = b0_;
+}
+
+inline static void
+Hacl_Bignum_Fproduct_copy_from_wide_(uint32_t *output, uint64_t *input)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+        uint64_t xi = input[i];
+        output[i] = (uint32_t)xi;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_sum_scalar_multiplication_(uint64_t *output, uint32_t *input, uint32_t s)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+        uint64_t xi = output[i];
+        uint32_t yi = input[i];
+        uint64_t x_wide = (uint64_t)yi;
+        uint64_t y_wide = (uint64_t)s;
+        output[i] = xi + x_wide * y_wide;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_carry_wide_(uint64_t *tmp)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+        uint32_t ctr = i;
+        uint64_t tctr = tmp[ctr];
+        uint64_t tctrp1 = tmp[ctr + (uint32_t)1U];
+        uint32_t r0 = (uint32_t)tctr & (uint32_t)0x3ffffffU;
+        uint64_t c = tctr >> (uint32_t)26U;
+        tmp[ctr] = (uint64_t)r0;
+        tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fproduct_carry_limb_(uint32_t *tmp)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+        uint32_t ctr = i;
+        uint32_t tctr = tmp[ctr];
+        uint32_t tctrp1 = tmp[ctr + (uint32_t)1U];
+        uint32_t r0 = tctr & (uint32_t)0x3ffffffU;
+        uint32_t c = tctr >> (uint32_t)26U;
+        tmp[ctr] = r0;
+        tmp[ctr + (uint32_t)1U] = tctrp1 + c;
+    }
+}
+
+inline static void
+Hacl_Bignum_Fmul_shift_reduce(uint32_t *output)
+{
+    uint32_t tmp = output[4U];
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+        uint32_t ctr = (uint32_t)5U - i - (uint32_t)1U;
+        uint32_t z = output[ctr - (uint32_t)1U];
+        output[ctr] = z;
+    }
+    output[0U] = tmp;
+    Hacl_Bignum_Modulo_reduce(output);
+}
+
+static void
+Hacl_Bignum_Fmul_mul_shift_reduce_(uint64_t *output, uint32_t *input, uint32_t *input2)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i = i + (uint32_t)1U) {
+        uint32_t input2i = input2[i];
+        Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+        Hacl_Bignum_Fmul_shift_reduce(input);
+    }
+    uint32_t i = (uint32_t)4U;
+    uint32_t input2i = input2[i];
+    Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i);
+}
+
+inline static void
+Hacl_Bignum_Fmul_fmul(uint32_t *output, uint32_t *input, uint32_t *input2)
+{
+    uint32_t tmp[5U] = { 0U };
+    memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]);
+    uint64_t t[5U] = { 0U };
+    Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2);
+    Hacl_Bignum_Fproduct_carry_wide_(t);
+    Hacl_Bignum_Modulo_carry_top_wide(t);
+    Hacl_Bignum_Fproduct_copy_from_wide_(output, t);
+    uint32_t i0 = output[0U];
+    uint32_t i1 = output[1U];
+    uint32_t i0_ = i0 & (uint32_t)0x3ffffffU;
+    uint32_t i1_ = i1 + (i0 >> (uint32_t)26U);
+    output[0U] = i0_;
+    output[1U] = i1_;
+}
+
+inline static void
+Hacl_Bignum_AddAndMultiply_add_and_multiply(uint32_t *acc, uint32_t *block, uint32_t *r)
+{
+    for (uint32_t i = (uint32_t)0U; i < (uint32_t)5U; i = i + (uint32_t)1U) {
+        uint32_t xi = acc[i];
+        uint32_t yi = block[i];
+        acc[i] = xi + yi;
+    }
+    Hacl_Bignum_Fmul_fmul(acc, acc, r);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_update(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m)
+{
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+    uint32_t *h = scrut0.h;
+    uint32_t *acc = h;
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *r = scrut.r;
+    uint32_t *r5 = r;
+    uint32_t tmp[5U] = { 0U };
+    uint8_t *s0 = m;
+    uint8_t *s1 = m + (uint32_t)3U;
+    uint8_t *s2 = m + (uint32_t)6U;
+    uint8_t *s3 = m + (uint32_t)9U;
+    uint8_t *s4 = m + (uint32_t)12U;
+    uint32_t i0 = load32_le(s0);
+    uint32_t i1 = load32_le(s1);
+    uint32_t i2 = load32_le(s2);
+    uint32_t i3 = load32_le(s3);
+    uint32_t i4 = load32_le(s4);
+    uint32_t r0 = i0 & (uint32_t)0x3ffffffU;
+    uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU;
+    uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU;
+    uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU;
+    uint32_t r4 = i4 >> (uint32_t)8U;
+    tmp[0U] = r0;
+    tmp[1U] = r1;
+    tmp[2U] = r2;
+    tmp[3U] = r3;
+    tmp[4U] = r4;
+    uint32_t b4 = tmp[4U];
+    uint32_t b4_ = (uint32_t)0x1000000U | b4;
+    tmp[4U] = b4_;
+    Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r5);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_process_last_block_(
+    uint8_t *block,
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint64_t rem_)
+{
+    uint32_t tmp[5U] = { 0U };
+    uint8_t *s0 = block;
+    uint8_t *s1 = block + (uint32_t)3U;
+    uint8_t *s2 = block + (uint32_t)6U;
+    uint8_t *s3 = block + (uint32_t)9U;
+    uint8_t *s4 = block + (uint32_t)12U;
+    uint32_t i0 = load32_le(s0);
+    uint32_t i1 = load32_le(s1);
+    uint32_t i2 = load32_le(s2);
+    uint32_t i3 = load32_le(s3);
+    uint32_t i4 = load32_le(s4);
+    uint32_t r0 = i0 & (uint32_t)0x3ffffffU;
+    uint32_t r1 = i1 >> (uint32_t)2U & (uint32_t)0x3ffffffU;
+    uint32_t r2 = i2 >> (uint32_t)4U & (uint32_t)0x3ffffffU;
+    uint32_t r3 = i3 >> (uint32_t)6U & (uint32_t)0x3ffffffU;
+    uint32_t r4 = i4 >> (uint32_t)8U;
+    tmp[0U] = r0;
+    tmp[1U] = r1;
+    tmp[2U] = r2;
+    tmp[3U] = r3;
+    tmp[4U] = r4;
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+    uint32_t *h = scrut0.h;
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *r = scrut.r;
+    Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r);
+}
+
+inline static void
+Hacl_Impl_Poly1305_32_poly1305_process_last_block(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint64_t rem_)
+{
+    uint8_t zero1 = (uint8_t)0U;
+    KRML_CHECK_SIZE(zero1, (uint32_t)16U);
+    uint8_t block[16U];
+    for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i)
+        block[_i] = zero1;
+    uint32_t i0 = (uint32_t)rem_;
+    uint32_t i = (uint32_t)rem_;
+    memcpy(block, m, i * sizeof m[0U]);
+    block[i0] = (uint8_t)1U;
+    Hacl_Impl_Poly1305_32_poly1305_process_last_block_(block, st, m, rem_);
+}
+
+static void
+Hacl_Impl_Poly1305_32_poly1305_last_pass(uint32_t *acc)
+{
+    Hacl_Bignum_Fproduct_carry_limb_(acc);
+    Hacl_Bignum_Modulo_carry_top(acc);
+    uint32_t t0 = acc[0U];
+    uint32_t t10 = acc[1U];
+    uint32_t t20 = acc[2U];
+    uint32_t t30 = acc[3U];
+    uint32_t t40 = acc[4U];
+    uint32_t t1_ = t10 + (t0 >> (uint32_t)26U);
+    uint32_t mask_261 = (uint32_t)0x3ffffffU;
+    uint32_t t0_ = t0 & mask_261;
+    uint32_t t2_ = t20 + (t1_ >> (uint32_t)26U);
+    uint32_t t1__ = t1_ & mask_261;
+    uint32_t t3_ = t30 + (t2_ >> (uint32_t)26U);
+    uint32_t t2__ = t2_ & mask_261;
+    uint32_t t4_ = t40 + (t3_ >> (uint32_t)26U);
+    uint32_t t3__ = t3_ & mask_261;
+    acc[0U] = t0_;
+    acc[1U] = t1__;
+    acc[2U] = t2__;
+    acc[3U] = t3__;
+    acc[4U] = t4_;
+    Hacl_Bignum_Modulo_carry_top(acc);
+    uint32_t t00 = acc[0U];
+    uint32_t t1 = acc[1U];
+    uint32_t t2 = acc[2U];
+    uint32_t t3 = acc[3U];
+    uint32_t t4 = acc[4U];
+    uint32_t t1_0 = t1 + (t00 >> (uint32_t)26U);
+    uint32_t t0_0 = t00 & (uint32_t)0x3ffffffU;
+    uint32_t t2_0 = t2 + (t1_0 >> (uint32_t)26U);
+    uint32_t t1__0 = t1_0 & (uint32_t)0x3ffffffU;
+    uint32_t t3_0 = t3 + (t2_0 >> (uint32_t)26U);
+    uint32_t t2__0 = t2_0 & (uint32_t)0x3ffffffU;
+    uint32_t t4_0 = t4 + (t3_0 >> (uint32_t)26U);
+    uint32_t t3__0 = t3_0 & (uint32_t)0x3ffffffU;
+    acc[0U] = t0_0;
+    acc[1U] = t1__0;
+    acc[2U] = t2__0;
+    acc[3U] = t3__0;
+    acc[4U] = t4_0;
+    Hacl_Bignum_Modulo_carry_top(acc);
+    uint32_t i0 = acc[0U];
+    uint32_t i1 = acc[1U];
+    uint32_t i0_ = i0 & (uint32_t)0x3ffffffU;
+    uint32_t i1_ = i1 + (i0 >> (uint32_t)26U);
+    acc[0U] = i0_;
+    acc[1U] = i1_;
+    uint32_t a0 = acc[0U];
+    uint32_t a1 = acc[1U];
+    uint32_t a2 = acc[2U];
+    uint32_t a3 = acc[3U];
+    uint32_t a4 = acc[4U];
+    uint32_t mask0 = FStar_UInt32_gte_mask(a0, (uint32_t)0x3fffffbU);
+    uint32_t mask1 = FStar_UInt32_eq_mask(a1, (uint32_t)0x3ffffffU);
+    uint32_t mask2 = FStar_UInt32_eq_mask(a2, (uint32_t)0x3ffffffU);
+    uint32_t mask3 = FStar_UInt32_eq_mask(a3, (uint32_t)0x3ffffffU);
+    uint32_t mask4 = FStar_UInt32_eq_mask(a4, (uint32_t)0x3ffffffU);
+    uint32_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4;
+    uint32_t a0_ = a0 - ((uint32_t)0x3fffffbU & mask);
+    uint32_t a1_ = a1 - ((uint32_t)0x3ffffffU & mask);
+    uint32_t a2_ = a2 - ((uint32_t)0x3ffffffU & mask);
+    uint32_t a3_ = a3 - ((uint32_t)0x3ffffffU & mask);
+    uint32_t a4_ = a4 - ((uint32_t)0x3ffffffU & mask);
+    acc[0U] = a0_;
+    acc[1U] = a1_;
+    acc[2U] = a2_;
+    acc[3U] = a3_;
+    acc[4U] = a4_;
+}
+
+static Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Impl_Poly1305_32_mk_state(uint32_t *r, uint32_t *h)
+{
+    return ((Hacl_Impl_Poly1305_32_State_poly1305_state){.r = r, .h = h });
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_blocks(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint64_t len1)
+{
+    if (!(len1 == (uint64_t)0U)) {
+        uint8_t *block = m;
+        uint8_t *tail1 = m + (uint32_t)16U;
+        Hacl_Impl_Poly1305_32_poly1305_update(st, block);
+        uint64_t len2 = len1 - (uint64_t)1U;
+        Hacl_Standalone_Poly1305_32_poly1305_blocks(st, tail1, len2);
+    }
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_partial(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *input,
+    uint64_t len1,
+    uint8_t *kr)
+{
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *r = scrut.r;
+    uint32_t *x0 = r;
+    FStar_UInt128_t k1 = load128_le(kr);
+    FStar_UInt128_t
+        k_clamped =
+            FStar_UInt128_logand(k1,
+                                 FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU),
+                                                                              (uint32_t)64U),
+                                                     FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU)));
+    uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r1 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r2 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r3 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r4 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU;
+    x0[0U] = r0;
+    x0[1U] = r1;
+    x0[2U] = r2;
+    x0[3U] = r3;
+    x0[4U] = r4;
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+    uint32_t *h = scrut0.h;
+    uint32_t *x00 = h;
+    x00[0U] = (uint32_t)0U;
+    x00[1U] = (uint32_t)0U;
+    x00[2U] = (uint32_t)0U;
+    x00[3U] = (uint32_t)0U;
+    x00[4U] = (uint32_t)0U;
+    Hacl_Standalone_Poly1305_32_poly1305_blocks(st, input, len1);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_poly1305_complete(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint64_t len1,
+    uint8_t *k1)
+{
+    uint8_t *kr = k1;
+    uint64_t len16 = len1 >> (uint32_t)4U;
+    uint64_t rem16 = len1 & (uint64_t)0xfU;
+    uint8_t *part_input = m;
+    uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16);
+    Hacl_Standalone_Poly1305_32_poly1305_partial(st, part_input, len16, kr);
+    if (!(rem16 == (uint64_t)0U))
+        Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, last_block, rem16);
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *h = scrut.h;
+    uint32_t *acc = h;
+    Hacl_Impl_Poly1305_32_poly1305_last_pass(acc);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(
+    uint8_t *output,
+    uint8_t *input,
+    uint64_t len1,
+    uint8_t *k1)
+{
+    uint32_t buf[10U] = { 0U };
+    uint32_t *r = buf;
+    uint32_t *h = buf + (uint32_t)5U;
+    Hacl_Impl_Poly1305_32_State_poly1305_state st = Hacl_Impl_Poly1305_32_mk_state(r, h);
+    uint8_t *key_s = k1 + (uint32_t)16U;
+    Hacl_Standalone_Poly1305_32_poly1305_complete(st, input, len1, k1);
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *h5 = scrut.h;
+    uint32_t *acc = h5;
+    FStar_UInt128_t k_ = load128_le(key_s);
+    uint32_t h0 = acc[0U];
+    uint32_t h1 = acc[1U];
+    uint32_t h2 = acc[2U];
+    uint32_t h3 = acc[3U];
+    uint32_t h4 = acc[4U];
+    FStar_UInt128_t
+        acc_ =
+            FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4),
+                                                         (uint32_t)104U),
+                                FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3),
+                                                                             (uint32_t)78U),
+                                                    FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2),
+                                                                                                 (uint32_t)52U),
+                                                                        FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1),
+                                                                                                                     (uint32_t)26U),
+                                                                                            FStar_UInt128_uint64_to_uint128((uint64_t)h0)))));
+    FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_);
+    store128_le(output, mac_);
+}
+
+static void
+Hacl_Standalone_Poly1305_32_crypto_onetimeauth(
+    uint8_t *output,
+    uint8_t *input,
+    uint64_t len1,
+    uint8_t *k1)
+{
+    Hacl_Standalone_Poly1305_32_crypto_onetimeauth_(output, input, len1, k1);
+}
+
+void *
+Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b)
+{
+    return (void *)(uint8_t)0U;
+}
+
+Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc)
+{
+    return Hacl_Impl_Poly1305_32_mk_state(r, acc);
+}
+
+void
+Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1)
+{
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *r = scrut.r;
+    uint32_t *x0 = r;
+    FStar_UInt128_t k10 = load128_le(k1);
+    FStar_UInt128_t
+        k_clamped =
+            FStar_UInt128_logand(k10,
+                                 FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU),
+                                                                              (uint32_t)64U),
+                                                     FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU)));
+    uint32_t r0 = (uint32_t)FStar_UInt128_uint128_to_uint64(k_clamped) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r1 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)26U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r2 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)52U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r3 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)78U)) & (uint32_t)0x3ffffffU;
+    uint32_t
+        r4 =
+            (uint32_t)FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)104U)) & (uint32_t)0x3ffffffU;
+    x0[0U] = r0;
+    x0[1U] = r1;
+    x0[2U] = r2;
+    x0[3U] = r3;
+    x0[4U] = r4;
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut0 = st;
+    uint32_t *h = scrut0.h;
+    uint32_t *x00 = h;
+    x00[0U] = (uint32_t)0U;
+    x00[1U] = (uint32_t)0U;
+    x00[2U] = (uint32_t)0U;
+    x00[3U] = (uint32_t)0U;
+    x00[4U] = (uint32_t)0U;
+}
+
+void *Hacl_Poly1305_32_empty_log = (void *)(uint8_t)0U;
+
+void
+Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m)
+{
+    Hacl_Impl_Poly1305_32_poly1305_update(st, m);
+}
+
+void
+Hacl_Poly1305_32_update(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint32_t len1)
+{
+    if (!(len1 == (uint32_t)0U)) {
+        uint8_t *block = m;
+        uint8_t *m_ = m + (uint32_t)16U;
+        uint32_t len2 = len1 - (uint32_t)1U;
+        Hacl_Poly1305_32_update_block(st, block);
+        Hacl_Poly1305_32_update(st, m_, len2);
+    }
+}
+
+void
+Hacl_Poly1305_32_update_last(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint32_t len1)
+{
+    if (!((uint64_t)len1 == (uint64_t)0U))
+        Hacl_Impl_Poly1305_32_poly1305_process_last_block(st, m, (uint64_t)len1);
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *h = scrut.h;
+    uint32_t *acc = h;
+    Hacl_Impl_Poly1305_32_poly1305_last_pass(acc);
+}
+
+void
+Hacl_Poly1305_32_finish(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *mac,
+    uint8_t *k1)
+{
+    Hacl_Impl_Poly1305_32_State_poly1305_state scrut = st;
+    uint32_t *h = scrut.h;
+    uint32_t *acc = h;
+    FStar_UInt128_t k_ = load128_le(k1);
+    uint32_t h0 = acc[0U];
+    uint32_t h1 = acc[1U];
+    uint32_t h2 = acc[2U];
+    uint32_t h3 = acc[3U];
+    uint32_t h4 = acc[4U];
+    FStar_UInt128_t
+        acc_ =
+            FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h4),
+                                                         (uint32_t)104U),
+                                FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h3),
+                                                                             (uint32_t)78U),
+                                                    FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h2),
+                                                                                                 (uint32_t)52U),
+                                                                        FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)h1),
+                                                                                                                     (uint32_t)26U),
+                                                                                            FStar_UInt128_uint64_to_uint128((uint64_t)h0)))));
+    FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_);
+    store128_le(mac, mac_);
+}
+
+void
+Hacl_Poly1305_32_crypto_onetimeauth(
+    uint8_t *output,
+    uint8_t *input,
+    uint64_t len1,
+    uint8_t *k1)
+{
+    Hacl_Standalone_Poly1305_32_crypto_onetimeauth(output, input, len1, k1);
+}
diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
new file mode 100644
index 000000000..4dd070026
--- /dev/null
+++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_32.h
@@ -0,0 +1,103 @@
+/* Copyright 2016-2017 INRIA and Microsoft Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "kremlib.h"
+#ifndef __Hacl_Poly1305_32_H
+#define __Hacl_Poly1305_32_H
+
+typedef uint32_t Hacl_Bignum_Constants_limb;
+
+typedef uint64_t Hacl_Bignum_Constants_wide;
+
+typedef uint64_t Hacl_Bignum_Wide_t;
+
+typedef uint32_t Hacl_Bignum_Limb_t;
+
+typedef void *Hacl_Impl_Poly1305_32_State_log_t;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_uint8_p;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_State_bigint;
+
+typedef void *Hacl_Impl_Poly1305_32_State_seqelem;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_State_elemB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_State_wordB_16;
+
+typedef struct
+{
+    uint32_t *r;
+    uint32_t *h;
+} Hacl_Impl_Poly1305_32_State_poly1305_state;
+
+typedef void *Hacl_Impl_Poly1305_32_log_t;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_bigint;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_uint8_p;
+
+typedef uint32_t *Hacl_Impl_Poly1305_32_elemB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_wordB;
+
+typedef uint8_t *Hacl_Impl_Poly1305_32_wordB_16;
+
+typedef uint8_t *Hacl_Poly1305_32_uint8_p;
+
+typedef uint64_t Hacl_Poly1305_32_uint64_t;
+
+void *Hacl_Poly1305_32_op_String_Access(FStar_Monotonic_HyperStack_mem h, uint8_t *b);
+
+typedef uint8_t *Hacl_Poly1305_32_key;
+
+typedef Hacl_Impl_Poly1305_32_State_poly1305_state Hacl_Poly1305_32_state;
+
+Hacl_Impl_Poly1305_32_State_poly1305_state
+Hacl_Poly1305_32_mk_state(uint32_t *r, uint32_t *acc);
+
+void Hacl_Poly1305_32_init(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *k1);
+
+extern void *Hacl_Poly1305_32_empty_log;
+
+void Hacl_Poly1305_32_update_block(Hacl_Impl_Poly1305_32_State_poly1305_state st, uint8_t *m);
+
+void
+Hacl_Poly1305_32_update(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint32_t len1);
+
+void
+Hacl_Poly1305_32_update_last(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *m,
+    uint32_t len1);
+
+void
+Hacl_Poly1305_32_finish(
+    Hacl_Impl_Poly1305_32_State_poly1305_state st,
+    uint8_t *mac,
+    uint8_t *k1);
+
+void
+Hacl_Poly1305_32_crypto_onetimeauth(
+    uint8_t *output,
+    uint8_t *input,
+    uint64_t len1,
+    uint8_t *k1);
+#endif