diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-07-18 08:24:24 +0200 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-07-18 08:24:24 +0200 |
commit | fc61780b35af913801d72086456f493f63197da6 (patch) | |
tree | f85891288a7bd988da9f0f15ae64e5c63f00d493 /security/nss/lib/freebl | |
parent | 69f7f9e5f1475891ce11cc4f431692f965b0cd30 (diff) | |
parent | 50d3e596bbe89c95615f96eb71f6bc5be737a1db (diff) | |
download | UXP-fc61780b35af913801d72086456f493f63197da6.tar UXP-fc61780b35af913801d72086456f493f63197da6.tar.gz UXP-fc61780b35af913801d72086456f493f63197da6.tar.lz UXP-fc61780b35af913801d72086456f493f63197da6.tar.xz UXP-fc61780b35af913801d72086456f493f63197da6.zip |
Merge commit '50d3e596bbe89c95615f96eb71f6bc5be737a1db' into Basilisk-releasev2018.07.18
# Conflicts:
# browser/app/profile/firefox.js
# browser/components/preferences/jar.mn
Diffstat (limited to 'security/nss/lib/freebl')
64 files changed, 6200 insertions, 1789 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile index 914a0119c..a4b1a86ae 100644 --- a/security/nss/lib/freebl/Makefile +++ b/security/nss/lib/freebl/Makefile @@ -110,7 +110,9 @@ endif # NSS_X86_OR_X64 means the target is either x86 or x64 ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) DEFINES += -DNSS_X86_OR_X64 - CFLAGS += -mpclmul -maes + EXTRA_SRCS += gcm-x86.c aes-x86.c +$(OBJDIR)/gcm-x86.o: CFLAGS += -mpclmul -maes +$(OBJDIR)/aes-x86.o: CFLAGS += -mpclmul -maes ifneq (,$(USE_64)$(USE_X32)) DEFINES += -DNSS_X64 else @@ -490,8 +492,6 @@ else endif # Solaris for non-sparc family CPUs endif # target == SunO -# poly1305-donna-x64-sse2-incremental-source.c requires __int128 support -# in GCC 4.6.0. ifdef USE_64 ifdef CC_IS_CLANG HAVE_INT128_SUPPORT = 1 @@ -508,38 +508,39 @@ ifdef USE_64 endif endif +ifndef HAVE_INT128_SUPPORT + DEFINES += -DKRML_NOUINT128 +endif + ifndef NSS_DISABLE_CHACHAPOLY ifeq ($(CPU_ARCH),x86_64) ifdef HAVE_INT128_SUPPORT - EXTRA_SRCS += poly1305-donna-x64-sse2-incremental-source.c + EXTRA_SRCS += Hacl_Poly1305_64.c else EXTRA_SRCS += poly1305.c endif - - ifneq (1,$(CC_IS_GCC)) - EXTRA_SRCS += chacha20.c + else + ifeq ($(CPU_ARCH),aarch64) + EXTRA_SRCS += Hacl_Poly1305_64.c else - EXTRA_SRCS += chacha20_vec.c + EXTRA_SRCS += poly1305.c endif - else - EXTRA_SRCS += poly1305.c - EXTRA_SRCS += chacha20.c endif # x86_64 + + VERIFIED_SRCS += Hacl_Chacha20.c + VERIFIED_SRCS += Hacl_Chacha20_Vec128.c endif # NSS_DISABLE_CHACHAPOLY -ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) +ifeq (,$(filter-out i386 x386 x86 x86_64 aarch64,$(CPU_ARCH))) # All intel architectures get the 64 bit version # With custom uint128 if necessary (faster than generic 32 bit version). ECL_SRCS += curve25519_64.c + VERIFIED_SRCS += Hacl_Curve25519.c FStar.c else # All non intel architectures get the generic 32 bit implementation (slow!) ECL_SRCS += curve25519_32.c endif -ifndef HAVE_INT128_SUPPORT - ECL_SRCS += uint128.c -endif - ####################################################################### # (5) Execute "global" rules. (OPTIONAL) # ####################################################################### @@ -563,12 +564,12 @@ rijndael_tables: $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a $(OBJDIR)/make_rijndael_tab -vpath %.h mpi ecl -vpath %.c mpi ecl +vpath %.h mpi ecl verified +vpath %.c mpi ecl verified vpath %.S mpi ecl vpath %.s mpi ecl vpath %.asm mpi ecl -INCLUDES += -Impi -Iecl +INCLUDES += -Impi -Iecl -Iverified DEFINES += -DMP_API_COMPATIBLE @@ -587,8 +588,6 @@ ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX))) $(ECL_OBJS): $(ECL_HDRS) - - $(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c $(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c diff --git a/security/nss/lib/freebl/aes-x86.c b/security/nss/lib/freebl/aes-x86.c new file mode 100644 index 000000000..830b4782f --- /dev/null +++ b/security/nss/lib/freebl/aes-x86.c @@ -0,0 +1,157 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "rijndael.h" +#include "secerr.h" + +#include <wmmintrin.h> /* aes-ni */ + +#define EXPAND_KEY128(k, rcon, res) \ + tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ + tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ + tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + res = _mm_xor_si128(tmp, tmp_key) + +static void +native_key_expansion128(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); + EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); + EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); + EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); + EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); + EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); + EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); + EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); + EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); + EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); +} + +#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ + tmp2 = _mm_slli_si128(k0, 4); \ + tmp1 = _mm_xor_si128(k0, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ + res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) + +#define EXPAND_KEY192_PART2(res, k1, k2) \ + tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ + res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) + +#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ + EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ + EXPAND_KEY192_PART2(carry, res1, tmp3); \ + res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ + _mm_castsi128_pd(tmp3), 0)); \ + res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ + _mm_castsi128_pd(carry), 1)); \ + EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) + +static void +native_key_expansion192(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + pre_align __m128i tmp3 post_align; + pre_align __m128i carry post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], + keySchedule[3], carry, 0x1, 0x2); + EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); + EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], + keySchedule[6], carry, 0x4, 0x8); + EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); + EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], + keySchedule[9], carry, 0x10, 0x20); + EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); + EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], + keySchedule[12], carry, 0x40, 0x80); +} + +#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ + tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ + tmp2 = _mm_slli_si128(k1x, 4); \ + tmp1 = _mm_xor_si128(k1x, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + res = _mm_xor_si128(tmp1, tmp_key); + +#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ + EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ + EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) + +static void +native_key_expansion256(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], + keySchedule[1], 0x01); + EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], + keySchedule[3], 0x02); + EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], + keySchedule[5], 0x04); + EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], + keySchedule[7], 0x08); + EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], + keySchedule[9], 0x10); + EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], + keySchedule[11], 0x20); + EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], + keySchedule[13], 0xFF); +} + +/* + * AES key expansion using aes-ni instructions. + */ +void +rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk) +{ + switch (Nk) { + case 4: + native_key_expansion128(cx, key); + return; + case 6: + native_key_expansion192(cx, key); + return; + case 8: + native_key_expansion256(cx, key); + return; + default: + /* This shouldn't happen (checked by the caller). */ + return; + } +} + +void +rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->keySchedule[0]); + for (i = 1; i < cx->Nr; ++i) { + m = _mm_aesenc_si128(m, cx->keySchedule[i]); + } + m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); + _mm_storeu_si128((__m128i *)output, m); +} diff --git a/security/nss/lib/freebl/blake2b.c b/security/nss/lib/freebl/blake2b.c new file mode 100644 index 000000000..4099c67e0 --- /dev/null +++ b/security/nss/lib/freebl/blake2b.c @@ -0,0 +1,430 @@ +/* + * blake2b.c - definitions for the blake2b hash function + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" +#include "blapi.h" +#include "blake2b.h" +#include "crypto_primitives.h" + +/** + * This contains the BLAKE2b initialization vectors. + */ +static const uint64_t iv[8] = { + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 0x3c6ef372fe94f82bULL, + 0xa54ff53a5f1d36f1ULL, 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL +}; + +/** + * This contains the table of permutations for blake2b compression function. + */ +static const uint8_t sigma[12][16] = { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } +}; + +/** + * This function increments the blake2b ctx counter. + */ +void +blake2b_IncrementCounter(BLAKE2BContext* ctx, const uint64_t inc) +{ + ctx->t[0] += inc; + ctx->t[1] += ctx->t[0] < inc; +} + +/** + * This macro implements the blake2b mixing function which mixes two 8-byte + * words from the message into the hash. + */ +#define G(a, b, c, d, x, y) \ + a += b + x; \ + d = ROTR64(d ^ a, 32); \ + c += d; \ + b = ROTR64(b ^ c, 24); \ + a += b + y; \ + d = ROTR64(d ^ a, 16); \ + c += d; \ + b = ROTR64(b ^ c, 63) + +#define ROUND(i) \ + G(v[0], v[4], v[8], v[12], m[sigma[i][0]], m[sigma[i][1]]); \ + G(v[1], v[5], v[9], v[13], m[sigma[i][2]], m[sigma[i][3]]); \ + G(v[2], v[6], v[10], v[14], m[sigma[i][4]], m[sigma[i][5]]); \ + G(v[3], v[7], v[11], v[15], m[sigma[i][6]], m[sigma[i][7]]); \ + G(v[0], v[5], v[10], v[15], m[sigma[i][8]], m[sigma[i][9]]); \ + G(v[1], v[6], v[11], v[12], m[sigma[i][10]], m[sigma[i][11]]); \ + G(v[2], v[7], v[8], v[13], m[sigma[i][12]], m[sigma[i][13]]); \ + G(v[3], v[4], v[9], v[14], m[sigma[i][14]], m[sigma[i][15]]) + +/** + * The blake2b compression function which takes a full 128-byte chunk of the + * input message and mixes it into the ongoing ctx array, i.e., permute the + * ctx while xoring in the block of data. + */ +void +blake2b_Compress(BLAKE2BContext* ctx, const uint8_t* block) +{ + size_t i; + uint64_t v[16], m[16]; + + PORT_Memcpy(m, block, BLAKE2B_BLOCK_LENGTH); +#if !defined(IS_LITTLE_ENDIAN) + for (i = 0; i < 16; ++i) { + m[i] = FREEBL_HTONLL(m[i]); + } +#endif + + PORT_Memcpy(v, ctx->h, 8 * 8); + PORT_Memcpy(v + 8, iv, 8 * 8); + + v[12] ^= ctx->t[0]; + v[13] ^= ctx->t[1]; + v[14] ^= ctx->f; + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + + for (i = 0; i < 8; i++) { + ctx->h[i] ^= v[i] ^ v[i + 8]; + } +} + +/** + * This function can be used for both keyed and unkeyed version. + */ +BLAKE2BContext* +BLAKE2B_NewContext() +{ + return PORT_ZNew(BLAKE2BContext); +} + +/** + * Zero and free the context and can be used for both keyed and unkeyed version. + */ +void +BLAKE2B_DestroyContext(BLAKE2BContext* ctx, PRBool freeit) +{ + PORT_Memset(ctx, 0, sizeof(*ctx)); + if (freeit) { + PORT_Free(ctx); + } +} + +/** + * This function initializes blake2b ctx and can be used for both keyed and + * unkeyed version. It also checks ctx and sets error states. + */ +static SECStatus +blake2b_Begin(BLAKE2BContext* ctx, uint8_t outlen, const uint8_t* key, + size_t keylen) +{ + PORT_Assert(ctx != NULL); + if (!ctx) { + goto failure; + } + if (outlen == 0 || outlen > BLAKE2B512_LENGTH) { + goto failure; + } + if (key && keylen > BLAKE2B_KEY_SIZE) { + goto failure; + } + /* Note: key can be null if it's unkeyed. */ + if ((key == NULL && keylen > 0) || keylen > BLAKE2B_KEY_SIZE || + (key != NULL && keylen == 0)) { + goto failure; + } + + /* Mix key size(keylen) and desired hash length(outlen) into h0 */ + uint64_t param = outlen ^ (keylen << 8) ^ (1 << 16) ^ (1 << 24); + PORT_Memcpy(ctx->h, iv, 8 * 8); + ctx->h[0] ^= param; + ctx->outlen = outlen; + + /* This updates the context for only the keyed version */ + if (keylen > 0 && keylen <= BLAKE2B_KEY_SIZE && key) { + uint8_t block[BLAKE2B_BLOCK_LENGTH] = { 0 }; + PORT_Memcpy(block, key, keylen); + BLAKE2B_Update(ctx, block, BLAKE2B_BLOCK_LENGTH); + PORT_Memset(block, 0, BLAKE2B_BLOCK_LENGTH); + } + + return SECSuccess; + +failure: + PORT_Memset(&ctx, 0, sizeof(ctx)); + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; +} + +SECStatus +BLAKE2B_Begin(BLAKE2BContext* ctx) +{ + return blake2b_Begin(ctx, BLAKE2B512_LENGTH, NULL, 0); +} + +SECStatus +BLAKE2B_MAC_Begin(BLAKE2BContext* ctx, const PRUint8* key, const size_t keylen) +{ + PORT_Assert(key != NULL); + if (!key) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return blake2b_Begin(ctx, BLAKE2B512_LENGTH, (const uint8_t*)key, keylen); +} + +static void +blake2b_IncrementCompress(BLAKE2BContext* ctx, size_t blockLength, + const unsigned char* input) +{ + blake2b_IncrementCounter(ctx, blockLength); + blake2b_Compress(ctx, input); +} + +/** + * This function updates blake2b ctx and can be used for both keyed and unkeyed + * version. + */ +SECStatus +BLAKE2B_Update(BLAKE2BContext* ctx, const unsigned char* in, + unsigned int inlen) +{ + size_t left = ctx->buflen; + size_t fill = BLAKE2B_BLOCK_LENGTH - left; + + /* Nothing to do if there's nothing. */ + if (inlen == 0) { + return SECSuccess; + } + + PORT_Assert(ctx != NULL); + PORT_Assert(in != NULL); + PORT_Assert(left <= BLAKE2B_BLOCK_LENGTH); + if (!ctx || !in) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Is this a reused context? */ + if (ctx->f) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (inlen > fill) { + if (ctx->buflen) { + /* There's some remaining data in ctx->buf that we have to prepend + * to in. */ + PORT_Memcpy(ctx->buf + left, in, fill); + ctx->buflen = 0; + blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, ctx->buf); + in += fill; + inlen -= fill; + } + while (inlen > BLAKE2B_BLOCK_LENGTH) { + blake2b_IncrementCompress(ctx, BLAKE2B_BLOCK_LENGTH, in); + in += BLAKE2B_BLOCK_LENGTH; + inlen -= BLAKE2B_BLOCK_LENGTH; + } + } + + /* Store the remaining data from in in ctx->buf to process later. + * Note that ctx->buflen can be BLAKE2B_BLOCK_LENGTH. We can't process that + * here because we have to update ctx->f before compressing the last block. + */ + PORT_Assert(inlen <= BLAKE2B_BLOCK_LENGTH); + PORT_Memcpy(ctx->buf + ctx->buflen, in, inlen); + ctx->buflen += inlen; + + return SECSuccess; +} + +/** + * This function finalizes ctx, pads final block and stores hash. + * It can be used for both keyed and unkeyed version. + */ +SECStatus +BLAKE2B_End(BLAKE2BContext* ctx, unsigned char* out, + unsigned int* digestLen, size_t maxDigestLen) +{ + size_t i; + unsigned int outlen = PR_MIN(BLAKE2B512_LENGTH, maxDigestLen); + + /* Argument checks */ + if (!ctx || !out) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Sanity check against outlen in context. */ + if (ctx->outlen < outlen) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Is this a reused context? */ + if (ctx->f != 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Process the remaining data from ctx->buf (padded with 0). */ + blake2b_IncrementCounter(ctx, ctx->buflen); + /* BLAKE2B_BLOCK_LENGTH - ctx->buflen can be 0. */ + PORT_Memset(ctx->buf + ctx->buflen, 0, BLAKE2B_BLOCK_LENGTH - ctx->buflen); + ctx->f = UINT64_MAX; + blake2b_Compress(ctx, ctx->buf); + + /* Write out the blake2b context(ctx). */ + for (i = 0; i < outlen; ++i) { + out[i] = ctx->h[i / 8] >> ((i % 8) * 8); + } + + if (digestLen) { + *digestLen = outlen; + } + + return SECSuccess; +} + +SECStatus +blake2b_HashBuf(uint8_t* output, const uint8_t* input, uint8_t outlen, + size_t inlen, const uint8_t* key, size_t keylen) +{ + SECStatus rv = SECFailure; + BLAKE2BContext ctx = { { 0 } }; + + if (inlen != 0) { + PORT_Assert(input != NULL); + if (input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto done; + } + } + + PORT_Assert(output != NULL); + if (output == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto done; + } + + if (blake2b_Begin(&ctx, outlen, key, keylen) != SECSuccess) { + goto done; + } + + if (BLAKE2B_Update(&ctx, input, inlen) != SECSuccess) { + goto done; + } + + if (BLAKE2B_End(&ctx, output, NULL, outlen) != SECSuccess) { + goto done; + } + rv = SECSuccess; + +done: + PORT_Memset(&ctx, 0, sizeof ctx); + return rv; +} + +SECStatus +BLAKE2B_Hash(unsigned char* dest, const char* src) +{ + return blake2b_HashBuf(dest, (const unsigned char*)src, BLAKE2B512_LENGTH, + PORT_Strlen(src), NULL, 0); +} + +SECStatus +BLAKE2B_HashBuf(unsigned char* output, const unsigned char* input, PRUint32 inlen) +{ + return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, NULL, 0); +} + +SECStatus +BLAKE2B_MAC_HashBuf(unsigned char* output, const unsigned char* input, + unsigned int inlen, const unsigned char* key, + unsigned int keylen) +{ + PORT_Assert(key != NULL); + if (!key && keylen <= BLAKE2B_KEY_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return blake2b_HashBuf(output, input, BLAKE2B512_LENGTH, inlen, key, keylen); +} + +unsigned int +BLAKE2B_FlattenSize(BLAKE2BContext* ctx) +{ + return sizeof(BLAKE2BContext); +} + +SECStatus +BLAKE2B_Flatten(BLAKE2BContext* ctx, unsigned char* space) +{ + PORT_Assert(space != NULL); + if (!space) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + PORT_Memcpy(space, ctx, sizeof(BLAKE2BContext)); + return SECSuccess; +} + +BLAKE2BContext* +BLAKE2B_Resurrect(unsigned char* space, void* arg) +{ + PORT_Assert(space != NULL); + if (!space) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + BLAKE2BContext* ctx = BLAKE2B_NewContext(); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + + PORT_Memcpy(ctx, space, sizeof(BLAKE2BContext)); + return ctx; +} + +void +BLAKE2B_Clone(BLAKE2BContext* dest, BLAKE2BContext* src) +{ + PORT_Assert(dest != NULL); + PORT_Assert(src != NULL); + if (!dest || !src) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + PORT_Memcpy(dest, src, sizeof(BLAKE2BContext)); +} diff --git a/security/nss/lib/freebl/blake2b.h b/security/nss/lib/freebl/blake2b.h new file mode 100644 index 000000000..d19a49f0e --- /dev/null +++ b/security/nss/lib/freebl/blake2b.h @@ -0,0 +1,23 @@ +/* + * blake2b.h - header file for blake2b hash function + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef BLAKE_H +#define BLAKE_H + +#include <stddef.h> +#include <stdint.h> + +struct Blake2bContextStr { + uint64_t h[8]; /* chained state */ + uint64_t t[2]; /* total number of bytes */ + uint64_t f; /* last block flag */ + uint8_t buf[BLAKE2B_BLOCK_LENGTH]; /* input buffer */ + size_t buflen; /* size of remaining bytes in buf */ + size_t outlen; /* digest size */ +}; + +#endif /* BLAKE_H */ diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h index 31e471ac4..ca2149972 100644 --- a/security/nss/lib/freebl/blapi.h +++ b/security/nss/lib/freebl/blapi.h @@ -1402,6 +1402,84 @@ TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label, /******************************************/ /* +** Implements the Blake2b hash function. +*/ + +/* +** Hash a null terminated string "src" into "dest" using Blake2b +*/ +extern SECStatus BLAKE2B_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using Blake2b +*/ +extern SECStatus BLAKE2B_HashBuf(unsigned char *output, + const unsigned char *input, PRUint32 inlen); + +extern SECStatus BLAKE2B_MAC_HashBuf(unsigned char *output, + const unsigned char *input, + unsigned int inlen, + const unsigned char *key, + unsigned int keylen); + +/* +** Create a new Blake2b context +*/ +extern BLAKE2BContext *BLAKE2B_NewContext(); + +/* +** Destroy a Blake2b secure hash context. +** "ctx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void BLAKE2B_DestroyContext(BLAKE2BContext *ctx, PRBool freeit); + +/* +** Reset a Blake2b context, preparing it for a fresh round of hashing +*/ +extern SECStatus BLAKE2B_Begin(BLAKE2BContext *ctx); + +extern SECStatus BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key, + const size_t keylen); + +/* +** Update the Blake hash function with more data. +*/ +extern SECStatus BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in, + unsigned int inlen); + +/* +** Finish the Blake hash function. Produce the digested results in "digest" +*/ +extern SECStatus BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen); + +/* + * Return the size of a buffer needed to flatten the Blake2b Context into + * "ctx" the context + * returns size; + */ +extern unsigned int BLAKE2B_FlattenSize(BLAKE2BContext *ctx); + +/* + * Flatten the Blake2b Context into a buffer: + * "ctx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space); + +/* + * Resurrect a flattened context into a Blake2b Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context + */ +extern BLAKE2BContext *BLAKE2B_Resurrect(unsigned char *space, void *arg); +extern void BLAKE2B_Clone(BLAKE2BContext *dest, BLAKE2BContext *src); + +/******************************************/ +/* ** Pseudo Random Number Generation. FIPS compliance desirable. */ diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h index b1be7bedf..743a1168b 100644 --- a/security/nss/lib/freebl/blapii.h +++ b/security/nss/lib/freebl/blapii.h @@ -22,8 +22,10 @@ typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit); SEC_BEGIN_PROTOS +#ifndef NSS_FIPS_DISABLED SECStatus BL_FIPSEntryOK(PRBool freeblOnly); PRBool BL_POSTRan(PRBool freeblOnly); +#endif #if defined(XP_UNIX) && !defined(NO_FORK_CHECK) @@ -78,5 +80,11 @@ SECStatus generate_prime(mp_int *prime, int primeLen); PRBool aesni_support(); PRBool clmul_support(); PRBool avx_support(); +PRBool ssse3_support(); +PRBool arm_neon_support(); +PRBool arm_aes_support(); +PRBool arm_pmull_support(); +PRBool arm_sha1_support(); +PRBool arm_sha2_support(); #endif /* _BLAPII_H_ */ diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h index 2a17b5f46..c718c6f27 100644 --- a/security/nss/lib/freebl/blapit.h +++ b/security/nss/lib/freebl/blapit.h @@ -91,25 +91,27 @@ typedef int __BLAPI_DEPRECATED __attribute__((deprecated)); /* * Number of bytes each hash algorithm produces */ -#define MD2_LENGTH 16 /* Bytes */ -#define MD5_LENGTH 16 /* Bytes */ -#define SHA1_LENGTH 20 /* Bytes */ -#define SHA256_LENGTH 32 /* bytes */ -#define SHA384_LENGTH 48 /* bytes */ -#define SHA512_LENGTH 64 /* bytes */ +#define MD2_LENGTH 16 /* Bytes */ +#define MD5_LENGTH 16 /* Bytes */ +#define SHA1_LENGTH 20 /* Bytes */ +#define SHA256_LENGTH 32 /* bytes */ +#define SHA384_LENGTH 48 /* bytes */ +#define SHA512_LENGTH 64 /* bytes */ +#define BLAKE2B512_LENGTH 64 /* Bytes */ #define HASH_LENGTH_MAX SHA512_LENGTH /* * Input block size for each hash algorithm. */ -#define MD2_BLOCK_LENGTH 64 /* bytes */ -#define MD5_BLOCK_LENGTH 64 /* bytes */ -#define SHA1_BLOCK_LENGTH 64 /* bytes */ -#define SHA224_BLOCK_LENGTH 64 /* bytes */ -#define SHA256_BLOCK_LENGTH 64 /* bytes */ -#define SHA384_BLOCK_LENGTH 128 /* bytes */ -#define SHA512_BLOCK_LENGTH 128 /* bytes */ +#define MD2_BLOCK_LENGTH 64 /* bytes */ +#define MD5_BLOCK_LENGTH 64 /* bytes */ +#define SHA1_BLOCK_LENGTH 64 /* bytes */ +#define SHA224_BLOCK_LENGTH 64 /* bytes */ +#define SHA256_BLOCK_LENGTH 64 /* bytes */ +#define SHA384_BLOCK_LENGTH 128 /* bytes */ +#define SHA512_BLOCK_LENGTH 128 /* bytes */ +#define BLAKE2B_BLOCK_LENGTH 128 /* Bytes */ #define HASH_BLOCK_LENGTH_MAX SHA512_BLOCK_LENGTH #define AES_KEY_WRAP_IV_BYTES 8 @@ -127,6 +129,8 @@ typedef int __BLAPI_DEPRECATED __attribute__((deprecated)); #define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048 +#define BLAKE2B_KEY_SIZE 64 + /* * These values come from the initial key size limits from the PKCS #11 * module. They may be arbitrarily adjusted to any value freebl supports. @@ -213,6 +217,7 @@ struct SHA512ContextStr; struct AESKeyWrapContextStr; struct SEEDContextStr; struct ChaCha20Poly1305ContextStr; +struct Blake2bContextStr; typedef struct DESContextStr DESContext; typedef struct RC2ContextStr RC2Context; @@ -232,6 +237,7 @@ typedef struct SHA512ContextStr SHA384Context; typedef struct AESKeyWrapContextStr AESKeyWrapContext; typedef struct SEEDContextStr SEEDContext; typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context; +typedef struct Blake2bContextStr BLAKE2BContext; /*************************************************************************** ** RSA Public and Private Key structures diff --git a/security/nss/lib/freebl/blinit.c b/security/nss/lib/freebl/blinit.c index d7f2ec53a..f369e62e7 100644 --- a/security/nss/lib/freebl/blinit.c +++ b/security/nss/lib/freebl/blinit.c @@ -23,6 +23,12 @@ static PRCallOnceType coFreeblInit; static PRBool aesni_support_ = PR_FALSE; static PRBool clmul_support_ = PR_FALSE; static PRBool avx_support_ = PR_FALSE; +static PRBool ssse3_support_ = PR_FALSE; +static PRBool arm_neon_support_ = PR_FALSE; +static PRBool arm_aes_support_ = PR_FALSE; +static PRBool arm_sha1_support_ = PR_FALSE; +static PRBool arm_sha2_support_ = PR_FALSE; +static PRBool arm_pmull_support_ = PR_FALSE; #ifdef NSS_X86_OR_X64 /* @@ -62,6 +68,7 @@ check_xcr0_ymm() #define ECX_XSAVE (1 << 26) #define ECX_OSXSAVE (1 << 27) #define ECX_AVX (1 << 28) +#define ECX_SSSE3 (1 << 9) #define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX) void @@ -71,6 +78,7 @@ CheckX86CPUSupport() char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL"); char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX"); + char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3"); freebl_cpuid(1, &eax, &ebx, &ecx, &edx); aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL); clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL); @@ -78,9 +86,131 @@ CheckX86CPUSupport() * as well as XMM and YMM state. */ avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() && disable_avx == NULL; + ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 && + disable_ssse3 == NULL); } #endif /* NSS_X86_OR_X64 */ +/* clang-format off */ +#if (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__) +#ifndef __has_include +#define __has_include(x) 0 +#endif +#if (__has_include(<sys/auxv.h>) || defined(__linux__)) && \ + defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__) +#include <sys/auxv.h> +extern unsigned long getauxval(unsigned long type) __attribute__((weak)); +#else +static unsigned long (*getauxval)(unsigned long) = NULL; +#define AT_HWCAP2 0 +#define AT_HWCAP 0 +#endif /* defined(__GNUC__) && __GNUC__ >= 2 && defined(__ELF__)*/ +#endif /* (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__) */ +/* clang-format on */ + +#if defined(__aarch64__) && !defined(__ANDROID__) +// Defines from hwcap.h in Linux kernel - ARM64 +#ifndef HWCAP_AES +#define HWCAP_AES (1 << 3) +#endif +#ifndef HWCAP_PMULL +#define HWCAP_PMULL (1 << 4) +#endif +#ifndef HWCAP_SHA1 +#define HWCAP_SHA1 (1 << 5) +#endif +#ifndef HWCAP_SHA2 +#define HWCAP_SHA2 (1 << 6) +#endif + +void +CheckARMSupport() +{ + char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON"); + char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); + if (getauxval) { + long hwcaps = getauxval(AT_HWCAP); + arm_aes_support_ = hwcaps & HWCAP_AES && disable_hw_aes == NULL; + arm_pmull_support_ = hwcaps & HWCAP_PMULL; + arm_sha1_support_ = hwcaps & HWCAP_SHA1; + arm_sha2_support_ = hwcaps & HWCAP_SHA2; + } + /* aarch64 must support NEON. */ + arm_neon_support_ = disable_arm_neon == NULL; +} +#endif /* defined(__aarch64__) && !defined(__ANDROID__) */ + +#if defined(__arm__) && !defined(__ANDROID__) +// Defines from hwcap.h in Linux kernel - ARM +/* + * HWCAP flags - for elf_hwcap (in kernel) and AT_HWCAP + */ +#ifndef HWCAP_NEON +#define HWCAP_NEON (1 << 12) +#endif + +/* + * HWCAP2 flags - for elf_hwcap2 (in kernel) and AT_HWCAP2 + */ +#ifndef HWCAP2_AES +#define HWCAP2_AES (1 << 0) +#endif +#ifndef HWCAP2_PMULL +#define HWCAP2_PMULL (1 << 1) +#endif +#ifndef HWCAP2_SHA1 +#define HWCAP2_SHA1 (1 << 2) +#endif +#ifndef HWCAP2_SHA2 +#define HWCAP2_SHA2 (1 << 3) +#endif + +void +CheckARMSupport() +{ + char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON"); + char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); + if (getauxval) { + long hwcaps = getauxval(AT_HWCAP2); + arm_aes_support_ = hwcaps & HWCAP2_AES && disable_hw_aes == NULL; + arm_pmull_support_ = hwcaps & HWCAP2_PMULL; + arm_sha1_support_ = hwcaps & HWCAP2_SHA1; + arm_sha2_support_ = hwcaps & HWCAP2_SHA2; + arm_neon_support_ = hwcaps & HWCAP_NEON && disable_arm_neon == NULL; + } +} +#endif /* defined(__arm__) && !defined(__ANDROID__) */ + +// Enable when Firefox can use it. +// #if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) +// #include <cpu-features.h> +// void +// CheckARMSupport() +// { +// char *disable_arm_neon = PR_GetEnvSecure("NSS_DISABLE_ARM_NEON"); +// char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); +// AndroidCpuFamily family = android_getCpuFamily(); +// uint64_t features = android_getCpuFeatures(); +// if (family == ANDROID_CPU_FAMILY_ARM64) { +// arm_aes_support_ = features & ANDROID_CPU_ARM64_FEATURE_AES && +// disable_hw_aes == NULL; +// arm_pmull_support_ = features & ANDROID_CPU_ARM64_FEATURE_PMULL; +// arm_sha1_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA1; +// arm_sha2_support_ = features & ANDROID_CPU_ARM64_FEATURE_SHA2; +// arm_neon_support_ = disable_arm_neon == NULL; +// } +// if (family == ANDROID_CPU_FAMILY_ARM) { +// arm_aes_support_ = features & ANDROID_CPU_ARM_FEATURE_AES && +// disable_hw_aes == NULL; +// arm_pmull_support_ = features & ANDROID_CPU_ARM_FEATURE_PMULL; +// arm_sha1_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA1; +// arm_sha2_support_ = features & ANDROID_CPU_ARM_FEATURE_SHA2; +// arm_neon_support_ = hwcaps & ANDROID_CPU_ARM_FEATURE_NEON && +// disable_arm_neon == NULL; +// } +// } +// #endif /* defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__)) */ + PRBool aesni_support() { @@ -96,12 +226,44 @@ avx_support() { return avx_support_; } +PRBool +ssse3_support() +{ + return ssse3_support_; +} +PRBool +arm_neon_support() +{ + return arm_neon_support_; +} +PRBool +arm_aes_support() +{ + return arm_aes_support_; +} +PRBool +arm_pmull_support() +{ + return arm_pmull_support_; +} +PRBool +arm_sha1_support() +{ + return arm_sha1_support_; +} +PRBool +arm_sha2_support() +{ + return arm_sha2_support_; +} static PRStatus FreeblInit(void) { #ifdef NSS_X86_OR_X64 CheckX86CPUSupport(); +#elif (defined(__aarch64__) || defined(__arm__)) && !defined(__ANDROID__) + CheckARMSupport(); #endif return PR_SUCCESS; } diff --git a/security/nss/lib/freebl/chacha20.c b/security/nss/lib/freebl/chacha20.c deleted file mode 100644 index f55d1e670..000000000 --- a/security/nss/lib/freebl/chacha20.c +++ /dev/null @@ -1,119 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/* Adopted from the public domain code in NaCl by djb. */ - -#include <string.h> -#include <stdio.h> - -#include "prtypes.h" -#include "secport.h" -#include "chacha20.h" - -#if defined(_MSC_VER) -#pragma intrinsic(_lrotl) -#define ROTL32(x, n) _lrotl(x, n) -#else -#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n))) -#endif - -#define ROTATE(v, c) ROTL32((v), (c)) - -#define U32TO8_LITTLE(p, v) \ - { \ - (p)[0] = ((v)) & 0xff; \ - (p)[1] = ((v) >> 8) & 0xff; \ - (p)[2] = ((v) >> 16) & 0xff; \ - (p)[3] = ((v) >> 24) & 0xff; \ - } -#define U8TO32_LITTLE(p) \ - (((PRUint32)((p)[0])) | ((PRUint32)((p)[1]) << 8) | \ - ((PRUint32)((p)[2]) << 16) | ((PRUint32)((p)[3]) << 24)) - -#define QUARTERROUND(x, a, b, c, d) \ - x[a] = x[a] + x[b]; \ - x[d] = ROTATE(x[d] ^ x[a], 16); \ - x[c] = x[c] + x[d]; \ - x[b] = ROTATE(x[b] ^ x[c], 12); \ - x[a] = x[a] + x[b]; \ - x[d] = ROTATE(x[d] ^ x[a], 8); \ - x[c] = x[c] + x[d]; \ - x[b] = ROTATE(x[b] ^ x[c], 7); - -static void -ChaChaCore(unsigned char output[64], const PRUint32 input[16], int num_rounds) -{ - PRUint32 x[16]; - int i; - - PORT_Memcpy(x, input, sizeof(PRUint32) * 16); - for (i = num_rounds; i > 0; i -= 2) { - QUARTERROUND(x, 0, 4, 8, 12) - QUARTERROUND(x, 1, 5, 9, 13) - QUARTERROUND(x, 2, 6, 10, 14) - QUARTERROUND(x, 3, 7, 11, 15) - QUARTERROUND(x, 0, 5, 10, 15) - QUARTERROUND(x, 1, 6, 11, 12) - QUARTERROUND(x, 2, 7, 8, 13) - QUARTERROUND(x, 3, 4, 9, 14) - } - - for (i = 0; i < 16; ++i) { - x[i] = x[i] + input[i]; - } - for (i = 0; i < 16; ++i) { - U32TO8_LITTLE(output + 4 * i, x[i]); - } -} - -static const unsigned char sigma[16] = "expand 32-byte k"; - -void -ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inLen, - const unsigned char key[32], const unsigned char nonce[12], - uint32_t counter) -{ - unsigned char block[64]; - PRUint32 input[16]; - unsigned int i; - - input[4] = U8TO32_LITTLE(key + 0); - input[5] = U8TO32_LITTLE(key + 4); - input[6] = U8TO32_LITTLE(key + 8); - input[7] = U8TO32_LITTLE(key + 12); - - input[8] = U8TO32_LITTLE(key + 16); - input[9] = U8TO32_LITTLE(key + 20); - input[10] = U8TO32_LITTLE(key + 24); - input[11] = U8TO32_LITTLE(key + 28); - - input[0] = U8TO32_LITTLE(sigma + 0); - input[1] = U8TO32_LITTLE(sigma + 4); - input[2] = U8TO32_LITTLE(sigma + 8); - input[3] = U8TO32_LITTLE(sigma + 12); - - input[12] = counter; - input[13] = U8TO32_LITTLE(nonce + 0); - input[14] = U8TO32_LITTLE(nonce + 4); - input[15] = U8TO32_LITTLE(nonce + 8); - - while (inLen >= 64) { - ChaChaCore(block, input, 20); - for (i = 0; i < 64; i++) { - out[i] = in[i] ^ block[i]; - } - - input[12]++; - inLen -= 64; - in += 64; - out += 64; - } - - if (inLen > 0) { - ChaChaCore(block, input, 20); - for (i = 0; i < inLen; i++) { - out[i] = in[i] ^ block[i]; - } - } -} diff --git a/security/nss/lib/freebl/chacha20.h b/security/nss/lib/freebl/chacha20.h deleted file mode 100644 index 7e396fa8c..000000000 --- a/security/nss/lib/freebl/chacha20.h +++ /dev/null @@ -1,26 +0,0 @@ -/* - * chacha20.h - header file for ChaCha20 implementation. - * - * This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#ifndef FREEBL_CHACHA20_H_ -#define FREEBL_CHACHA20_H_ - -#if defined(_MSC_VER) && _MSC_VER < 1600 -#include "prtypes.h" -typedef PRUint32 uint32_t; -typedef PRUint64 uint64_t; -#else -#include <stdint.h> -#endif - -/* ChaCha20XOR encrypts |inLen| bytes from |in| with the given key and - * nonce and writes the result to |out|, which may be equal to |in|. The - * initial block counter is specified by |counter|. */ -extern void ChaCha20XOR(unsigned char *out, const unsigned char *in, - unsigned int inLen, const unsigned char key[32], - const unsigned char nonce[12], uint32_t counter); - -#endif /* FREEBL_CHACHA20_H_ */ diff --git a/security/nss/lib/freebl/chacha20_vec.c b/security/nss/lib/freebl/chacha20_vec.c deleted file mode 100644 index 12f94d897..000000000 --- a/security/nss/lib/freebl/chacha20_vec.c +++ /dev/null @@ -1,327 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and - * marked as public domain. It was been altered to allow for non-aligned inputs - * and to allow the block counter to be passed in specifically. */ - -#include <string.h> - -#include "chacha20.h" -#include "blapii.h" - -#ifndef CHACHA_RNDS -#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ -#endif - -/* Architecture-neutral way to specify 16-byte vector of ints */ -typedef unsigned vec __attribute__((vector_size(16))); - -/* This implementation is designed for Neon, SSE and AltiVec machines. The - * following specify how to do certain vector operations efficiently on - * each architecture, using intrinsics. - * This implementation supports parallel processing of multiple blocks, - * including potentially using general-purpose registers. - */ -#if __ARM_NEON__ -#include <arm_neon.h> -#define GPR_TOO 1 -#define VBPI 2 -#define ONE (vec) vsetq_lane_u32(1, vdupq_n_u32(0), 0) -#define LOAD(m) (vec)(*((vec *)(m))) -#define STORE(m, r) (*((vec *)(m))) = (r) -#define ROTV1(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 1) -#define ROTV2(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 2) -#define ROTV3(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 3) -#define ROTW16(x) (vec) vrev32q_u16((uint16x8_t)x) -#if __clang__ -#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7 })) ^ (x >> ((vec){ 25, 25, 25, 25 })) -#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8 })) ^ (x >> ((vec){ 24, 24, 24, 24 })) -#define ROTW12(x) (x << ((vec){ 12, 12, 12, 12 })) ^ (x >> ((vec){ 20, 20, 20, 20 })) -#else -#define ROTW7(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 7), (uint32x4_t)x, 25) -#define ROTW8(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 8), (uint32x4_t)x, 24) -#define ROTW12(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 12), (uint32x4_t)x, 20) -#endif -#elif __SSE2__ -#include <emmintrin.h> -#define GPR_TOO 0 -#if __clang__ -#define VBPI 4 -#else -#define VBPI 3 -#endif -#define ONE (vec) _mm_set_epi32(0, 0, 0, 1) -#define LOAD(m) (vec) _mm_loadu_si128((__m128i *)(m)) -#define STORE(m, r) _mm_storeu_si128((__m128i *)(m), (__m128i)(r)) -#define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1)) -#define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2)) -#define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3)) -#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25)) -#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20)) -#if __SSSE3__ -#include <tmmintrin.h> -#define ROTW8(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)) -#define ROTW16(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)) -#else -#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24)) -#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16)) -#endif -#else -#error-- Implementation supports only machines with neon or SSE2 -#endif - -#ifndef REVV_BE -#define REVV_BE(x) (x) -#endif - -#ifndef REVW_BE -#define REVW_BE(x) (x) -#endif - -#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ - -#define DQROUND_VECTORS(a, b, c, d) \ - a += b; \ - d ^= a; \ - d = ROTW16(d); \ - c += d; \ - b ^= c; \ - b = ROTW12(b); \ - a += b; \ - d ^= a; \ - d = ROTW8(d); \ - c += d; \ - b ^= c; \ - b = ROTW7(b); \ - b = ROTV1(b); \ - c = ROTV2(c); \ - d = ROTV3(d); \ - a += b; \ - d ^= a; \ - d = ROTW16(d); \ - c += d; \ - b ^= c; \ - b = ROTW12(b); \ - a += b; \ - d ^= a; \ - d = ROTW8(d); \ - c += d; \ - b ^= c; \ - b = ROTW7(b); \ - b = ROTV3(b); \ - c = ROTV2(c); \ - d = ROTV1(d); - -#define QROUND_WORDS(a, b, c, d) \ - a = a + b; \ - d ^= a; \ - d = d << 16 | d >> 16; \ - c = c + d; \ - b ^= c; \ - b = b << 12 | b >> 20; \ - a = a + b; \ - d ^= a; \ - d = d << 8 | d >> 24; \ - c = c + d; \ - b ^= c; \ - b = b << 7 | b >> 25; - -#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ - STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ - STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ - STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ - STORE(op + d + 12, LOAD(in + d + 12) ^ REVV_BE(v3)); - -void NO_SANITIZE_ALIGNMENT -ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inlen, - const unsigned char key[32], const unsigned char nonce[12], - uint32_t counter) -{ - unsigned iters, i, *op = (unsigned *)out, *ip = (unsigned *)in, *kp; -#if defined(__ARM_NEON__) - unsigned *np; -#endif - vec s0, s1, s2, s3; -#if !defined(__ARM_NEON__) && !defined(__SSE2__) - __attribute__((aligned(16))) unsigned key[8], nonce[4]; -#endif - __attribute__((aligned(16))) unsigned chacha_const[] = - { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 }; -#if defined(__ARM_NEON__) || defined(__SSE2__) - kp = (unsigned *)key; -#else - ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); - ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); - ((unsigned *)nonce)[0] = REVW_BE(((unsigned *)nonce)[0]); - ((unsigned *)nonce)[1] = REVW_BE(((unsigned *)nonce)[1]); - ((unsigned *)nonce)[2] = REVW_BE(((unsigned *)nonce)[2]); - ((unsigned *)nonce)[3] = REVW_BE(((unsigned *)nonce)[3]); - kp = (unsigned *)key; - np = (unsigned *)nonce; -#endif -#if defined(__ARM_NEON__) - np = (unsigned *)nonce; -#endif - s0 = LOAD(chacha_const); - s1 = LOAD(&((vec *)kp)[0]); - s2 = LOAD(&((vec *)kp)[1]); - s3 = (vec){ - counter, - ((uint32_t *)nonce)[0], - ((uint32_t *)nonce)[1], - ((uint32_t *)nonce)[2] - }; - - for (iters = 0; iters < inlen / (BPI * 64); iters++) { -#if GPR_TOO - register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, - x9, x10, x11, x12, x13, x14, x15; -#endif -#if VBPI > 2 - vec v8, v9, v10, v11; -#endif -#if VBPI > 3 - vec v12, v13, v14, v15; -#endif - - vec v0, v1, v2, v3, v4, v5, v6, v7; - v4 = v0 = s0; - v5 = v1 = s1; - v6 = v2 = s2; - v3 = s3; - v7 = v3 + ONE; -#if VBPI > 2 - v8 = v4; - v9 = v5; - v10 = v6; - v11 = v7 + ONE; -#endif -#if VBPI > 3 - v12 = v8; - v13 = v9; - v14 = v10; - v15 = v11 + ONE; -#endif -#if GPR_TOO - x0 = chacha_const[0]; - x1 = chacha_const[1]; - x2 = chacha_const[2]; - x3 = chacha_const[3]; - x4 = kp[0]; - x5 = kp[1]; - x6 = kp[2]; - x7 = kp[3]; - x8 = kp[4]; - x9 = kp[5]; - x10 = kp[6]; - x11 = kp[7]; - x12 = counter + BPI * iters + (BPI - 1); - x13 = np[0]; - x14 = np[1]; - x15 = np[2]; -#endif - for (i = CHACHA_RNDS / 2; i; i--) { - DQROUND_VECTORS(v0, v1, v2, v3) - DQROUND_VECTORS(v4, v5, v6, v7) -#if VBPI > 2 - DQROUND_VECTORS(v8, v9, v10, v11) -#endif -#if VBPI > 3 - DQROUND_VECTORS(v12, v13, v14, v15) -#endif -#if GPR_TOO - QROUND_WORDS(x0, x4, x8, x12) - QROUND_WORDS(x1, x5, x9, x13) - QROUND_WORDS(x2, x6, x10, x14) - QROUND_WORDS(x3, x7, x11, x15) - QROUND_WORDS(x0, x5, x10, x15) - QROUND_WORDS(x1, x6, x11, x12) - QROUND_WORDS(x2, x7, x8, x13) - QROUND_WORDS(x3, x4, x9, x14) -#endif - } - - WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3) - s3 += ONE; - WRITE_XOR(ip, op, 16, v4 + s0, v5 + s1, v6 + s2, v7 + s3) - s3 += ONE; -#if VBPI > 2 - WRITE_XOR(ip, op, 32, v8 + s0, v9 + s1, v10 + s2, v11 + s3) - s3 += ONE; -#endif -#if VBPI > 3 - WRITE_XOR(ip, op, 48, v12 + s0, v13 + s1, v14 + s2, v15 + s3) - s3 += ONE; -#endif - ip += VBPI * 16; - op += VBPI * 16; -#if GPR_TOO - op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); - op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); - op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); - op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); - op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); - op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); - op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); - op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); - op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); - op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); - op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); - op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); - op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter + BPI * iters + (BPI - 1))); - op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0])); - op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1])); - op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2])); - s3 += ONE; - ip += 16; - op += 16; -#endif - } - - for (iters = inlen % (BPI * 64) / 64; iters != 0; iters--) { - vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; - for (i = CHACHA_RNDS / 2; i; i--) { - DQROUND_VECTORS(v0, v1, v2, v3); - } - WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3) - s3 += ONE; - ip += 16; - op += 16; - } - - inlen = inlen % 64; - if (inlen) { - __attribute__((aligned(16))) vec buf[4]; - vec v0, v1, v2, v3; - v0 = s0; - v1 = s1; - v2 = s2; - v3 = s3; - for (i = CHACHA_RNDS / 2; i; i--) { - DQROUND_VECTORS(v0, v1, v2, v3); - } - - if (inlen >= 16) { - STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); - if (inlen >= 32) { - STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); - if (inlen >= 48) { - STORE(op + 8, LOAD(ip + 8) ^ REVV_BE(v2 + s2)); - buf[3] = REVV_BE(v3 + s3); - } else { - buf[2] = REVV_BE(v2 + s2); - } - } else { - buf[1] = REVV_BE(v1 + s1); - } - } else { - buf[0] = REVV_BE(v0 + s0); - } - - for (i = inlen & ~15; i < inlen; i++) { - ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; - } - } -} diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c index cd265e1ff..859d05316 100644 --- a/security/nss/lib/freebl/chacha20poly1305.c +++ b/security/nss/lib/freebl/chacha20poly1305.c @@ -12,16 +12,69 @@ #include "seccomon.h" #include "secerr.h" #include "blapit.h" +#include "blapii.h" #ifndef NSS_DISABLE_CHACHAPOLY -#include "poly1305.h" -#include "chacha20.h" #include "chacha20poly1305.h" -#endif +// Forward declaration from "Hacl_Chacha20_Vec128.h". +extern void Hacl_Chacha20_Vec128_chacha20(uint8_t *output, uint8_t *plain, + uint32_t len, uint8_t *k, uint8_t *n1, + uint32_t ctr); +// Forward declaration from "Hacl_Chacha20.h". +extern void Hacl_Chacha20_chacha20(uint8_t *output, uint8_t *plain, uint32_t len, + uint8_t *k, uint8_t *n1, uint32_t ctr); /* Poly1305Do writes the Poly1305 authenticator of the given additional data * and ciphertext to |out|. */ -#ifndef NSS_DISABLE_CHACHAPOLY +#if defined(HAVE_INT128_SUPPORT) && (defined(NSS_X86_OR_X64) || defined(__aarch64__)) +/* Use HACL* Poly1305 on 64-bit Intel and ARM */ +#include "verified/Hacl_Poly1305_64.h" + +static void +Poly1305PadUpdate(Hacl_Impl_Poly1305_64_State_poly1305_state state, + unsigned char *block, const unsigned char *p, + const unsigned int pLen) +{ + unsigned int pRemLen = pLen % 16; + Hacl_Poly1305_64_update(state, (uint8_t *)p, (pLen / 16)); + if (pRemLen > 0) { + memcpy(block, p + (pLen - pRemLen), pRemLen); + Hacl_Poly1305_64_update(state, block, 1); + } +} + +static void +Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, + const unsigned char *ciphertext, unsigned int ciphertextLen, + const unsigned char key[32]) +{ + uint64_t tmp1[6U] = { 0U }; + Hacl_Impl_Poly1305_64_State_poly1305_state state = + Hacl_Poly1305_64_mk_state(tmp1, tmp1 + 3); + + unsigned char block[16] = { 0 }; + Hacl_Poly1305_64_init(state, (uint8_t *)key); + + Poly1305PadUpdate(state, block, ad, adLen); + memset(block, 0, 16); + Poly1305PadUpdate(state, block, ciphertext, ciphertextLen); + + unsigned int i; + unsigned int j; + for (i = 0, j = adLen; i < 8; i++, j >>= 8) { + block[i] = j; + } + for (i = 8, j = ciphertextLen; i < 16; i++, j >>= 8) { + block[i] = j; + } + + Hacl_Poly1305_64_update(state, block, 1); + Hacl_Poly1305_64_finish(state, out, (uint8_t *)(key + 16)); +} +#else +/* All other platforms get the 32-bit poly1305 reference implementation. */ +#include "poly1305.h" + static void Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, const unsigned char *ciphertext, unsigned int ciphertextLen, @@ -56,7 +109,9 @@ Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, Poly1305Update(&state, lengthBytes, sizeof(lengthBytes)); Poly1305Finish(&state, out); } -#endif + +#endif /* HAVE_INT128_SUPPORT */ +#endif /* NSS_DISABLE_CHACHAPOLY */ SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, @@ -116,6 +171,17 @@ ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit) #endif } +void +ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k, + uint8_t *nonce, uint32_t ctr) +{ + if (ssse3_support() || arm_neon_support()) { + Hacl_Chacha20_Vec128_chacha20(output, block, len, k, nonce, ctr); + } else { + Hacl_Chacha20_chacha20(output, block, len, k, nonce, ctr); + } +} + SECStatus ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, @@ -142,8 +208,10 @@ ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, PORT_Memset(block, 0, sizeof(block)); // Generate a block of keystream. The first 32 bytes will be the poly1305 // key. The remainder of the block is discarded. - ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0); - ChaCha20XOR(output, input, inputLen, ctx->key, nonce, 1); + ChaCha20Xor(block, (uint8_t *)block, sizeof(block), (uint8_t *)ctx->key, + (uint8_t *)nonce, 0); + ChaCha20Xor(output, (uint8_t *)input, inputLen, (uint8_t *)ctx->key, + (uint8_t *)nonce, 1); Poly1305Do(tag, ad, adLen, output, inputLen, block); PORT_Memcpy(output + inputLen, tag, ctx->tagLen); @@ -184,14 +252,16 @@ ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output, PORT_Memset(block, 0, sizeof(block)); // Generate a block of keystream. The first 32 bytes will be the poly1305 // key. The remainder of the block is discarded. - ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0); + ChaCha20Xor(block, (uint8_t *)block, sizeof(block), (uint8_t *)ctx->key, + (uint8_t *)nonce, 0); Poly1305Do(tag, ad, adLen, input, ciphertextLen, block); if (NSS_SecureMemcmp(tag, &input[ciphertextLen], ctx->tagLen) != 0) { PORT_SetError(SEC_ERROR_BAD_DATA); return SECFailure; } - ChaCha20XOR(output, input, ciphertextLen, ctx->key, nonce, 1); + ChaCha20Xor(output, (uint8_t *)input, ciphertextLen, (uint8_t *)ctx->key, + (uint8_t *)nonce, 1); return SECSuccess; #endif diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk index 918a66363..7ac50db65 100644 --- a/security/nss/lib/freebl/config.mk +++ b/security/nss/lib/freebl/config.mk @@ -90,7 +90,12 @@ EXTRA_SHARED_LIBS += \ endif endif +ifeq (,$(filter-out DragonFly FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET))) +CFLAGS += -std=gnu99 +endif + ifeq ($(OS_ARCH), Darwin) +CFLAGS += -std=gnu99 EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib endif diff --git a/security/nss/lib/freebl/crypto_primitives.c b/security/nss/lib/freebl/crypto_primitives.c new file mode 100644 index 000000000..49c8ca5ca --- /dev/null +++ b/security/nss/lib/freebl/crypto_primitives.c @@ -0,0 +1,36 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +/* This file holds useful functions and macros for crypto code. */ +#include "crypto_primitives.h" + +/* + * FREEBL_HTONLL(x): swap bytes in a 64-bit integer. + */ +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) + +__inline__ PRUint64 +swap8b(PRUint64 value) +{ + __asm__("bswapq %0" + : "+r"(value)); + return (value); +} + +#elif !defined(_MSC_VER) + +PRUint64 +swap8b(PRUint64 x) +{ + PRUint64 t1 = x; + t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8); + t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16); + return (t1 >> 32) | (t1 << 32); +} + +#endif diff --git a/security/nss/lib/freebl/crypto_primitives.h b/security/nss/lib/freebl/crypto_primitives.h new file mode 100644 index 000000000..f19601f4b --- /dev/null +++ b/security/nss/lib/freebl/crypto_primitives.h @@ -0,0 +1,51 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file holds useful functions and macros for crypto code. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include <stdlib.h> +#include "prtypes.h" + +/* Unfortunately this isn't always set when it should be. */ +#if defined(HAVE_LONG_LONG) + +/* + * ROTR64/ROTL64(x, n): rotate a 64-bit integer x by n bites to the right/left. + */ +#if defined(_MSC_VER) +#pragma intrinsic(_rotr64, _rotl64) +#define ROTR64(x, n) _rotr64((x), (n)) +#define ROTL64(x, n) _rotl64((x), (n)) +#else +#define ROTR64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) +#define ROTL64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) +#endif + +/* + * FREEBL_HTONLL(x): swap bytes in a 64-bit integer. + */ +#if defined(_MSC_VER) + +#pragma intrinsic(_byteswap_uint64) +#define FREEBL_HTONLL(x) _byteswap_uint64(x) + +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) + +PRUint64 swap8b(PRUint64 value); +#define FREEBL_HTONLL(x) swap8b(x) + +#else + +#define SHA_MASK16 0x0000FFFF0000FFFFULL +#define SHA_MASK8 0x00FF00FF00FF00FFULL +PRUint64 swap8b(PRUint64 x); +#define FREEBL_HTONLL(x) swap8b(x) + +#endif /* _MSC_VER */ + +#endif /* HAVE_LONG_LONG */
\ No newline at end of file diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c index 04fce30e8..56be2d356 100644 --- a/security/nss/lib/freebl/det_rng.c +++ b/security/nss/lib/freebl/det_rng.c @@ -4,23 +4,26 @@ #include "blapi.h" #include "blapit.h" -#include "chacha20.h" +#include "Hacl_Chacha20.h" #include "nssilock.h" #include "seccomon.h" #include "secerr.h" +#include "prinit.h" #define GLOBAL_BYTES_SIZE 100 static PRUint8 globalBytes[GLOBAL_BYTES_SIZE]; static unsigned long globalNumCalls = 0; static PZLock *rng_lock = NULL; +static PRCallOnceType coRNGInit; +static const PRCallOnceType pristineCallOnce; -SECStatus -RNG_RNGInit(void) +static PRStatus +rng_init(void) { rng_lock = PZ_NewLock(nssILockOther); if (!rng_lock) { PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; + return PR_FAILURE; } /* --- LOCKED --- */ PZ_Lock(rng_lock); @@ -28,6 +31,17 @@ RNG_RNGInit(void) PZ_Unlock(rng_lock); /* --- UNLOCKED --- */ + return PR_SUCCESS; +} + +SECStatus +RNG_RNGInit(void) +{ + /* Allow only one call to initialize the context */ + if (PR_CallOnce(&coRNGInit, rng_init) != PR_SUCCESS) { + return SECFailure; + } + return SECSuccess; } @@ -85,7 +99,7 @@ RNG_GenerateGlobalRandomBytes(void *dest, size_t len) memset(dest, 0, len); memcpy(dest, globalBytes, PR_MIN(len, GLOBAL_BYTES_SIZE)); - ChaCha20XOR(dest, dest, len, key, nonce, 0); + Hacl_Chacha20_chacha20(dest, (uint8_t *)dest, len, (uint8_t *)key, nonce, 0); ChaCha20Poly1305_DestroyContext(cx, PR_TRUE); PZ_Unlock(rng_lock); @@ -97,8 +111,11 @@ RNG_GenerateGlobalRandomBytes(void *dest, size_t len) void RNG_RNGShutdown(void) { - PZ_DestroyLock(rng_lock); - rng_lock = NULL; + if (rng_lock) { + PZ_DestroyLock(rng_lock); + rng_lock = NULL; + } + coRNGInit = pristineCallOnce; } /* Test functions are not implemented! */ diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c index 224bbe87d..70ae2618e 100644 --- a/security/nss/lib/freebl/drbg.c +++ b/security/nss/lib/freebl/drbg.c @@ -74,8 +74,7 @@ struct RNGContextStr { #define V_type V_Data[0] #define V(rng) (((rng)->V_Data) + 1) #define VSize(rng) ((sizeof(rng)->V_Data) - 1) - PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */ - PRUint8 lastOutput[SHA256_LENGTH]; /* for continuous rng checking */ + PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */ /* If we get calls for the PRNG to return less than the length of our * hash, we extend the request for a full hash (since we'll be doing * the full hash anyway). Future requests for random numbers are fulfilled @@ -286,7 +285,6 @@ prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes, { PRUint8 data[VSize(rng)]; PRUint8 thisHash[SHA256_LENGTH]; - PRUint8 *lastHash = rng->lastOutput; PORT_Memcpy(data, V(rng), VSize(rng)); while (no_of_returned_bytes) { @@ -297,15 +295,10 @@ prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes, SHA256_Begin(&ctx); SHA256_Update(&ctx, data, sizeof data); SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH); - if (PORT_Memcmp(lastHash, thisHash, len) == 0) { - rng->isValid = PR_FALSE; - break; - } if (no_of_returned_bytes < SHA256_LENGTH) { len = no_of_returned_bytes; } PORT_Memcpy(returned_bytes, thisHash, len); - lastHash = returned_bytes; returned_bytes += len; no_of_returned_bytes -= len; /* The carry parameter is a bool (increment or not). @@ -313,7 +306,6 @@ prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes, carry = no_of_returned_bytes; PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry); } - PORT_Memcpy(rng->lastOutput, thisHash, SHA256_LENGTH); PORT_Memset(data, 0, sizeof data); PORT_Memset(thisHash, 0, sizeof thisHash); } @@ -361,11 +353,6 @@ prng_generateNewBytes(RNGContext *rng, if (no_of_returned_bytes == SHA256_LENGTH) { /* short_cut to hashbuf and a couple of copies and clears */ SHA256_HashBuf(returned_bytes, V(rng), VSize(rng)); - /* continuous rng check */ - if (memcmp(rng->lastOutput, returned_bytes, SHA256_LENGTH) == 0) { - rng->isValid = PR_FALSE; - } - PORT_Memcpy(rng->lastOutput, returned_bytes, sizeof rng->lastOutput); } else { prng_Hashgen(rng, returned_bytes, no_of_returned_bytes); } diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c index 669c9b147..b28815ade 100644 --- a/security/nss/lib/freebl/ec.c +++ b/security/nss/lib/freebl/ec.c @@ -15,8 +15,6 @@ #include "ec.h" #include "ecl.h" -#ifndef NSS_DISABLE_ECC - static const ECMethod kMethods[] = { { ECCurve25519, ec_Curve25519_pt_mul, @@ -183,7 +181,6 @@ cleanup: return rv; } -#endif /* NSS_DISABLE_ECC */ /* Generates a new EC key pair. The private key is a supplied * value and the public key is the result of performing a scalar @@ -194,7 +191,6 @@ ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey, const unsigned char *privKeyBytes, int privKeyLen) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC PLArenaPool *arena; ECPrivateKey *key; mp_int k; @@ -309,9 +305,6 @@ cleanup: printf("ec_NewKey returning %s\n", (rv == SECSuccess) ? "success" : "failure"); #endif -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } @@ -326,15 +319,10 @@ EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey, const unsigned char *seed, int seedlen) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC rv = ec_NewKey(ecParams, privKey, seed, seedlen); -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } -#ifndef NSS_DISABLE_ECC /* Generate a random private key using the algorithm A.4.1 of ANSI X9.62, * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the * random number generator. @@ -391,7 +379,6 @@ cleanup: } return privKeyBytes; } -#endif /* NSS_DISABLE_ECC */ /* Generates a new EC key pair. The private key is a random value and * the public key is the result of performing a scalar point multiplication @@ -401,7 +388,6 @@ SECStatus EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC int len; unsigned char *privKeyBytes = NULL; @@ -425,9 +411,6 @@ cleanup: printf("EC_NewKey returning %s\n", (rv == SECSuccess) ? "success" : "failure"); #endif -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } @@ -441,7 +424,6 @@ cleanup: SECStatus EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue) { -#ifndef NSS_DISABLE_ECC mp_int Px, Py; ECGroup *group = NULL; SECStatus rv = SECFailure; @@ -525,10 +507,6 @@ cleanup: rv = SECFailure; } return rv; -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); - return SECFailure; -#endif /* NSS_DISABLE_ECC */ } /* @@ -549,7 +527,6 @@ ECDH_Derive(SECItem *publicValue, SECItem *derivedSecret) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC unsigned int len = 0; SECItem pointQ = { siBuffer, NULL, 0 }; mp_int k; /* to hold the private value */ @@ -589,7 +566,11 @@ ECDH_Derive(SECItem *publicValue, PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); return SECFailure; } - return method->mul(derivedSecret, privateValue, publicValue); + rv = method->mul(derivedSecret, privateValue, publicValue); + if (rv != SECSuccess) { + SECITEM_ZfreeItem(derivedSecret, PR_FALSE); + } + return rv; } /* @@ -654,9 +635,6 @@ cleanup: if (pointQ.data) { PORT_ZFree(pointQ.data, pointQ.len); } -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } @@ -670,7 +648,6 @@ ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, const SECItem *digest, const unsigned char *kb, const int kblen) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC mp_int x1; mp_int d, k; /* private key, random integer */ mp_int r, s; /* tuple (r, s) is the signature */ @@ -899,9 +876,6 @@ cleanup: printf("ECDSA signing with seed %s\n", (rv == SECSuccess) ? "succeeded" : "failed"); #endif -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } @@ -914,7 +888,6 @@ SECStatus ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC int len; unsigned char *kBytes = NULL; @@ -941,9 +914,6 @@ cleanup: printf("ECDSA signing %s\n", (rv == SECSuccess) ? "succeeded" : "failed"); #endif -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } @@ -961,7 +931,6 @@ ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature, const SECItem *digest) { SECStatus rv = SECFailure; -#ifndef NSS_DISABLE_ECC mp_int r_, s_; /* tuple (r', s') is received signature) */ mp_int c, u1, u2, v; /* intermediate values used in verification */ mp_int x1; @@ -1161,9 +1130,6 @@ cleanup: printf("ECDSA verification %s\n", (rv == SECSuccess) ? "succeeded" : "failed"); #endif -#else - PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); -#endif /* NSS_DISABLE_ECC */ return rv; } diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c index 54b3e111b..652ad42d5 100644 --- a/security/nss/lib/freebl/ecdecode.c +++ b/security/nss/lib/freebl/ecdecode.c @@ -2,8 +2,6 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -#ifndef NSS_DISABLE_ECC - #ifdef FREEBL_NO_DEPEND #include "stubs.h" #endif @@ -252,5 +250,3 @@ EC_GetPointSize(const ECParams *params) } return curveParams->pointSize - 1; } - -#endif /* NSS_DISABLE_ECC */ diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c index 65f6bd41b..a2e4296bb 100644 --- a/security/nss/lib/freebl/ecl/curve25519_64.c +++ b/security/nss/lib/freebl/ecl/curve25519_64.c @@ -2,513 +2,13 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -/* - * Derived from public domain C code by Adan Langley and Daniel J. Bernstein - */ - -#include "uint128.h" - #include "ecl-priv.h" -#include "mpi.h" - -#include <stdint.h> -#include <stdio.h> -#include <string.h> - -typedef uint8_t u8; -typedef uint64_t felem; - -/* Sum two numbers: output += in */ -static void -fsum(felem *output, const felem *in) -{ - unsigned i; - for (i = 0; i < 5; ++i) { - output[i] += in[i]; - } -} - -/* Find the difference of two numbers: output = in - output - * (note the order of the arguments!) - */ -static void -fdifference_backwards(felem *ioutput, const felem *iin) -{ - static const int64_t twotothe51 = ((int64_t)1l << 51); - const int64_t *in = (const int64_t *)iin; - int64_t *out = (int64_t *)ioutput; - - out[0] = in[0] - out[0]; - out[1] = in[1] - out[1]; - out[2] = in[2] - out[2]; - out[3] = in[3] - out[3]; - out[4] = in[4] - out[4]; - - // An arithmetic shift right of 63 places turns a positive number to 0 and a - // negative number to all 1's. This gives us a bitmask that lets us avoid - // side-channel prone branches. - int64_t t; - -#define NEGCHAIN(a, b) \ - t = out[a] >> 63; \ - out[a] += twotothe51 & t; \ - out[b] -= 1 & t; - -#define NEGCHAIN19(a, b) \ - t = out[a] >> 63; \ - out[a] += twotothe51 & t; \ - out[b] -= 19 & t; - - NEGCHAIN(0, 1); - NEGCHAIN(1, 2); - NEGCHAIN(2, 3); - NEGCHAIN(3, 4); - NEGCHAIN19(4, 0); - NEGCHAIN(0, 1); - NEGCHAIN(1, 2); - NEGCHAIN(2, 3); - NEGCHAIN(3, 4); -} - -/* Multiply a number by a scalar: output = in * scalar */ -static void -fscalar_product(felem *output, const felem *in, - const felem scalar) -{ - uint128_t tmp, tmp2; - - tmp = mul6464(in[0], scalar); - output[0] = mask51(tmp); - - tmp2 = mul6464(in[1], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[1] = mask51(tmp); - - tmp2 = mul6464(in[2], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[2] = mask51(tmp); - - tmp2 = mul6464(in[3], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[3] = mask51(tmp); - - tmp2 = mul6464(in[4], scalar); - tmp = add128(tmp2, rshift128(tmp, 51)); - output[4] = mask51(tmp); - - output[0] += mask_lower(rshift128(tmp, 51)) * 19; -} - -/* Multiply two numbers: output = in2 * in - * - * output must be distinct to both inputs. The inputs are reduced coefficient - * form, the output is not. - */ -static void -fmul(felem *output, const felem *in2, const felem *in) -{ - uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; - - t0 = mul6464(in[0], in2[0]); - t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1])); - t2 = add128(add128(mul6464(in[0], in2[2]), - mul6464(in[2], in2[0])), - mul6464(in[1], in2[1])); - t3 = add128(add128(add128(mul6464(in[0], in2[3]), - mul6464(in[3], in2[0])), - mul6464(in[1], in2[2])), - mul6464(in[2], in2[1])); - t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]), - mul6464(in[4], in2[0])), - mul6464(in[3], in2[1])), - mul6464(in[1], in2[3])), - mul6464(in[2], in2[2])); - t5 = add128(add128(add128(mul6464(in[4], in2[1]), - mul6464(in[1], in2[4])), - mul6464(in[2], in2[3])), - mul6464(in[3], in2[2])); - t6 = add128(add128(mul6464(in[4], in2[2]), - mul6464(in[2], in2[4])), - mul6464(in[3], in2[3])); - t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3])); - t8 = mul6464(in[4], in2[4]); - - t0 = add128(t0, mul12819(t5)); - t1 = add128(t1, mul12819(t6)); - t2 = add128(t2, mul12819(t7)); - t3 = add128(t3, mul12819(t8)); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t4 = add128(t4, rshift128(t3, 51)); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t1 = add128(t1, rshift128(t0, 51)); - t2 = mask51full(t2); - t2 = add128(t2, rshift128(t1, 51)); - - output[0] = mask51(t0); - output[1] = mask51(t1); - output[2] = mask_lower(t2); - output[3] = mask51(t3); - output[4] = mask51(t4); -} - -static void -fsquare(felem *output, const felem *in) -{ - uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; - - t0 = mul6464(in[0], in[0]); - t1 = lshift128(mul6464(in[0], in[1]), 1); - t2 = add128(lshift128(mul6464(in[0], in[2]), 1), - mul6464(in[1], in[1])); - t3 = add128(lshift128(mul6464(in[0], in[3]), 1), - lshift128(mul6464(in[1], in[2]), 1)); - t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1), - lshift128(mul6464(in[3], in[1]), 1)), - mul6464(in[2], in[2])); - t5 = add128(lshift128(mul6464(in[4], in[1]), 1), - lshift128(mul6464(in[2], in[3]), 1)); - t6 = add128(lshift128(mul6464(in[4], in[2]), 1), - mul6464(in[3], in[3])); - t7 = lshift128(mul6464(in[3], in[4]), 1); - t8 = mul6464(in[4], in[4]); - - t0 = add128(t0, mul12819(t5)); - t1 = add128(t1, mul12819(t6)); - t2 = add128(t2, mul12819(t7)); - t3 = add128(t3, mul12819(t8)); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t4 = add128(t4, rshift128(t3, 51)); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t1 = add128(t1, rshift128(t0, 51)); - - output[0] = mask51(t0); - output[1] = mask_lower(t1); - output[2] = mask51(t2); - output[3] = mask51(t3); - output[4] = mask51(t4); -} - -/* Take a 32-byte number and expand it into polynomial form */ -static void NO_SANITIZE_ALIGNMENT -fexpand(felem *output, const u8 *in) -{ - output[0] = *((const uint64_t *)(in)) & MASK51; - output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51; - output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51; - output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51; - output[4] = (*((const uint64_t *)(in + 24)) >> 12) & MASK51; -} - -/* Take a fully reduced polynomial form number and contract it into a - * 32-byte array - */ -static void -fcontract(u8 *output, const felem *input) -{ - uint128_t t0 = init128x(input[0]); - uint128_t t1 = init128x(input[1]); - uint128_t t2 = init128x(input[2]); - uint128_t t3 = init128x(input[3]); - uint128_t t4 = init128x(input[4]); - uint128_t tmp = init128x(19); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - /* now t is between 0 and 2^255-1, properly carried. */ - /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ - - t0 = add128(t0, tmp); - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t0 = add128(t0, mul12819(rshift128(t4, 51))); - t4 = mask51full(t4); - - /* now between 19 and 2^255-1 in both cases, and offset by 19. */ - - t0 = add128(t0, init128x(0x8000000000000 - 19)); - tmp = init128x(0x8000000000000 - 1); - t1 = add128(t1, tmp); - t2 = add128(t2, tmp); - t3 = add128(t3, tmp); - t4 = add128(t4, tmp); - - /* now between 2^255 and 2^256-20, and offset by 2^255. */ - - t1 = add128(t1, rshift128(t0, 51)); - t0 = mask51full(t0); - t2 = add128(t2, rshift128(t1, 51)); - t1 = mask51full(t1); - t3 = add128(t3, rshift128(t2, 51)); - t2 = mask51full(t2); - t4 = add128(t4, rshift128(t3, 51)); - t3 = mask51full(t3); - t4 = mask51full(t4); - - *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51; - *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38); - *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25); - *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12); -} - -/* Input: Q, Q', Q-Q' - * Output: 2Q, Q+Q' - * - * x2 z3: long form - * x3 z3: long form - * x z: short form, destroyed - * xprime zprime: short form, destroyed - * qmqp: short form, preserved - */ -static void -fmonty(felem *x2, felem *z2, /* output 2Q */ - felem *x3, felem *z3, /* output Q + Q' */ - felem *x, felem *z, /* input Q */ - felem *xprime, felem *zprime, /* input Q' */ - const felem *qmqp /* input Q - Q' */) -{ - felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], - zzzprime[5]; - - memcpy(origx, x, 5 * sizeof(felem)); - fsum(x, z); - fdifference_backwards(z, origx); // does x - z - - memcpy(origxprime, xprime, sizeof(felem) * 5); - fsum(xprime, zprime); - fdifference_backwards(zprime, origxprime); - fmul(xxprime, xprime, z); - fmul(zzprime, x, zprime); - memcpy(origxprime, xxprime, sizeof(felem) * 5); - fsum(xxprime, zzprime); - fdifference_backwards(zzprime, origxprime); - fsquare(x3, xxprime); - fsquare(zzzprime, zzprime); - fmul(z3, zzzprime, qmqp); - - fsquare(xx, x); - fsquare(zz, z); - fmul(x2, xx, zz); - fdifference_backwards(zz, xx); // does zz = xx - zz - fscalar_product(zzz, zz, 121665); - fsum(zzz, xx); - fmul(z2, zz, zzz); -} - -// ----------------------------------------------------------------------------- -// Maybe swap the contents of two felem arrays (@a and @b), each @len elements -// long. Perform the swap iff @swap is non-zero. -// -// This function performs the swap without leaking any side-channel -// information. -// ----------------------------------------------------------------------------- -static void -swap_conditional(felem *a, felem *b, unsigned len, felem iswap) -{ - unsigned i; - const felem swap = 1 + ~iswap; - - for (i = 0; i < len; ++i) { - const felem x = swap & (a[i] ^ b[i]); - a[i] ^= x; - b[i] ^= x; - } -} - -/* Calculates nQ where Q is the x-coordinate of a point on the curve - * - * resultx/resultz: the x coordinate of the resulting curve point (short form) - * n: a 32-byte number - * q: a point of the curve (short form) - */ -static void -cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) -{ - felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 }; - felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; - felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 }; - felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; - - unsigned i, j; - - memcpy(nqpqx, q, sizeof(felem) * 5); - - for (i = 0; i < 32; ++i) { - u8 byte = n[31 - i]; - for (j = 0; j < 8; ++j) { - const felem bit = byte >> 7; - - swap_conditional(nqx, nqpqx, 5, bit); - swap_conditional(nqz, nqpqz, 5, bit); - fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); - swap_conditional(nqx2, nqpqx2, 5, bit); - swap_conditional(nqz2, nqpqz2, 5, bit); - - t = nqx; - nqx = nqx2; - nqx2 = t; - t = nqz; - nqz = nqz2; - nqz2 = t; - t = nqpqx; - nqpqx = nqpqx2; - nqpqx2 = t; - t = nqpqz; - nqpqz = nqpqz2; - nqpqz2 = t; - - byte <<= 1; - } - } - - memcpy(resultx, nqx, sizeof(felem) * 5); - memcpy(resultz, nqz, sizeof(felem) * 5); -} - -// ----------------------------------------------------------------------------- -// Shamelessly copied from djb's code -// ----------------------------------------------------------------------------- -static void -crecip(felem *out, const felem *z) -{ - felem z2[5]; - felem z9[5]; - felem z11[5]; - felem z2_5_0[5]; - felem z2_10_0[5]; - felem z2_20_0[5]; - felem z2_50_0[5]; - felem z2_100_0[5]; - felem t0[5]; - felem t1[5]; - int i; - - /* 2 */ fsquare(z2, z); - /* 4 */ fsquare(t1, z2); - /* 8 */ fsquare(t0, t1); - /* 9 */ fmul(z9, t0, z); - /* 11 */ fmul(z11, z9, z2); - /* 22 */ fsquare(t0, z11); - /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9); - - /* 2^6 - 2^1 */ fsquare(t0, z2_5_0); - /* 2^7 - 2^2 */ fsquare(t1, t0); - /* 2^8 - 2^3 */ fsquare(t0, t1); - /* 2^9 - 2^4 */ fsquare(t1, t0); - /* 2^10 - 2^5 */ fsquare(t0, t1); - /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0); - - /* 2^11 - 2^1 */ fsquare(t0, z2_10_0); - /* 2^12 - 2^2 */ fsquare(t1, t0); - /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0); - - /* 2^21 - 2^1 */ fsquare(t0, z2_20_0); - /* 2^22 - 2^2 */ fsquare(t1, t0); - /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0); - - /* 2^41 - 2^1 */ fsquare(t1, t0); - /* 2^42 - 2^2 */ fsquare(t0, t1); - /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { - fsquare(t1, t0); - fsquare(t0, t1); - } - /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0); - - /* 2^51 - 2^1 */ fsquare(t0, z2_50_0); - /* 2^52 - 2^2 */ fsquare(t1, t0); - /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0); - - /* 2^101 - 2^1 */ fsquare(t1, z2_100_0); - /* 2^102 - 2^2 */ fsquare(t0, t1); - /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { - fsquare(t1, t0); - fsquare(t0, t1); - } - /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0); - - /* 2^201 - 2^1 */ fsquare(t0, t1); - /* 2^202 - 2^2 */ fsquare(t1, t0); - /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { - fsquare(t0, t1); - fsquare(t1, t0); - } - /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0); - - /* 2^251 - 2^1 */ fsquare(t1, t0); - /* 2^252 - 2^2 */ fsquare(t0, t1); - /* 2^253 - 2^3 */ fsquare(t1, t0); - /* 2^254 - 2^4 */ fsquare(t0, t1); - /* 2^255 - 2^5 */ fsquare(t1, t0); - /* 2^255 - 21 */ fmul(out, t1, z11); -} +#include "../verified/Hacl_Curve25519.h" SECStatus -ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, - const uint8_t *basepoint) +ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, const uint8_t *basepoint) { - felem bp[5], x[5], z[5], zmone[5]; - uint8_t e[32]; - int i; - - for (i = 0; i < 32; ++i) { - e[i] = secret[i]; - } - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - fexpand(bp, basepoint); - cmult(x, z, e, bp); - crecip(zmone, z); - fmul(z, x, zmone); - fcontract(mypublic, z); - + // Note: this cast is safe because HaCl* state has a post-condition that only "mypublic" changed. + Hacl_Curve25519_crypto_scalarmult(mypublic, (uint8_t *)secret, (uint8_t *)basepoint); return 0; } diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c index 1e7875fff..38bd34c50 100644 --- a/security/nss/lib/freebl/ecl/ecp_25519.c +++ b/security/nss/lib/freebl/ecl/ecp_25519.c @@ -115,5 +115,9 @@ ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P) px = P->data; } - return ec_Curve25519_mul(X->data, k->data, px); + SECStatus rv = ec_Curve25519_mul(X->data, k->data, px); + if (NSS_SecureMemcmpZero(X->data, X->len) == 0) { + return SECFailure; + } + return rv; } diff --git a/security/nss/lib/freebl/ecl/uint128.c b/security/nss/lib/freebl/ecl/uint128.c deleted file mode 100644 index 5465875ad..000000000 --- a/security/nss/lib/freebl/ecl/uint128.c +++ /dev/null @@ -1,90 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include "uint128.h" - -/* helper functions */ -uint64_t -mask51(uint128_t x) -{ - return x.lo & MASK51; -} - -uint64_t -mask_lower(uint128_t x) -{ - return x.lo; -} - -uint128_t -mask51full(uint128_t x) -{ - uint128_t ret = { x.lo & MASK51, 0 }; - return ret; -} - -uint128_t -init128x(uint64_t x) -{ - uint128_t ret = { x, 0 }; - return ret; -} - -#define CONSTANT_TIME_CARRY(a, b) \ - ((a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1)) - -/* arithmetic */ - -uint128_t -add128(uint128_t a, uint128_t b) -{ - uint128_t ret; - ret.lo = a.lo + b.lo; - ret.hi = a.hi + b.hi + CONSTANT_TIME_CARRY(ret.lo, b.lo); - return ret; -} - -/* out = 19 * a */ -uint128_t -mul12819(uint128_t a) -{ - uint128_t ret = lshift128(a, 4); - ret = add128(ret, a); - ret = add128(ret, a); - ret = add128(ret, a); - return ret; -} - -uint128_t -mul6464(uint64_t a, uint64_t b) -{ - uint128_t ret; - uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b); - uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32); - uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1); - ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0); - ret.hi = (a >> 32) * (b >> 32); - ret.hi += (t2 >> 32) + (t1 >> 32); - return ret; -} - -/* only defined for n < 64 */ -uint128_t -rshift128(uint128_t x, uint8_t n) -{ - uint128_t ret; - ret.lo = (x.lo >> n) + (x.hi << (64 - n)); - ret.hi = x.hi >> n; - return ret; -} - -/* only defined for n < 64 */ -uint128_t -lshift128(uint128_t x, uint8_t n) -{ - uint128_t ret; - ret.hi = (x.hi << n) + (x.lo >> (64 - n)); - ret.lo = x.lo << n; - return ret; -} diff --git a/security/nss/lib/freebl/ecl/uint128.h b/security/nss/lib/freebl/ecl/uint128.h deleted file mode 100644 index a3a71e6e7..000000000 --- a/security/nss/lib/freebl/ecl/uint128.h +++ /dev/null @@ -1,35 +0,0 @@ -/* This Source Code Form is subject to the terms of the Mozilla Public - * License, v. 2.0. If a copy of the MPL was not distributed with this - * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ - -#include <stdint.h> - -#define MASK51 0x7ffffffffffffULL - -#ifdef HAVE_INT128_SUPPORT -typedef unsigned __int128 uint128_t; -#define add128(a, b) (a) + (b) -#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b) -#define mul12819(a) (uint128_t)(a) * 19 -#define rshift128(x, n) (x) >> (n) -#define lshift128(x, n) (x) << (n) -#define mask51(x) (x) & 0x7ffffffffffff -#define mask_lower(x) (uint64_t)(x) -#define mask51full(x) (x) & 0x7ffffffffffff -#define init128x(x) (x) -#else /* uint128_t for Windows and 32 bit intel systems */ -struct uint128_t_str { - uint64_t lo; - uint64_t hi; -}; -typedef struct uint128_t_str uint128_t; -uint128_t add128(uint128_t a, uint128_t b); -uint128_t mul6464(uint64_t a, uint64_t b); -uint128_t mul12819(uint128_t a); -uint128_t rshift128(uint128_t x, uint8_t n); -uint128_t lshift128(uint128_t x, uint8_t n); -uint64_t mask51(uint128_t x); -uint64_t mask_lower(uint128_t x); -uint128_t mask51full(uint128_t x); -uint128_t init128x(uint64_t x); -#endif diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp index aded6bfb6..ca0b6dafd 100644 --- a/security/nss/lib/freebl/exports.gyp +++ b/security/nss/lib/freebl/exports.gyp @@ -29,6 +29,7 @@ 'files': [ 'alghmac.h', 'blapi.h', + 'blake2b.h', 'chacha20poly1305.h', 'ec.h', 'ecl/ecl-curve.h', diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c index 094513560..2328a677f 100644 --- a/security/nss/lib/freebl/fipsfreebl.c +++ b/security/nss/lib/freebl/fipsfreebl.c @@ -6,6 +6,7 @@ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ /* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */ +#ifndef NSS_FIPS_DISABLED #ifdef FREEBL_NO_DEPEND #include "stubs.h" #endif @@ -15,9 +16,7 @@ #include "secerr.h" #include "prtypes.h" -#ifdef NSS_ENABLE_ECC #include "ec.h" /* Required for ECDSA */ -#endif /* * different platforms have different ways of calling and initial entry point @@ -1077,8 +1076,6 @@ rsa_loser: return (SECFailure); } -#ifdef NSS_ENABLE_ECC - static SECStatus freebl_fips_ECDSA_Test(ECParams *ecparams, const PRUint8 *knownSignature, @@ -1275,8 +1272,6 @@ freebl_fips_ECDSA_PowerUpSelfTest() return (SECSuccess); } -#endif /* NSS_ENABLE_ECC */ - static SECStatus freebl_fips_DSA_PowerUpSelfTest(void) { @@ -1559,13 +1554,11 @@ freebl_fipsPowerUpSelfTest(unsigned int tests) if (rv != SECSuccess) return rv; -#ifdef NSS_ENABLE_ECC /* ECDSA Power-Up SelfTest(s). */ rv = freebl_fips_ECDSA_PowerUpSelfTest(); if (rv != SECSuccess) return rv; -#endif } /* Passed Power-Up SelfTest(s). */ return (SECSuccess); @@ -1589,9 +1582,6 @@ static PRBool self_tests_freebl_ran = PR_FALSE; static PRBool self_tests_ran = PR_FALSE; static PRBool self_tests_freebl_success = PR_FALSE; static PRBool self_tests_success = PR_FALSE; -#if defined(DEBUG) -static PRBool fips_mode_available = PR_FALSE; -#endif /* * accessors for freebl @@ -1644,7 +1634,6 @@ bl_startup_tests(void) PORT_Assert(self_tests_freebl_ran == PR_FALSE); PORT_Assert(self_tests_success == PR_FALSE); - PORT_Assert(fips_mode_available == PR_FALSE); self_tests_freebl_ran = PR_TRUE; /* we are running the tests */ self_tests_success = PR_FALSE; /* force it just in case */ self_tests_freebl_success = PR_FALSE; /* force it just in case */ @@ -1713,3 +1702,4 @@ BL_FIPSEntryOK(PRBool freebl_only) PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); return SECFailure; } +#endif diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp index 8c0d0dcd5..fae56f709 100644 --- a/security/nss/lib/freebl/freebl.gyp +++ b/security/nss/lib/freebl/freebl.gyp @@ -10,7 +10,7 @@ 'target_name': 'intel-gcm-wrap_c_lib', 'type': 'static_library', 'sources': [ - 'intel-gcm-wrap.c' + 'intel-gcm-wrap.c', ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports' @@ -23,6 +23,69 @@ ] }, { + # TODO: make this so that all hardware accelerated code is in here. + 'target_name': 'hw-acc-crypto', + 'type': 'static_library', + 'sources': [ + 'verified/Hacl_Chacha20_Vec128.c', + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'conditions': [ + [ 'target_arch=="ia32" or target_arch=="x64"', { + 'cflags': [ + '-mssse3' + ], + 'cflags_mozilla': [ + '-mssse3' + ], + # GCC doesn't define this. + 'defines': [ + '__SSSE3__', + ], + }], + [ 'OS=="android"', { + # On Android we can't use any of the hardware acceleration :( + 'defines!': [ + '__ARM_NEON__', + '__ARM_NEON', + ], + }], + ], + }, + { + 'target_name': 'gcm-aes-x86_c_lib', + 'type': 'static_library', + 'sources': [ + 'gcm-x86.c', 'aes-x86.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + # Enable isa option for pclmul and aes-ni; supported since gcc 4.4. + # This is only supported by x84/x64. It's not needed for Windows, + # unless clang-cl is used. + 'cflags_mozilla': [ + '-mpclmul', '-maes' + ], + 'conditions': [ + [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', { + 'cflags': [ + '-mpclmul', '-maes' + ], + }], + # macOS build doesn't use cflags. + [ 'OS=="mac"', { + 'xcode_settings': { + 'OTHER_CFLAGS': [ + '-mpclmul', '-maes' + ], + }, + }] + ] + }, + { 'target_name': 'freebl', 'type': 'static_library', 'sources': [ @@ -43,8 +106,14 @@ ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto', ], 'conditions': [ + [ 'target_arch=="ia32" or target_arch=="x64"', { + 'dependencies': [ + 'gcm-aes-x86_c_lib', + ], + }], [ 'OS=="linux"', { 'defines!': [ 'FREEBL_NO_DEPEND', @@ -74,8 +143,14 @@ ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', + 'hw-acc-crypto', ], 'conditions': [ + [ 'target_arch=="ia32" or target_arch=="x64"', { + 'dependencies': [ + 'gcm-aes-x86_c_lib', + ] + }], [ 'OS!="linux" and OS!="android"', { 'conditions': [ [ 'moz_fold_libs==0', { @@ -142,7 +217,8 @@ 'target_defaults': { 'include_dirs': [ 'mpi', - 'ecl' + 'ecl', + 'verified', ], 'defines': [ 'SHLIB_SUFFIX=\"<(dll_suffix)\"', @@ -153,19 +229,12 @@ 'MP_API_COMPATIBLE' ], 'conditions': [ - [ 'target_arch=="ia32" or target_arch=="x64"', { - 'cflags_mozilla': [ - '-mpclmul', - '-maes', - ], - }], [ 'OS=="mac"', { 'xcode_settings': { # I'm not sure since when this is supported. # But I hope that doesn't matter. We also assume this is x86/x64. 'OTHER_CFLAGS': [ - '-mpclmul', - '-maes', + '-std=gnu99', ], }, }], @@ -221,17 +290,29 @@ 'HAVE_INT128_SUPPORT', ], }, { - 'sources': [ - 'ecl/uint128.c', + 'defines': [ + 'KRML_NOUINT128', ], }], ], + }, { + 'defines': [ + 'KRML_NOUINT128', + ], }], [ 'OS=="linux"', { 'defines': [ 'FREEBL_LOWHASH', 'FREEBL_NO_DEPEND', ], + 'cflags': [ + '-std=gnu99', + ], + }], + [ 'OS=="dragonfly" or OS=="freebsd" or OS=="netbsd" or OS=="openbsd"', { + 'cflags': [ + '-std=gnu99', + ], }], [ 'OS=="linux" or OS=="android"', { 'conditions': [ @@ -259,14 +340,6 @@ 'MP_USE_UINT_DIGIT', ], }], - [ 'target_arch=="ia32" or target_arch=="x64"', { - 'cflags': [ - # enable isa option for pclmul am aes-ni; supported since gcc 4.4 - # This is only support by x84/x64. It's not needed for Windows. - '-mpclmul', - '-maes', - ], - }], [ 'target_arch=="arm"', { 'defines': [ 'MP_ASSEMBLY_MULTIPLY', diff --git a/security/nss/lib/freebl/freebl_base.gypi b/security/nss/lib/freebl/freebl_base.gypi index 027aa2702..ebd1018d8 100644 --- a/security/nss/lib/freebl/freebl_base.gypi +++ b/security/nss/lib/freebl/freebl_base.gypi @@ -8,8 +8,10 @@ 'alghmac.c', 'arcfive.c', 'arcfour.c', + 'blake2b.c', 'camellia.c', 'chacha20poly1305.c', + 'crypto_primitives.c', 'ctr.c', 'cts.c', 'des.c', @@ -98,10 +100,6 @@ ], }], [ 'OS=="win"', { - 'sources': [ - #TODO: building with mingw should not need this. - 'ecl/uint128.c', - ], 'libraries': [ 'advapi32.lib', ], @@ -132,29 +130,52 @@ }], ], }], - ['target_arch=="ia32" or target_arch=="x64"', { + ['target_arch=="ia32" or target_arch=="x64" or target_arch=="arm64" or target_arch=="aarch64"', { 'sources': [ - # All intel architectures get the 64 bit version + # All intel and 64-bit ARM architectures get the 64 bit version. 'ecl/curve25519_64.c', + 'verified/Hacl_Curve25519.c', + 'verified/FStar.c', ], }, { 'sources': [ - # All non intel architectures get the generic 32 bit implementation (slow!) + # All other architectures get the generic 32 bit implementation (slow!) 'ecl/curve25519_32.c', ], }], - #TODO uint128.c [ 'disable_chachapoly==0', { + # The ChaCha20 code is linked in through the static ssse3-crypto lib on + # all platforms that support SSSE3. There are runtime checks in place to + # choose the correct ChaCha implementation at runtime. + 'sources': [ + 'verified/Hacl_Chacha20.c', + ], 'conditions': [ - [ 'OS!="win" and target_arch=="x64"', { - 'sources': [ - 'chacha20_vec.c', - 'poly1305-donna-x64-sse2-incremental-source.c', + [ 'OS!="win"', { + 'conditions': [ + [ 'target_arch=="x64"', { + 'sources': [ + 'verified/Hacl_Poly1305_64.c', + ], + }, { + # !Windows & !x64 + 'conditions': [ + [ 'target_arch=="arm64" or target_arch=="aarch64"', { + 'sources': [ + 'verified/Hacl_Poly1305_64.c', + ], + }, { + # !Windows & !x64 & !arm64 & !aarch64 + 'sources': [ + 'poly1305.c', + ], + }], + ], + }], ], }, { - # not x64 + # Windows 'sources': [ - 'chacha20.c', 'poly1305.c', ], }], diff --git a/security/nss/lib/freebl/gcm-x86.c b/security/nss/lib/freebl/gcm-x86.c new file mode 100644 index 000000000..e34d63394 --- /dev/null +++ b/security/nss/lib/freebl/gcm-x86.c @@ -0,0 +1,127 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +#include <wmmintrin.h> /* clmul */ + +#define WRITE64(x, bytes) \ + (bytes)[0] = (x) >> 56; \ + (bytes)[1] = (x) >> 48; \ + (bytes)[2] = (x) >> 40; \ + (bytes)[3] = (x) >> 32; \ + (bytes)[4] = (x) >> 24; \ + (bytes)[5] = (x) >> 16; \ + (bytes)[6] = (x) >> 8; \ + (bytes)[7] = (x); + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + uint64_t tmp_out[2]; + _mm_storeu_si128((__m128i *)tmp_out, ghash->x); + /* maxout must be larger than 16 byte (checked by the caller). */ + WRITE64(tmp_out[0], outbuf + 8); + WRITE64(tmp_out[1], outbuf); + return SECSuccess; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + size_t i; + pre_align __m128i z_high post_align; + pre_align __m128i z_low post_align; + pre_align __m128i C post_align; + pre_align __m128i D post_align; + pre_align __m128i E post_align; + pre_align __m128i F post_align; + pre_align __m128i bin post_align; + pre_align __m128i Ci post_align; + pre_align __m128i tmp post_align; + + for (i = 0; i < count; i++, buf += 16) { + bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], + ((uint16_t)buf[2] << 8) | buf[3], + ((uint16_t)buf[4] << 8) | buf[5], + ((uint16_t)buf[6] << 8) | buf[7], + ((uint16_t)buf[8] << 8) | buf[9], + ((uint16_t)buf[10] << 8) | buf[11], + ((uint16_t)buf[12] << 8) | buf[13], + ((uint16_t)buf[14] << 8) | buf[15]); + Ci = _mm_xor_si128(bin, ghash->x); + + /* Do binary mult ghash->X = Ci * ghash->H. */ + C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); + D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); + E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); + F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); + tmp = _mm_xor_si128(E, F); + z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); + z_high = _mm_unpackhi_epi64(z_high, D); + z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); + z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); + + /* Shift one to the left (multiply by x) as gcm spec is stupid. */ + C = _mm_slli_si128(z_low, 8); + E = _mm_srli_epi64(C, 63); + D = _mm_slli_si128(z_high, 8); + F = _mm_srli_epi64(D, 63); + /* Carry over */ + C = _mm_srli_si128(z_low, 8); + D = _mm_srli_epi64(C, 63); + z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); + z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); + + /* Reduce */ + C = _mm_slli_si128(z_low, 8); + /* D = z_low << 127 */ + D = _mm_slli_epi64(C, 63); + /* E = z_low << 126 */ + E = _mm_slli_epi64(C, 62); + /* F = z_low << 121 */ + F = _mm_slli_epi64(C, 57); + /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ + z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); + C = _mm_srli_si128(z_low, 8); + /* D = z_low >> 1 */ + D = _mm_slli_epi64(C, 63); + D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); + /* E = z_low >> 2 */ + E = _mm_slli_epi64(C, 62); + E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); + /* F = z_low >> 7 */ + F = _mm_slli_epi64(C, 57); + F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); + /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ + ghash->x = _mm_xor_si128(_mm_xor_si128( + _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), + F); + } + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x = _mm_setzero_si128(); + /* MSVC requires __m64 to load epi64. */ + ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, + ghash->h_low >> 32, (uint32_t)ghash->h_low); + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = _mm_setzero_si128(); + return SECSuccess; +} diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c index 0fdb0fd48..f1e16da78 100644 --- a/security/nss/lib/freebl/gcm.c +++ b/security/nss/lib/freebl/gcm.c @@ -17,18 +17,50 @@ #include <limits.h> -#ifdef NSS_X86_OR_X64 -#include <wmmintrin.h> /* clmul */ -#endif - /* Forward declarations */ +SECStatus gcm_HashInit_hw(gcmHashContext *ghash); +SECStatus gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf); SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, unsigned int count); +SECStatus gcm_HashZeroX_hw(gcmHashContext *ghash); SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, unsigned int count); SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, unsigned int count); +/* Stub definitions for the above *_hw functions, which shouldn't be + * used unless NSS_X86_OR_X64 is defined */ +#ifndef NSS_X86_OR_X64 +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} +#endif /* NSS_X86_OR_X64 */ + uint64_t get64(const unsigned char *bytes) { @@ -46,6 +78,8 @@ get64(const unsigned char *bytes) SECStatus gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) { + SECStatus rv = SECSuccess; + ghash->cLen = 0; ghash->bufLen = 0; PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); @@ -53,17 +87,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) ghash->h_low = get64(H + 8); ghash->h_high = get64(H); if (clmul_support() && !sw) { -#ifdef NSS_X86_OR_X64 - ghash->ghash_mul = gcm_HashMult_hw; - ghash->x = _mm_setzero_si128(); - /* MSVC requires __m64 to load epi64. */ - ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, - ghash->h_low >> 32, (uint32_t)ghash->h_low); - ghash->hw = PR_TRUE; -#else - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; -#endif /* NSS_X86_OR_X64 */ + rv = gcm_HashInit_hw(ghash); } else { /* We fall back to the software implementation if we can't use / don't * want to use pclmul. */ @@ -75,7 +99,7 @@ gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) ghash->x_high = ghash->x_low = 0; ghash->hw = PR_FALSE; } - return SECSuccess; + return rv; } #ifdef HAVE_INT128_SUPPORT @@ -283,102 +307,17 @@ gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, } #endif /* HAVE_INT128_SUPPORT */ -SECStatus -gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count) -{ -#ifdef NSS_X86_OR_X64 - size_t i; - pre_align __m128i z_high post_align; - pre_align __m128i z_low post_align; - pre_align __m128i C post_align; - pre_align __m128i D post_align; - pre_align __m128i E post_align; - pre_align __m128i F post_align; - pre_align __m128i bin post_align; - pre_align __m128i Ci post_align; - pre_align __m128i tmp post_align; - - for (i = 0; i < count; i++, buf += 16) { - bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], - ((uint16_t)buf[2] << 8) | buf[3], - ((uint16_t)buf[4] << 8) | buf[5], - ((uint16_t)buf[6] << 8) | buf[7], - ((uint16_t)buf[8] << 8) | buf[9], - ((uint16_t)buf[10] << 8) | buf[11], - ((uint16_t)buf[12] << 8) | buf[13], - ((uint16_t)buf[14] << 8) | buf[15]); - Ci = _mm_xor_si128(bin, ghash->x); - - /* Do binary mult ghash->X = Ci * ghash->H. */ - C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); - D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); - E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); - F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); - tmp = _mm_xor_si128(E, F); - z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); - z_high = _mm_unpackhi_epi64(z_high, D); - z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); - z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); - - /* Shift one to the left (multiply by x) as gcm spec is stupid. */ - C = _mm_slli_si128(z_low, 8); - E = _mm_srli_epi64(C, 63); - D = _mm_slli_si128(z_high, 8); - F = _mm_srli_epi64(D, 63); - /* Carry over */ - C = _mm_srli_si128(z_low, 8); - D = _mm_srli_epi64(C, 63); - z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); - z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); - - /* Reduce */ - C = _mm_slli_si128(z_low, 8); - /* D = z_low << 127 */ - D = _mm_slli_epi64(C, 63); - /* E = z_low << 126 */ - E = _mm_slli_epi64(C, 62); - /* F = z_low << 121 */ - F = _mm_slli_epi64(C, 57); - /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ - z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); - C = _mm_srli_si128(z_low, 8); - /* D = z_low >> 1 */ - D = _mm_slli_epi64(C, 63); - D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); - /* E = z_low >> 2 */ - E = _mm_slli_epi64(C, 62); - E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); - /* F = z_low >> 7 */ - F = _mm_slli_epi64(C, 57); - F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); - /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ - ghash->x = _mm_xor_si128(_mm_xor_si128( - _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), - F); - } - return SECSuccess; -#else - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; -#endif /* NSS_X86_OR_X64 */ -} - static SECStatus gcm_zeroX(gcmHashContext *ghash) { + SECStatus rv = SECSuccess; + if (ghash->hw) { -#ifdef NSS_X86_OR_X64 - ghash->x = _mm_setzero_si128(); - return SECSuccess; -#else - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; -#endif /* NSS_X86_OR_X64 */ + rv = gcm_HashZeroX_hw(ghash); } ghash->x_high = ghash->x_low = 0; - return SECSuccess; + return rv; } /* @@ -503,15 +442,10 @@ gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, } if (ghash->hw) { -#ifdef NSS_X86_OR_X64 - uint64_t tmp_out[2]; - _mm_storeu_si128((__m128i *)tmp_out, ghash->x); - WRITE64(tmp_out[0], T + 8); - WRITE64(tmp_out[1], T); -#else - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; -#endif /* NSS_X86_OR_X64 */ + rv = gcm_HashWrite_hw(ghash, T); + if (rv != SECSuccess) { + goto cleanup; + } } else { WRITE64(ghash->x_low, T + 8); WRITE64(ghash->x_high, T); @@ -595,14 +529,7 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, if (gcm == NULL) { return NULL; } - /* aligned_alloc is C11 so we have to do it the old way. */ - ghash = PORT_ZAlloc(sizeof(gcmHashContext) + 15); - if (ghash == NULL) { - PORT_SetError(SEC_ERROR_NO_MEMORY); - goto loser; - } - ghash->mem = ghash; - ghash = (gcmHashContext *)(((uintptr_t)ghash + 15) & ~(uintptr_t)0x0F); + ghash = PORT_ZNewAligned(gcmHashContext, 16, mem); /* first plug in the ghash context */ gcm->ghash_context = ghash; diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h index 0c707a081..42ef0f717 100644 --- a/security/nss/lib/freebl/gcm.h +++ b/security/nss/lib/freebl/gcm.h @@ -9,7 +9,21 @@ #include <stdint.h> #ifdef NSS_X86_OR_X64 +/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) +#pragma GCC push_options +#pragma GCC target("sse2") +#undef NSS_DISABLE_SSE2 +#define NSS_DISABLE_SSE2 1 +#endif /* GCC <= 4.8 */ + #include <emmintrin.h> /* __m128i */ + +#ifdef NSS_DISABLE_SSE2 +#undef NSS_DISABLE_SSE2 +#pragma GCC pop_options +#endif /* NSS_DISABLE_SSE2 */ #endif SEC_BEGIN_PROTOS diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c index 2447a0c9f..d39965256 100644 --- a/security/nss/lib/freebl/ldvector.c +++ b/security/nss/lib/freebl/ldvector.c @@ -298,9 +298,25 @@ static const struct FREEBLVectorStr vector = /* End of Version 3.018 */ - EC_GetPointSize + EC_GetPointSize, /* End of Version 3.019 */ + + BLAKE2B_Hash, + BLAKE2B_HashBuf, + BLAKE2B_MAC_HashBuf, + BLAKE2B_NewContext, + BLAKE2B_DestroyContext, + BLAKE2B_Begin, + BLAKE2B_MAC_Begin, + BLAKE2B_Update, + BLAKE2B_End, + BLAKE2B_FlattenSize, + BLAKE2B_Flatten, + BLAKE2B_Resurrect + + /* End of Version 3.020 */ + }; const FREEBLVector* @@ -320,8 +336,12 @@ FREEBL_GetVector(void) return NULL; } #endif - /* make sure the Full self tests have been run before continuing */ + +#ifndef NSS_FIPS_DISABLED + /* In FIPS mode make sure the Full self tests have been run before + * continuing. */ BL_POSTRan(PR_FALSE); +#endif return &vector; } diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c index 792171b08..fe5e0a668 100644 --- a/security/nss/lib/freebl/loader.c +++ b/security/nss/lib/freebl/loader.c @@ -2124,3 +2124,114 @@ EC_GetPointSize(const ECParams *params) return SECFailure; return (vector->p_EC_GetPointSize)(params); } + +SECStatus +BLAKE2B_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Hash)(dest, src); +} + +SECStatus +BLAKE2B_HashBuf(unsigned char *output, const unsigned char *input, PRUint32 inlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_HashBuf)(output, input, inlen); +} + +SECStatus +BLAKE2B_MAC_HashBuf(unsigned char *output, const unsigned char *input, + unsigned int inlen, const unsigned char *key, + unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_MAC_HashBuf)(output, input, inlen, key, keylen); +} + +BLAKE2BContext * +BLAKE2B_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return NULL; + } + return (vector->p_BLAKE2B_NewContext)(); +} + +void +BLAKE2B_DestroyContext(BLAKE2BContext *BLAKE2BContext, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return; + } + (vector->p_BLAKE2B_DestroyContext)(BLAKE2BContext, freeit); +} + +SECStatus +BLAKE2B_Begin(BLAKE2BContext *ctx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Begin)(ctx); +} + +SECStatus +BLAKE2B_MAC_Begin(BLAKE2BContext *ctx, const PRUint8 *key, const size_t keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_MAC_Begin)(ctx, key, keylen); +} + +SECStatus +BLAKE2B_Update(BLAKE2BContext *ctx, const unsigned char *in, unsigned int inlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Update)(ctx, in, inlen); +} + +SECStatus +BLAKE2B_End(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_End)(ctx, out, digestLen, maxDigestLen); +} + +unsigned int +BLAKE2B_FlattenSize(BLAKE2BContext *ctx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return 0; + } + return (vector->p_BLAKE2B_FlattenSize)(ctx); +} + +SECStatus +BLAKE2B_Flatten(BLAKE2BContext *ctx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return SECFailure; + } + return (vector->p_BLAKE2B_Flatten)(ctx, space); +} + +BLAKE2BContext * +BLAKE2B_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return NULL; + } + return (vector->p_BLAKE2B_Resurrect)(space, arg); +} diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h index ed392cc47..ff10cf9ba 100644 --- a/security/nss/lib/freebl/loader.h +++ b/security/nss/lib/freebl/loader.h @@ -10,7 +10,7 @@ #include "blapi.h" -#define FREEBL_VERSION 0x0313 +#define FREEBL_VERSION 0x0314 struct FREEBLVectorStr { @@ -736,6 +736,29 @@ struct FREEBLVectorStr { /* Version 3.019 came to here */ + SECStatus (*p_BLAKE2B_Hash)(unsigned char *dest, const char *src); + SECStatus (*p_BLAKE2B_HashBuf)(unsigned char *output, + const unsigned char *input, PRUint32 inlen); + SECStatus (*p_BLAKE2B_MAC_HashBuf)(unsigned char *output, + const unsigned char *input, + unsigned int inlen, + const unsigned char *key, + unsigned int keylen); + BLAKE2BContext *(*p_BLAKE2B_NewContext)(); + void (*p_BLAKE2B_DestroyContext)(BLAKE2BContext *ctx, PRBool freeit); + SECStatus (*p_BLAKE2B_Begin)(BLAKE2BContext *ctx); + SECStatus (*p_BLAKE2B_MAC_Begin)(BLAKE2BContext *ctx, const PRUint8 *key, + const size_t keylen); + SECStatus (*p_BLAKE2B_Update)(BLAKE2BContext *ctx, const unsigned char *in, + unsigned int inlen); + SECStatus (*p_BLAKE2B_End)(BLAKE2BContext *ctx, unsigned char *out, + unsigned int *digestLen, size_t maxDigestLen); + unsigned int (*p_BLAKE2B_FlattenSize)(BLAKE2BContext *ctx); + SECStatus (*p_BLAKE2B_Flatten)(BLAKE2BContext *ctx, unsigned char *space); + BLAKE2BContext *(*p_BLAKE2B_Resurrect)(unsigned char *space, void *arg); + + /* Version 3.020 came to here */ + /* Add new function pointers at the end of this struct and bump * FREEBL_VERSION at the beginning of this file. */ }; diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c index 7690c98da..be53bbdc6 100644 --- a/security/nss/lib/freebl/lowhash_vector.c +++ b/security/nss/lib/freebl/lowhash_vector.c @@ -102,13 +102,13 @@ freebl_LoadDSO(void) static PRCallOnceType loadFreeBLOnce; -static PRStatus +static void freebl_RunLoaderOnce(void) { /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped * down version. */ if (loadFreeBLOnce.initialized) { - return loadFreeBLOnce.status; + return; } if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) { loadFreeBLOnce.status = freebl_LoadDSO(); @@ -122,17 +122,21 @@ freebl_RunLoaderOnce(void) sleep(1); /* don't have condition variables, just give up the CPU */ } } +} - return loadFreeBLOnce.status; +static const NSSLOWVector * +freebl_InitVector(void) +{ + if (!vector) { + freebl_RunLoaderOnce(); + } + return vector; } const FREEBLVector * FREEBL_GetVector(void) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { - return NULL; - } - if (vector) { + if (freebl_InitVector()) { return (vector->p_FREEBL_GetVector)(); } return NULL; @@ -141,25 +145,26 @@ FREEBL_GetVector(void) NSSLOWInitContext * NSSLOW_Init(void) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return NULL; - return (vector->p_NSSLOW_Init)(); + if (freebl_InitVector()) { + return (vector->p_NSSLOW_Init)(); + } + return NULL; } void NSSLOW_Shutdown(NSSLOWInitContext *context) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOW_Shutdown)(context); + if (freebl_InitVector()) { + (vector->p_NSSLOW_Shutdown)(context); + } } void NSSLOW_Reset(NSSLOWInitContext *context) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOW_Reset)(context); + if (freebl_InitVector()) { + (vector->p_NSSLOW_Reset)(context); + } } NSSLOWHASHContext * @@ -167,17 +172,18 @@ NSSLOWHASH_NewContext( NSSLOWInitContext *initContext, HASH_HashType hashType) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return NULL; - return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType); + if (freebl_InitVector()) { + return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType); + } + return NULL; } void NSSLOWHASH_Begin(NSSLOWHASHContext *context) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOWHASH_Begin)(context); + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Begin)(context); + } } void @@ -185,9 +191,9 @@ NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf, unsigned int len) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOWHASH_Update)(context, buf, len); + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Update)(context, buf, len); + } } void @@ -195,23 +201,24 @@ NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf, unsigned int *ret, unsigned int len) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOWHASH_End)(context, buf, ret, len); + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_End)(context, buf, ret, len); + } } void NSSLOWHASH_Destroy(NSSLOWHASHContext *context) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return; - (vector->p_NSSLOWHASH_Destroy)(context); + if (freebl_InitVector()) { + (vector->p_NSSLOWHASH_Destroy)(context); + } } unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context) { - if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) - return -1; - return (vector->p_NSSLOWHASH_Length)(context); + if (freebl_InitVector()) { + return (vector->p_NSSLOWHASH_Length)(context); + } + return -1; } diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn index bf8144218..e4c9ab0b7 100644 --- a/security/nss/lib/freebl/manifest.mn +++ b/security/nss/lib/freebl/manifest.mn @@ -1,10 +1,10 @@ -# +# # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. # NOTE: any ifdefs in this file must be defined on the gmake command line -# (if anywhere). They cannot come from Makefile or config.mk +# (if anywhere). They cannot come from Makefile or config.mk CORE_DEPTH = ../.. @@ -75,7 +75,7 @@ DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \ -DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \ -DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\" -REQUIRES = +REQUIRES = EXPORTS = \ blapit.h \ @@ -86,6 +86,7 @@ EXPORTS = \ PRIVATE_EXPORTS = \ alghmac.h \ + blake2b.h \ blapi.h \ chacha20poly1305.h \ hmacct.h \ @@ -102,16 +103,13 @@ MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h -ifndef NSS_DISABLE_ECC ECL_SRCS = ecl.c ecl_mult.c ecl_gf.c \ ecp_aff.c ecp_jac.c ecp_mont.c \ ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \ ecp_256_32.c ecp_25519.c -else -ECL_SRCS = $(NULL) -endif SHA_SRCS = sha_fast.c MPCPU_SRCS = mpcpucache.c +VERIFIED_SRCS = $(NULL) CSRCS = \ freeblver.c \ @@ -126,6 +124,8 @@ CSRCS = \ alg2268.c \ arcfour.c \ arcfive.c \ + crypto_primitives.c \ + blake2b.c \ desblapi.c \ des.c \ drbg.c \ @@ -153,6 +153,7 @@ CSRCS = \ $(MPI_SRCS) \ $(MPCPU_SRCS) \ $(ECL_SRCS) \ + $(VERIFIED_SRCS) \ $(STUBS_SRCS) \ $(LOWHASH_SRCS) \ $(EXTRA_SRCS) \ @@ -162,6 +163,7 @@ ALL_CSRCS := $(CSRCS) ALL_HDRS = \ alghmac.h \ + blake2b.h \ blapi.h \ blapit.h \ des.h \ @@ -178,12 +180,6 @@ ALL_HDRS = \ $(NULL) -ifdef AES_GEN_TBL -DEFINES += -DRIJNDAEL_GENERATE_TABLES -else -ifdef AES_GEN_TBL_M -DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO -else ifdef AES_GEN_VAL DEFINES += -DRIJNDAEL_GENERATE_VALUES else @@ -193,5 +189,3 @@ else DEFINES += -DRIJNDAEL_INCLUDE_TABLES endif endif -endif -endif diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README index 776ba713a..cf4302758 100644 --- a/security/nss/lib/freebl/mpi/README +++ b/security/nss/lib/freebl/mpi/README @@ -53,7 +53,7 @@ to change are: single digit. This is just a printf() format string, so you can adjust it appropriately. -(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the +(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the largest value expressible in an mp_digit and an mp_word, respectively. @@ -345,7 +345,7 @@ returns values of x and y satisfying Bezout's identity. This is used by mp_invmod() to find modular inverses. However, if you do not need these values, you will find that mp_gcd() is MUCH more efficient, since it doesn't need all the intermediate values that mp_xgcd() -requires in order to compute x and y. +requires in order to compute x and y. The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD algorithm due to Josef Stein. @@ -361,7 +361,7 @@ mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int mp_radix_size(mp, r) - return length of buffer needed by mp_toradix() mp_raw_size(mp) - return length of buffer needed by mp_toraw() -mp_toradix(mp, str, r) - convert an mp_int to a string of radix r +mp_toradix(mp, str, r) - convert an mp_int to a string of radix r digits mp_toraw(mp, str) - convert an mp_int to a string of bytes mp_tovalue(ch, r) - convert ch to its value when taken as @@ -387,7 +387,7 @@ The mp_read_radix() and mp_toradix() functions support bases from 2 to than this, you will need to write them yourself (that's why mp_div_d() is provided, after all). -Note: mp_read_radix() will accept as digits either capital or +Note: mp_read_radix() will accept as digits either capital or ---- lower-case letters. However, the current implementation of mp_toradix() only outputs upper-case letters, when writing bases betwee 10 and 36. The underlying code supports using @@ -448,14 +448,14 @@ Note: The mpp_random() and mpp_random_size() functions use the C to change. mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits - in v? If so, let w be the index of + in v? If so, let w be the index of that digit mpp_divis_primes(a, np) - is a divisible by any of the first np - primes? If so, set np to the prime + primes? If so, set np to the prime which divided a. -mpp_fermat(a, d) - test if w^a = w (mod a). If so, +mpp_fermat(a, d) - test if w^a = w (mod a). If so, returns MP_YES, otherwise MP_NO. mpp_pprime(a, nt) - perform nt iterations of the Rabin- @@ -486,7 +486,7 @@ The file 'mpi-config.h' defines several configurable parameters for the library, which you can adjust to suit your application. At the time of this writing, the available options are: -MP_IOFUNC - Define true to include the mp_print() function, +MP_IOFUNC - Define true to include the mp_print() function, which is moderately useful for debugging. This implicitly includes <stdio.h>. @@ -502,21 +502,14 @@ MP_LOGTAB - If true, the file "logtab.h" is included, which the library includes <math.h> and uses log(). This typically forces you to link against math libraries. -MP_MEMSET - If true, use memset() to zero buffers. If you run - into weird alignment related bugs, set this to zero - and an explicit loop will be used. - -MP_MEMCPY - If true, use memcpy() to copy buffers. If you run - into weird alignment bugs, set this to zero and an - explicit loop will be used. MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument - checking macro, ARGCHK(), gets expanded. If this - is set to zero, ARGCHK() expands to nothing; no + checking macro, ARGCHK(), gets expanded. If this + is set to zero, ARGCHK() expands to nothing; no argument checks are performed. If this is 1, the ARGCHK() macro expands to code that returns MP_BADARG - or similar at runtime. If it is 2, ARGCHK() expands - to an assert() call that aborts the program on a + or similar at runtime. If it is 2, ARGCHK() expands + to an assert() call that aborts the program on a bad input. MP_DEBUG - Turns on debugging output. This is probably not at @@ -528,14 +521,14 @@ MP_DEFPREC - The default precision of a newly-created mp_int, in the mp_set_prec() function, but this is its initial value. -MP_SQUARE - If this is set to a nonzero value, the mp_sqr() +MP_SQUARE - If this is set to a nonzero value, the mp_sqr() function will use an alternate algorithm that takes advantage of the redundant inner product computation when both multiplicands are identical. Unfortunately, with some compilers this is actually SLOWER than just calling mp_mul() with the same argument twice. So if you set MP_SQUARE to zero, mp_sqr() will be expan- - ded into a call to mp_mul(). This applies to all + ded into a call to mp_mul(). This applies to all the uses of mp_sqr(), including mp_sqrmod() and the internal calls to s_mp_sqr() inside mpi.c @@ -568,7 +561,7 @@ CFLAGS=-ansi -pedantic -Wall -O2 If all goes well, the library should compile without warnings using this combination. You should, of course, make whatever adjustments -you find necessary. +you find necessary. The MPI library distribution comes with several additional programs which are intended to demonstrate the use of the library, and provide @@ -580,7 +573,7 @@ directory) for manipulating large numbers. These include: basecvt.c A radix-conversion program, supporting bases from 2 to 64 inclusive. -bbsrand.c A BBS (quadratic residue) pseudo-random number +bbsrand.c A BBS (quadratic residue) pseudo-random number generator. The file 'bbsrand.c' is just the driver for the program; the real code lives in the files 'bbs_rand.h' and 'bbs_rand.c' @@ -626,7 +619,7 @@ Acknowledgements: ---------------- The algorithms used in this library were drawn primarily from Volume -2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, +2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, "Semi-Numerical Methods". Barrett's algorithm for modular reduction came from Menezes, Oorschot, and Vanstone's _Handbook of Applied Cryptography_, Chapter 14. diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h index c6f72b206..0cc868a14 100644 --- a/security/nss/lib/freebl/mpi/mpi-config.h +++ b/security/nss/lib/freebl/mpi/mpi-config.h @@ -28,14 +28,6 @@ #define MP_LOGTAB 1 /* use table of logs instead of log()? */ #endif -#ifndef MP_MEMSET -#define MP_MEMSET 1 /* use memset() to zero buffers? */ -#endif - -#ifndef MP_MEMCPY -#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */ -#endif - #ifndef MP_ARGCHK /* 0 = no parameter checks diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c index f7784c8d9..ae404019d 100644 --- a/security/nss/lib/freebl/mpi/mpi.c +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -2782,15 +2782,7 @@ s_mp_pad(mp_int *mp, mp_size min) void s_mp_setz(mp_digit *dp, mp_size count) { -#if MP_MEMSET == 0 - int ix; - - for (ix = 0; ix < count; ix++) - dp[ix] = 0; -#else memset(dp, 0, count * sizeof(mp_digit)); -#endif - } /* end s_mp_setz() */ /* }}} */ @@ -2801,14 +2793,7 @@ s_mp_setz(mp_digit *dp, mp_size count) void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count) { -#if MP_MEMCPY == 0 - int ix; - - for (ix = 0; ix < count; ix++) - dp[ix] = sp[ix]; -#else memcpy(dp, sp, count * sizeof(mp_digit)); -#endif } /* end s_mp_copy() */ /* }}} */ diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c index 5ed039689..22f97810f 100644 --- a/security/nss/lib/freebl/nsslowhash.c +++ b/security/nss/lib/freebl/nsslowhash.c @@ -22,6 +22,7 @@ struct NSSLOWHASHContextStr { void *hashCtxt; }; +#ifndef NSS_FIPS_DISABLED static int nsslow_GetFIPSEnabled(void) { @@ -40,9 +41,10 @@ nsslow_GetFIPSEnabled(void) return 0; if (d != '1') return 0; -#endif +#endif /* LINUX */ return 1; } +#endif /* NSS_FIPS_DISABLED */ static NSSLOWInitContext dummyContext = { 0 }; static PRBool post_failed = PR_TRUE; @@ -54,6 +56,7 @@ NSSLOW_Init(void) (void)FREEBL_InitStubs(); #endif +#ifndef NSS_FIPS_DISABLED /* make sure the FIPS product is installed if we are trying to * go into FIPS mode */ if (nsslow_GetFIPSEnabled()) { @@ -63,6 +66,7 @@ NSSLOW_Init(void) return NULL; } } +#endif post_failed = PR_FALSE; return &dummyContext; diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h index 0a463483f..125f49b3b 100644 --- a/security/nss/lib/freebl/poly1305.h +++ b/security/nss/lib/freebl/poly1305.h @@ -8,6 +8,8 @@ #ifndef FREEBL_POLY1305_H_ #define FREEBL_POLY1305_H_ +#include "stddef.h" + typedef unsigned char poly1305_state[512]; /* Poly1305Init sets up |state| so that it can be used to calculate an diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c index e4ad60388..5de27de9c 100644 --- a/security/nss/lib/freebl/rijndael.c +++ b/security/nss/lib/freebl/rijndael.c @@ -27,16 +27,39 @@ #include "intel-gcm.h" #endif /* INTEL_GCM */ +/* Forward declarations */ +void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk); +void rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input); + +/* Stub definitions for the above rijndael_native_* functions, which + * shouldn't be used unless NSS_X86_OR_X64 is defined */ +#ifndef NSS_X86_OR_X64 +void +rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, + unsigned int Nk) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} + +void +rijndael_native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + PORT_Assert(0); +} +#endif /* NSS_X86_OR_X64 */ + /* - * There are currently five ways to build this code, varying in performance + * There are currently three ways to build this code, varying in performance * and code size. * * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab - * RIJNDAEL_GENERATE_TABLES Generate tables on first - * encryption/decryption, then store them; - * use the function gfm - * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do - * the generation * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table * values "on-the-fly", using gfm * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros @@ -108,8 +131,7 @@ ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) /* Choose GFM method (macros or function) */ -#if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ - defined(RIJNDAEL_GENERATE_VALUES_MACRO) +#if defined(RIJNDAEL_GENERATE_VALUES_MACRO) /* * Galois field GF(2**8) multipliers, in macro form @@ -133,7 +155,7 @@ #define GFM0E(a) \ (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ -#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ +#else /* RIJNDAEL_GENERATE_VALUES */ /* GF_MULTIPLY * @@ -244,7 +266,7 @@ gen_TInvXi(PRUint8 tx, PRUint8 i) #define IMXC1(b) G_IMXC1(b) #define IMXC2(b) G_IMXC2(b) #define IMXC3(b) G_IMXC3(b) -#elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) +#else /* RIJNDAEL_GENERATE_VALUES_MACRO */ /* generate values for the tables with macros */ #define T0(i) G_T0(i) #define T1(i) G_T1(i) @@ -258,84 +280,10 @@ gen_TInvXi(PRUint8 tx, PRUint8 i) #define IMXC1(b) G_IMXC1(b) #define IMXC2(b) G_IMXC2(b) #define IMXC3(b) G_IMXC3(b) -#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ -/* Generate T and T**-1 table values and store, then index */ -/* The inverse mix column tables are still generated */ -#define T0(i) rijndaelTables->T0[i] -#define T1(i) rijndaelTables->T1[i] -#define T2(i) rijndaelTables->T2[i] -#define T3(i) rijndaelTables->T3[i] -#define TInv0(i) rijndaelTables->TInv0[i] -#define TInv1(i) rijndaelTables->TInv1[i] -#define TInv2(i) rijndaelTables->TInv2[i] -#define TInv3(i) rijndaelTables->TInv3[i] -#define IMXC0(b) G_IMXC0(b) -#define IMXC1(b) G_IMXC1(b) -#define IMXC2(b) G_IMXC2(b) -#define IMXC3(b) G_IMXC3(b) #endif /* choose T-table indexing method */ #endif /* not RIJNDAEL_INCLUDE_TABLES */ -#if defined(RIJNDAEL_GENERATE_TABLES) || \ - defined(RIJNDAEL_GENERATE_TABLES_MACRO) - -/* Code to generate and store the tables */ - -struct rijndael_tables_str { - PRUint32 T0[256]; - PRUint32 T1[256]; - PRUint32 T2[256]; - PRUint32 T3[256]; - PRUint32 TInv0[256]; - PRUint32 TInv1[256]; - PRUint32 TInv2[256]; - PRUint32 TInv3[256]; -}; - -static struct rijndael_tables_str *rijndaelTables = NULL; -static PRCallOnceType coRTInit = { 0, 0, 0 }; -static PRStatus -init_rijndael_tables(void) -{ - PRUint32 i; - PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; - struct rijndael_tables_str *rts; - rts = (struct rijndael_tables_str *) - PORT_Alloc(sizeof(struct rijndael_tables_str)); - if (!rts) - return PR_FAILURE; - for (i = 0; i < 256; i++) { - /* The forward values */ - si01 = SBOX(i); - si02 = XTIME(si01); - si03 = si02 ^ si01; - rts->T0[i] = WORD4(si02, si01, si01, si03); - rts->T1[i] = WORD4(si03, si02, si01, si01); - rts->T2[i] = WORD4(si01, si03, si02, si01); - rts->T3[i] = WORD4(si01, si01, si03, si02); - /* The inverse values */ - si01 = SINV(i); - si02 = XTIME(si01); - si04 = XTIME(si02); - si08 = XTIME(si04); - si03 = si02 ^ si01; - si09 = si08 ^ si01; - si0B = si08 ^ si03; - si0D = si09 ^ si04; - si0E = si08 ^ si04 ^ si02; - rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); - rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); - rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); - rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); - } - /* wait until all the values are in to set */ - rijndaelTables = rts; - return PR_SUCCESS; -} - -#endif /* code to generate tables */ - /************************************************************************** * * Stuff related to the Rijndael key schedule @@ -389,162 +337,6 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N } } -#if defined(NSS_X86_OR_X64) -#define EXPAND_KEY128(k, rcon, res) \ - tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ - tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ - tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ - tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ - tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ - res = _mm_xor_si128(tmp, tmp_key) - -static void -native_key_expansion128(AESContext *cx, const unsigned char *key) -{ - __m128i *keySchedule = cx->keySchedule; - pre_align __m128i tmp_key post_align; - pre_align __m128i tmp post_align; - keySchedule[0] = _mm_loadu_si128((__m128i *)key); - EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); - EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); - EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); - EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); - EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); - EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); - EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); - EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); - EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); - EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); -} - -#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ - tmp2 = _mm_slli_si128(k0, 4); \ - tmp1 = _mm_xor_si128(k0, tmp2); \ - tmp2 = _mm_slli_si128(tmp2, 4); \ - tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ - tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ - res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) - -#define EXPAND_KEY192_PART2(res, k1, k2) \ - tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ - res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) - -#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ - EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ - EXPAND_KEY192_PART2(carry, res1, tmp3); \ - res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ - _mm_castsi128_pd(tmp3), 0)); \ - res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ - _mm_castsi128_pd(carry), 1)); \ - EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) - -static void -native_key_expansion192(AESContext *cx, const unsigned char *key) -{ - __m128i *keySchedule = cx->keySchedule; - pre_align __m128i tmp1 post_align; - pre_align __m128i tmp2 post_align; - pre_align __m128i tmp3 post_align; - pre_align __m128i carry post_align; - keySchedule[0] = _mm_loadu_si128((__m128i *)key); - keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); - EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], - keySchedule[3], carry, 0x1, 0x2); - EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); - EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], - keySchedule[6], carry, 0x4, 0x8); - EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); - EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], - keySchedule[9], carry, 0x10, 0x20); - EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); - EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], - keySchedule[12], carry, 0x40, 0x80); -} - -#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ - tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ - tmp2 = _mm_slli_si128(k1x, 4); \ - tmp1 = _mm_xor_si128(k1x, tmp2); \ - tmp2 = _mm_slli_si128(tmp2, 4); \ - tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ - res = _mm_xor_si128(tmp1, tmp_key); - -#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ - EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ - EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) - -static void -native_key_expansion256(AESContext *cx, const unsigned char *key) -{ - __m128i *keySchedule = cx->keySchedule; - pre_align __m128i tmp_key post_align; - pre_align __m128i tmp1 post_align; - pre_align __m128i tmp2 post_align; - keySchedule[0] = _mm_loadu_si128((__m128i *)key); - keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); - EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], - keySchedule[1], 0x01); - EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], - keySchedule[3], 0x02); - EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], - keySchedule[5], 0x04); - EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], - keySchedule[7], 0x08); - EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], - keySchedule[9], 0x10); - EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], - keySchedule[11], 0x20); - EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], - keySchedule[13], 0xFF); -} - -#endif /* NSS_X86_OR_X64 */ - -/* - * AES key expansion using aes-ni instructions. - */ -static void -native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) -{ -#ifdef NSS_X86_OR_X64 - switch (Nk) { - case 4: - native_key_expansion128(cx, key); - return; - case 6: - native_key_expansion192(cx, key); - return; - case 8: - native_key_expansion256(cx, key); - return; - default: - /* This shouldn't happen. */ - PORT_Assert(0); - } -#else - PORT_Assert(0); -#endif /* NSS_X86_OR_X64 */ -} - -static void -native_encryptBlock(AESContext *cx, - unsigned char *output, - const unsigned char *input) -{ -#ifdef NSS_X86_OR_X64 - int i; - pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); - m = _mm_xor_si128(m, cx->keySchedule[0]); - for (i = 1; i < cx->Nr; ++i) { - m = _mm_aesenc_si128(m, cx->keySchedule[i]); - } - m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); - _mm_storeu_si128((__m128i *)output, m); -#else - PORT_Assert(0); -#endif /* NSS_X86_OR_X64 */ -} - /* rijndael_key_expansion * * Generate the expanded key from the key input by the user. @@ -910,7 +702,7 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, if (aesni_support()) { /* Use hardware acceleration for normal AES parameters. */ - encryptor = &native_encryptBlock; + encryptor = &rijndael_native_encryptBlock; } else { encryptor = &rijndael_encryptBlock128; } @@ -1017,14 +809,7 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, AESContext * AES_AllocateContext(void) { - /* aligned_alloc is C11 so we have to do it the old way. */ - AESContext *ctx = PORT_ZAlloc(sizeof(AESContext) + 15); - if (ctx == NULL) { - PORT_SetError(SEC_ERROR_NO_MEMORY); - return NULL; - } - ctx->mem = ctx; - return (AESContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F); + return PORT_ZNewAligned(AESContext, 16, mem); } /* @@ -1107,22 +892,13 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, } else #endif { - -#if defined(RIJNDAEL_GENERATE_TABLES) || \ - defined(RIJNDAEL_GENERATE_TABLES_MACRO) - if (rijndaelTables == NULL) { - if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) { - return SECFailure; - } - } -#endif /* Generate expanded key */ if (encrypt) { if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || cx->mode == NSS_AES_CTR)) { PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); /* Prepare hardware key for normal AES parameters. */ - native_key_expansion(cx, key, Nk); + rijndael_native_key_expansion(cx, key, Nk); } else { rijndael_key_expansion(cx, key, Nk); } diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h index 1f4a8a9f7..1b63a323d 100644 --- a/security/nss/lib/freebl/rijndael.h +++ b/security/nss/lib/freebl/rijndael.h @@ -8,8 +8,22 @@ #include "blapii.h" #include <stdint.h> -#ifdef NSS_X86_OR_X64 -#include <wmmintrin.h> /* aes-ni */ +#if defined(NSS_X86_OR_X64) +/* GCC <= 4.8 doesn't support including emmintrin.h without enabling SSE2 */ +#if !defined(__clang__) && defined(__GNUC__) && defined(__GNUC_MINOR__) && \ + (__GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8)) +#pragma GCC push_options +#pragma GCC target("sse2") +#undef NSS_DISABLE_SSE2 +#define NSS_DISABLE_SSE2 1 +#endif /* GCC <= 4.8 */ + +#include <emmintrin.h> /* __m128i */ + +#ifdef NSS_DISABLE_SSE2 +#undef NSS_DISABLE_SSE2 +#pragma GCC pop_options +#endif /* NSS_DISABLE_SSE2 */ #endif typedef void AESBlockFunc(AESContext *cx, diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c index 7354d9317..a08636de6 100644 --- a/security/nss/lib/freebl/rsa.c +++ b/security/nss/lib/freebl/rsa.c @@ -276,7 +276,10 @@ RSAPrivateKey * RSA_NewKey(int keySizeInBits, SECItem *publicExponent) { unsigned int primeLen; - mp_int p, q, e, d; + mp_int p = { 0, 0, 0, NULL }; + mp_int q = { 0, 0, 0, NULL }; + mp_int e = { 0, 0, 0, NULL }; + mp_int d = { 0, 0, 0, NULL }; int kiter; int max_attempts; mp_err err = MP_OKAY; @@ -290,34 +293,46 @@ RSA_NewKey(int keySizeInBits, SECItem *publicExponent) PORT_SetError(SEC_ERROR_INVALID_ARGS); return NULL; } - /* 1. Allocate arena & key */ + /* 1. Set the public exponent and check if it's uneven and greater than 2.*/ + MP_DIGITS(&e) = 0; + CHECK_MPI_OK(mp_init(&e)); + SECITEM_TO_MPINT(*publicExponent, &e); + if (mp_iseven(&e) || !(mp_cmp_d(&e, 2) > 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } +#ifndef NSS_FIPS_DISABLED + /* Check that the exponent is not smaller than 65537 */ + if (mp_cmp_d(&e, 0x10001) < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } +#endif + + /* 2. Allocate arena & key */ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); if (!arena) { PORT_SetError(SEC_ERROR_NO_MEMORY); - return NULL; + goto cleanup; } key = PORT_ArenaZNew(arena, RSAPrivateKey); if (!key) { PORT_SetError(SEC_ERROR_NO_MEMORY); - PORT_FreeArena(arena, PR_TRUE); - return NULL; + goto cleanup; } key->arena = arena; /* length of primes p and q (in bytes) */ primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE); MP_DIGITS(&p) = 0; MP_DIGITS(&q) = 0; - MP_DIGITS(&e) = 0; MP_DIGITS(&d) = 0; CHECK_MPI_OK(mp_init(&p)); CHECK_MPI_OK(mp_init(&q)); - CHECK_MPI_OK(mp_init(&e)); CHECK_MPI_OK(mp_init(&d)); - /* 2. Set the version number (PKCS1 v1.5 says it should be zero) */ + /* 3. Set the version number (PKCS1 v1.5 says it should be zero) */ SECITEM_AllocItem(arena, &key->version, 1); key->version.data[0] = 0; - /* 3. Set the public exponent */ - SECITEM_TO_MPINT(*publicExponent, &e); + kiter = 0; max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */ do { diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c index 528f884b2..c1cfb7376 100644 --- a/security/nss/lib/freebl/sha512.c +++ b/security/nss/lib/freebl/sha512.c @@ -19,6 +19,7 @@ #include "secport.h" /* for PORT_XXX */ #include "blapi.h" #include "sha256.h" /* for struct SHA256ContextStr */ +#include "crypto_primitives.h" /* ============= Common constants and defines ======================= */ @@ -648,15 +649,6 @@ SHA224_Clone(SHA224Context *dest, SHA224Context *src) /* common #defines for SHA512 and SHA384 */ #if defined(HAVE_LONG_LONG) -#if defined(_MSC_VER) -#pragma intrinsic(_rotr64, _rotl64) -#define ROTR64(x, n) _rotr64(x, n) -#define ROTL64(x, n) _rotl64(x, n) -#else -#define ROTR64(x, n) ((x >> n) | (x << (64 - n))) -#define ROTL64(x, n) ((x << n) | (x >> (64 - n))) -#endif - #define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) #define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) #define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7)) @@ -670,36 +662,7 @@ SHA224_Clone(SHA224Context *dest, SHA224Context *src) #define ULLC(hi, lo) 0x##hi##lo##ULL #endif -#if defined(IS_LITTLE_ENDIAN) -#if defined(_MSC_VER) -#pragma intrinsic(_byteswap_uint64) -#define SHA_HTONLL(x) _byteswap_uint64(x) - -#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) -static __inline__ PRUint64 -swap8b(PRUint64 value) -{ - __asm__("bswapq %0" - : "+r"(value)); - return (value); -} -#define SHA_HTONLL(x) swap8b(x) - -#else -#define SHA_MASK16 ULLC(0000FFFF, 0000FFFF) -#define SHA_MASK8 ULLC(00FF00FF, 00FF00FF) -static PRUint64 -swap8b(PRUint64 x) -{ - PRUint64 t1 = x; - t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8); - t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16); - return (t1 >> 32) | (t1 << 32); -} -#define SHA_HTONLL(x) swap8b(x) -#endif -#define BYTESWAP8(x) x = SHA_HTONLL(x) -#endif /* defined(IS_LITTLE_ENDIAN) */ +#define BYTESWAP8(x) x = FREEBL_HTONLL(x) #else /* no long long */ @@ -708,8 +671,8 @@ swap8b(PRUint64 x) { \ 0x##lo##U, 0x##hi##U \ } -#define SHA_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \ - x.hi ^= x.lo ^= x.hi ^= x.lo, x) +#define FREEBL_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \ + x.hi ^= x.lo ^= x.hi ^= x.lo, x) #define BYTESWAP8(x) \ do { \ PRUint32 tmp; \ diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c index bd9cd1c94..98db4614b 100644 --- a/security/nss/lib/freebl/shvfy.c +++ b/security/nss/lib/freebl/shvfy.c @@ -19,6 +19,8 @@ #include "pqg.h" #include "blapii.h" +#ifndef NSS_FIPS_DISABLED + /* * Most modern version of Linux support a speed optimization scheme where an * application called prelink modifies programs and shared libraries to quickly @@ -537,3 +539,23 @@ BLAPI_VerifySelf(const char *name) } return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE); } + +#else /* NSS_FIPS_DISABLED */ + +PRBool +BLAPI_SHVerifyFile(const char *shName) +{ + return PR_FALSE; +} +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + return PR_FALSE; +} +PRBool +BLAPI_VerifySelf(const char *name) +{ + return PR_FALSE; +} + +#endif /* NSS_FIPS_DISABLED */ diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c index 8e0784935..c42f694d7 100644 --- a/security/nss/lib/freebl/stubs.c +++ b/security/nss/lib/freebl/stubs.c @@ -36,8 +36,14 @@ #include <secport.h> #include <secitem.h> #include <blapi.h> +#include <assert.h> #include <private/pprio.h> +/* Android API < 21 doesn't define RTLD_NOLOAD */ +#ifndef RTLD_NOLOAD +#define RTLD_NOLOAD 0 +#endif + #define FREEBL_NO_WEAK 1 #define WEAK __attribute__((weak)) @@ -136,6 +142,11 @@ STUB_DECLARE(int, PORT_GetError_Util, (void)); STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize)); STUB_DECLARE(void, PORT_SetError_Util, (int value)); STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len)); +STUB_DECLARE(void *, PORT_ZAllocAligned_Util, (size_t bytes, size_t alignment, + void **mem)); +STUB_DECLARE(void *, PORT_ZAllocAlignedOffset_Util, (size_t bytes, + size_t alignment, + size_t offset)); STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len)); STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln)); @@ -174,11 +185,14 @@ STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit)); STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit)); STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid)); STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n)); +STUB_DECLARE(unsigned int, NSS_SecureMemcmpZero, (const void *mem, size_t n)); #define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type)) #define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type)) #define PORT_ZNewArray_stub(type, num) \ (type *)PORT_ZAlloc_stub(sizeof(type) * (num)) +#define PORT_ZNewAligned_stub(type, alignment, mem) \ + (type *)PORT_ZAllocAlignedOffset_stub(sizeof(type), alignment, offsetof(type, mem)) /* * NOTE: in order to support hashing only the memory allocation stubs, @@ -214,6 +228,57 @@ PORT_ZAlloc_stub(size_t len) return ptr; } +/* aligned_alloc is C11. This is an alternative to get aligned memory. */ +extern void * +PORT_ZAllocAligned_stub(size_t bytes, size_t alignment, void **mem) +{ + STUB_SAFE_CALL3(PORT_ZAllocAligned_Util, bytes, alignment, mem); + + /* This only works if alignement is a power of 2. */ + if ((alignment == 0) || (alignment & (alignment - 1))) { + return NULL; + } + + size_t x = alignment - 1; + size_t len = (bytes ? bytes : 1) + x; + + if (!mem) { + return NULL; + } + + /* Always allocate a non-zero amount of bytes */ + *mem = malloc(len); + if (!*mem) { + return NULL; + } + + memset(*mem, 0, len); + + /* We're pretty sure this is non-zero, but let's assure scan-build too. */ + void *ret = (void *)(((uintptr_t)*mem + x) & ~(uintptr_t)x); + assert(ret); + + return ret; +} + +extern void * +PORT_ZAllocAlignedOffset_stub(size_t size, size_t alignment, size_t offset) +{ + STUB_SAFE_CALL3(PORT_ZAllocAlignedOffset_Util, size, alignment, offset); + if (offset > size) { + return NULL; + } + + void *mem = NULL; + void *v = PORT_ZAllocAligned_stub(size, alignment, &mem); + if (!v) { + return NULL; + } + + *((void **)((uintptr_t)v + offset)) = mem; + return v; +} + extern void PORT_ZFree_stub(void *ptr, size_t len) { @@ -590,6 +655,13 @@ NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n) abort(); } +extern unsigned int +NSS_SecureMemcmpZero_stub(const void *mem, size_t n) +{ + STUB_SAFE_CALL2(NSS_SecureMemcmpZero, mem, n); + abort(); +} + #ifdef FREEBL_NO_WEAK static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX; @@ -642,6 +714,7 @@ freebl_InitNSSUtil(void *lib) STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util); STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util); STUB_FETCH_FUNCTION(NSS_SecureMemcmp); + STUB_FETCH_FUNCTION(NSS_SecureMemcmpZero); return SECSuccess; } diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h index 25ec394ec..e63cf7a5d 100644 --- a/security/nss/lib/freebl/stubs.h +++ b/security/nss/lib/freebl/stubs.h @@ -30,6 +30,8 @@ #define PORT_SetError PORT_SetError_stub #define PORT_ZAlloc PORT_ZAlloc_stub #define PORT_ZFree PORT_ZFree_stub +#define PORT_ZAllocAligned PORT_ZAllocAligned_stub +#define PORT_ZAllocAlignedOffset PORT_ZAllocAlignedOffset_stub #define SECITEM_AllocItem SECITEM_AllocItem_stub #define SECITEM_CompareItem SECITEM_CompareItem_stub @@ -38,6 +40,7 @@ #define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub #define SECOID_FindOIDTag SECOID_FindOIDTag_stub #define NSS_SecureMemcmp NSS_SecureMemcmp_stub +#define NSS_SecureMemcmpZero NSS_SecureMemcmpZero_stub #define PR_Assert PR_Assert_stub #define PR_Access PR_Access_stub diff --git a/security/nss/lib/freebl/verified/FStar.c b/security/nss/lib/freebl/verified/FStar.c new file mode 100644 index 000000000..4e5f6d50d --- /dev/null +++ b/security/nss/lib/freebl/verified/FStar.c @@ -0,0 +1,255 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file was auto-generated by KreMLin! */ + +#include "FStar.h" + +static uint64_t +FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) +{ + return (a ^ ((a ^ b) | ((a - b) ^ b))) >> (uint32_t)63U; +} + +static uint64_t +FStar_UInt128_carry(uint64_t a, uint64_t b) +{ + return FStar_UInt128_constant_time_carry(a, b); +} + +FStar_UInt128_uint128 +FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = a.low + b.low, + .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = a.low + b.low, + .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = a.low - b.low, + .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); +} + +static FStar_UInt128_uint128 +FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = a.low - b.low, + .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return FStar_UInt128_sub_mod_impl(a, b); +} + +FStar_UInt128_uint128 +FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ((FStar_UInt128_uint128){.low = a.low & b.low, .high = a.high & b.high }); +} + +FStar_UInt128_uint128 +FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ((FStar_UInt128_uint128){.low = a.low ^ b.low, .high = a.high ^ b.high }); +} + +FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ((FStar_UInt128_uint128){.low = a.low | b.low, .high = a.high | b.high }); +} + +FStar_UInt128_uint128 +FStar_UInt128_lognot(FStar_UInt128_uint128 a) +{ + return ((FStar_UInt128_uint128){.low = ~a.low, .high = ~a.high }); +} + +static uint32_t FStar_UInt128_u32_64 = (uint32_t)64U; + +static uint64_t +FStar_UInt128_add_u64_shift_left(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (hi << s) + (lo >> (FStar_UInt128_u32_64 - s)); +} + +static uint64_t +FStar_UInt128_add_u64_shift_left_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_left(hi, lo, s); +} + +static FStar_UInt128_uint128 +FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else + return ( + (FStar_UInt128_uint128){ + .low = a.low << s, + .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) }); +} + +static FStar_UInt128_uint128 +FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) +{ + return ((FStar_UInt128_uint128){.low = (uint64_t)0U, .high = a.low << (s - FStar_UInt128_u32_64) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_left_small(a, s); + else + return FStar_UInt128_shift_left_large(a, s); +} + +static uint64_t +FStar_UInt128_add_u64_shift_right(uint64_t hi, uint64_t lo, uint32_t s) +{ + return (lo >> s) + (hi << (FStar_UInt128_u32_64 - s)); +} + +static uint64_t +FStar_UInt128_add_u64_shift_right_respec(uint64_t hi, uint64_t lo, uint32_t s) +{ + return FStar_UInt128_add_u64_shift_right(hi, lo, s); +} + +static FStar_UInt128_uint128 +FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s == (uint32_t)0U) + return a; + else + return ( + (FStar_UInt128_uint128){ + .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s), + .high = a.high >> s }); +} + +static FStar_UInt128_uint128 +FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) +{ + return ((FStar_UInt128_uint128){.low = a.high >> (s - FStar_UInt128_u32_64), .high = (uint64_t)0U }); +} + +FStar_UInt128_uint128 +FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s) +{ + if (s < FStar_UInt128_u32_64) + return FStar_UInt128_shift_right_small(a, s); + else + return FStar_UInt128_shift_right_large(a, s); +} + +FStar_UInt128_uint128 +FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high), + .high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) +{ + return ( + (FStar_UInt128_uint128){ + .low = (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)), + .high = (FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high)) | (FStar_UInt64_eq_mask(a.high, b.high) & FStar_UInt64_gte_mask(a.low, b.low)) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_uint64_to_uint128(uint64_t a) +{ + return ((FStar_UInt128_uint128){.low = a, .high = (uint64_t)0U }); +} + +uint64_t +FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a) +{ + return a.low; +} + +static uint64_t FStar_UInt128_u64_l32_mask = (uint64_t)0xffffffffU; + +static uint64_t +FStar_UInt128_u64_mod_32(uint64_t a) +{ + return a & FStar_UInt128_u64_l32_mask; +} + +static uint32_t FStar_UInt128_u32_32 = (uint32_t)32U; + +static K___uint64_t_uint64_t_uint64_t_uint64_t +FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) +{ + return ( + (K___uint64_t_uint64_t_uint64_t_uint64_t){ + .fst = FStar_UInt128_u64_mod_32(x), + .snd = FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)), + .thd = x >> FStar_UInt128_u32_32, + .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> FStar_UInt128_u32_32) }); +} + +static uint64_t +FStar_UInt128_u32_combine_(uint64_t hi, uint64_t lo) +{ + return lo + (hi << FStar_UInt128_u32_32); +} + +static FStar_UInt128_uint128 +FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) +{ + K___uint64_t_uint64_t_uint64_t_uint64_t scrut = FStar_UInt128_mul_wide_impl_t_(x, y); + uint64_t u1 = scrut.fst; + uint64_t w3 = scrut.snd; + uint64_t x_ = scrut.thd; + uint64_t t_ = scrut.f3; + return ( + (FStar_UInt128_uint128){ + .low = FStar_UInt128_u32_combine_(u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), + w3), + .high = x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + ((u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_)) >> FStar_UInt128_u32_32) }); +} + +FStar_UInt128_uint128 +FStar_UInt128_mul_wide(uint64_t x, uint64_t y) +{ + return FStar_UInt128_mul_wide_impl(x, y); +} diff --git a/security/nss/lib/freebl/verified/FStar.h b/security/nss/lib/freebl/verified/FStar.h new file mode 100644 index 000000000..7b105b8f2 --- /dev/null +++ b/security/nss/lib/freebl/verified/FStar.h @@ -0,0 +1,69 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* This file was auto-generated by KreMLin! */ +#ifndef __FStar_H +#define __FStar_H + +#include "kremlib_base.h" + +typedef struct +{ + uint64_t low; + uint64_t high; +} FStar_UInt128_uint128; + +typedef FStar_UInt128_uint128 FStar_UInt128_t; + +extern void FStar_UInt128_constant_time_carry_ok(uint64_t x0, uint64_t x1); + +FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_lognot(FStar_UInt128_uint128 a); + +FStar_UInt128_uint128 FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); + +FStar_UInt128_uint128 FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); + +uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); + +typedef struct +{ + uint64_t fst; + uint64_t snd; + uint64_t thd; + uint64_t f3; +} K___uint64_t_uint64_t_uint64_t_uint64_t; + +FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.c b/security/nss/lib/freebl/verified/Hacl_Chacha20.c new file mode 100644 index 000000000..45a743035 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.c @@ -0,0 +1,270 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Hacl_Chacha20.h" + +static void +Hacl_Lib_LoadStore32_uint32s_from_le_bytes(uint32_t *output, uint8_t *input, uint32_t len) +{ + for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { + uint8_t *x0 = input + (uint32_t)4U * i; + uint32_t inputi = load32_le(x0); + output[i] = inputi; + } +} + +static void +Hacl_Lib_LoadStore32_uint32s_to_le_bytes(uint8_t *output, uint32_t *input, uint32_t len) +{ + for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { + uint32_t hd1 = input[i]; + uint8_t *x0 = output + (uint32_t)4U * i; + store32_le(x0, hd1); + } +} + +inline static uint32_t +Hacl_Impl_Chacha20_rotate_left(uint32_t a, uint32_t s) +{ + return a << s | a >> ((uint32_t)32U - s); +} + +inline static void +Hacl_Impl_Chacha20_quarter_round(uint32_t *st, uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + uint32_t sa = st[a]; + uint32_t sb0 = st[b]; + st[a] = sa + sb0; + uint32_t sd = st[d]; + uint32_t sa10 = st[a]; + uint32_t sda = sd ^ sa10; + st[d] = Hacl_Impl_Chacha20_rotate_left(sda, (uint32_t)16U); + uint32_t sa0 = st[c]; + uint32_t sb1 = st[d]; + st[c] = sa0 + sb1; + uint32_t sd0 = st[b]; + uint32_t sa11 = st[c]; + uint32_t sda0 = sd0 ^ sa11; + st[b] = Hacl_Impl_Chacha20_rotate_left(sda0, (uint32_t)12U); + uint32_t sa2 = st[a]; + uint32_t sb2 = st[b]; + st[a] = sa2 + sb2; + uint32_t sd1 = st[d]; + uint32_t sa12 = st[a]; + uint32_t sda1 = sd1 ^ sa12; + st[d] = Hacl_Impl_Chacha20_rotate_left(sda1, (uint32_t)8U); + uint32_t sa3 = st[c]; + uint32_t sb = st[d]; + st[c] = sa3 + sb; + uint32_t sd2 = st[b]; + uint32_t sa1 = st[c]; + uint32_t sda2 = sd2 ^ sa1; + st[b] = Hacl_Impl_Chacha20_rotate_left(sda2, (uint32_t)7U); +} + +inline static void +Hacl_Impl_Chacha20_double_round(uint32_t *st) +{ + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)0U, (uint32_t)4U, (uint32_t)8U, (uint32_t)12U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)1U, (uint32_t)5U, (uint32_t)9U, (uint32_t)13U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)2U, (uint32_t)6U, (uint32_t)10U, (uint32_t)14U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)3U, (uint32_t)7U, (uint32_t)11U, (uint32_t)15U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)0U, (uint32_t)5U, (uint32_t)10U, (uint32_t)15U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)1U, (uint32_t)6U, (uint32_t)11U, (uint32_t)12U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)2U, (uint32_t)7U, (uint32_t)8U, (uint32_t)13U); + Hacl_Impl_Chacha20_quarter_round(st, (uint32_t)3U, (uint32_t)4U, (uint32_t)9U, (uint32_t)14U); +} + +inline static void +Hacl_Impl_Chacha20_rounds(uint32_t *st) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) + Hacl_Impl_Chacha20_double_round(st); +} + +inline static void +Hacl_Impl_Chacha20_sum_states(uint32_t *st, uint32_t *st_) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + uint32_t xi = st[i]; + uint32_t yi = st_[i]; + st[i] = xi + yi; + } +} + +inline static void +Hacl_Impl_Chacha20_copy_state(uint32_t *st, uint32_t *st_) +{ + memcpy(st, st_, (uint32_t)16U * sizeof st_[0U]); +} + +inline static void +Hacl_Impl_Chacha20_chacha20_core(uint32_t *k, uint32_t *st, uint32_t ctr) +{ + st[12U] = ctr; + Hacl_Impl_Chacha20_copy_state(k, st); + Hacl_Impl_Chacha20_rounds(k); + Hacl_Impl_Chacha20_sum_states(k, st); +} + +inline static void +Hacl_Impl_Chacha20_chacha20_block(uint8_t *stream_block, uint32_t *st, uint32_t ctr) +{ + uint32_t st_[16U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_core(st_, st, ctr); + Hacl_Lib_LoadStore32_uint32s_to_le_bytes(stream_block, st_, (uint32_t)16U); +} + +inline static void +Hacl_Impl_Chacha20_init(uint32_t *st, uint8_t *k, uint8_t *n1) +{ + uint32_t *stcst = st; + uint32_t *stk = st + (uint32_t)4U; + uint32_t *stc = st + (uint32_t)12U; + uint32_t *stn = st + (uint32_t)13U; + stcst[0U] = (uint32_t)0x61707865U; + stcst[1U] = (uint32_t)0x3320646eU; + stcst[2U] = (uint32_t)0x79622d32U; + stcst[3U] = (uint32_t)0x6b206574U; + Hacl_Lib_LoadStore32_uint32s_from_le_bytes(stk, k, (uint32_t)8U); + stc[0U] = (uint32_t)0U; + Hacl_Lib_LoadStore32_uint32s_from_le_bytes(stn, n1, (uint32_t)3U); +} + +static void +Hacl_Impl_Chacha20_update(uint8_t *output, uint8_t *plain, uint32_t *st, uint32_t ctr) +{ + uint32_t b[48U] = { 0U }; + uint32_t *k = b; + uint32_t *ib = b + (uint32_t)16U; + uint32_t *ob = b + (uint32_t)32U; + Hacl_Impl_Chacha20_chacha20_core(k, st, ctr); + Hacl_Lib_LoadStore32_uint32s_from_le_bytes(ib, plain, (uint32_t)16U); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i = i + (uint32_t)1U) { + uint32_t xi = ib[i]; + uint32_t yi = k[i]; + ob[i] = xi ^ yi; + } + Hacl_Lib_LoadStore32_uint32s_to_le_bytes(output, ob, (uint32_t)16U); +} + +static void +Hacl_Impl_Chacha20_update_last( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint32_t *st, + uint32_t ctr) +{ + uint8_t block[64U] = { 0U }; + Hacl_Impl_Chacha20_chacha20_block(block, st, ctr); + uint8_t *mask = block; + for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { + uint8_t xi = plain[i]; + uint8_t yi = mask[i]; + output[i] = xi ^ yi; + } +} + +static void +Hacl_Impl_Chacha20_chacha20_counter_mode_blocks( + uint8_t *output, + uint8_t *plain, + uint32_t num_blocks, + uint32_t *st, + uint32_t ctr) +{ + for (uint32_t i = (uint32_t)0U; i < num_blocks; i = i + (uint32_t)1U) { + uint8_t *b = plain + (uint32_t)64U * i; + uint8_t *o = output + (uint32_t)64U * i; + Hacl_Impl_Chacha20_update(o, b, st, ctr + i); + } +} + +static void +Hacl_Impl_Chacha20_chacha20_counter_mode( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint32_t *st, + uint32_t ctr) +{ + uint32_t blocks_len = len >> (uint32_t)6U; + uint32_t part_len = len & (uint32_t)0x3fU; + uint8_t *output_ = output; + uint8_t *plain_ = plain; + uint8_t *output__ = output + (uint32_t)64U * blocks_len; + uint8_t *plain__ = plain + (uint32_t)64U * blocks_len; + Hacl_Impl_Chacha20_chacha20_counter_mode_blocks(output_, plain_, blocks_len, st, ctr); + if (part_len > (uint32_t)0U) + Hacl_Impl_Chacha20_update_last(output__, plain__, part_len, st, ctr + blocks_len); +} + +static void +Hacl_Impl_Chacha20_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr) +{ + uint32_t buf[16U] = { 0U }; + uint32_t *st = buf; + Hacl_Impl_Chacha20_init(st, k, n1); + Hacl_Impl_Chacha20_chacha20_counter_mode(output, plain, len, st, ctr); +} + +void +Hacl_Chacha20_chacha20_key_block(uint8_t *block, uint8_t *k, uint8_t *n1, uint32_t ctr) +{ + uint32_t buf[16U] = { 0U }; + uint32_t *st = buf; + Hacl_Impl_Chacha20_init(st, k, n1); + Hacl_Impl_Chacha20_chacha20_block(block, st, ctr); +} + +/* + This function implements Chacha20 + + val chacha20 : + output:uint8_p -> + plain:uint8_p{ disjoint output plain } -> + len:uint32_t{ v len = length output /\ v len = length plain } -> + key:uint8_p{ length key = 32 } -> + nonce:uint8_p{ length nonce = 12 } -> + ctr:uint32_t{ v ctr + length plain / 64 < pow2 32 } -> + Stack unit + (requires + fun h -> live h output /\ live h plain /\ live h nonce /\ live h key) + (ensures + fun h0 _ h1 -> + live h1 output /\ live h0 plain /\ modifies_1 output h0 h1 /\ + live h0 nonce /\ + live h0 key /\ + h1.[ output ] == + chacha20_encrypt_bytes h0.[ key ] h0.[ nonce ] (v ctr) h0.[ plain ]) +*/ +void +Hacl_Chacha20_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr) +{ + Hacl_Impl_Chacha20_chacha20(output, plain, len, k, n1, ctr); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20.h b/security/nss/lib/freebl/verified/Hacl_Chacha20.h new file mode 100644 index 000000000..f97e44b74 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20.h @@ -0,0 +1,81 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kremlib.h" +#ifndef __Hacl_Chacha20_H +#define __Hacl_Chacha20_H + +typedef uint32_t Hacl_Impl_Xor_Lemmas_u32; + +typedef uint8_t Hacl_Impl_Xor_Lemmas_u8; + +typedef uint8_t *Hacl_Lib_LoadStore32_uint8_p; + +typedef uint32_t Hacl_Impl_Chacha20_u32; + +typedef uint32_t Hacl_Impl_Chacha20_h32; + +typedef uint8_t *Hacl_Impl_Chacha20_uint8_p; + +typedef uint32_t *Hacl_Impl_Chacha20_state; + +typedef uint32_t Hacl_Impl_Chacha20_idx; + +typedef struct +{ + void *k; + void *n; +} Hacl_Impl_Chacha20_log_t_; + +typedef void *Hacl_Impl_Chacha20_log_t; + +typedef uint32_t Hacl_Lib_Create_h32; + +typedef uint8_t *Hacl_Chacha20_uint8_p; + +typedef uint32_t Hacl_Chacha20_uint32_t; + +void Hacl_Chacha20_chacha20_key_block(uint8_t *block, uint8_t *k, uint8_t *n1, uint32_t ctr); + +/* + This function implements Chacha20 + + val chacha20 : + output:uint8_p -> + plain:uint8_p{ disjoint output plain } -> + len:uint32_t{ v len = length output /\ v len = length plain } -> + key:uint8_p{ length key = 32 } -> + nonce:uint8_p{ length nonce = 12 } -> + ctr:uint32_t{ v ctr + length plain / 64 < pow2 32 } -> + Stack unit + (requires + fun h -> live h output /\ live h plain /\ live h nonce /\ live h key) + (ensures + fun h0 _ h1 -> + live h1 output /\ live h0 plain /\ modifies_1 output h0 h1 /\ + live h0 nonce /\ + live h0 key /\ + h1.[ output ] == + chacha20_encrypt_bytes h0.[ key ] h0.[ nonce ] (v ctr) h0.[ plain ]) +*/ +void +Hacl_Chacha20_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr); +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c new file mode 100644 index 000000000..4eba49f47 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -0,0 +1,390 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Hacl_Chacha20_Vec128.h" + +inline static void +Hacl_Impl_Chacha20_Vec128_State_state_incr(vec *k) +{ + vec k3 = k[3U]; + k[3U] = vec_increment(k3); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_State_state_to_key_block(uint8_t *stream_block, vec *k) +{ + vec k0 = k[0U]; + vec k1 = k[1U]; + vec k2 = k[2U]; + vec k3 = k[3U]; + uint8_t *a = stream_block; + uint8_t *b = stream_block + (uint32_t)16U; + uint8_t *c = stream_block + (uint32_t)32U; + uint8_t *d = stream_block + (uint32_t)48U; + vec_store_le(a, k0); + vec_store_le(b, k1); + vec_store_le(c, k2); + vec_store_le(d, k3); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_State_state_setup(vec *st, uint8_t *k, uint8_t *n1, uint32_t c) +{ + st[0U] = + vec_load_32x4((uint32_t)0x61707865U, + (uint32_t)0x3320646eU, + (uint32_t)0x79622d32U, + (uint32_t)0x6b206574U); + vec k0 = vec_load128_le(k); + vec k1 = vec_load128_le(k + (uint32_t)16U); + st[1U] = k0; + st[2U] = k1; + uint32_t n0 = load32_le(n1); + uint8_t *x00 = n1 + (uint32_t)4U; + uint32_t n10 = load32_le(x00); + uint8_t *x0 = n1 + (uint32_t)8U; + uint32_t n2 = load32_le(x0); + vec v1 = vec_load_32x4(c, n0, n10, n2); + st[3U] = v1; +} + +inline static void +Hacl_Impl_Chacha20_Vec128_round(vec *st) +{ + vec sa = st[0U]; + vec sb0 = st[1U]; + vec sd0 = st[3U]; + vec sa10 = vec_add(sa, sb0); + vec sd10 = vec_rotate_left(vec_xor(sd0, sa10), (uint32_t)16U); + st[0U] = sa10; + st[3U] = sd10; + vec sa0 = st[2U]; + vec sb1 = st[3U]; + vec sd2 = st[1U]; + vec sa11 = vec_add(sa0, sb1); + vec sd11 = vec_rotate_left(vec_xor(sd2, sa11), (uint32_t)12U); + st[2U] = sa11; + st[1U] = sd11; + vec sa2 = st[0U]; + vec sb2 = st[1U]; + vec sd3 = st[3U]; + vec sa12 = vec_add(sa2, sb2); + vec sd12 = vec_rotate_left(vec_xor(sd3, sa12), (uint32_t)8U); + st[0U] = sa12; + st[3U] = sd12; + vec sa3 = st[2U]; + vec sb = st[3U]; + vec sd = st[1U]; + vec sa1 = vec_add(sa3, sb); + vec sd1 = vec_rotate_left(vec_xor(sd, sa1), (uint32_t)7U); + st[2U] = sa1; + st[1U] = sd1; +} + +inline static void +Hacl_Impl_Chacha20_Vec128_double_round(vec *st) +{ + Hacl_Impl_Chacha20_Vec128_round(st); + vec r1 = st[1U]; + vec r20 = st[2U]; + vec r30 = st[3U]; + st[1U] = vec_shuffle_right(r1, (uint32_t)1U); + st[2U] = vec_shuffle_right(r20, (uint32_t)2U); + st[3U] = vec_shuffle_right(r30, (uint32_t)3U); + Hacl_Impl_Chacha20_Vec128_round(st); + vec r10 = st[1U]; + vec r2 = st[2U]; + vec r3 = st[3U]; + st[1U] = vec_shuffle_right(r10, (uint32_t)3U); + st[2U] = vec_shuffle_right(r2, (uint32_t)2U); + st[3U] = vec_shuffle_right(r3, (uint32_t)1U); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_double_round3(vec *st, vec *st_, vec *st__) +{ + Hacl_Impl_Chacha20_Vec128_double_round(st); + Hacl_Impl_Chacha20_Vec128_double_round(st_); + Hacl_Impl_Chacha20_Vec128_double_round(st__); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_sum_states(vec *st_, vec *st) +{ + vec s0 = st[0U]; + vec s1 = st[1U]; + vec s2 = st[2U]; + vec s3 = st[3U]; + vec s0_ = st_[0U]; + vec s1_ = st_[1U]; + vec s2_ = st_[2U]; + vec s3_ = st_[3U]; + st_[0U] = vec_add(s0_, s0); + st_[1U] = vec_add(s1_, s1); + st_[2U] = vec_add(s2_, s2); + st_[3U] = vec_add(s3_, s3); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_copy_state(vec *st_, vec *st) +{ + vec st0 = st[0U]; + vec st1 = st[1U]; + vec st2 = st[2U]; + vec st3 = st[3U]; + st_[0U] = st0; + st_[1U] = st1; + st_[2U] = st2; + st_[3U] = st3; +} + +inline static void +Hacl_Impl_Chacha20_Vec128_chacha20_core(vec *k, vec *st) +{ + Hacl_Impl_Chacha20_Vec128_copy_state(k, st); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) + Hacl_Impl_Chacha20_Vec128_double_round(k); + Hacl_Impl_Chacha20_Vec128_sum_states(k, st); +} + +static void +Hacl_Impl_Chacha20_Vec128_state_incr(vec *st) +{ + Hacl_Impl_Chacha20_Vec128_State_state_incr(st); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_chacha20_incr3(vec *k0, vec *k1, vec *k2, vec *st) +{ + Hacl_Impl_Chacha20_Vec128_copy_state(k0, st); + Hacl_Impl_Chacha20_Vec128_copy_state(k1, st); + Hacl_Impl_Chacha20_Vec128_state_incr(k1); + Hacl_Impl_Chacha20_Vec128_copy_state(k2, k1); + Hacl_Impl_Chacha20_Vec128_state_incr(k2); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_chacha20_sum3(vec *k0, vec *k1, vec *k2, vec *st) +{ + Hacl_Impl_Chacha20_Vec128_sum_states(k0, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); + Hacl_Impl_Chacha20_Vec128_sum_states(k1, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); + Hacl_Impl_Chacha20_Vec128_sum_states(k2, st); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_chacha20_core3(vec *k0, vec *k1, vec *k2, vec *st) +{ + Hacl_Impl_Chacha20_Vec128_chacha20_incr3(k0, k1, k2, st); + for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i = i + (uint32_t)1U) + Hacl_Impl_Chacha20_Vec128_double_round3(k0, k1, k2); + Hacl_Impl_Chacha20_Vec128_chacha20_sum3(k0, k1, k2, st); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_chacha20_block(uint8_t *stream_block, vec *st) +{ + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec k[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + k[_i] = vec_zero(); + Hacl_Impl_Chacha20_Vec128_chacha20_core(k, st); + Hacl_Impl_Chacha20_Vec128_State_state_to_key_block(stream_block, k); +} + +inline static void +Hacl_Impl_Chacha20_Vec128_init(vec *st, uint8_t *k, uint8_t *n1, uint32_t ctr) +{ + Hacl_Impl_Chacha20_Vec128_State_state_setup(st, k, n1, ctr); +} + +static void +Hacl_Impl_Chacha20_Vec128_update_last(uint8_t *output, uint8_t *plain, uint32_t len, vec *st) +{ + uint8_t block[64U] = { 0U }; + Hacl_Impl_Chacha20_Vec128_chacha20_block(block, st); + uint8_t *mask = block; + for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) { + uint8_t xi = plain[i]; + uint8_t yi = mask[i]; + output[i] = xi ^ yi; + } +} + +static void +Hacl_Impl_Chacha20_Vec128_xor_block(uint8_t *output, uint8_t *plain, vec *st) +{ + vec p0 = vec_load_le(plain); + vec p1 = vec_load_le(plain + (uint32_t)16U); + vec p2 = vec_load_le(plain + (uint32_t)32U); + vec p3 = vec_load_le(plain + (uint32_t)48U); + vec k0 = st[0U]; + vec k1 = st[1U]; + vec k2 = st[2U]; + vec k3 = st[3U]; + vec o00 = vec_xor(p0, k0); + vec o10 = vec_xor(p1, k1); + vec o20 = vec_xor(p2, k2); + vec o30 = vec_xor(p3, k3); + uint8_t *o0 = output; + uint8_t *o1 = output + (uint32_t)16U; + uint8_t *o2 = output + (uint32_t)32U; + uint8_t *o3 = output + (uint32_t)48U; + vec_store_le(o0, o00); + vec_store_le(o1, o10); + vec_store_le(o2, o20); + vec_store_le(o3, o30); +} + +static void +Hacl_Impl_Chacha20_Vec128_update(uint8_t *output, uint8_t *plain, vec *st) +{ + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec k[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + k[_i] = vec_zero(); + Hacl_Impl_Chacha20_Vec128_chacha20_core(k, st); + Hacl_Impl_Chacha20_Vec128_xor_block(output, plain, k); +} + +static void +Hacl_Impl_Chacha20_Vec128_update3(uint8_t *output, uint8_t *plain, vec *st) +{ + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec k0[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + k0[_i] = vec_zero(); + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec k1[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + k1[_i] = vec_zero(); + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec k2[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + k2[_i] = vec_zero(); + Hacl_Impl_Chacha20_Vec128_chacha20_core3(k0, k1, k2, st); + uint8_t *p0 = plain; + uint8_t *p1 = plain + (uint32_t)64U; + uint8_t *p2 = plain + (uint32_t)128U; + uint8_t *o0 = output; + uint8_t *o1 = output + (uint32_t)64U; + uint8_t *o2 = output + (uint32_t)128U; + Hacl_Impl_Chacha20_Vec128_xor_block(o0, p0, k0); + Hacl_Impl_Chacha20_Vec128_xor_block(o1, p1, k1); + Hacl_Impl_Chacha20_Vec128_xor_block(o2, p2, k2); +} + +static void +Hacl_Impl_Chacha20_Vec128_update3_( + uint8_t *output, + uint8_t *plain, + uint32_t len, + vec *st, + uint32_t i) +{ + uint8_t *out_block = output + (uint32_t)192U * i; + uint8_t *plain_block = plain + (uint32_t)192U * i; + Hacl_Impl_Chacha20_Vec128_update3(out_block, plain_block, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); +} + +static void +Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks3( + uint8_t *output, + uint8_t *plain, + uint32_t len, + vec *st) +{ + for (uint32_t i = (uint32_t)0U; i < len; i = i + (uint32_t)1U) + Hacl_Impl_Chacha20_Vec128_update3_(output, plain, len, st, i); +} + +static void +Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks( + uint8_t *output, + uint8_t *plain, + uint32_t len, + vec *st) +{ + uint32_t len3 = len / (uint32_t)3U; + uint32_t rest3 = len % (uint32_t)3U; + uint8_t *plain_ = plain; + uint8_t *blocks1 = plain + (uint32_t)192U * len3; + uint8_t *output_ = output; + uint8_t *outs = output + (uint32_t)192U * len3; + Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks3(output_, plain_, len3, st); + if (rest3 == (uint32_t)2U) { + uint8_t *block0 = blocks1; + uint8_t *block1 = blocks1 + (uint32_t)64U; + uint8_t *out0 = outs; + uint8_t *out1 = outs + (uint32_t)64U; + Hacl_Impl_Chacha20_Vec128_update(out0, block0, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); + Hacl_Impl_Chacha20_Vec128_update(out1, block1, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); + } else if (rest3 == (uint32_t)1U) { + Hacl_Impl_Chacha20_Vec128_update(outs, blocks1, st); + Hacl_Impl_Chacha20_Vec128_state_incr(st); + } +} + +static void +Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode( + uint8_t *output, + uint8_t *plain, + uint32_t len, + vec *st) +{ + uint32_t blocks_len = len >> (uint32_t)6U; + uint32_t part_len = len & (uint32_t)0x3fU; + uint8_t *output_ = output; + uint8_t *plain_ = plain; + uint8_t *output__ = output + (uint32_t)64U * blocks_len; + uint8_t *plain__ = plain + (uint32_t)64U * blocks_len; + Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode_blocks(output_, plain_, blocks_len, st); + if (part_len > (uint32_t)0U) + Hacl_Impl_Chacha20_Vec128_update_last(output__, plain__, part_len, st); +} + +static void +Hacl_Impl_Chacha20_Vec128_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr) +{ + KRML_CHECK_SIZE(vec_zero(), (uint32_t)4U); + vec buf[4U]; + for (uint32_t _i = 0U; _i < (uint32_t)4U; ++_i) + buf[_i] = vec_zero(); + vec *st = buf; + Hacl_Impl_Chacha20_Vec128_init(st, k, n1, ctr); + Hacl_Impl_Chacha20_Vec128_chacha20_counter_mode(output, plain, len, st); +} + +void +Hacl_Chacha20_Vec128_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr) +{ + Hacl_Impl_Chacha20_Vec128_chacha20(output, plain, len, k, n1, ctr); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h new file mode 100644 index 000000000..57942093d --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -0,0 +1,61 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kremlib.h" +#ifndef __Hacl_Chacha20_Vec128_H +#define __Hacl_Chacha20_Vec128_H + +#include "vec128.h" + +typedef uint32_t Hacl_Impl_Xor_Lemmas_u32; + +typedef uint8_t Hacl_Impl_Xor_Lemmas_u8; + +typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_u32; + +typedef uint32_t Hacl_Impl_Chacha20_Vec128_State_h32; + +typedef uint8_t *Hacl_Impl_Chacha20_Vec128_State_uint8_p; + +typedef vec *Hacl_Impl_Chacha20_Vec128_State_state; + +typedef uint32_t Hacl_Impl_Chacha20_Vec128_u32; + +typedef uint32_t Hacl_Impl_Chacha20_Vec128_h32; + +typedef uint8_t *Hacl_Impl_Chacha20_Vec128_uint8_p; + +typedef uint32_t Hacl_Impl_Chacha20_Vec128_idx; + +typedef struct +{ + void *k; + void *n; + uint32_t ctr; +} Hacl_Impl_Chacha20_Vec128_log_t_; + +typedef void *Hacl_Impl_Chacha20_Vec128_log_t; + +typedef uint8_t *Hacl_Chacha20_Vec128_uint8_p; + +void +Hacl_Chacha20_Vec128_chacha20( + uint8_t *output, + uint8_t *plain, + uint32_t len, + uint8_t *k, + uint8_t *n1, + uint32_t ctr); +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519.c b/security/nss/lib/freebl/verified/Hacl_Curve25519.c new file mode 100644 index 000000000..f2dcddc57 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519.c @@ -0,0 +1,845 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Hacl_Curve25519.h" + +static void +Hacl_Bignum_Modulo_carry_top(uint64_t *b) +{ + uint64_t b4 = b[4U]; + uint64_t b0 = b[0U]; + uint64_t b4_ = b4 & (uint64_t)0x7ffffffffffffU; + uint64_t b0_ = b0 + (uint64_t)19U * (b4 >> (uint32_t)51U); + b[4U] = b4_; + b[0U] = b0_; +} + +inline static void +Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_t *input) +{ + { + FStar_UInt128_t xi = input[0U]; + output[0U] = FStar_UInt128_uint128_to_uint64(xi); + } + { + FStar_UInt128_t xi = input[1U]; + output[1U] = FStar_UInt128_uint128_to_uint64(xi); + } + { + FStar_UInt128_t xi = input[2U]; + output[2U] = FStar_UInt128_uint128_to_uint64(xi); + } + { + FStar_UInt128_t xi = input[3U]; + output[3U] = FStar_UInt128_uint128_to_uint64(xi); + } + { + FStar_UInt128_t xi = input[4U]; + output[4U] = FStar_UInt128_uint128_to_uint64(xi); + } +} + +inline static void +Hacl_Bignum_Fproduct_sum_scalar_multiplication_( + FStar_UInt128_t *output, + uint64_t *input, + uint64_t s) +{ + { + FStar_UInt128_t xi = output[0U]; + uint64_t yi = input[0U]; + output[0U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } + { + FStar_UInt128_t xi = output[1U]; + uint64_t yi = input[1U]; + output[1U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } + { + FStar_UInt128_t xi = output[2U]; + uint64_t yi = input[2U]; + output[2U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } + { + FStar_UInt128_t xi = output[3U]; + uint64_t yi = input[3U]; + output[3U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } + { + FStar_UInt128_t xi = output[4U]; + uint64_t yi = input[4U]; + output[4U] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } +} + +inline static void +Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_t *tmp) +{ + { + uint32_t ctr = (uint32_t)0U; + FStar_UInt128_t tctr = tmp[ctr]; + FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; + FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); + tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); + tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); + } + { + uint32_t ctr = (uint32_t)1U; + FStar_UInt128_t tctr = tmp[ctr]; + FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; + FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); + tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); + tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); + } + { + uint32_t ctr = (uint32_t)2U; + FStar_UInt128_t tctr = tmp[ctr]; + FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; + FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); + tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); + tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); + } + { + uint32_t ctr = (uint32_t)3U; + FStar_UInt128_t tctr = tmp[ctr]; + FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0x7ffffffffffffU; + FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)51U); + tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); + tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); + } +} + +inline static void +Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) +{ + uint64_t tmp = output[4U]; + { + uint32_t ctr = (uint32_t)5U - (uint32_t)0U - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + { + uint32_t ctr = (uint32_t)5U - (uint32_t)1U - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + { + uint32_t ctr = (uint32_t)5U - (uint32_t)2U - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + { + uint32_t ctr = (uint32_t)5U - (uint32_t)3U - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + output[0U] = tmp; + uint64_t b0 = output[0U]; + output[0U] = (uint64_t)19U * b0; +} + +static void +Hacl_Bignum_Fmul_mul_shift_reduce_(FStar_UInt128_t *output, uint64_t *input, uint64_t *input21) +{ + { + uint64_t input2i = input21[0U]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + { + uint64_t input2i = input21[1U]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + { + uint64_t input2i = input21[2U]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + { + uint64_t input2i = input21[3U]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + uint32_t i = (uint32_t)4U; + uint64_t input2i = input21[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +inline static void +Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input21) +{ + uint64_t tmp[5U] = { 0U }; + memcpy(tmp, input, (uint32_t)5U * sizeof input[0U]); + KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); + FStar_UInt128_t t[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input21); + Hacl_Bignum_Fproduct_carry_wide_(t); + FStar_UInt128_t b4 = t[4U]; + FStar_UInt128_t b0 = t[0U]; + FStar_UInt128_t + b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); + FStar_UInt128_t + b0_ = + FStar_UInt128_add(b0, + FStar_UInt128_mul_wide((uint64_t)19U, + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); + t[4U] = b4_; + t[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, t); + uint64_t i0 = output[0U]; + uint64_t i1 = output[1U]; + uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; + uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); + output[0U] = i0_; + output[1U] = i1_; +} + +inline static void +Hacl_Bignum_Fsquare_fsquare__(FStar_UInt128_t *tmp, uint64_t *output) +{ + uint64_t r0 = output[0U]; + uint64_t r1 = output[1U]; + uint64_t r2 = output[2U]; + uint64_t r3 = output[3U]; + uint64_t r4 = output[4U]; + uint64_t d0 = r0 * (uint64_t)2U; + uint64_t d1 = r1 * (uint64_t)2U; + uint64_t d2 = r2 * (uint64_t)2U * (uint64_t)19U; + uint64_t d419 = r4 * (uint64_t)19U; + uint64_t d4 = d419 * (uint64_t)2U; + FStar_UInt128_t + s0 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(r0, r0), + FStar_UInt128_mul_wide(d4, r1)), + FStar_UInt128_mul_wide(d2, r3)); + FStar_UInt128_t + s1 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r1), + FStar_UInt128_mul_wide(d4, r2)), + FStar_UInt128_mul_wide(r3 * (uint64_t)19U, r3)); + FStar_UInt128_t + s2 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r2), + FStar_UInt128_mul_wide(r1, r1)), + FStar_UInt128_mul_wide(d4, r3)); + FStar_UInt128_t + s3 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r3), + FStar_UInt128_mul_wide(d1, r2)), + FStar_UInt128_mul_wide(r4, d419)); + FStar_UInt128_t + s4 = + FStar_UInt128_add(FStar_UInt128_add(FStar_UInt128_mul_wide(d0, r4), + FStar_UInt128_mul_wide(d1, r3)), + FStar_UInt128_mul_wide(r2, r2)); + tmp[0U] = s0; + tmp[1U] = s1; + tmp[2U] = s2; + tmp[3U] = s3; + tmp[4U] = s4; +} + +inline static void +Hacl_Bignum_Fsquare_fsquare_(FStar_UInt128_t *tmp, uint64_t *output) +{ + Hacl_Bignum_Fsquare_fsquare__(tmp, output); + Hacl_Bignum_Fproduct_carry_wide_(tmp); + FStar_UInt128_t b4 = tmp[4U]; + FStar_UInt128_t b0 = tmp[0U]; + FStar_UInt128_t + b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); + FStar_UInt128_t + b0_ = + FStar_UInt128_add(b0, + FStar_UInt128_mul_wide((uint64_t)19U, + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); + tmp[4U] = b4_; + tmp[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); + uint64_t i0 = output[0U]; + uint64_t i1 = output[1U]; + uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; + uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); + output[0U] = i0_; + output[1U] = i1_; +} + +static void +Hacl_Bignum_Fsquare_fsquare_times_(uint64_t *input, FStar_UInt128_t *tmp, uint32_t count1) +{ + Hacl_Bignum_Fsquare_fsquare_(tmp, input); + for (uint32_t i = (uint32_t)1U; i < count1; i = i + (uint32_t)1U) + Hacl_Bignum_Fsquare_fsquare_(tmp, input); +} + +inline static void +Hacl_Bignum_Fsquare_fsquare_times(uint64_t *output, uint64_t *input, uint32_t count1) +{ + KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); + FStar_UInt128_t t[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + memcpy(output, input, (uint32_t)5U * sizeof input[0U]); + Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); +} + +inline static void +Hacl_Bignum_Fsquare_fsquare_times_inplace(uint64_t *output, uint32_t count1) +{ + KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); + FStar_UInt128_t t[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Bignum_Fsquare_fsquare_times_(output, t, count1); +} + +inline static void +Hacl_Bignum_Crecip_crecip(uint64_t *out, uint64_t *z) +{ + uint64_t buf[20U] = { 0U }; + uint64_t *a = buf; + uint64_t *t00 = buf + (uint32_t)5U; + uint64_t *b0 = buf + (uint32_t)10U; + Hacl_Bignum_Fsquare_fsquare_times(a, z, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)2U); + Hacl_Bignum_Fmul_fmul(b0, t00, z); + Hacl_Bignum_Fmul_fmul(a, b0, a); + Hacl_Bignum_Fsquare_fsquare_times(t00, a, (uint32_t)1U); + Hacl_Bignum_Fmul_fmul(b0, t00, b0); + Hacl_Bignum_Fsquare_fsquare_times(t00, b0, (uint32_t)5U); + uint64_t *t01 = buf + (uint32_t)5U; + uint64_t *b1 = buf + (uint32_t)10U; + uint64_t *c0 = buf + (uint32_t)15U; + Hacl_Bignum_Fmul_fmul(b1, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)10U); + Hacl_Bignum_Fmul_fmul(c0, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, c0, (uint32_t)20U); + Hacl_Bignum_Fmul_fmul(t01, t01, c0); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t01, (uint32_t)10U); + Hacl_Bignum_Fmul_fmul(b1, t01, b1); + Hacl_Bignum_Fsquare_fsquare_times(t01, b1, (uint32_t)50U); + uint64_t *a0 = buf; + uint64_t *t0 = buf + (uint32_t)5U; + uint64_t *b = buf + (uint32_t)10U; + uint64_t *c = buf + (uint32_t)15U; + Hacl_Bignum_Fmul_fmul(c, t0, b); + Hacl_Bignum_Fsquare_fsquare_times(t0, c, (uint32_t)100U); + Hacl_Bignum_Fmul_fmul(t0, t0, c); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)50U); + Hacl_Bignum_Fmul_fmul(t0, t0, b); + Hacl_Bignum_Fsquare_fsquare_times_inplace(t0, (uint32_t)5U); + Hacl_Bignum_Fmul_fmul(out, t0, a0); +} + +inline static void +Hacl_Bignum_fsum(uint64_t *a, uint64_t *b) +{ + { + uint64_t xi = a[0U]; + uint64_t yi = b[0U]; + a[0U] = xi + yi; + } + { + uint64_t xi = a[1U]; + uint64_t yi = b[1U]; + a[1U] = xi + yi; + } + { + uint64_t xi = a[2U]; + uint64_t yi = b[2U]; + a[2U] = xi + yi; + } + { + uint64_t xi = a[3U]; + uint64_t yi = b[3U]; + a[3U] = xi + yi; + } + { + uint64_t xi = a[4U]; + uint64_t yi = b[4U]; + a[4U] = xi + yi; + } +} + +inline static void +Hacl_Bignum_fdifference(uint64_t *a, uint64_t *b) +{ + uint64_t tmp[5U] = { 0U }; + memcpy(tmp, b, (uint32_t)5U * sizeof b[0U]); + uint64_t b0 = tmp[0U]; + uint64_t b1 = tmp[1U]; + uint64_t b2 = tmp[2U]; + uint64_t b3 = tmp[3U]; + uint64_t b4 = tmp[4U]; + tmp[0U] = b0 + (uint64_t)0x3fffffffffff68U; + tmp[1U] = b1 + (uint64_t)0x3ffffffffffff8U; + tmp[2U] = b2 + (uint64_t)0x3ffffffffffff8U; + tmp[3U] = b3 + (uint64_t)0x3ffffffffffff8U; + tmp[4U] = b4 + (uint64_t)0x3ffffffffffff8U; + { + uint64_t xi = a[0U]; + uint64_t yi = tmp[0U]; + a[0U] = yi - xi; + } + { + uint64_t xi = a[1U]; + uint64_t yi = tmp[1U]; + a[1U] = yi - xi; + } + { + uint64_t xi = a[2U]; + uint64_t yi = tmp[2U]; + a[2U] = yi - xi; + } + { + uint64_t xi = a[3U]; + uint64_t yi = tmp[3U]; + a[3U] = yi - xi; + } + { + uint64_t xi = a[4U]; + uint64_t yi = tmp[4U]; + a[4U] = yi - xi; + } +} + +inline static void +Hacl_Bignum_fscalar(uint64_t *output, uint64_t *b, uint64_t s) +{ + KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)5U); + FStar_UInt128_t tmp[5U]; + for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) + tmp[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + { + uint64_t xi = b[0U]; + tmp[0U] = FStar_UInt128_mul_wide(xi, s); + } + { + uint64_t xi = b[1U]; + tmp[1U] = FStar_UInt128_mul_wide(xi, s); + } + { + uint64_t xi = b[2U]; + tmp[2U] = FStar_UInt128_mul_wide(xi, s); + } + { + uint64_t xi = b[3U]; + tmp[3U] = FStar_UInt128_mul_wide(xi, s); + } + { + uint64_t xi = b[4U]; + tmp[4U] = FStar_UInt128_mul_wide(xi, s); + } + Hacl_Bignum_Fproduct_carry_wide_(tmp); + FStar_UInt128_t b4 = tmp[4U]; + FStar_UInt128_t b0 = tmp[0U]; + FStar_UInt128_t + b4_ = FStar_UInt128_logand(b4, FStar_UInt128_uint64_to_uint128((uint64_t)0x7ffffffffffffU)); + FStar_UInt128_t + b0_ = + FStar_UInt128_add(b0, + FStar_UInt128_mul_wide((uint64_t)19U, + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b4, (uint32_t)51U)))); + tmp[4U] = b4_; + tmp[0U] = b0_; + Hacl_Bignum_Fproduct_copy_from_wide_(output, tmp); +} + +inline static void +Hacl_Bignum_fmul(uint64_t *output, uint64_t *a, uint64_t *b) +{ + Hacl_Bignum_Fmul_fmul(output, a, b); +} + +inline static void +Hacl_Bignum_crecip(uint64_t *output, uint64_t *input) +{ + Hacl_Bignum_Crecip_crecip(output, input); +} + +static void +Hacl_EC_Point_swap_conditional_step(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) +{ + uint32_t i = ctr - (uint32_t)1U; + uint64_t ai = a[i]; + uint64_t bi = b[i]; + uint64_t x = swap1 & (ai ^ bi); + uint64_t ai1 = ai ^ x; + uint64_t bi1 = bi ^ x; + a[i] = ai1; + b[i] = bi1; +} + +static void +Hacl_EC_Point_swap_conditional_(uint64_t *a, uint64_t *b, uint64_t swap1, uint32_t ctr) +{ + if (!(ctr == (uint32_t)0U)) { + Hacl_EC_Point_swap_conditional_step(a, b, swap1, ctr); + uint32_t i = ctr - (uint32_t)1U; + Hacl_EC_Point_swap_conditional_(a, b, swap1, i); + } +} + +static void +Hacl_EC_Point_swap_conditional(uint64_t *a, uint64_t *b, uint64_t iswap) +{ + uint64_t swap1 = (uint64_t)0U - iswap; + Hacl_EC_Point_swap_conditional_(a, b, swap1, (uint32_t)5U); + Hacl_EC_Point_swap_conditional_(a + (uint32_t)5U, b + (uint32_t)5U, swap1, (uint32_t)5U); +} + +static void +Hacl_EC_Point_copy(uint64_t *output, uint64_t *input) +{ + memcpy(output, input, (uint32_t)5U * sizeof input[0U]); + memcpy(output + (uint32_t)5U, + input + (uint32_t)5U, + (uint32_t)5U * sizeof(input + (uint32_t)5U)[0U]); +} + +static void +Hacl_EC_AddAndDouble_fmonty( + uint64_t *pp, + uint64_t *ppq, + uint64_t *p, + uint64_t *pq, + uint64_t *qmqp) +{ + uint64_t *qx = qmqp; + uint64_t *x2 = pp; + uint64_t *z2 = pp + (uint32_t)5U; + uint64_t *x3 = ppq; + uint64_t *z3 = ppq + (uint32_t)5U; + uint64_t *x = p; + uint64_t *z = p + (uint32_t)5U; + uint64_t *xprime = pq; + uint64_t *zprime = pq + (uint32_t)5U; + uint64_t buf[40U] = { 0U }; + uint64_t *origx = buf; + uint64_t *origxprime = buf + (uint32_t)5U; + uint64_t *xxprime0 = buf + (uint32_t)25U; + uint64_t *zzprime0 = buf + (uint32_t)30U; + memcpy(origx, x, (uint32_t)5U * sizeof x[0U]); + Hacl_Bignum_fsum(x, z); + Hacl_Bignum_fdifference(z, origx); + memcpy(origxprime, xprime, (uint32_t)5U * sizeof xprime[0U]); + Hacl_Bignum_fsum(xprime, zprime); + Hacl_Bignum_fdifference(zprime, origxprime); + Hacl_Bignum_fmul(xxprime0, xprime, z); + Hacl_Bignum_fmul(zzprime0, x, zprime); + uint64_t *origxprime0 = buf + (uint32_t)5U; + uint64_t *xx0 = buf + (uint32_t)15U; + uint64_t *zz0 = buf + (uint32_t)20U; + uint64_t *xxprime = buf + (uint32_t)25U; + uint64_t *zzprime = buf + (uint32_t)30U; + uint64_t *zzzprime = buf + (uint32_t)35U; + memcpy(origxprime0, xxprime, (uint32_t)5U * sizeof xxprime[0U]); + Hacl_Bignum_fsum(xxprime, zzprime); + Hacl_Bignum_fdifference(zzprime, origxprime0); + Hacl_Bignum_Fsquare_fsquare_times(x3, xxprime, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(zzzprime, zzprime, (uint32_t)1U); + Hacl_Bignum_fmul(z3, zzzprime, qx); + Hacl_Bignum_Fsquare_fsquare_times(xx0, x, (uint32_t)1U); + Hacl_Bignum_Fsquare_fsquare_times(zz0, z, (uint32_t)1U); + uint64_t *zzz = buf + (uint32_t)10U; + uint64_t *xx = buf + (uint32_t)15U; + uint64_t *zz = buf + (uint32_t)20U; + Hacl_Bignum_fmul(x2, xx, zz); + Hacl_Bignum_fdifference(zz, xx); + uint64_t scalar = (uint64_t)121665U; + Hacl_Bignum_fscalar(zzz, zz, scalar); + Hacl_Bignum_fsum(zzz, xx); + Hacl_Bignum_fmul(z2, zzz, zz); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt) +{ + uint64_t bit = (uint64_t)(byt >> (uint32_t)7U); + Hacl_EC_Point_swap_conditional(nq, nqpq, bit); + Hacl_EC_AddAndDouble_fmonty(nq2, nqpq2, nq, nqpq, q); + uint64_t bit0 = (uint64_t)(byt >> (uint32_t)7U); + Hacl_EC_Point_swap_conditional(nq2, nqpq2, bit0); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt) +{ + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq, nqpq, nq2, nqpq2, q, byt); + uint8_t byt1 = byt << (uint32_t)1U; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_step(nq2, nqpq2, nq, nqpq, q, byt1); +} + +static void +Hacl_EC_Ladder_SmallLoop_cmult_small_loop( + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint8_t byt, + uint32_t i) +{ + if (!(i == (uint32_t)0U)) { + uint32_t i_ = i - (uint32_t)1U; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop_double_step(nq, nqpq, nq2, nqpq2, q, byt); + uint8_t byt_ = byt << (uint32_t)2U; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byt_, i_); + } +} + +static void +Hacl_EC_Ladder_BigLoop_cmult_big_loop( + uint8_t *n1, + uint64_t *nq, + uint64_t *nqpq, + uint64_t *nq2, + uint64_t *nqpq2, + uint64_t *q, + uint32_t i) +{ + if (!(i == (uint32_t)0U)) { + uint32_t i1 = i - (uint32_t)1U; + uint8_t byte = n1[i1]; + Hacl_EC_Ladder_SmallLoop_cmult_small_loop(nq, nqpq, nq2, nqpq2, q, byte, (uint32_t)4U); + Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, i1); + } +} + +static void +Hacl_EC_Ladder_cmult(uint64_t *result, uint8_t *n1, uint64_t *q) +{ + uint64_t point_buf[40U] = { 0U }; + uint64_t *nq = point_buf; + uint64_t *nqpq = point_buf + (uint32_t)10U; + uint64_t *nq2 = point_buf + (uint32_t)20U; + uint64_t *nqpq2 = point_buf + (uint32_t)30U; + Hacl_EC_Point_copy(nqpq, q); + nq[0U] = (uint64_t)1U; + Hacl_EC_Ladder_BigLoop_cmult_big_loop(n1, nq, nqpq, nq2, nqpq2, q, (uint32_t)32U); + Hacl_EC_Point_copy(result, nq); +} + +static void +Hacl_EC_Format_fexpand(uint64_t *output, uint8_t *input) +{ + uint64_t i0 = load64_le(input); + uint8_t *x00 = input + (uint32_t)6U; + uint64_t i1 = load64_le(x00); + uint8_t *x01 = input + (uint32_t)12U; + uint64_t i2 = load64_le(x01); + uint8_t *x02 = input + (uint32_t)19U; + uint64_t i3 = load64_le(x02); + uint8_t *x0 = input + (uint32_t)24U; + uint64_t i4 = load64_le(x0); + uint64_t output0 = i0 & (uint64_t)0x7ffffffffffffU; + uint64_t output1 = i1 >> (uint32_t)3U & (uint64_t)0x7ffffffffffffU; + uint64_t output2 = i2 >> (uint32_t)6U & (uint64_t)0x7ffffffffffffU; + uint64_t output3 = i3 >> (uint32_t)1U & (uint64_t)0x7ffffffffffffU; + uint64_t output4 = i4 >> (uint32_t)12U & (uint64_t)0x7ffffffffffffU; + output[0U] = output0; + output[1U] = output1; + output[2U] = output2; + output[3U] = output3; + output[4U] = output4; +} + +static void +Hacl_EC_Format_fcontract_first_carry_pass(uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); + uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; + uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); + uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; + uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); + uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; + uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); + uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; + input[0U] = t0_; + input[1U] = t1__; + input[2U] = t2__; + input[3U] = t3__; + input[4U] = t4_; +} + +static void +Hacl_EC_Format_fcontract_first_carry_full(uint64_t *input) +{ + Hacl_EC_Format_fcontract_first_carry_pass(input); + Hacl_Bignum_Modulo_carry_top(input); +} + +static void +Hacl_EC_Format_fcontract_second_carry_pass(uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t t1_ = t1 + (t0 >> (uint32_t)51U); + uint64_t t0_ = t0 & (uint64_t)0x7ffffffffffffU; + uint64_t t2_ = t2 + (t1_ >> (uint32_t)51U); + uint64_t t1__ = t1_ & (uint64_t)0x7ffffffffffffU; + uint64_t t3_ = t3 + (t2_ >> (uint32_t)51U); + uint64_t t2__ = t2_ & (uint64_t)0x7ffffffffffffU; + uint64_t t4_ = t4 + (t3_ >> (uint32_t)51U); + uint64_t t3__ = t3_ & (uint64_t)0x7ffffffffffffU; + input[0U] = t0_; + input[1U] = t1__; + input[2U] = t2__; + input[3U] = t3__; + input[4U] = t4_; +} + +static void +Hacl_EC_Format_fcontract_second_carry_full(uint64_t *input) +{ + Hacl_EC_Format_fcontract_second_carry_pass(input); + Hacl_Bignum_Modulo_carry_top(input); + uint64_t i0 = input[0U]; + uint64_t i1 = input[1U]; + uint64_t i0_ = i0 & (uint64_t)0x7ffffffffffffU; + uint64_t i1_ = i1 + (i0 >> (uint32_t)51U); + input[0U] = i0_; + input[1U] = i1_; +} + +static void +Hacl_EC_Format_fcontract_trim(uint64_t *input) +{ + uint64_t a0 = input[0U]; + uint64_t a1 = input[1U]; + uint64_t a2 = input[2U]; + uint64_t a3 = input[3U]; + uint64_t a4 = input[4U]; + uint64_t mask0 = FStar_UInt64_gte_mask(a0, (uint64_t)0x7ffffffffffedU); + uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0x7ffffffffffffU); + uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x7ffffffffffffU); + uint64_t mask3 = FStar_UInt64_eq_mask(a3, (uint64_t)0x7ffffffffffffU); + uint64_t mask4 = FStar_UInt64_eq_mask(a4, (uint64_t)0x7ffffffffffffU); + uint64_t mask = (((mask0 & mask1) & mask2) & mask3) & mask4; + uint64_t a0_ = a0 - ((uint64_t)0x7ffffffffffedU & mask); + uint64_t a1_ = a1 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a2_ = a2 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a3_ = a3 - ((uint64_t)0x7ffffffffffffU & mask); + uint64_t a4_ = a4 - ((uint64_t)0x7ffffffffffffU & mask); + input[0U] = a0_; + input[1U] = a1_; + input[2U] = a2_; + input[3U] = a3_; + input[4U] = a4_; +} + +static void +Hacl_EC_Format_fcontract_store(uint8_t *output, uint64_t *input) +{ + uint64_t t0 = input[0U]; + uint64_t t1 = input[1U]; + uint64_t t2 = input[2U]; + uint64_t t3 = input[3U]; + uint64_t t4 = input[4U]; + uint64_t o0 = t1 << (uint32_t)51U | t0; + uint64_t o1 = t2 << (uint32_t)38U | t1 >> (uint32_t)13U; + uint64_t o2 = t3 << (uint32_t)25U | t2 >> (uint32_t)26U; + uint64_t o3 = t4 << (uint32_t)12U | t3 >> (uint32_t)39U; + uint8_t *b0 = output; + uint8_t *b1 = output + (uint32_t)8U; + uint8_t *b2 = output + (uint32_t)16U; + uint8_t *b3 = output + (uint32_t)24U; + store64_le(b0, o0); + store64_le(b1, o1); + store64_le(b2, o2); + store64_le(b3, o3); +} + +static void +Hacl_EC_Format_fcontract(uint8_t *output, uint64_t *input) +{ + Hacl_EC_Format_fcontract_first_carry_full(input); + Hacl_EC_Format_fcontract_second_carry_full(input); + Hacl_EC_Format_fcontract_trim(input); + Hacl_EC_Format_fcontract_store(output, input); +} + +static void +Hacl_EC_Format_scalar_of_point(uint8_t *scalar, uint64_t *point) +{ + uint64_t *x = point; + uint64_t *z = point + (uint32_t)5U; + uint64_t buf[10U] = { 0U }; + uint64_t *zmone = buf; + uint64_t *sc = buf + (uint32_t)5U; + Hacl_Bignum_crecip(zmone, z); + Hacl_Bignum_fmul(sc, x, zmone); + Hacl_EC_Format_fcontract(scalar, sc); +} + +void +Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) +{ + uint64_t buf0[10U] = { 0U }; + uint64_t *x0 = buf0; + uint64_t *z = buf0 + (uint32_t)5U; + Hacl_EC_Format_fexpand(x0, basepoint); + z[0U] = (uint64_t)1U; + uint64_t *q = buf0; + uint8_t e[32U] = { 0U }; + memcpy(e, secret, (uint32_t)32U * sizeof secret[0U]); + uint8_t e0 = e[0U]; + uint8_t e31 = e[31U]; + uint8_t e01 = e0 & (uint8_t)248U; + uint8_t e311 = e31 & (uint8_t)127U; + uint8_t e312 = e311 | (uint8_t)64U; + e[0U] = e01; + e[31U] = e312; + uint8_t *scalar = e; + uint64_t buf[15U] = { 0U }; + uint64_t *nq = buf; + uint64_t *x = nq; + x[0U] = (uint64_t)1U; + Hacl_EC_Ladder_cmult(nq, scalar, q); + Hacl_EC_Format_scalar_of_point(mypublic, nq); +} + +void +Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint) +{ + Hacl_EC_crypto_scalarmult(mypublic, secret, basepoint); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Curve25519.h b/security/nss/lib/freebl/verified/Hacl_Curve25519.h new file mode 100644 index 000000000..0e443f177 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Curve25519.h @@ -0,0 +1,57 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kremlib.h" +#ifndef __Hacl_Curve25519_H +#define __Hacl_Curve25519_H + +typedef uint64_t Hacl_Bignum_Constants_limb; + +typedef FStar_UInt128_t Hacl_Bignum_Constants_wide; + +typedef uint64_t Hacl_Bignum_Parameters_limb; + +typedef FStar_UInt128_t Hacl_Bignum_Parameters_wide; + +typedef uint32_t Hacl_Bignum_Parameters_ctr; + +typedef uint64_t *Hacl_Bignum_Parameters_felem; + +typedef FStar_UInt128_t *Hacl_Bignum_Parameters_felem_wide; + +typedef void *Hacl_Bignum_Parameters_seqelem; + +typedef void *Hacl_Bignum_Parameters_seqelem_wide; + +typedef FStar_UInt128_t Hacl_Bignum_Wide_t; + +typedef uint64_t Hacl_Bignum_Limb_t; + +extern void Hacl_Bignum_lemma_diff(Prims_int x0, Prims_int x1, Prims_pos x2); + +typedef uint64_t *Hacl_EC_Point_point; + +typedef uint8_t *Hacl_EC_Ladder_SmallLoop_uint8_p; + +typedef uint8_t *Hacl_EC_Ladder_uint8_p; + +typedef uint8_t *Hacl_EC_Format_uint8_p; + +void Hacl_EC_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint); + +typedef uint8_t *Hacl_Curve25519_uint8_p; + +void Hacl_Curve25519_crypto_scalarmult(uint8_t *mypublic, uint8_t *secret, uint8_t *basepoint); +#endif diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_64.c b/security/nss/lib/freebl/verified/Hacl_Poly1305_64.c new file mode 100644 index 000000000..984031ae2 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_64.c @@ -0,0 +1,485 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Hacl_Poly1305_64.h" + +inline static void +Hacl_Bignum_Modulo_reduce(uint64_t *b) +{ + uint64_t b0 = b[0U]; + b[0U] = (b0 << (uint32_t)4U) + (b0 << (uint32_t)2U); +} + +inline static void +Hacl_Bignum_Modulo_carry_top(uint64_t *b) +{ + uint64_t b2 = b[2U]; + uint64_t b0 = b[0U]; + uint64_t b2_42 = b2 >> (uint32_t)42U; + b[2U] = b2 & (uint64_t)0x3ffffffffffU; + b[0U] = (b2_42 << (uint32_t)2U) + b2_42 + b0; +} + +inline static void +Hacl_Bignum_Modulo_carry_top_wide(FStar_UInt128_t *b) +{ + FStar_UInt128_t b2 = b[2U]; + FStar_UInt128_t b0 = b[0U]; + FStar_UInt128_t + b2_ = FStar_UInt128_logand(b2, FStar_UInt128_uint64_to_uint128((uint64_t)0x3ffffffffffU)); + uint64_t b2_42 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(b2, (uint32_t)42U)); + FStar_UInt128_t + b0_ = FStar_UInt128_add(b0, FStar_UInt128_uint64_to_uint128((b2_42 << (uint32_t)2U) + b2_42)); + b[2U] = b2_; + b[0U] = b0_; +} + +inline static void +Hacl_Bignum_Fproduct_copy_from_wide_(uint64_t *output, FStar_UInt128_t *input) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { + FStar_UInt128_t xi = input[i]; + output[i] = FStar_UInt128_uint128_to_uint64(xi); + } +} + +inline static void +Hacl_Bignum_Fproduct_sum_scalar_multiplication_( + FStar_UInt128_t *output, + uint64_t *input, + uint64_t s) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { + FStar_UInt128_t xi = output[i]; + uint64_t yi = input[i]; + output[i] = FStar_UInt128_add_mod(xi, FStar_UInt128_mul_wide(yi, s)); + } +} + +inline static void +Hacl_Bignum_Fproduct_carry_wide_(FStar_UInt128_t *tmp) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { + uint32_t ctr = i; + FStar_UInt128_t tctr = tmp[ctr]; + FStar_UInt128_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = FStar_UInt128_uint128_to_uint64(tctr) & (uint64_t)0xfffffffffffU; + FStar_UInt128_t c = FStar_UInt128_shift_right(tctr, (uint32_t)44U); + tmp[ctr] = FStar_UInt128_uint64_to_uint128(r0); + tmp[ctr + (uint32_t)1U] = FStar_UInt128_add(tctrp1, c); + } +} + +inline static void +Hacl_Bignum_Fproduct_carry_limb_(uint64_t *tmp) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { + uint32_t ctr = i; + uint64_t tctr = tmp[ctr]; + uint64_t tctrp1 = tmp[ctr + (uint32_t)1U]; + uint64_t r0 = tctr & (uint64_t)0xfffffffffffU; + uint64_t c = tctr >> (uint32_t)44U; + tmp[ctr] = r0; + tmp[ctr + (uint32_t)1U] = tctrp1 + c; + } +} + +inline static void +Hacl_Bignum_Fmul_shift_reduce(uint64_t *output) +{ + uint64_t tmp = output[2U]; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { + uint32_t ctr = (uint32_t)3U - i - (uint32_t)1U; + uint64_t z = output[ctr - (uint32_t)1U]; + output[ctr] = z; + } + output[0U] = tmp; + Hacl_Bignum_Modulo_reduce(output); +} + +static void +Hacl_Bignum_Fmul_mul_shift_reduce_(FStar_UInt128_t *output, uint64_t *input, uint64_t *input2) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)2U; i = i + (uint32_t)1U) { + uint64_t input2i = input2[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); + Hacl_Bignum_Fmul_shift_reduce(input); + } + uint32_t i = (uint32_t)2U; + uint64_t input2i = input2[i]; + Hacl_Bignum_Fproduct_sum_scalar_multiplication_(output, input, input2i); +} + +inline static void +Hacl_Bignum_Fmul_fmul(uint64_t *output, uint64_t *input, uint64_t *input2) +{ + uint64_t tmp[3U] = { 0U }; + memcpy(tmp, input, (uint32_t)3U * sizeof input[0U]); + KRML_CHECK_SIZE(FStar_UInt128_uint64_to_uint128((uint64_t)0U), (uint32_t)3U); + FStar_UInt128_t t[3U]; + for (uint32_t _i = 0U; _i < (uint32_t)3U; ++_i) + t[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); + Hacl_Bignum_Fmul_mul_shift_reduce_(t, tmp, input2); + Hacl_Bignum_Fproduct_carry_wide_(t); + Hacl_Bignum_Modulo_carry_top_wide(t); + Hacl_Bignum_Fproduct_copy_from_wide_(output, t); + uint64_t i0 = output[0U]; + uint64_t i1 = output[1U]; + uint64_t i0_ = i0 & (uint64_t)0xfffffffffffU; + uint64_t i1_ = i1 + (i0 >> (uint32_t)44U); + output[0U] = i0_; + output[1U] = i1_; +} + +inline static void +Hacl_Bignum_AddAndMultiply_add_and_multiply(uint64_t *acc, uint64_t *block, uint64_t *r) +{ + for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i = i + (uint32_t)1U) { + uint64_t xi = acc[i]; + uint64_t yi = block[i]; + acc[i] = xi + yi; + } + Hacl_Bignum_Fmul_fmul(acc, acc, r); +} + +inline static void +Hacl_Impl_Poly1305_64_poly1305_update( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m) +{ + Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; + uint64_t *h = scrut0.h; + uint64_t *acc = h; + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *r = scrut.r; + uint64_t *r3 = r; + uint64_t tmp[3U] = { 0U }; + FStar_UInt128_t m0 = load128_le(m); + uint64_t r0 = FStar_UInt128_uint128_to_uint64(m0) & (uint64_t)0xfffffffffffU; + uint64_t + r1 = + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; + uint64_t r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)88U)); + tmp[0U] = r0; + tmp[1U] = r1; + tmp[2U] = r2; + uint64_t b2 = tmp[2U]; + uint64_t b2_ = (uint64_t)0x10000000000U | b2; + tmp[2U] = b2_; + Hacl_Bignum_AddAndMultiply_add_and_multiply(acc, tmp, r3); +} + +inline static void +Hacl_Impl_Poly1305_64_poly1305_process_last_block_( + uint8_t *block, + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint64_t rem_) +{ + uint64_t tmp[3U] = { 0U }; + FStar_UInt128_t m0 = load128_le(block); + uint64_t r0 = FStar_UInt128_uint128_to_uint64(m0) & (uint64_t)0xfffffffffffU; + uint64_t + r1 = + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; + uint64_t r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(m0, (uint32_t)88U)); + tmp[0U] = r0; + tmp[1U] = r1; + tmp[2U] = r2; + Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; + uint64_t *h = scrut0.h; + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *r = scrut.r; + Hacl_Bignum_AddAndMultiply_add_and_multiply(h, tmp, r); +} + +inline static void +Hacl_Impl_Poly1305_64_poly1305_process_last_block( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint64_t rem_) +{ + uint8_t zero1 = (uint8_t)0U; + KRML_CHECK_SIZE(zero1, (uint32_t)16U); + uint8_t block[16U]; + for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) + block[_i] = zero1; + uint32_t i0 = (uint32_t)rem_; + uint32_t i = (uint32_t)rem_; + memcpy(block, m, i * sizeof m[0U]); + block[i0] = (uint8_t)1U; + Hacl_Impl_Poly1305_64_poly1305_process_last_block_(block, st, m, rem_); +} + +static void +Hacl_Impl_Poly1305_64_poly1305_last_pass(uint64_t *acc) +{ + Hacl_Bignum_Fproduct_carry_limb_(acc); + Hacl_Bignum_Modulo_carry_top(acc); + uint64_t a0 = acc[0U]; + uint64_t a10 = acc[1U]; + uint64_t a20 = acc[2U]; + uint64_t a0_ = a0 & (uint64_t)0xfffffffffffU; + uint64_t r0 = a0 >> (uint32_t)44U; + uint64_t a1_ = (a10 + r0) & (uint64_t)0xfffffffffffU; + uint64_t r1 = (a10 + r0) >> (uint32_t)44U; + uint64_t a2_ = a20 + r1; + acc[0U] = a0_; + acc[1U] = a1_; + acc[2U] = a2_; + Hacl_Bignum_Modulo_carry_top(acc); + uint64_t i0 = acc[0U]; + uint64_t i1 = acc[1U]; + uint64_t i0_ = i0 & (uint64_t)0xfffffffffffU; + uint64_t i1_ = i1 + (i0 >> (uint32_t)44U); + acc[0U] = i0_; + acc[1U] = i1_; + uint64_t a00 = acc[0U]; + uint64_t a1 = acc[1U]; + uint64_t a2 = acc[2U]; + uint64_t mask0 = FStar_UInt64_gte_mask(a00, (uint64_t)0xffffffffffbU); + uint64_t mask1 = FStar_UInt64_eq_mask(a1, (uint64_t)0xfffffffffffU); + uint64_t mask2 = FStar_UInt64_eq_mask(a2, (uint64_t)0x3ffffffffffU); + uint64_t mask = (mask0 & mask1) & mask2; + uint64_t a0_0 = a00 - ((uint64_t)0xffffffffffbU & mask); + uint64_t a1_0 = a1 - ((uint64_t)0xfffffffffffU & mask); + uint64_t a2_0 = a2 - ((uint64_t)0x3ffffffffffU & mask); + acc[0U] = a0_0; + acc[1U] = a1_0; + acc[2U] = a2_0; +} + +static Hacl_Impl_Poly1305_64_State_poly1305_state +Hacl_Impl_Poly1305_64_mk_state(uint64_t *r, uint64_t *h) +{ + return ((Hacl_Impl_Poly1305_64_State_poly1305_state){.r = r, .h = h }); +} + +static void +Hacl_Standalone_Poly1305_64_poly1305_blocks( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint64_t len1) +{ + if (!(len1 == (uint64_t)0U)) { + uint8_t *block = m; + uint8_t *tail1 = m + (uint32_t)16U; + Hacl_Impl_Poly1305_64_poly1305_update(st, block); + uint64_t len2 = len1 - (uint64_t)1U; + Hacl_Standalone_Poly1305_64_poly1305_blocks(st, tail1, len2); + } +} + +static void +Hacl_Standalone_Poly1305_64_poly1305_partial( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *input, + uint64_t len1, + uint8_t *kr) +{ + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *r = scrut.r; + uint64_t *x0 = r; + FStar_UInt128_t k1 = load128_le(kr); + FStar_UInt128_t + k_clamped = + FStar_UInt128_logand(k1, + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); + uint64_t r0 = FStar_UInt128_uint128_to_uint64(k_clamped) & (uint64_t)0xfffffffffffU; + uint64_t + r1 = + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; + uint64_t + r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)88U)); + x0[0U] = r0; + x0[1U] = r1; + x0[2U] = r2; + Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; + uint64_t *h = scrut0.h; + uint64_t *x00 = h; + x00[0U] = (uint64_t)0U; + x00[1U] = (uint64_t)0U; + x00[2U] = (uint64_t)0U; + Hacl_Standalone_Poly1305_64_poly1305_blocks(st, input, len1); +} + +static void +Hacl_Standalone_Poly1305_64_poly1305_complete( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint64_t len1, + uint8_t *k1) +{ + uint8_t *kr = k1; + uint64_t len16 = len1 >> (uint32_t)4U; + uint64_t rem16 = len1 & (uint64_t)0xfU; + uint8_t *part_input = m; + uint8_t *last_block = m + (uint32_t)((uint64_t)16U * len16); + Hacl_Standalone_Poly1305_64_poly1305_partial(st, part_input, len16, kr); + if (!(rem16 == (uint64_t)0U)) + Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, last_block, rem16); + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *h = scrut.h; + uint64_t *acc = h; + Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); +} + +static void +Hacl_Standalone_Poly1305_64_crypto_onetimeauth_( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + uint64_t buf[6U] = { 0U }; + uint64_t *r = buf; + uint64_t *h = buf + (uint32_t)3U; + Hacl_Impl_Poly1305_64_State_poly1305_state st = Hacl_Impl_Poly1305_64_mk_state(r, h); + uint8_t *key_s = k1 + (uint32_t)16U; + Hacl_Standalone_Poly1305_64_poly1305_complete(st, input, len1, k1); + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *h3 = scrut.h; + uint64_t *acc = h3; + FStar_UInt128_t k_ = load128_le(key_s); + uint64_t h0 = acc[0U]; + uint64_t h1 = acc[1U]; + uint64_t h2 = acc[2U]; + FStar_UInt128_t + acc_ = + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128(h2 + << (uint32_t)24U | + h1 >> (uint32_t)20U), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128(h1 << (uint32_t)44U | h0)); + FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); + store128_le(output, mac_); +} + +static void +Hacl_Standalone_Poly1305_64_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + Hacl_Standalone_Poly1305_64_crypto_onetimeauth_(output, input, len1, k1); +} + +Hacl_Impl_Poly1305_64_State_poly1305_state +Hacl_Poly1305_64_mk_state(uint64_t *r, uint64_t *acc) +{ + return Hacl_Impl_Poly1305_64_mk_state(r, acc); +} + +void +Hacl_Poly1305_64_init(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *k1) +{ + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *r = scrut.r; + uint64_t *x0 = r; + FStar_UInt128_t k10 = load128_le(k1); + FStar_UInt128_t + k_clamped = + FStar_UInt128_logand(k10, + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0ffffffcU), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128((uint64_t)0x0ffffffc0fffffffU))); + uint64_t r0 = FStar_UInt128_uint128_to_uint64(k_clamped) & (uint64_t)0xfffffffffffU; + uint64_t + r1 = + FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)44U)) & (uint64_t)0xfffffffffffU; + uint64_t + r2 = FStar_UInt128_uint128_to_uint64(FStar_UInt128_shift_right(k_clamped, (uint32_t)88U)); + x0[0U] = r0; + x0[1U] = r1; + x0[2U] = r2; + Hacl_Impl_Poly1305_64_State_poly1305_state scrut0 = st; + uint64_t *h = scrut0.h; + uint64_t *x00 = h; + x00[0U] = (uint64_t)0U; + x00[1U] = (uint64_t)0U; + x00[2U] = (uint64_t)0U; +} + +void +Hacl_Poly1305_64_update_block(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *m) +{ + Hacl_Impl_Poly1305_64_poly1305_update(st, m); +} + +void +Hacl_Poly1305_64_update( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint32_t num_blocks) +{ + if (!(num_blocks == (uint32_t)0U)) { + uint8_t *block = m; + uint8_t *m_ = m + (uint32_t)16U; + uint32_t n1 = num_blocks - (uint32_t)1U; + Hacl_Poly1305_64_update_block(st, block); + Hacl_Poly1305_64_update(st, m_, n1); + } +} + +void +Hacl_Poly1305_64_update_last( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint32_t len1) +{ + if (!((uint64_t)len1 == (uint64_t)0U)) + Hacl_Impl_Poly1305_64_poly1305_process_last_block(st, m, (uint64_t)len1); + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *h = scrut.h; + uint64_t *acc = h; + Hacl_Impl_Poly1305_64_poly1305_last_pass(acc); +} + +void +Hacl_Poly1305_64_finish( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *mac, + uint8_t *k1) +{ + Hacl_Impl_Poly1305_64_State_poly1305_state scrut = st; + uint64_t *h = scrut.h; + uint64_t *acc = h; + FStar_UInt128_t k_ = load128_le(k1); + uint64_t h0 = acc[0U]; + uint64_t h1 = acc[1U]; + uint64_t h2 = acc[2U]; + FStar_UInt128_t + acc_ = + FStar_UInt128_logor(FStar_UInt128_shift_left(FStar_UInt128_uint64_to_uint128(h2 + << (uint32_t)24U | + h1 >> (uint32_t)20U), + (uint32_t)64U), + FStar_UInt128_uint64_to_uint128(h1 << (uint32_t)44U | h0)); + FStar_UInt128_t mac_ = FStar_UInt128_add_mod(acc_, k_); + store128_le(mac, mac_); +} + +void +Hacl_Poly1305_64_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1) +{ + Hacl_Standalone_Poly1305_64_crypto_onetimeauth(output, input, len1, k1); +} diff --git a/security/nss/lib/freebl/verified/Hacl_Poly1305_64.h b/security/nss/lib/freebl/verified/Hacl_Poly1305_64.h new file mode 100644 index 000000000..0aa9a0de3 --- /dev/null +++ b/security/nss/lib/freebl/verified/Hacl_Poly1305_64.h @@ -0,0 +1,99 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "kremlib.h" +#ifndef __Hacl_Poly1305_64_H +#define __Hacl_Poly1305_64_H + +typedef uint64_t Hacl_Bignum_Constants_limb; + +typedef FStar_UInt128_t Hacl_Bignum_Constants_wide; + +typedef FStar_UInt128_t Hacl_Bignum_Wide_t; + +typedef uint64_t Hacl_Bignum_Limb_t; + +typedef void *Hacl_Impl_Poly1305_64_State_log_t; + +typedef uint8_t *Hacl_Impl_Poly1305_64_State_uint8_p; + +typedef uint64_t *Hacl_Impl_Poly1305_64_State_bigint; + +typedef void *Hacl_Impl_Poly1305_64_State_seqelem; + +typedef uint64_t *Hacl_Impl_Poly1305_64_State_elemB; + +typedef uint8_t *Hacl_Impl_Poly1305_64_State_wordB; + +typedef uint8_t *Hacl_Impl_Poly1305_64_State_wordB_16; + +typedef struct +{ + uint64_t *r; + uint64_t *h; +} Hacl_Impl_Poly1305_64_State_poly1305_state; + +typedef void *Hacl_Impl_Poly1305_64_log_t; + +typedef uint64_t *Hacl_Impl_Poly1305_64_bigint; + +typedef uint8_t *Hacl_Impl_Poly1305_64_uint8_p; + +typedef uint64_t *Hacl_Impl_Poly1305_64_elemB; + +typedef uint8_t *Hacl_Impl_Poly1305_64_wordB; + +typedef uint8_t *Hacl_Impl_Poly1305_64_wordB_16; + +typedef uint8_t *Hacl_Poly1305_64_uint8_p; + +typedef uint64_t Hacl_Poly1305_64_uint64_t; + +typedef uint8_t *Hacl_Poly1305_64_key; + +typedef Hacl_Impl_Poly1305_64_State_poly1305_state Hacl_Poly1305_64_state; + +Hacl_Impl_Poly1305_64_State_poly1305_state +Hacl_Poly1305_64_mk_state(uint64_t *r, uint64_t *acc); + +void Hacl_Poly1305_64_init(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *k1); + +void Hacl_Poly1305_64_update_block(Hacl_Impl_Poly1305_64_State_poly1305_state st, uint8_t *m); + +void +Hacl_Poly1305_64_update( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint32_t num_blocks); + +void +Hacl_Poly1305_64_update_last( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *m, + uint32_t len1); + +void +Hacl_Poly1305_64_finish( + Hacl_Impl_Poly1305_64_State_poly1305_state st, + uint8_t *mac, + uint8_t *k1); + +void +Hacl_Poly1305_64_crypto_onetimeauth( + uint8_t *output, + uint8_t *input, + uint64_t len1, + uint8_t *k1); +#endif diff --git a/security/nss/lib/freebl/verified/kremlib.h b/security/nss/lib/freebl/verified/kremlib.h new file mode 100644 index 000000000..c12164e74 --- /dev/null +++ b/security/nss/lib/freebl/verified/kremlib.h @@ -0,0 +1,672 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KREMLIB_H +#define __KREMLIB_H + +#include "kremlib_base.h" + +/* For tests only: we might need this function to be forward-declared, because + * the dependency on WasmSupport appears very late, after SimplifyWasm, and + * sadly, after the topological order has been done. */ +void WasmSupport_check_buffer_size(uint32_t s); + +/******************************************************************************/ +/* Stubs to ease compilation of non-Low* code */ +/******************************************************************************/ + +/* Some types that KreMLin has no special knowledge of; many of them appear in + * signatures of ghost functions, meaning that it suffices to give them (any) + * definition. */ +typedef void *FStar_Seq_Base_seq, *Prims_prop, *FStar_HyperStack_mem, + *FStar_Set_set, *Prims_st_pre_h, *FStar_Heap_heap, *Prims_all_pre_h, + *FStar_TSet_set, *Prims_list, *FStar_Map_t, *FStar_UInt63_t_, + *FStar_Int63_t_, *FStar_UInt63_t, *FStar_Int63_t, *FStar_UInt_uint_t, + *FStar_Int_int_t, *FStar_HyperStack_stackref, *FStar_Bytes_bytes, + *FStar_HyperHeap_rid, *FStar_Heap_aref, *FStar_Monotonic_Heap_heap, + *FStar_Monotonic_Heap_aref, *FStar_Monotonic_HyperHeap_rid, + *FStar_Monotonic_HyperStack_mem, *FStar_Char_char_; + +typedef const char *Prims_string; + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +#define KRML_HOST_PRINTF printf +#endif + +#ifndef KRML_HOST_EXIT +#define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +#define KRML_HOST_MALLOC malloc +#endif + +/* In statement position, exiting is easy. */ +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +/* In expression position, use the comma-operator and a malloc to return an + * expression of the right size. KreMLin passes t as the parameter to the macro. + */ +#define KRML_EABORT(t, msg) \ + (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ + KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate). + */ +#define KRML_CHECK_SIZE(elt, size) \ + if (((size_t)size) > SIZE_MAX / sizeof(elt)) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } + +/* A series of GCC atrocities to trace function calls (kremlin's [-d c-calls] + * option). Useful when trying to debug, say, Wasm, to compare traces. */ +/* clang-format off */ +#ifdef __GNUC__ +#define KRML_FORMAT(X) _Generic((X), \ + uint8_t : "0x%08" PRIx8, \ + uint16_t: "0x%08" PRIx16, \ + uint32_t: "0x%08" PRIx32, \ + uint64_t: "0x%08" PRIx64, \ + int8_t : "0x%08" PRIx8, \ + int16_t : "0x%08" PRIx16, \ + int32_t : "0x%08" PRIx32, \ + int64_t : "0x%08" PRIx64, \ + default : "%s") + +#define KRML_FORMAT_ARG(X) _Generic((X), \ + uint8_t : X, \ + uint16_t: X, \ + uint32_t: X, \ + uint64_t: X, \ + int8_t : X, \ + int16_t : X, \ + int32_t : X, \ + int64_t : X, \ + default : "unknown") +/* clang-format on */ + +#define KRML_DEBUG_RETURN(X) \ + ({ \ + __auto_type _ret = (X); \ + KRML_HOST_PRINTF("returning: "); \ + KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \ + KRML_HOST_PRINTF(" \n"); \ + _ret; \ + }) +#endif + +#define FStar_Buffer_eqb(b1, b2, n) \ + (memcmp((b1), (b2), (n) * sizeof((b1)[0])) == 0) + +/* Stubs to make ST happy. Important note: you must generate a use of the macro + * argument, otherwise, you may have FStar_ST_recall(f) as the only use of f; + * KreMLin will think that this is a valid use, but then the C compiler, after + * macro expansion, will error out. */ +#define FStar_HyperHeap_root 0 +#define FStar_Pervasives_Native_fst(x) (x).fst +#define FStar_Pervasives_Native_snd(x) (x).snd +#define FStar_Seq_Base_createEmpty(x) 0 +#define FStar_Seq_Base_create(len, init) 0 +#define FStar_Seq_Base_upd(s, i, e) 0 +#define FStar_Seq_Base_eq(l1, l2) 0 +#define FStar_Seq_Base_length(l1) 0 +#define FStar_Seq_Base_append(x, y) 0 +#define FStar_Seq_Base_slice(x, y, z) 0 +#define FStar_Seq_Properties_snoc(x, y) 0 +#define FStar_Seq_Properties_cons(x, y) 0 +#define FStar_Seq_Base_index(x, y) 0 +#define FStar_HyperStack_is_eternal_color(x) 0 +#define FStar_Monotonic_HyperHeap_root 0 +#define FStar_Buffer_to_seq_full(x) 0 +#define FStar_Buffer_recall(x) +#define FStar_HyperStack_ST_op_Colon_Equals(x, v) KRML_EXIT +#define FStar_HyperStack_ST_op_Bang(x) 0 +#define FStar_HyperStack_ST_salloc(x) 0 +#define FStar_HyperStack_ST_ralloc(x, y) 0 +#define FStar_HyperStack_ST_new_region(x) (0) +#define FStar_Monotonic_RRef_m_alloc(x) \ + { \ + 0 \ + } + +#define FStar_HyperStack_ST_recall(x) \ + do { \ + (void)(x); \ + } while (0) + +#define FStar_HyperStack_ST_recall_region(x) \ + do { \ + (void)(x); \ + } while (0) + +#define FStar_Monotonic_RRef_m_recall(x1, x2) \ + do { \ + (void)(x1); \ + (void)(x2); \ + } while (0) + +#define FStar_Monotonic_RRef_m_write(x1, x2, x3, x4, x5) \ + do { \ + (void)(x1); \ + (void)(x2); \ + (void)(x3); \ + (void)(x4); \ + (void)(x5); \ + } while (0) + +/******************************************************************************/ +/* Endian-ness macros that can only be implemented in C */ +/******************************************************************************/ + +/* ... for Linux */ +#if defined(__linux__) || defined(__CYGWIN__) +#include <endian.h> + +/* ... for OSX */ +#elif defined(__APPLE__) +#include <libkern/OSByteOrder.h> +#define htole64(x) OSSwapHostToLittleInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) +#define htobe64(x) OSSwapHostToBigInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) + +#define htole16(x) OSSwapHostToLittleInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) +#define htobe16(x) OSSwapHostToBigInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) + +#define htole32(x) OSSwapHostToLittleInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) +#define htobe32(x) OSSwapHostToBigInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) + +/* ... for Solaris */ +#elif defined(__sun__) +#include <sys/byteorder.h> +#define htole64(x) LE_64(x) +#define le64toh(x) LE_64(x) +#define htobe64(x) BE_64(x) +#define be64toh(x) BE_64(x) + +#define htole16(x) LE_16(x) +#define le16toh(x) LE_16(x) +#define htobe16(x) BE_16(x) +#define be16toh(x) BE_16(x) + +#define htole32(x) LE_32(x) +#define le32toh(x) LE_32(x) +#define htobe32(x) BE_32(x) +#define be32toh(x) BE_32(x) + +/* ... for the BSDs */ +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__DragonFly__) +#include <sys/endian.h> +#elif defined(__OpenBSD__) +#include <endian.h> + +/* ... for Windows (MSVC)... not targeting XBOX 360! */ +#elif defined(_MSC_VER) + +#include <stdlib.h> +#define htobe16(x) _byteswap_ushort(x) +#define htole16(x) (x) +#define be16toh(x) _byteswap_ushort(x) +#define le16toh(x) (x) + +#define htobe32(x) _byteswap_ulong(x) +#define htole32(x) (x) +#define be32toh(x) _byteswap_ulong(x) +#define le32toh(x) (x) + +#define htobe64(x) _byteswap_uint64(x) +#define htole64(x) (x) +#define be64toh(x) _byteswap_uint64(x) +#define le64toh(x) (x) + +/* ... for Windows (GCC-like, e.g. mingw or clang) */ +#elif (defined(_WIN32) || defined(_WIN64)) && \ + (defined(__GNUC__) || defined(__clang__)) + +#define htobe16(x) __builtin_bswap16(x) +#define htole16(x) (x) +#define be16toh(x) __builtin_bswap16(x) +#define le16toh(x) (x) + +#define htobe32(x) __builtin_bswap32(x) +#define htole32(x) (x) +#define be32toh(x) __builtin_bswap32(x) +#define le32toh(x) (x) + +#define htobe64(x) __builtin_bswap64(x) +#define htole64(x) (x) +#define be64toh(x) __builtin_bswap64(x) +#define le64toh(x) (x) + +/* ... generic big-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + +/* byte swapping code inspired by: + * https://github.com/rweather/arduinolibs/blob/master/libraries/Crypto/utility/EndianUtil.h + * */ + +#define htobe32(x) (x) +#define be32toh(x) (x) +#define htole32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define le32toh(x) (htole32((x))) + +#define htobe64(x) (x) +#define be64toh(x) (x) +#define htole64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define le64toh(x) (htole64((x))) + +/* ... generic little-endian fallback code */ +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + +#define htole32(x) (x) +#define le32toh(x) (x) +#define htobe32(x) \ + (__extension__({ \ + uint32_t _temp = (x); \ + ((_temp >> 24) & 0x000000FF) | ((_temp >> 8) & 0x0000FF00) | \ + ((_temp << 8) & 0x00FF0000) | ((_temp << 24) & 0xFF000000); \ + })) +#define be32toh(x) (htobe32((x))) + +#define htole64(x) (x) +#define le64toh(x) (x) +#define htobe64(x) \ + (__extension__({ \ + uint64_t __temp = (x); \ + uint32_t __low = htobe32((uint32_t)__temp); \ + uint32_t __high = htobe32((uint32_t)(__temp >> 32)); \ + (((uint64_t)__low) << 32) | __high; \ + })) +#define be64toh(x) (htobe64((x))) + +/* ... couldn't determine endian-ness of the target platform */ +#else +#error "Please define __BYTE_ORDER__!" + +#endif /* defined(__linux__) || ... */ + +/* Loads and stores. These avoid undefined behavior due to unaligned memory + * accesses, via memcpy. */ + +inline static uint16_t +load16(uint8_t *b) +{ + uint16_t x; + memcpy(&x, b, 2); + return x; +} + +inline static uint32_t +load32(uint8_t *b) +{ + uint32_t x; + memcpy(&x, b, 4); + return x; +} + +inline static uint64_t +load64(uint8_t *b) +{ + uint64_t x; + memcpy(&x, b, 8); + return x; +} + +inline static void +store16(uint8_t *b, uint16_t i) +{ + memcpy(b, &i, 2); +} + +inline static void +store32(uint8_t *b, uint32_t i) +{ + memcpy(b, &i, 4); +} + +inline static void +store64(uint8_t *b, uint64_t i) +{ + memcpy(b, &i, 8); +} + +#define load16_le(b) (le16toh(load16(b))) +#define store16_le(b, i) (store16(b, htole16(i))) +#define load16_be(b) (be16toh(load16(b))) +#define store16_be(b, i) (store16(b, htobe16(i))) + +#define load32_le(b) (le32toh(load32(b))) +#define store32_le(b, i) (store32(b, htole32(i))) +#define load32_be(b) (be32toh(load32(b))) +#define store32_be(b, i) (store32(b, htobe32(i))) + +#define load64_le(b) (le64toh(load64(b))) +#define store64_le(b, i) (store64(b, htole64(i))) +#define load64_be(b) (be64toh(load64(b))) +#define store64_be(b, i) (store64(b, htobe64(i))) + +/******************************************************************************/ +/* Checked integers to ease the compilation of non-Low* code */ +/******************************************************************************/ + +typedef int32_t Prims_pos, Prims_nat, Prims_nonzero, Prims_int, + krml_checked_int_t; + +inline static bool +Prims_op_GreaterThanOrEqual(int32_t x, int32_t y) +{ + return x >= y; +} + +inline static bool +Prims_op_LessThanOrEqual(int32_t x, int32_t y) +{ + return x <= y; +} + +inline static bool +Prims_op_GreaterThan(int32_t x, int32_t y) +{ + return x > y; +} + +inline static bool +Prims_op_LessThan(int32_t x, int32_t y) +{ + return x < y; +} + +#define RETURN_OR(x) \ + do { \ + int64_t __ret = x; \ + if (__ret < INT32_MIN || INT32_MAX < __ret) { \ + KRML_HOST_PRINTF("Prims.{int,nat,pos} integer overflow at %s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(252); \ + } \ + return (int32_t)__ret; \ + } while (0) + +inline static int32_t +Prims_pow2(int32_t x) +{ + RETURN_OR((int64_t)1 << (int64_t)x); +} + +inline static int32_t +Prims_op_Multiply(int32_t x, int32_t y) +{ + RETURN_OR((int64_t)x * (int64_t)y); +} + +inline static int32_t +Prims_op_Addition(int32_t x, int32_t y) +{ + RETURN_OR((int64_t)x + (int64_t)y); +} + +inline static int32_t +Prims_op_Subtraction(int32_t x, int32_t y) +{ + RETURN_OR((int64_t)x - (int64_t)y); +} + +inline static int32_t +Prims_op_Division(int32_t x, int32_t y) +{ + RETURN_OR((int64_t)x / (int64_t)y); +} + +inline static int32_t +Prims_op_Modulus(int32_t x, int32_t y) +{ + RETURN_OR((int64_t)x % (int64_t)y); +} + +inline static int8_t +FStar_UInt8_uint_to_t(int8_t x) +{ + return x; +} +inline static int16_t +FStar_UInt16_uint_to_t(int16_t x) +{ + return x; +} +inline static int32_t +FStar_UInt32_uint_to_t(int32_t x) +{ + return x; +} +inline static int64_t +FStar_UInt64_uint_to_t(int64_t x) +{ + return x; +} + +inline static int8_t +FStar_UInt8_v(int8_t x) +{ + return x; +} +inline static int16_t +FStar_UInt16_v(int16_t x) +{ + return x; +} +inline static int32_t +FStar_UInt32_v(int32_t x) +{ + return x; +} +inline static int64_t +FStar_UInt64_v(int64_t x) +{ + return x; +} + +/* Platform-specific 128-bit arithmetic. These are static functions in a header, + * so that each translation unit gets its own copy and the C compiler can + * optimize. */ +#ifndef KRML_NOUINT128 +typedef unsigned __int128 FStar_UInt128_t, FStar_UInt128_t_, uint128_t; + +static inline void +print128(const char *where, uint128_t n) +{ + KRML_HOST_PRINTF("%s: [%" PRIu64 ",%" PRIu64 "]\n", where, + (uint64_t)(n >> 64), (uint64_t)n); +} + +static inline uint128_t +load128_le(uint8_t *b) +{ + uint128_t l = (uint128_t)load64_le(b); + uint128_t h = (uint128_t)load64_le(b + 8); + return (h << 64 | l); +} + +static inline void +store128_le(uint8_t *b, uint128_t n) +{ + store64_le(b, (uint64_t)n); + store64_le(b + 8, (uint64_t)(n >> 64)); +} + +static inline uint128_t +load128_be(uint8_t *b) +{ + uint128_t h = (uint128_t)load64_be(b); + uint128_t l = (uint128_t)load64_be(b + 8); + return (h << 64 | l); +} + +static inline void +store128_be(uint8_t *b, uint128_t n) +{ + store64_be(b, (uint64_t)(n >> 64)); + store64_be(b + 8, (uint64_t)n); +} + +#define FStar_UInt128_add(x, y) ((x) + (y)) +#define FStar_UInt128_mul(x, y) ((x) * (y)) +#define FStar_UInt128_add_mod(x, y) ((x) + (y)) +#define FStar_UInt128_sub(x, y) ((x) - (y)) +#define FStar_UInt128_sub_mod(x, y) ((x) - (y)) +#define FStar_UInt128_logand(x, y) ((x) & (y)) +#define FStar_UInt128_logor(x, y) ((x) | (y)) +#define FStar_UInt128_logxor(x, y) ((x) ^ (y)) +#define FStar_UInt128_lognot(x) (~(x)) +#define FStar_UInt128_shift_left(x, y) ((x) << (y)) +#define FStar_UInt128_shift_right(x, y) ((x) >> (y)) +#define FStar_UInt128_uint64_to_uint128(x) ((uint128_t)(x)) +#define FStar_UInt128_uint128_to_uint64(x) ((uint64_t)(x)) +#define FStar_UInt128_mul_wide(x, y) ((uint128_t)(x) * (y)) +#define FStar_UInt128_op_Hat_Hat(x, y) ((x) ^ (y)) + +static inline uint128_t +FStar_UInt128_eq_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + FStar_UInt64_eq_mask((uint64_t)(x >> 64), (uint64_t)(y >> 64)) & + FStar_UInt64_eq_mask(x, y); + return ((uint128_t)mask) << 64 | mask; +} + +static inline uint128_t +FStar_UInt128_gte_mask(uint128_t x, uint128_t y) +{ + uint64_t mask = + (FStar_UInt64_gte_mask(x >> 64, y >> 64) & + ~(FStar_UInt64_eq_mask(x >> 64, y >> 64))) | + (FStar_UInt64_eq_mask(x >> 64, y >> 64) & FStar_UInt64_gte_mask(x, y)); + return ((uint128_t)mask) << 64 | mask; +} + +#else /* !defined(KRML_NOUINT128) */ + +/* This is a bad circular dependency... should fix it properly. */ +#include "FStar.h" + +typedef FStar_UInt128_uint128 FStar_UInt128_t_, uint128_t; + +/* A series of definitions written using pointers. */ +static inline void +print128_(const char *where, uint128_t *n) +{ + KRML_HOST_PRINTF("%s: [0x%08" PRIx64 ",0x%08" PRIx64 "]\n", where, n->high, n->low); +} + +static inline void +load128_le_(uint8_t *b, uint128_t *r) +{ + r->low = load64_le(b); + r->high = load64_le(b + 8); +} + +static inline void +store128_le_(uint8_t *b, uint128_t *n) +{ + store64_le(b, n->low); + store64_le(b + 8, n->high); +} + +static inline void +load128_be_(uint8_t *b, uint128_t *r) +{ + r->high = load64_be(b); + r->low = load64_be(b + 8); +} + +static inline void +store128_be_(uint8_t *b, uint128_t *n) +{ + store64_be(b, n->high); + store64_be(b + 8, n->low); +} + +#ifndef KRML_NOSTRUCT_PASSING + +static inline void +print128(const char *where, uint128_t n) +{ + print128_(where, &n); +} + +static inline uint128_t +load128_le(uint8_t *b) +{ + uint128_t r; + load128_le_(b, &r); + return r; +} + +static inline void +store128_le(uint8_t *b, uint128_t n) +{ + store128_le_(b, &n); +} + +static inline uint128_t +load128_be(uint8_t *b) +{ + uint128_t r; + load128_be_(b, &r); + return r; +} + +static inline void +store128_be(uint8_t *b, uint128_t n) +{ + store128_be_(b, &n); +} + +#else /* !defined(KRML_STRUCT_PASSING) */ + +#define print128 print128_ +#define load128_le load128_le_ +#define store128_le store128_le_ +#define load128_be load128_be_ +#define store128_be store128_be_ + +#endif /* KRML_STRUCT_PASSING */ +#endif /* KRML_UINT128 */ +#endif /* __KREMLIB_H */ diff --git a/security/nss/lib/freebl/verified/kremlib_base.h b/security/nss/lib/freebl/verified/kremlib_base.h new file mode 100644 index 000000000..14170625d --- /dev/null +++ b/security/nss/lib/freebl/verified/kremlib_base.h @@ -0,0 +1,192 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __KREMLIB_BASE_H +#define __KREMLIB_BASE_H + +#include <inttypes.h> +#include <limits.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +/******************************************************************************/ +/* Some macros to ease compatibility */ +/******************************************************************************/ + +/* Define __cdecl and friends when using GCC, so that we can safely compile code + * that contains __cdecl on all platforms. Note that this is in a separate + * header so that Dafny-generated code can include just this file. */ +#ifndef _MSC_VER +/* Use the gcc predefined macros if on a platform/architectures that set them. + * Otherwise define them to be empty. */ +#ifndef __cdecl +#define __cdecl +#endif +#ifndef __stdcall +#define __stdcall +#endif +#ifndef __fastcall +#define __fastcall +#endif +#endif + +#ifdef __GNUC__ +#define inline __inline__ +#endif + +/* GCC-specific attribute syntax; everyone else gets the standard C inline + * attribute. */ +#ifdef __GNU_C__ +#ifndef __clang__ +#define force_inline inline __attribute__((always_inline)) +#else +#define force_inline inline +#endif +#else +#define force_inline inline +#endif + +/******************************************************************************/ +/* Implementing C.fst */ +/******************************************************************************/ + +/* Uppercase issue; we have to define lowercase versions of the C macros (as we + * have no way to refer to an uppercase *variable* in F*). */ +extern int exit_success; +extern int exit_failure; + +/* This one allows the user to write C.EXIT_SUCCESS. */ +typedef int exit_code; + +void print_string(const char *s); +void print_bytes(uint8_t *b, uint32_t len); + +/* The universal null pointer defined in C.Nullity.fst */ +#define C_Nullity_null(X) 0 + +/* If some globals need to be initialized before the main, then kremlin will + * generate and try to link last a function with this type: */ +void kremlinit_globals(void); + +/******************************************************************************/ +/* Implementation of machine integers (possibly of 128-bit integers) */ +/******************************************************************************/ + +/* Integer types */ +typedef uint64_t FStar_UInt64_t, FStar_UInt64_t_; +typedef int64_t FStar_Int64_t, FStar_Int64_t_; +typedef uint32_t FStar_UInt32_t, FStar_UInt32_t_; +typedef int32_t FStar_Int32_t, FStar_Int32_t_; +typedef uint16_t FStar_UInt16_t, FStar_UInt16_t_; +typedef int16_t FStar_Int16_t, FStar_Int16_t_; +typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; +typedef int8_t FStar_Int8_t, FStar_Int8_t_; + +static inline uint32_t +rotate32_left(uint32_t x, uint32_t n) +{ + /* assert (n<32); */ + return (x << n) | (x >> (32 - n)); +} +static inline uint32_t +rotate32_right(uint32_t x, uint32_t n) +{ + /* assert (n<32); */ + return (x >> n) | (x << (32 - n)); +} + +/* Constant time comparisons */ +static inline uint8_t +FStar_UInt8_eq_mask(uint8_t x, uint8_t y) +{ + x = ~(x ^ y); + x &= x << 4; + x &= x << 2; + x &= x << 1; + return (int8_t)x >> 7; +} + +static inline uint8_t +FStar_UInt8_gte_mask(uint8_t x, uint8_t y) +{ + return ~(uint8_t)(((int32_t)x - y) >> 31); +} + +static inline uint16_t +FStar_UInt16_eq_mask(uint16_t x, uint16_t y) +{ + x = ~(x ^ y); + x &= x << 8; + x &= x << 4; + x &= x << 2; + x &= x << 1; + return (int16_t)x >> 15; +} + +static inline uint16_t +FStar_UInt16_gte_mask(uint16_t x, uint16_t y) +{ + return ~(uint16_t)(((int32_t)x - y) >> 31); +} + +static inline uint32_t +FStar_UInt32_eq_mask(uint32_t x, uint32_t y) +{ + x = ~(x ^ y); + x &= x << 16; + x &= x << 8; + x &= x << 4; + x &= x << 2; + x &= x << 1; + return ((int32_t)x) >> 31; +} + +static inline uint32_t +FStar_UInt32_gte_mask(uint32_t x, uint32_t y) +{ + return ~((uint32_t)(((int64_t)x - y) >> 63)); +} + +static inline uint64_t +FStar_UInt64_eq_mask(uint64_t x, uint64_t y) +{ + x = ~(x ^ y); + x &= x << 32; + x &= x << 16; + x &= x << 8; + x &= x << 4; + x &= x << 2; + x &= x << 1; + return ((int64_t)x) >> 63; +} + +static inline uint64_t +FStar_UInt64_gte_mask(uint64_t x, uint64_t y) +{ + uint64_t low63 = + ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x7fffffffffffffff)) - + (int64_t)(y & UINT64_C(0x7fffffffffffffff))) >> + 63)); + uint64_t high_bit = + ~((uint64_t)((int64_t)((int64_t)(x & UINT64_C(0x8000000000000000)) - + (int64_t)(y & UINT64_C(0x8000000000000000))) >> + 63)); + return low63 & high_bit; +} + +#endif diff --git a/security/nss/lib/freebl/verified/specs/Spec.CTR.fst b/security/nss/lib/freebl/verified/specs/Spec.CTR.fst new file mode 100644 index 000000000..e411cd353 --- /dev/null +++ b/security/nss/lib/freebl/verified/specs/Spec.CTR.fst @@ -0,0 +1,98 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module Spec.CTR + +module ST = FStar.HyperStack.ST + +open FStar.Mul +open FStar.Seq +open Spec.Lib + +#reset-options "--initial_fuel 0 --max_fuel 0 --initial_ifuel 0 --max_ifuel 0" + +type block_cipher_ctx = { + keylen: nat ; + blocklen: (x:nat{x>0}); + noncelen: nat; + counterbits: nat; + incr: pos} + +type key (c:block_cipher_ctx) = lbytes c.keylen +type nonce (c:block_cipher_ctx) = lbytes c.noncelen +type block (c:block_cipher_ctx) = lbytes (c.blocklen*c.incr) +type counter (c:block_cipher_ctx) = UInt.uint_t c.counterbits +type block_cipher (c:block_cipher_ctx) = key c -> nonce c -> counter c -> block c + +val xor: #len:nat -> x:lbytes len -> y:lbytes len -> Tot (lbytes len) +let xor #len x y = map2 FStar.UInt8.(fun x y -> x ^^ y) x y + + +val counter_mode_blocks: + ctx: block_cipher_ctx -> + bc: block_cipher ctx -> + k:key ctx -> n:nonce ctx -> c:counter ctx -> + plain:seq UInt8.t{c + ctx.incr * (length plain / ctx.blocklen) < pow2 ctx.counterbits /\ + length plain % (ctx.blocklen * ctx.incr) = 0} -> + Tot (lbytes (length plain)) + (decreases (length plain)) +#reset-options "--z3rlimit 200 --max_fuel 0" +let rec counter_mode_blocks ctx block_enc key nonce counter plain = + let len = length plain in + let len' = len / (ctx.blocklen * ctx.incr) in + Math.Lemmas.lemma_div_mod len (ctx.blocklen * ctx.incr) ; + if len = 0 then Seq.createEmpty #UInt8.t + else ( + let prefix, block = split plain (len - ctx.blocklen * ctx.incr) in + (* TODO: move to a single lemma for clarify *) + Math.Lemmas.lemma_mod_plus (length prefix) 1 (ctx.blocklen * ctx.incr); + Math.Lemmas.lemma_div_le (length prefix) len ctx.blocklen; + Spec.CTR.Lemmas.lemma_div len (ctx.blocklen * ctx.incr); + (* End TODO *) + let cipher = counter_mode_blocks ctx block_enc key nonce counter prefix in + let mask = block_enc key nonce (counter + (len / ctx.blocklen - 1) * ctx.incr) in + let eb = xor block mask in + cipher @| eb + ) + + +val counter_mode: + ctx: block_cipher_ctx -> + bc: block_cipher ctx -> + k:key ctx -> n:nonce ctx -> c:counter ctx -> + plain:seq UInt8.t{c + ctx.incr * (length plain / ctx.blocklen) < pow2 ctx.counterbits} -> + Tot (lbytes (length plain)) + (decreases (length plain)) +#reset-options "--z3rlimit 200 --max_fuel 0" +let counter_mode ctx block_enc key nonce counter plain = + let len = length plain in + let blocks_len = (ctx.incr * ctx.blocklen) * (len / (ctx.blocklen * ctx.incr)) in + let part_len = len % (ctx.blocklen * ctx.incr) in + (* TODO: move to a single lemma for clarify *) + Math.Lemmas.lemma_div_mod len (ctx.blocklen * ctx.incr); + Math.Lemmas.multiple_modulo_lemma (len / (ctx.blocklen * ctx.incr)) (ctx.blocklen * ctx.incr); + Math.Lemmas.lemma_div_le (blocks_len) len ctx.blocklen; + (* End TODO *) + let blocks, last_block = split plain blocks_len in + let cipher_blocks = counter_mode_blocks ctx block_enc key nonce counter blocks in + let cipher_last_block = + if part_len > 0 + then (* encrypt final partial block(s) *) + let mask = block_enc key nonce (counter+ctx.incr*(length plain / ctx.blocklen)) in + let mask = slice mask 0 part_len in + assert(length last_block = part_len); + xor #part_len last_block mask + else createEmpty in + cipher_blocks @| cipher_last_block diff --git a/security/nss/lib/freebl/verified/specs/Spec.Chacha20.fst b/security/nss/lib/freebl/verified/specs/Spec.Chacha20.fst new file mode 100644 index 000000000..0bdc69725 --- /dev/null +++ b/security/nss/lib/freebl/verified/specs/Spec.Chacha20.fst @@ -0,0 +1,169 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module Spec.Chacha20 + +module ST = FStar.HyperStack.ST + +open FStar.Mul +open FStar.Seq +open FStar.UInt32 +open FStar.Endianness +open Spec.Lib +open Spec.Chacha20.Lemmas +open Seq.Create + +#set-options "--max_fuel 0 --z3rlimit 100" + +(* Constants *) +let keylen = 32 (* in bytes *) +let blocklen = 64 (* in bytes *) +let noncelen = 12 (* in bytes *) + +type key = lbytes keylen +type block = lbytes blocklen +type nonce = lbytes noncelen +type counter = UInt.uint_t 32 + +// using @ as a functional substitute for ; +// internally, blocks are represented as 16 x 4-byte integers +type state = m:seq UInt32.t {length m = 16} +type idx = n:nat{n < 16} +type shuffle = state -> Tot state + +let line (a:idx) (b:idx) (d:idx) (s:t{0 < v s /\ v s < 32}) (m:state) : Tot state = + let m = m.[a] <- (m.[a] +%^ m.[b]) in + let m = m.[d] <- ((m.[d] ^^ m.[a]) <<< s) in m + +let quarter_round a b c d : shuffle = + line a b d 16ul @ + line c d b 12ul @ + line a b d 8ul @ + line c d b 7ul + +let column_round : shuffle = + quarter_round 0 4 8 12 @ + quarter_round 1 5 9 13 @ + quarter_round 2 6 10 14 @ + quarter_round 3 7 11 15 + +let diagonal_round : shuffle = + quarter_round 0 5 10 15 @ + quarter_round 1 6 11 12 @ + quarter_round 2 7 8 13 @ + quarter_round 3 4 9 14 + +let double_round: shuffle = + column_round @ diagonal_round (* 2 rounds *) + +let rounds : shuffle = + iter 10 double_round (* 20 rounds *) + +let chacha20_core (s:state) : Tot state = + let s' = rounds s in + Spec.Loops.seq_map2 (fun x y -> x +%^ y) s' s + +(* state initialization *) +let c0 = 0x61707865ul +let c1 = 0x3320646eul +let c2 = 0x79622d32ul +let c3 = 0x6b206574ul + +let setup (k:key) (n:nonce) (c:counter): Tot state = + create_4 c0 c1 c2 c3 @| + uint32s_from_le 8 k @| + create_1 (UInt32.uint_to_t c) @| + uint32s_from_le 3 n + +let chacha20_block (k:key) (n:nonce) (c:counter): Tot block = + let st = setup k n c in + let st' = chacha20_core st in + uint32s_to_le 16 st' + +let chacha20_ctx: Spec.CTR.block_cipher_ctx = + let open Spec.CTR in + { + keylen = keylen; + blocklen = blocklen; + noncelen = noncelen; + counterbits = 32; + incr = 1 + } + +let chacha20_cipher: Spec.CTR.block_cipher chacha20_ctx = chacha20_block + +let chacha20_encrypt_bytes key nonce counter m = + Spec.CTR.counter_mode chacha20_ctx chacha20_cipher key nonce counter m + + +unfold let test_plaintext = [ + 0x4cuy; 0x61uy; 0x64uy; 0x69uy; 0x65uy; 0x73uy; 0x20uy; 0x61uy; + 0x6euy; 0x64uy; 0x20uy; 0x47uy; 0x65uy; 0x6euy; 0x74uy; 0x6cuy; + 0x65uy; 0x6duy; 0x65uy; 0x6euy; 0x20uy; 0x6fuy; 0x66uy; 0x20uy; + 0x74uy; 0x68uy; 0x65uy; 0x20uy; 0x63uy; 0x6cuy; 0x61uy; 0x73uy; + 0x73uy; 0x20uy; 0x6fuy; 0x66uy; 0x20uy; 0x27uy; 0x39uy; 0x39uy; + 0x3auy; 0x20uy; 0x49uy; 0x66uy; 0x20uy; 0x49uy; 0x20uy; 0x63uy; + 0x6fuy; 0x75uy; 0x6cuy; 0x64uy; 0x20uy; 0x6fuy; 0x66uy; 0x66uy; + 0x65uy; 0x72uy; 0x20uy; 0x79uy; 0x6fuy; 0x75uy; 0x20uy; 0x6fuy; + 0x6euy; 0x6cuy; 0x79uy; 0x20uy; 0x6fuy; 0x6euy; 0x65uy; 0x20uy; + 0x74uy; 0x69uy; 0x70uy; 0x20uy; 0x66uy; 0x6fuy; 0x72uy; 0x20uy; + 0x74uy; 0x68uy; 0x65uy; 0x20uy; 0x66uy; 0x75uy; 0x74uy; 0x75uy; + 0x72uy; 0x65uy; 0x2cuy; 0x20uy; 0x73uy; 0x75uy; 0x6euy; 0x73uy; + 0x63uy; 0x72uy; 0x65uy; 0x65uy; 0x6euy; 0x20uy; 0x77uy; 0x6fuy; + 0x75uy; 0x6cuy; 0x64uy; 0x20uy; 0x62uy; 0x65uy; 0x20uy; 0x69uy; + 0x74uy; 0x2euy +] + +unfold let test_ciphertext = [ + 0x6euy; 0x2euy; 0x35uy; 0x9auy; 0x25uy; 0x68uy; 0xf9uy; 0x80uy; + 0x41uy; 0xbauy; 0x07uy; 0x28uy; 0xdduy; 0x0duy; 0x69uy; 0x81uy; + 0xe9uy; 0x7euy; 0x7auy; 0xecuy; 0x1duy; 0x43uy; 0x60uy; 0xc2uy; + 0x0auy; 0x27uy; 0xafuy; 0xccuy; 0xfduy; 0x9fuy; 0xaeuy; 0x0buy; + 0xf9uy; 0x1buy; 0x65uy; 0xc5uy; 0x52uy; 0x47uy; 0x33uy; 0xabuy; + 0x8fuy; 0x59uy; 0x3duy; 0xabuy; 0xcduy; 0x62uy; 0xb3uy; 0x57uy; + 0x16uy; 0x39uy; 0xd6uy; 0x24uy; 0xe6uy; 0x51uy; 0x52uy; 0xabuy; + 0x8fuy; 0x53uy; 0x0cuy; 0x35uy; 0x9fuy; 0x08uy; 0x61uy; 0xd8uy; + 0x07uy; 0xcauy; 0x0duy; 0xbfuy; 0x50uy; 0x0duy; 0x6auy; 0x61uy; + 0x56uy; 0xa3uy; 0x8euy; 0x08uy; 0x8auy; 0x22uy; 0xb6uy; 0x5euy; + 0x52uy; 0xbcuy; 0x51uy; 0x4duy; 0x16uy; 0xccuy; 0xf8uy; 0x06uy; + 0x81uy; 0x8cuy; 0xe9uy; 0x1auy; 0xb7uy; 0x79uy; 0x37uy; 0x36uy; + 0x5auy; 0xf9uy; 0x0buy; 0xbfuy; 0x74uy; 0xa3uy; 0x5buy; 0xe6uy; + 0xb4uy; 0x0buy; 0x8euy; 0xeduy; 0xf2uy; 0x78uy; 0x5euy; 0x42uy; + 0x87uy; 0x4duy +] + +unfold let test_key = [ + 0uy; 1uy; 2uy; 3uy; 4uy; 5uy; 6uy; 7uy; + 8uy; 9uy; 10uy; 11uy; 12uy; 13uy; 14uy; 15uy; + 16uy; 17uy; 18uy; 19uy; 20uy; 21uy; 22uy; 23uy; + 24uy; 25uy; 26uy; 27uy; 28uy; 29uy; 30uy; 31uy + ] +unfold let test_nonce = [ + 0uy; 0uy; 0uy; 0uy; 0uy; 0uy; 0uy; 0x4auy; 0uy; 0uy; 0uy; 0uy + ] + +unfold let test_counter = 1 + +let test() = + assert_norm(List.Tot.length test_plaintext = 114); + assert_norm(List.Tot.length test_ciphertext = 114); + assert_norm(List.Tot.length test_key = 32); + assert_norm(List.Tot.length test_nonce = 12); + let test_plaintext = createL test_plaintext in + let test_ciphertext = createL test_ciphertext in + let test_key = createL test_key in + let test_nonce = createL test_nonce in + chacha20_encrypt_bytes test_key test_nonce test_counter test_plaintext + = test_ciphertext diff --git a/security/nss/lib/freebl/verified/specs/Spec.Curve25519.fst b/security/nss/lib/freebl/verified/specs/Spec.Curve25519.fst new file mode 100644 index 000000000..af4035b09 --- /dev/null +++ b/security/nss/lib/freebl/verified/specs/Spec.Curve25519.fst @@ -0,0 +1,168 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module Spec.Curve25519 + +module ST = FStar.HyperStack.ST + +open FStar.Mul +open FStar.Seq +open FStar.UInt8 +open FStar.Endianness +open Spec.Lib +open Spec.Curve25519.Lemmas + +#reset-options "--initial_fuel 0 --max_fuel 0 --z3rlimit 20" + +(* Field types and parameters *) +let prime = pow2 255 - 19 +type elem : Type0 = e:int{e >= 0 /\ e < prime} +let fadd e1 e2 = (e1 + e2) % prime +let fsub e1 e2 = (e1 - e2) % prime +let fmul e1 e2 = (e1 * e2) % prime +let zero : elem = 0 +let one : elem = 1 +let ( +@ ) = fadd +let ( *@ ) = fmul + +(** Exponentiation *) +let rec ( ** ) (e:elem) (n:pos) : Tot elem (decreases n) = + if n = 1 then e + else + if n % 2 = 0 then op_Star_Star (e `fmul` e) (n / 2) + else e `fmul` (op_Star_Star (e `fmul` e) ((n-1)/2)) + +(* Type aliases *) +type scalar = lbytes 32 +type serialized_point = lbytes 32 +type proj_point = | Proj: x:elem -> z:elem -> proj_point + +let decodeScalar25519 (k:scalar) = + let k = k.[0] <- (k.[0] &^ 248uy) in + let k = k.[31] <- ((k.[31] &^ 127uy) |^ 64uy) in k + +let decodePoint (u:serialized_point) = + (little_endian u % pow2 255) % prime + +let add_and_double qx nq nqp1 = + let x_1 = qx in + let x_2, z_2 = nq.x, nq.z in + let x_3, z_3 = nqp1.x, nqp1.z in + let a = x_2 `fadd` z_2 in + let aa = a**2 in + let b = x_2 `fsub` z_2 in + let bb = b**2 in + let e = aa `fsub` bb in + let c = x_3 `fadd` z_3 in + let d = x_3 `fsub` z_3 in + let da = d `fmul` a in + let cb = c `fmul` b in + let x_3 = (da `fadd` cb)**2 in + let z_3 = x_1 `fmul` ((da `fsub` cb)**2) in + let x_2 = aa `fmul` bb in + let z_2 = e `fmul` (aa `fadd` (121665 `fmul` e)) in + Proj x_2 z_2, Proj x_3 z_3 + +let ith_bit (k:scalar) (i:nat{i < 256}) = + let q = i / 8 in let r = i % 8 in + (v (k.[q]) / pow2 r) % 2 + +let rec montgomery_ladder_ (init:elem) x xp1 (k:scalar) (ctr:nat{ctr<=256}) + : Tot proj_point (decreases ctr) = + if ctr = 0 then x + else ( + let ctr' = ctr - 1 in + let (x', xp1') = + if ith_bit k ctr' = 1 then ( + let nqp2, nqp1 = add_and_double init xp1 x in + nqp1, nqp2 + ) else add_and_double init x xp1 in + montgomery_ladder_ init x' xp1' k ctr' + ) + +let montgomery_ladder (init:elem) (k:scalar) : Tot proj_point = + montgomery_ladder_ init (Proj one zero) (Proj init one) k 256 + +let encodePoint (p:proj_point) : Tot serialized_point = + let p = p.x `fmul` (p.z ** (prime - 2)) in + little_bytes 32ul p + +let scalarmult (k:scalar) (u:serialized_point) : Tot serialized_point = + let k = decodeScalar25519 k in + let u = decodePoint u in + let res = montgomery_ladder u k in + encodePoint res + + +(* ********************* *) +(* RFC 7748 Test Vectors *) +(* ********************* *) + +let scalar1 = [ + 0xa5uy; 0x46uy; 0xe3uy; 0x6buy; 0xf0uy; 0x52uy; 0x7cuy; 0x9duy; + 0x3buy; 0x16uy; 0x15uy; 0x4buy; 0x82uy; 0x46uy; 0x5euy; 0xdduy; + 0x62uy; 0x14uy; 0x4cuy; 0x0auy; 0xc1uy; 0xfcuy; 0x5auy; 0x18uy; + 0x50uy; 0x6auy; 0x22uy; 0x44uy; 0xbauy; 0x44uy; 0x9auy; 0xc4uy +] + +let scalar2 = [ + 0x4buy; 0x66uy; 0xe9uy; 0xd4uy; 0xd1uy; 0xb4uy; 0x67uy; 0x3cuy; + 0x5auy; 0xd2uy; 0x26uy; 0x91uy; 0x95uy; 0x7duy; 0x6auy; 0xf5uy; + 0xc1uy; 0x1buy; 0x64uy; 0x21uy; 0xe0uy; 0xeauy; 0x01uy; 0xd4uy; + 0x2cuy; 0xa4uy; 0x16uy; 0x9euy; 0x79uy; 0x18uy; 0xbauy; 0x0duy +] + +let input1 = [ + 0xe6uy; 0xdbuy; 0x68uy; 0x67uy; 0x58uy; 0x30uy; 0x30uy; 0xdbuy; + 0x35uy; 0x94uy; 0xc1uy; 0xa4uy; 0x24uy; 0xb1uy; 0x5fuy; 0x7cuy; + 0x72uy; 0x66uy; 0x24uy; 0xecuy; 0x26uy; 0xb3uy; 0x35uy; 0x3buy; + 0x10uy; 0xa9uy; 0x03uy; 0xa6uy; 0xd0uy; 0xabuy; 0x1cuy; 0x4cuy +] + +let input2 = [ + 0xe5uy; 0x21uy; 0x0fuy; 0x12uy; 0x78uy; 0x68uy; 0x11uy; 0xd3uy; + 0xf4uy; 0xb7uy; 0x95uy; 0x9duy; 0x05uy; 0x38uy; 0xaeuy; 0x2cuy; + 0x31uy; 0xdbuy; 0xe7uy; 0x10uy; 0x6fuy; 0xc0uy; 0x3cuy; 0x3euy; + 0xfcuy; 0x4cuy; 0xd5uy; 0x49uy; 0xc7uy; 0x15uy; 0xa4uy; 0x93uy +] + +let expected1 = [ + 0xc3uy; 0xdauy; 0x55uy; 0x37uy; 0x9duy; 0xe9uy; 0xc6uy; 0x90uy; + 0x8euy; 0x94uy; 0xeauy; 0x4duy; 0xf2uy; 0x8duy; 0x08uy; 0x4fuy; + 0x32uy; 0xecuy; 0xcfuy; 0x03uy; 0x49uy; 0x1cuy; 0x71uy; 0xf7uy; + 0x54uy; 0xb4uy; 0x07uy; 0x55uy; 0x77uy; 0xa2uy; 0x85uy; 0x52uy +] +let expected2 = [ + 0x95uy; 0xcbuy; 0xdeuy; 0x94uy; 0x76uy; 0xe8uy; 0x90uy; 0x7duy; + 0x7auy; 0xaduy; 0xe4uy; 0x5cuy; 0xb4uy; 0xb8uy; 0x73uy; 0xf8uy; + 0x8buy; 0x59uy; 0x5auy; 0x68uy; 0x79uy; 0x9fuy; 0xa1uy; 0x52uy; + 0xe6uy; 0xf8uy; 0xf7uy; 0x64uy; 0x7auy; 0xacuy; 0x79uy; 0x57uy +] + +let test () = + assert_norm(List.Tot.length scalar1 = 32); + assert_norm(List.Tot.length scalar2 = 32); + assert_norm(List.Tot.length input1 = 32); + assert_norm(List.Tot.length input2 = 32); + assert_norm(List.Tot.length expected1 = 32); + assert_norm(List.Tot.length expected2 = 32); + let scalar1 = createL scalar1 in + let scalar2 = createL scalar2 in + let input1 = createL input1 in + let input2 = createL input2 in + let expected1 = createL expected1 in + let expected2 = createL expected2 in + scalarmult scalar1 input1 = expected1 + && scalarmult scalar2 input2 = expected2 diff --git a/security/nss/lib/freebl/verified/specs/Spec.Poly1305.fst b/security/nss/lib/freebl/verified/specs/Spec.Poly1305.fst new file mode 100644 index 000000000..f9d8a4cb2 --- /dev/null +++ b/security/nss/lib/freebl/verified/specs/Spec.Poly1305.fst @@ -0,0 +1,107 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +module Spec.Poly1305 + +module ST = FStar.HyperStack.ST + +open FStar.Math.Lib +open FStar.Mul +open FStar.Seq +open FStar.UInt8 +open FStar.Endianness +open Spec.Poly1305.Lemmas + +#set-options "--initial_fuel 0 --max_fuel 0 --initial_ifuel 0 --max_ifuel 0" + +(* Field types and parameters *) +let prime = pow2 130 - 5 +type elem = e:int{e >= 0 /\ e < prime} +let fadd (e1:elem) (e2:elem) = (e1 + e2) % prime +let fmul (e1:elem) (e2:elem) = (e1 * e2) % prime +let zero : elem = 0 +let one : elem = 1 +let op_Plus_At = fadd +let op_Star_At = fmul +(* Type aliases *) +let op_Amp_Bar = UInt.logand #128 +type word = w:bytes{length w <= 16} +type word_16 = w:bytes{length w = 16} +type tag = word_16 +type key = lbytes 32 +type text = seq word + +(* Specification code *) +let encode (w:word) = + (pow2 (8 * length w)) `fadd` (little_endian w) + +let rec poly (txt:text) (r:e:elem) : Tot elem (decreases (length txt)) = + if length txt = 0 then zero + else + let a = poly (Seq.tail txt) r in + let n = encode (Seq.head txt) in + (n `fadd` a) `fmul` r + +let encode_r (rb:word_16) = + (little_endian rb) &| 0x0ffffffc0ffffffc0ffffffc0fffffff + +let finish (a:elem) (s:word_16) : Tot tag = + let n = (a + little_endian s) % pow2 128 in + little_bytes 16ul n + +let rec encode_bytes (txt:bytes) : Tot text (decreases (length txt)) = + if length txt = 0 then createEmpty + else + let w, txt = split txt (min (length txt) 16) in + append_last (encode_bytes txt) w + +let poly1305 (msg:bytes) (k:key) : Tot tag = + let text = encode_bytes msg in + let r = encode_r (slice k 0 16) in + let s = slice k 16 32 in + finish (poly text r) s + + +(* ********************* *) +(* RFC 7539 Test Vectors *) +(* ********************* *) + +#reset-options "--initial_fuel 0 --max_fuel 0 --z3rlimit 20" + +unfold let msg = [ + 0x43uy; 0x72uy; 0x79uy; 0x70uy; 0x74uy; 0x6fuy; 0x67uy; 0x72uy; + 0x61uy; 0x70uy; 0x68uy; 0x69uy; 0x63uy; 0x20uy; 0x46uy; 0x6fuy; + 0x72uy; 0x75uy; 0x6duy; 0x20uy; 0x52uy; 0x65uy; 0x73uy; 0x65uy; + 0x61uy; 0x72uy; 0x63uy; 0x68uy; 0x20uy; 0x47uy; 0x72uy; 0x6fuy; + 0x75uy; 0x70uy ] + +unfold let k = [ + 0x85uy; 0xd6uy; 0xbeuy; 0x78uy; 0x57uy; 0x55uy; 0x6duy; 0x33uy; + 0x7fuy; 0x44uy; 0x52uy; 0xfeuy; 0x42uy; 0xd5uy; 0x06uy; 0xa8uy; + 0x01uy; 0x03uy; 0x80uy; 0x8auy; 0xfbuy; 0x0duy; 0xb2uy; 0xfduy; + 0x4auy; 0xbfuy; 0xf6uy; 0xafuy; 0x41uy; 0x49uy; 0xf5uy; 0x1buy ] + +unfold let expected = [ + 0xa8uy; 0x06uy; 0x1duy; 0xc1uy; 0x30uy; 0x51uy; 0x36uy; 0xc6uy; + 0xc2uy; 0x2buy; 0x8buy; 0xafuy; 0x0cuy; 0x01uy; 0x27uy; 0xa9uy ] + +let test () : Tot bool = + assert_norm(List.Tot.length msg = 34); + assert_norm(List.Tot.length k = 32); + assert_norm(List.Tot.length expected = 16); + let msg = createL msg in + let k = createL k in + let expected = createL expected in + poly1305 msg k = expected diff --git a/security/nss/lib/freebl/verified/vec128.h b/security/nss/lib/freebl/verified/vec128.h new file mode 100644 index 000000000..986e9db82 --- /dev/null +++ b/security/nss/lib/freebl/verified/vec128.h @@ -0,0 +1,345 @@ +/* Copyright 2016-2017 INRIA and Microsoft Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef __Vec_H +#define __Vec_H + +#ifdef __MSVC__ +#define forceinline __forceinline inline +#elif (defined(__GNUC__) || defined(__clang__)) +#define forceinline __attribute__((always_inline)) inline +#else +#define forceinline inline +#endif + +#if defined(__SSSE3__) || defined(__AVX2__) || defined(__AVX__) + +#include <emmintrin.h> +#include <tmmintrin.h> + +#define VEC128 +#define vec_size 4 + +typedef __m128i vec; + +static forceinline vec +vec_rotate_left_8(vec v) +{ + __m128i x = _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3); + return _mm_shuffle_epi8(v, x); +} + +static forceinline vec +vec_rotate_left_16(vec v) +{ + __m128i x = _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2); + return _mm_shuffle_epi8(v, x); +} + +static forceinline vec +vec_rotate_left(vec v, unsigned int n) +{ + if (n == 8) + return vec_rotate_left_8(v); + if (n == 16) + return vec_rotate_left_16(v); + return _mm_xor_si128(_mm_slli_epi32(v, n), + _mm_srli_epi32(v, 32 - n)); +} + +static forceinline vec +vec_rotate_right(vec v, unsigned int n) +{ + return (vec_rotate_left(v, 32 - n)); +} + +#define vec_shuffle_right(x, n) \ + _mm_shuffle_epi32(x, _MM_SHUFFLE((3 + (n)) % 4, (2 + (n)) % 4, (1 + (n)) % 4, (n) % 4)) + +#define vec_shuffle_left(x, n) vec_shuffle_right((x), 4 - (n)) + +static forceinline vec +vec_load_32x4(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) +{ + return _mm_set_epi32(x4, x3, x2, x1); +} + +static forceinline vec +vec_load_32x8(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) +{ + return _mm_set_epi32(x4, x3, x2, x1); +} + +static forceinline vec +vec_load_le(const unsigned char* in) +{ + return _mm_loadu_si128((__m128i*)(in)); +} + +static forceinline vec +vec_load128_le(const unsigned char* in) +{ + return vec_load_le(in); +} + +static forceinline void +vec_store_le(unsigned char* out, vec v) +{ + _mm_storeu_si128((__m128i*)(out), v); +} + +static forceinline vec +vec_add(vec v1, vec v2) +{ + return _mm_add_epi32(v1, v2); +} + +static forceinline vec +vec_add_u32(vec v1, uint32_t x) +{ + vec v2 = vec_load_32x4(x, 0, 0, 0); + return _mm_add_epi32(v1, v2); +} + +static forceinline vec +vec_increment(vec v1) +{ + vec one = vec_load_32x4(1, 0, 0, 0); + return _mm_add_epi32(v1, one); +} + +static forceinline vec +vec_xor(vec v1, vec v2) +{ + return _mm_xor_si128(v1, v2); +} + +#define vec_zero() _mm_set_epi32(0, 0, 0, 0) + +#elif defined(__ARM_NEON__) || defined(__ARM_NEON) +#include <arm_neon.h> + +typedef uint32x4_t vec; + +static forceinline vec +vec_xor(vec v1, vec v2) +{ + return veorq_u32(v1, v2); +} + +#define vec_rotate_left(x, n) \ + vsriq_n_u32(vshlq_n_u32((x), (n)), (x), 32 - (n)) + +#define vec_rotate_right(a, b) \ + vec_rotate_left((b), 32 - (b)) + +#define vec_shuffle_right(x, n) \ + vextq_u32((x), (x), (n)) + +#define vec_shuffle_left(a, b) \ + vec_shuffle_right((a), 4 - (b)) + +static forceinline vec +vec_load_32x4(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4) +{ + uint32_t a[4] = { x1, x2, x3, x4 }; + return vld1q_u32(a); +} + +static forceinline vec +vec_load_32(uint32_t x1) +{ + uint32_t a[4] = { x1, x1, x1, x1 }; + return vld1q_u32(a); +} + +static forceinline vec +vec_load_32x8(uint32_t x1, uint32_t x2, uint32_t x3, uint32_t x4, uint32_t x5, uint32_t x6, uint32_t x7, uint32_t x8) +{ + return vec_load_32x4(x1, x2, x3, x4); +} + +static forceinline vec +vec_load_le(const unsigned char* in) +{ + return vld1q_u32((uint32_t*)in); +} + +static forceinline vec +vec_load128_le(const unsigned char* in) +{ + return vec_load_le(in); +} + +static forceinline void +vec_store_le(unsigned char* out, vec v) +{ + vst1q_u32((uint32_t*)out, v); +} + +static forceinline vec +vec_add(vec v1, vec v2) +{ + return vaddq_u32(v1, v2); +} + +static forceinline vec +vec_add_u32(vec v1, uint32_t x) +{ + vec v2 = vec_load_32x4(x, 0, 0, 0); + return vec_add(v1, v2); +} + +static forceinline vec +vec_increment(vec v1) +{ + vec one = vec_load_32x4(1, 0, 0, 0); + return vec_add(v1, one); +} + +#define vec_zero() vec_load_32x4(0, 0, 0, 0) + +#else + +#define VEC128 +#define vec_size 4 + +typedef struct { + uint32_t v[4]; +} vec; + +static forceinline vec +vec_xor(vec v1, vec v2) +{ + vec r; + r.v[0] = v1.v[0] ^ v2.v[0]; + r.v[1] = v1.v[1] ^ v2.v[1]; + r.v[2] = v1.v[2] ^ v2.v[2]; + r.v[3] = v1.v[3] ^ v2.v[3]; + return r; +} + +static forceinline vec +vec_rotate_left(vec v, unsigned int n) +{ + vec r; + r.v[0] = (v.v[0] << n) ^ (v.v[0] >> (32 - n)); + r.v[1] = (v.v[1] << n) ^ (v.v[1] >> (32 - n)); + r.v[2] = (v.v[2] << n) ^ (v.v[2] >> (32 - n)); + r.v[3] = (v.v[3] << n) ^ (v.v[3] >> (32 - n)); + return r; +} + +static forceinline vec +vec_rotate_right(vec v, unsigned int n) +{ + return (vec_rotate_left(v, 32 - n)); +} + +static forceinline vec +vec_shuffle_right(vec v, unsigned int n) +{ + vec r; + r.v[0] = v.v[n % 4]; + r.v[1] = v.v[(n + 1) % 4]; + r.v[2] = v.v[(n + 2) % 4]; + r.v[3] = v.v[(n + 3) % 4]; + return r; +} + +static forceinline vec +vec_shuffle_left(vec x, unsigned int n) +{ + return vec_shuffle_right(x, 4 - n); +} + +static forceinline vec +vec_load_32x4(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3) +{ + vec v; + v.v[0] = x0; + v.v[1] = x1; + v.v[2] = x2; + v.v[3] = x3; + return v; +} + +static forceinline vec +vec_load_32(uint32_t x0) +{ + vec v; + v.v[0] = x0; + v.v[1] = x0; + v.v[2] = x0; + v.v[3] = x0; + return v; +} + +static forceinline vec +vec_load_le(const uint8_t* in) +{ + vec r; + r.v[0] = load32_le((uint8_t*)in); + r.v[1] = load32_le((uint8_t*)in + 4); + r.v[2] = load32_le((uint8_t*)in + 8); + r.v[3] = load32_le((uint8_t*)in + 12); + return r; +} + +static forceinline void +vec_store_le(unsigned char* out, vec r) +{ + store32_le(out, r.v[0]); + store32_le(out + 4, r.v[1]); + store32_le(out + 8, r.v[2]); + store32_le(out + 12, r.v[3]); +} + +static forceinline vec +vec_load128_le(const unsigned char* in) +{ + return vec_load_le(in); +} + +static forceinline vec +vec_add(vec v1, vec v2) +{ + vec r; + r.v[0] = v1.v[0] + v2.v[0]; + r.v[1] = v1.v[1] + v2.v[1]; + r.v[2] = v1.v[2] + v2.v[2]; + r.v[3] = v1.v[3] + v2.v[3]; + return r; +} + +static forceinline vec +vec_add_u32(vec v1, uint32_t x) +{ + vec v2 = vec_load_32x4(x, 0, 0, 0); + return vec_add(v1, v2); +} + +static forceinline vec +vec_increment(vec v1) +{ + vec one = vec_load_32x4(1, 0, 0, 0); + return vec_add(v1, one); +} + +#define vec_zero() vec_load_32x4(0, 0, 0, 0) + +#endif + +#endif |