From f017b749ea9f1586d2308504553d40bf4cc5439d Mon Sep 17 00:00:00 2001 From: wolfbeast Date: Tue, 6 Feb 2018 11:46:26 +0100 Subject: Update NSS to 3.32.1-RTM --- security/nss/lib/freebl/gcm.c | 885 +++++++++++++++++++++--------------------- 1 file changed, 432 insertions(+), 453 deletions(-) (limited to 'security/nss/lib/freebl/gcm.c') diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c index 22121001b..0fdb0fd48 100644 --- a/security/nss/lib/freebl/gcm.c +++ b/security/nss/lib/freebl/gcm.c @@ -1,6 +1,8 @@ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Thanks to Thomas Pornin for the ideas how to implement the constat time + * binary multiplication. */ #ifdef FREEBL_NO_DEPEND #include "stubs.h" @@ -15,440 +17,378 @@ #include -/************************************************************************** - * First implement the Galois hash function of GCM (gcmHash) * - **************************************************************************/ -#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */ - -typedef struct gcmHashContextStr gcmHashContext; - -static SECStatus gcmHash_InitContext(gcmHashContext *hash, - const unsigned char *H, - unsigned int blocksize); -static void gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit); -static SECStatus gcmHash_Update(gcmHashContext *ghash, - const unsigned char *buf, unsigned int len, - unsigned int blocksize); -static SECStatus gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize); -static SECStatus gcmHash_Final(gcmHashContext *gcm, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize); -static SECStatus gcmHash_Reset(gcmHashContext *ghash, - const unsigned char *inbuf, - unsigned int inbufLen, unsigned int blocksize); - -/* compile time defines to select how the GF2 multiply is calculated. - * There are currently 2 algorithms implemented here: MPI and ALGORITHM_1. - * - * MPI uses the GF2m implemented in mpi to support GF2 ECC. - * ALGORITHM_1 is the Algorithm 1 in both NIST SP 800-38D and - * "The Galois/Counter Mode of Operation (GCM)", McGrew & Viega. - */ -#if !defined(GCM_USE_ALGORITHM_1) && !defined(GCM_USE_MPI) -#define GCM_USE_MPI 1 /* MPI is about 5x faster with the \ - * same or less complexity. It's possible to use \ - * tables to speed things up even more */ -#endif - -/* GCM defines the bit string to be LSB first, which is exactly - * opposite everyone else, including hardware. build array - * to reverse everything. */ -static const unsigned char gcm_byte_rev[256] = { - 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, - 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, - 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, - 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, - 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, - 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, - 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, - 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, - 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, - 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, - 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, - 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, - 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, - 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, - 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, - 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, - 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, - 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, - 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, - 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, - 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, - 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, - 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, - 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, - 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, - 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, - 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, - 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, - 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, - 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, - 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, - 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff -}; - -#ifdef GCM_TRACE -#include - -#define GCM_TRACE_X(ghash, label) \ - { \ - unsigned char _X[MAX_BLOCK_SIZE]; \ - int i; \ - gcm_getX(ghash, _X, blocksize); \ - printf(label, (ghash)->m); \ - for (i = 0; i < blocksize; i++) \ - printf("%02x", _X[i]); \ - printf("\n"); \ - } -#define GCM_TRACE_BLOCK(label, buf, blocksize) \ - { \ - printf(label); \ - for (i = 0; i < blocksize; i++) \ - printf("%02x", buf[i]); \ - printf("\n"); \ - } -#else -#define GCM_TRACE_X(ghash, label) -#define GCM_TRACE_BLOCK(label, buf, blocksize) +#ifdef NSS_X86_OR_X64 +#include /* clmul */ #endif -#ifdef GCM_USE_MPI +/* Forward declarations */ +SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); +SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); +SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count); -#ifdef GCM_USE_ALGORITHM_1 -#error "Only define one of GCM_USE_MPI, GCM_USE_ALGORITHM_1" -#endif -/* use the MPI functions to calculate Xn = (Xn-1^C_i)*H mod poly */ -#include "mpi.h" -#include "secmpi.h" -#include "mplogic.h" -#include "mp_gf2m.h" - -/* state needed to handle GCM Hash function */ -struct gcmHashContextStr { - mp_int H; - mp_int X; - mp_int C_i; - const unsigned int *poly; - unsigned char buffer[MAX_BLOCK_SIZE]; - unsigned int bufLen; - int m; /* XXX what is m? */ - unsigned char counterBuf[2 * GCM_HASH_LEN_LEN]; - PRUint64 cLen; -}; - -/* f = x^128 + x^7 + x^2 + x + 1 */ -static const unsigned int poly_128[] = { 128, 7, 2, 1, 0 }; - -/* sigh, GCM defines the bit strings exactly backwards from everything else */ -static void -gcm_reverse(unsigned char *target, const unsigned char *src, - unsigned int blocksize) +uint64_t +get64(const unsigned char *bytes) { - unsigned int i; - for (i = 0; i < blocksize; i++) { - target[blocksize - i - 1] = gcm_byte_rev[src[i]]; - } + return ((uint64_t)bytes[0]) << 56 | + ((uint64_t)bytes[1]) << 48 | + ((uint64_t)bytes[2]) << 40 | + ((uint64_t)bytes[3]) << 32 | + ((uint64_t)bytes[4]) << 24 | + ((uint64_t)bytes[5]) << 16 | + ((uint64_t)bytes[6]) << 8 | + ((uint64_t)bytes[7]); } /* Initialize a gcmHashContext */ -static SECStatus -gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, - unsigned int blocksize) +SECStatus +gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw) { - mp_err err = MP_OKAY; - unsigned char H_rev[MAX_BLOCK_SIZE]; - - MP_DIGITS(&ghash->H) = 0; - MP_DIGITS(&ghash->X) = 0; - MP_DIGITS(&ghash->C_i) = 0; - CHECK_MPI_OK(mp_init(&ghash->H)); - CHECK_MPI_OK(mp_init(&ghash->X)); - CHECK_MPI_OK(mp_init(&ghash->C_i)); - - mp_zero(&ghash->X); - gcm_reverse(H_rev, H, blocksize); - CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->H, H_rev, blocksize)); - - /* set the irreducible polynomial. Each blocksize has its own polynomial. - * for now only blocksize 16 (=128 bits) is defined */ - switch (blocksize) { - case 16: /* 128 bits */ - ghash->poly = poly_128; - break; - default: - PORT_SetError(SEC_ERROR_INVALID_ARGS); - goto cleanup; - } ghash->cLen = 0; ghash->bufLen = 0; - ghash->m = 0; PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); - return SECSuccess; -cleanup: - gcmHash_DestroyContext(ghash, PR_FALSE); - return SECFailure; -} -/* Destroy a HashContext (Note we zero the digits so this function - * is idempotent if called with freeit == PR_FALSE */ -static void -gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit) -{ - mp_clear(&ghash->H); - mp_clear(&ghash->X); - mp_clear(&ghash->C_i); - PORT_Memset(ghash, 0, sizeof(gcmHashContext)); - if (freeit) { - PORT_Free(ghash); - } -} - -static SECStatus -gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize) -{ - int len; - mp_err err; - unsigned char tmp_buf[MAX_BLOCK_SIZE]; - unsigned char *X; - - len = mp_unsigned_octet_size(&ghash->X); - if (len <= 0) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; - } - X = tmp_buf; - PORT_Assert((unsigned int)len <= blocksize); - if ((unsigned int)len > blocksize) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - return SECFailure; - } - /* zero pad the result */ - if (len != blocksize) { - PORT_Memset(X, 0, blocksize - len); - X += blocksize - len; - } - - err = mp_to_unsigned_octets(&ghash->X, X, len); - if (err < 0) { + ghash->h_low = get64(H + 8); + ghash->h_high = get64(H); + if (clmul_support() && !sw) { +#ifdef NSS_X86_OR_X64 + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x = _mm_setzero_si128(); + /* MSVC requires __m64 to load epi64. */ + ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, + ghash->h_low >> 32, (uint32_t)ghash->h_low); + ghash->hw = PR_TRUE; +#else PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); return SECFailure; +#endif /* NSS_X86_OR_X64 */ + } else { +/* We fall back to the software implementation if we can't use / don't + * want to use pclmul. */ +#ifdef HAVE_INT128_SUPPORT + ghash->ghash_mul = gcm_HashMult_sftw; +#else + ghash->ghash_mul = gcm_HashMult_sftw32; +#endif + ghash->x_high = ghash->x_low = 0; + ghash->hw = PR_FALSE; } - gcm_reverse(T, tmp_buf, blocksize); return SECSuccess; } -static SECStatus -gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize) -{ - SECStatus rv = SECFailure; - mp_err err = MP_OKAY; - unsigned char tmp_buf[MAX_BLOCK_SIZE]; - unsigned int i; - - for (i = 0; i < count; i++, buf += blocksize) { - ghash->m++; - gcm_reverse(tmp_buf, buf, blocksize); - CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->C_i, tmp_buf, blocksize)); - CHECK_MPI_OK(mp_badd(&ghash->X, &ghash->C_i, &ghash->C_i)); - /* - * Looking to speed up GCM, this the the place to do it. - * There are two areas that can be exploited to speed up this code. - * - * 1) H is a constant in this multiply. We can precompute H * (0 - 255) - * at init time and this becomes an blockize xors of our table lookup. - * - * 2) poly is a constant for each blocksize. We can calculate the - * modulo reduction by a series of adds and shifts. - * - * For now we are after functionality, so we will go ahead and use - * the builtin bmulmod from mpi - */ - CHECK_MPI_OK(mp_bmulmod(&ghash->C_i, &ghash->H, - ghash->poly, &ghash->X)); - GCM_TRACE_X(ghash, "X%d = ") - } - rv = SECSuccess; -cleanup: - PORT_Memset(tmp_buf, 0, sizeof(tmp_buf)); - if (rv != SECSuccess) { - MP_TO_SEC_ERROR(err); - } - return rv; -} - -static void -gcm_zeroX(gcmHashContext *ghash) +#ifdef HAVE_INT128_SUPPORT +/* Binary multiplication x * y = r_high << 64 | r_low. */ +void +bmul(uint64_t x, uint64_t y, uint64_t *r_high, uint64_t *r_low) { - mp_zero(&ghash->X); - ghash->m = 0; + uint128_t x1, x2, x3, x4, x5; + uint128_t y1, y2, y3, y4, y5; + uint128_t r, z; + + uint128_t m1 = (uint128_t)0x2108421084210842 << 64 | 0x1084210842108421; + uint128_t m2 = (uint128_t)0x4210842108421084 << 64 | 0x2108421084210842; + uint128_t m3 = (uint128_t)0x8421084210842108 << 64 | 0x4210842108421084; + uint128_t m4 = (uint128_t)0x0842108421084210 << 64 | 0x8421084210842108; + uint128_t m5 = (uint128_t)0x1084210842108421 << 64 | 0x0842108421084210; + + x1 = x & m1; + y1 = y & m1; + x2 = x & m2; + y2 = y & m2; + x3 = x & m3; + y3 = y & m3; + x4 = x & m4; + y4 = y & m4; + x5 = x & m5; + y5 = y & m5; + + z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2); + r = z & m1; + z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3); + r |= z & m2; + z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4); + r |= z & m3; + z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5); + r |= z & m4; + z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1); + r |= z & m5; + + *r_high = (uint64_t)(r >> 64); + *r_low = (uint64_t)r; } -#endif - -#ifdef GCM_USE_ALGORITHM_1 -/* use algorithm 1 of McGrew & Viega "The Galois/Counter Mode of Operation" */ - -#define GCM_ARRAY_SIZE (MAX_BLOCK_SIZE / sizeof(unsigned long)) - -struct gcmHashContextStr { - unsigned long H[GCM_ARRAY_SIZE]; - unsigned long X[GCM_ARRAY_SIZE]; - unsigned long R; - unsigned char buffer[MAX_BLOCK_SIZE]; - unsigned int bufLen; - int m; - unsigned char counterBuf[2 * GCM_HASH_LEN_LEN]; - PRUint64 cLen; -}; - -static void -gcm_bytes_to_longs(unsigned long *l, const unsigned char *c, unsigned int len) +SECStatus +gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) { - int i, j; - int array_size = len / sizeof(unsigned long); - - PORT_Assert(len % sizeof(unsigned long) == 0); - for (i = 0; i < array_size; i++) { - unsigned long tmp = 0; - int byte_offset = i * sizeof(unsigned long); - for (j = sizeof(unsigned long) - 1; j >= 0; j--) { - tmp = (tmp << PR_BITS_PER_BYTE) | gcm_byte_rev[c[byte_offset + j]]; - } - l[i] = tmp; - } + uint64_t ci_low, ci_high; + size_t i; + uint64_t z2_low, z2_high, z0_low, z0_high, z1a_low, z1a_high; + uint128_t z_high = 0, z_low = 0; + + ci_low = ghash->x_low; + ci_high = ghash->x_high; + for (i = 0; i < count; i++, buf += 16) { + ci_low ^= get64(buf + 8); + ci_high ^= get64(buf); + + /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */ + bmul(ci_high, ghash->h_high, &z2_high, &z2_low); + bmul(ci_low, ghash->h_low, &z0_high, &z0_low); + bmul(ci_high ^ ci_low, ghash->h_high ^ ghash->h_low, &z1a_high, &z1a_low); + z1a_high ^= z2_high ^ z0_high; + z1a_low ^= z2_low ^ z0_low; + z_high = ((uint128_t)z2_high << 64) | (z2_low ^ z1a_high); + z_low = (((uint128_t)z0_high << 64) | z0_low) ^ (((uint128_t)z1a_low) << 64); + + /* Shift one (multiply by x) as gcm spec is stupid. */ + z_high = (z_high << 1) | (z_low >> 127); + z_low <<= 1; + + /* Reduce */ + z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); + z_high ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); + ci_low = (uint64_t)z_high; + ci_high = (uint64_t)(z_high >> 64); + } + ghash->x_low = ci_low; + ghash->x_high = ci_high; + return SECSuccess; } - -static void -gcm_longs_to_bytes(const unsigned long *l, unsigned char *c, unsigned int len) +#else +/* Binary multiplication x * y = r_high << 32 | r_low. */ +void +bmul32(uint32_t x, uint32_t y, uint32_t *r_high, uint32_t *r_low) { - int i, j; - int array_size = len / sizeof(unsigned long); - - PORT_Assert(len % sizeof(unsigned long) == 0); - for (i = 0; i < array_size; i++) { - unsigned long tmp = l[i]; - int byte_offset = i * sizeof(unsigned long); - for (j = 0; j < sizeof(unsigned long); j++) { - c[byte_offset + j] = gcm_byte_rev[tmp & 0xff]; - tmp = (tmp >> PR_BITS_PER_BYTE); - } - } + uint32_t x0, x1, x2, x3; + uint32_t y0, y1, y2, y3; + uint32_t m1 = (uint32_t)0x11111111; + uint32_t m2 = (uint32_t)0x22222222; + uint32_t m4 = (uint32_t)0x44444444; + uint32_t m8 = (uint32_t)0x88888888; + uint64_t z0, z1, z2, z3; + uint64_t z; + + x0 = x & m1; + x1 = x & m2; + x2 = x & m4; + x3 = x & m8; + y0 = y & m1; + y1 = y & m2; + y2 = y & m4; + y3 = y & m8; + z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^ + ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1); + z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^ + ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2); + z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^ + ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3); + z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^ + ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0); + z0 &= ((uint64_t)m1 << 32) | m1; + z1 &= ((uint64_t)m2 << 32) | m2; + z2 &= ((uint64_t)m4 << 32) | m4; + z3 &= ((uint64_t)m8 << 32) | m8; + z = z0 | z1 | z2 | z3; + *r_high = (uint32_t)(z >> 32); + *r_low = (uint32_t)z; } -/* Initialize a gcmHashContext */ -static SECStatus -gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, - unsigned int blocksize) +SECStatus +gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) { - PORT_Memset(ghash->X, 0, sizeof(ghash->X)); - PORT_Memset(ghash->H, 0, sizeof(ghash->H)); - gcm_bytes_to_longs(ghash->H, H, blocksize); - - /* set the irreducible polynomial. Each blocksize has its own polynommial - * for now only blocksize 16 (=128 bits) is defined */ - switch (blocksize) { - case 16: /* 128 bits */ - ghash->R = (unsigned long)0x87; /* x^7 + x^2 + x +1 */ - break; - default: - PORT_SetError(SEC_ERROR_INVALID_ARGS); - goto cleanup; + size_t i; + uint64_t ci_low, ci_high; + uint64_t z_high_h, z_high_l, z_low_h, z_low_l; + uint32_t ci_high_h, ci_high_l, ci_low_h, ci_low_l; + uint32_t b_a_h, b_a_l, a_a_h, a_a_l, b_b_h, b_b_l; + uint32_t a_b_h, a_b_l, b_c_h, b_c_l, a_c_h, a_c_l, c_c_h, c_c_l; + uint32_t ci_highXlow_h, ci_highXlow_l, c_a_h, c_a_l, c_b_h, c_b_l; + + uint32_t h_high_h = (uint32_t)(ghash->h_high >> 32); + uint32_t h_high_l = (uint32_t)ghash->h_high; + uint32_t h_low_h = (uint32_t)(ghash->h_low >> 32); + uint32_t h_low_l = (uint32_t)ghash->h_low; + uint32_t h_highXlow_h = h_high_h ^ h_low_h; + uint32_t h_highXlow_l = h_high_l ^ h_low_l; + uint32_t h_highX_xored = h_highXlow_h ^ h_highXlow_l; + + for (i = 0; i < count; i++, buf += 16) { + ci_low = ghash->x_low ^ get64(buf + 8); + ci_high = ghash->x_high ^ get64(buf); + ci_low_h = (uint32_t)(ci_low >> 32); + ci_low_l = (uint32_t)ci_low; + ci_high_h = (uint32_t)(ci_high >> 32); + ci_high_l = (uint32_t)ci_high; + ci_highXlow_h = ci_high_h ^ ci_low_h; + ci_highXlow_l = ci_high_l ^ ci_low_l; + + /* Do binary mult ghash->X = C * ghash->H (recursive Karatsuba). */ + bmul32(ci_high_h, h_high_h, &a_a_h, &a_a_l); + bmul32(ci_high_l, h_high_l, &a_b_h, &a_b_l); + bmul32(ci_high_h ^ ci_high_l, h_high_h ^ h_high_l, &a_c_h, &a_c_l); + a_c_h ^= a_a_h ^ a_b_h; + a_c_l ^= a_a_l ^ a_b_l; + a_a_l ^= a_c_h; + a_b_h ^= a_c_l; + /* ci_high * h_high = a_a_h:a_a_l:a_b_h:a_b_l */ + + bmul32(ci_low_h, h_low_h, &b_a_h, &b_a_l); + bmul32(ci_low_l, h_low_l, &b_b_h, &b_b_l); + bmul32(ci_low_h ^ ci_low_l, h_low_h ^ h_low_l, &b_c_h, &b_c_l); + b_c_h ^= b_a_h ^ b_b_h; + b_c_l ^= b_a_l ^ b_b_l; + b_a_l ^= b_c_h; + b_b_h ^= b_c_l; + /* ci_low * h_low = b_a_h:b_a_l:b_b_h:b_b_l */ + + bmul32(ci_highXlow_h, h_highXlow_h, &c_a_h, &c_a_l); + bmul32(ci_highXlow_l, h_highXlow_l, &c_b_h, &c_b_l); + bmul32(ci_highXlow_h ^ ci_highXlow_l, h_highX_xored, &c_c_h, &c_c_l); + c_c_h ^= c_a_h ^ c_b_h; + c_c_l ^= c_a_l ^ c_b_l; + c_a_l ^= c_c_h; + c_b_h ^= c_c_l; + /* (ci_high ^ ci_low) * (h_high ^ h_low) = c_a_h:c_a_l:c_b_h:c_b_l */ + + c_a_h ^= b_a_h ^ a_a_h; + c_a_l ^= b_a_l ^ a_a_l; + c_b_h ^= b_b_h ^ a_b_h; + c_b_l ^= b_b_l ^ a_b_l; + z_high_h = ((uint64_t)a_a_h << 32) | a_a_l; + z_high_l = (((uint64_t)a_b_h << 32) | a_b_l) ^ + (((uint64_t)c_a_h << 32) | c_a_l); + z_low_h = (((uint64_t)b_a_h << 32) | b_a_l) ^ + (((uint64_t)c_b_h << 32) | c_b_l); + z_low_l = ((uint64_t)b_b_h << 32) | b_b_l; + + /* Shift one (multiply by x) as gcm spec is stupid. */ + z_high_h = z_high_h << 1 | z_high_l >> 63; + z_high_l = z_high_l << 1 | z_low_h >> 63; + z_low_h = z_low_h << 1 | z_low_l >> 63; + z_low_l <<= 1; + + /* Reduce */ + z_low_h ^= (z_low_l << 63) ^ (z_low_l << 62) ^ (z_low_l << 57); + z_high_h ^= z_low_h ^ (z_low_h >> 1) ^ (z_low_h >> 2) ^ (z_low_h >> 7); + z_high_l ^= z_low_l ^ (z_low_l >> 1) ^ (z_low_l >> 2) ^ (z_low_l >> 7) ^ + (z_low_h << 63) ^ (z_low_h << 62) ^ (z_low_h << 57); + ghash->x_high = z_high_h; + ghash->x_low = z_high_l; } - ghash->cLen = 0; - ghash->bufLen = 0; - ghash->m = 0; - PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); return SECSuccess; -cleanup: - return SECFailure; -} - -/* Destroy a HashContext (Note we zero the digits so this function - * is idempotent if called with freeit == PR_FALSE */ -static void -gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit) -{ - PORT_Memset(ghash, 0, sizeof(gcmHashContext)); - if (freeit) { - PORT_Free(ghash); - } } +#endif /* HAVE_INT128_SUPPORT */ -static unsigned long -gcm_shift_one(unsigned long *t, unsigned int count) +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) { - unsigned long carry = 0; - unsigned long nextcarry = 0; - unsigned int i; - for (i = 0; i < count; i++) { - nextcarry = t[i] >> ((sizeof(unsigned long) * PR_BITS_PER_BYTE) - 1); - t[i] = (t[i] << 1) | carry; - carry = nextcarry; +#ifdef NSS_X86_OR_X64 + size_t i; + pre_align __m128i z_high post_align; + pre_align __m128i z_low post_align; + pre_align __m128i C post_align; + pre_align __m128i D post_align; + pre_align __m128i E post_align; + pre_align __m128i F post_align; + pre_align __m128i bin post_align; + pre_align __m128i Ci post_align; + pre_align __m128i tmp post_align; + + for (i = 0; i < count; i++, buf += 16) { + bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], + ((uint16_t)buf[2] << 8) | buf[3], + ((uint16_t)buf[4] << 8) | buf[5], + ((uint16_t)buf[6] << 8) | buf[7], + ((uint16_t)buf[8] << 8) | buf[9], + ((uint16_t)buf[10] << 8) | buf[11], + ((uint16_t)buf[12] << 8) | buf[13], + ((uint16_t)buf[14] << 8) | buf[15]); + Ci = _mm_xor_si128(bin, ghash->x); + + /* Do binary mult ghash->X = Ci * ghash->H. */ + C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); + D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); + E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); + F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); + tmp = _mm_xor_si128(E, F); + z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); + z_high = _mm_unpackhi_epi64(z_high, D); + z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); + z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); + + /* Shift one to the left (multiply by x) as gcm spec is stupid. */ + C = _mm_slli_si128(z_low, 8); + E = _mm_srli_epi64(C, 63); + D = _mm_slli_si128(z_high, 8); + F = _mm_srli_epi64(D, 63); + /* Carry over */ + C = _mm_srli_si128(z_low, 8); + D = _mm_srli_epi64(C, 63); + z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); + z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); + + /* Reduce */ + C = _mm_slli_si128(z_low, 8); + /* D = z_low << 127 */ + D = _mm_slli_epi64(C, 63); + /* E = z_low << 126 */ + E = _mm_slli_epi64(C, 62); + /* F = z_low << 121 */ + F = _mm_slli_epi64(C, 57); + /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ + z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); + C = _mm_srli_si128(z_low, 8); + /* D = z_low >> 1 */ + D = _mm_slli_epi64(C, 63); + D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); + /* E = z_low >> 2 */ + E = _mm_slli_epi64(C, 62); + E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); + /* F = z_low >> 7 */ + F = _mm_slli_epi64(C, 57); + F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); + /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ + ghash->x = _mm_xor_si128(_mm_xor_si128( + _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), + F); } - return carry; -} - -static SECStatus -gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize) -{ - gcm_longs_to_bytes(ghash->X, T, blocksize); return SECSuccess; +#else + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +#endif /* NSS_X86_OR_X64 */ } -#define GCM_XOR(t, s, len) \ - for (l = 0; l < len; l++) \ - t[l] ^= s[l] - static SECStatus -gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf, - unsigned int count, unsigned int blocksize) +gcm_zeroX(gcmHashContext *ghash) { - unsigned long C_i[GCM_ARRAY_SIZE]; - unsigned int arraysize = blocksize / sizeof(unsigned long); - unsigned int i, j, k, l; - - for (i = 0; i < count; i++, buf += blocksize) { - ghash->m++; - gcm_bytes_to_longs(C_i, buf, blocksize); - GCM_XOR(C_i, ghash->X, arraysize); - /* multiply X = C_i * H */ - PORT_Memset(ghash->X, 0, sizeof(ghash->X)); - for (j = 0; j < arraysize; j++) { - unsigned long H = ghash->H[j]; - for (k = 0; k < sizeof(unsigned long) * PR_BITS_PER_BYTE; k++) { - if (H & 1) { - GCM_XOR(ghash->X, C_i, arraysize); - } - if (gcm_shift_one(C_i, arraysize)) { - C_i[0] = C_i[0] ^ ghash->R; - } - H = H >> 1; - } - } - GCM_TRACE_X(ghash, "X%d = ") + if (ghash->hw) { +#ifdef NSS_X86_OR_X64 + ghash->x = _mm_setzero_si128(); + return SECSuccess; +#else + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +#endif /* NSS_X86_OR_X64 */ } - PORT_Memset(C_i, 0, sizeof(C_i)); - return SECSuccess; -} -static void -gcm_zeroX(gcmHashContext *ghash) -{ - PORT_Memset(ghash->X, 0, sizeof(ghash->X)); - ghash->m = 0; + ghash->x_high = ghash->x_low = 0; + return SECSuccess; } -#endif /* * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult - * function always takes blocksize lengths of data. gcmHash_Update will + * function always takes AES_BLOCK_SIZE lengths of data. gcmHash_Update will * format the data properly. */ -static SECStatus +SECStatus gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, - unsigned int len, unsigned int blocksize) + unsigned int len) { unsigned int blocks; SECStatus rv; @@ -458,7 +398,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, /* first deal with the current buffer of data. Try to fill it out so * we can hash it */ if (ghash->bufLen) { - unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen); + unsigned int needed = PR_MIN(len, AES_BLOCK_SIZE - ghash->bufLen); if (needed != 0) { PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed); } @@ -469,24 +409,24 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, /* didn't add enough to hash the data, nothing more do do */ return SECSuccess; } - PORT_Assert(ghash->bufLen == blocksize); + PORT_Assert(ghash->bufLen == AES_BLOCK_SIZE); /* hash the buffer and clear it */ - rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize); - PORT_Memset(ghash->buffer, 0, blocksize); + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); ghash->bufLen = 0; if (rv != SECSuccess) { return SECFailure; } } /* now hash any full blocks remaining in the data stream */ - blocks = len / blocksize; + blocks = len / AES_BLOCK_SIZE; if (blocks) { - rv = gcm_HashMult(ghash, buf, blocks, blocksize); + rv = ghash->ghash_mul(ghash, buf, blocks); if (rv != SECSuccess) { return SECFailure; } - buf += blocks * blocksize; - len -= blocks * blocksize; + buf += blocks * AES_BLOCK_SIZE; + len -= blocks * AES_BLOCK_SIZE; } /* save any remainder in the buffer to be hashed with the next call */ @@ -502,7 +442,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, * save the lengths for the final completion of the hash */ static SECStatus -gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) +gcmHash_Sync(gcmHashContext *ghash) { int i; SECStatus rv; @@ -519,9 +459,9 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) /* now zero fill the buffer and hash the last block */ if (ghash->bufLen) { - PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen); - rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize); - PORT_Memset(ghash->buffer, 0, blocksize); + PORT_Memset(ghash->buffer + ghash->bufLen, 0, AES_BLOCK_SIZE - ghash->bufLen); + rv = ghash->ghash_mul(ghash, ghash->buffer, 1); + PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE); ghash->bufLen = 0; if (rv != SECSuccess) { return SECFailure; @@ -530,38 +470,56 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) return SECSuccess; } +#define WRITE64(x, bytes) \ + (bytes)[0] = (x) >> 56; \ + (bytes)[1] = (x) >> 48; \ + (bytes)[2] = (x) >> 40; \ + (bytes)[3] = (x) >> 32; \ + (bytes)[4] = (x) >> 24; \ + (bytes)[5] = (x) >> 16; \ + (bytes)[6] = (x) >> 8; \ + (bytes)[7] = (x); + /* * This does the final sync, hashes the lengths, then returns * "T", the hashed output. */ -static SECStatus +SECStatus gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize) + unsigned int *outlen, unsigned int maxout) { unsigned char T[MAX_BLOCK_SIZE]; SECStatus rv; - rv = gcmHash_Sync(ghash, blocksize); + rv = gcmHash_Sync(ghash); if (rv != SECSuccess) { goto cleanup; } - rv = gcm_HashMult(ghash, ghash->counterBuf, (GCM_HASH_LEN_LEN * 2) / blocksize, - blocksize); + rv = ghash->ghash_mul(ghash, ghash->counterBuf, + (GCM_HASH_LEN_LEN * 2) / AES_BLOCK_SIZE); if (rv != SECSuccess) { goto cleanup; } - GCM_TRACE_X(ghash, "GHASH(H,A,C) = ") - - rv = gcm_getX(ghash, T, blocksize); - if (rv != SECSuccess) { - goto cleanup; + if (ghash->hw) { +#ifdef NSS_X86_OR_X64 + uint64_t tmp_out[2]; + _mm_storeu_si128((__m128i *)tmp_out, ghash->x); + WRITE64(tmp_out[0], T + 8); + WRITE64(tmp_out[1], T); +#else + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +#endif /* NSS_X86_OR_X64 */ + } else { + WRITE64(ghash->x_low, T + 8); + WRITE64(ghash->x_high, T); } - if (maxout > blocksize) - maxout = blocksize; + if (maxout > AES_BLOCK_SIZE) { + maxout = AES_BLOCK_SIZE; + } PORT_Memcpy(outbuf, T, maxout); *outlen = maxout; rv = SECSuccess; @@ -573,22 +531,25 @@ cleanup: SECStatus gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, - unsigned int AADLen, unsigned int blocksize) + unsigned int AADLen) { SECStatus rv; ghash->cLen = 0; PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2); ghash->bufLen = 0; - gcm_zeroX(ghash); + rv = gcm_zeroX(ghash); + if (rv != SECSuccess) { + return rv; + } /* now kick things off by hashing the Additional Authenticated Data */ if (AADLen != 0) { - rv = gcmHash_Update(ghash, AAD, AADLen, blocksize); + rv = gcmHash_Update(ghash, AAD, AADLen); if (rv != SECSuccess) { return SECFailure; } - rv = gcmHash_Sync(ghash, blocksize); + rv = gcmHash_Sync(ghash); if (rv != SECSuccess) { return SECFailure; } @@ -602,7 +563,7 @@ gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, /* state to handle the full GCM operation (hash and counter) */ struct GCMContextStr { - gcmHashContext ghash_context; + gcmHashContext *ghash_context; CTRContext ctr_context; unsigned long tagBits; unsigned char tagKey[MAX_BLOCK_SIZE]; @@ -610,58 +571,69 @@ struct GCMContextStr { GCMContext * GCM_CreateContext(void *context, freeblCipherFunc cipher, - const unsigned char *params, unsigned int blocksize) + const unsigned char *params) { GCMContext *gcm = NULL; - gcmHashContext *ghash; + gcmHashContext *ghash = NULL; unsigned char H[MAX_BLOCK_SIZE]; unsigned int tmp; PRBool freeCtr = PR_FALSE; - PRBool freeHash = PR_FALSE; const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params; CK_AES_CTR_PARAMS ctrParams; SECStatus rv; +#ifdef DISABLE_HW_GCM + const PRBool sw = PR_TRUE; +#else + const PRBool sw = PR_FALSE; +#endif - if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) { - PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + if (gcmParams->ulIvLen == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); return NULL; } gcm = PORT_ZNew(GCMContext); if (gcm == NULL) { return NULL; } - /* first fill in the ghash context */ - ghash = &gcm->ghash_context; - PORT_Memset(H, 0, blocksize); - rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize); + /* aligned_alloc is C11 so we have to do it the old way. */ + ghash = PORT_ZAlloc(sizeof(gcmHashContext) + 15); + if (ghash == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto loser; + } + ghash->mem = ghash; + ghash = (gcmHashContext *)(((uintptr_t)ghash + 15) & ~(uintptr_t)0x0F); + + /* first plug in the ghash context */ + gcm->ghash_context = ghash; + PORT_Memset(H, 0, AES_BLOCK_SIZE); + rv = (*cipher)(context, H, &tmp, AES_BLOCK_SIZE, H, AES_BLOCK_SIZE, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } - rv = gcmHash_InitContext(ghash, H, blocksize); + rv = gcmHash_InitContext(ghash, H, sw); if (rv != SECSuccess) { goto loser; } - freeHash = PR_TRUE; /* fill in the Counter context */ ctrParams.ulCounterBits = 32; PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb)); - if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) { + if (gcmParams->ulIvLen == 12) { PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen); - ctrParams.cb[blocksize - 1] = 1; + ctrParams.cb[AES_BLOCK_SIZE - 1] = 1; } else { - rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen, - blocksize); + rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen); if (rv != SECSuccess) { goto loser; } - rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize); + rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } } rv = CTR_InitContext(&gcm->ctr_context, context, cipher, - (unsigned char *)&ctrParams, blocksize); + (unsigned char *)&ctrParams); if (rv != SECSuccess) { goto loser; } @@ -671,14 +643,14 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher, gcm->tagBits = gcmParams->ulTagBits; /* save for final step */ /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with. * if this assumption changes, we would need to explicitly clear it here */ - rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize, - gcm->tagKey, blocksize, blocksize); + rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, AES_BLOCK_SIZE, + gcm->tagKey, AES_BLOCK_SIZE, AES_BLOCK_SIZE); if (rv != SECSuccess) { goto loser; } /* finally mix in the AAD data */ - rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize); + rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen); if (rv != SECSuccess) { goto loser; } @@ -689,8 +661,8 @@ loser: if (freeCtr) { CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); } - if (freeHash) { - gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE); + if (ghash && ghash->mem) { + PORT_Free(ghash->mem); } if (gcm) { PORT_Free(gcm); @@ -705,7 +677,7 @@ GCM_DestroyContext(GCMContext *gcm, PRBool freeit) * gcm. call their destroy functions to free up any locally * allocated data (like mp_int's) */ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); - gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE); + PORT_Free(gcm->ghash_context->mem); PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits)); PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey)); if (freeit) { @@ -715,8 +687,7 @@ GCM_DestroyContext(GCMContext *gcm, PRBool freeit) static SECStatus gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, - unsigned int *outlen, unsigned int maxout, - unsigned int blocksize) + unsigned int *outlen, unsigned int maxout) { unsigned int tagBytes; unsigned int extra; @@ -738,18 +709,14 @@ gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, return SECFailure; } maxout = tagBytes; - rv = gcmHash_Final(&gcm->ghash_context, outbuf, outlen, maxout, blocksize); + rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout); if (rv != SECSuccess) { return SECFailure; } - GCM_TRACE_BLOCK("GHASH=", outbuf, blocksize); - GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize); for (i = 0; i < *outlen; i++) { outbuf[i] ^= gcm->tagKey[i]; } - GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize); - GCM_TRACE_BLOCK("T=", outbuf, blocksize); /* mask off any extra bits we got */ if (extra) { outbuf[tagBytes - 1] &= ~((1 << extra) - 1); @@ -772,6 +739,12 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, unsigned int tagBytes; unsigned int len; + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; if (UINT_MAX - inlen < tagBytes) { PORT_SetError(SEC_ERROR_INPUT_LEN); @@ -784,17 +757,17 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, } rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, - inbuf, inlen, blocksize); + inbuf, inlen, AES_BLOCK_SIZE); if (rv != SECSuccess) { return SECFailure; } - rv = gcmHash_Update(&gcm->ghash_context, outbuf, *outlen, blocksize); + rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen); if (rv != SECSuccess) { PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ *outlen = 0; return SECFailure; } - rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize); + rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen); if (rv != SECSuccess) { PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ *outlen = 0; @@ -824,6 +797,12 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, const unsigned char *intag; unsigned int len; + PORT_Assert(blocksize == AES_BLOCK_SIZE); + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; /* get the authentication block */ @@ -836,11 +815,11 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, intag = inbuf + inlen; /* verify the block */ - rv = gcmHash_Update(&gcm->ghash_context, inbuf, inlen, blocksize); + rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen); if (rv != SECSuccess) { return SECFailure; } - rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize); + rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE); if (rv != SECSuccess) { return SECFailure; } @@ -856,5 +835,5 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, PORT_Memset(tag, 0, sizeof(tag)); /* finish the decryption */ return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, - inbuf, inlen, blocksize); + inbuf, inlen, AES_BLOCK_SIZE); } -- cgit v1.2.3