summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/gcm.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/gcm.c')
-rw-r--r--security/nss/lib/freebl/gcm.c885
1 files changed, 432 insertions, 453 deletions
diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c
index 22121001b..0fdb0fd48 100644
--- a/security/nss/lib/freebl/gcm.c
+++ b/security/nss/lib/freebl/gcm.c
@@ -1,6 +1,8 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Thanks to Thomas Pornin for the ideas how to implement the constat time
+ * binary multiplication. */
#ifdef FREEBL_NO_DEPEND
#include "stubs.h"
@@ -15,440 +17,378 @@
#include <limits.h>
-/**************************************************************************
- * First implement the Galois hash function of GCM (gcmHash) *
- **************************************************************************/
-#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */
-
-typedef struct gcmHashContextStr gcmHashContext;
-
-static SECStatus gcmHash_InitContext(gcmHashContext *hash,
- const unsigned char *H,
- unsigned int blocksize);
-static void gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit);
-static SECStatus gcmHash_Update(gcmHashContext *ghash,
- const unsigned char *buf, unsigned int len,
- unsigned int blocksize);
-static SECStatus gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize);
-static SECStatus gcmHash_Final(gcmHashContext *gcm, unsigned char *outbuf,
- unsigned int *outlen, unsigned int maxout,
- unsigned int blocksize);
-static SECStatus gcmHash_Reset(gcmHashContext *ghash,
- const unsigned char *inbuf,
- unsigned int inbufLen, unsigned int blocksize);
-
-/* compile time defines to select how the GF2 multiply is calculated.
- * There are currently 2 algorithms implemented here: MPI and ALGORITHM_1.
- *
- * MPI uses the GF2m implemented in mpi to support GF2 ECC.
- * ALGORITHM_1 is the Algorithm 1 in both NIST SP 800-38D and
- * "The Galois/Counter Mode of Operation (GCM)", McGrew & Viega.
- */
-#if !defined(GCM_USE_ALGORITHM_1) && !defined(GCM_USE_MPI)
-#define GCM_USE_MPI 1 /* MPI is about 5x faster with the \
- * same or less complexity. It's possible to use \
- * tables to speed things up even more */
-#endif
-
-/* GCM defines the bit string to be LSB first, which is exactly
- * opposite everyone else, including hardware. build array
- * to reverse everything. */
-static const unsigned char gcm_byte_rev[256] = {
- 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
- 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
- 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
- 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
- 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
- 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
- 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
- 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
- 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
- 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
- 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
- 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
- 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
- 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
- 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
- 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
- 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
- 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
- 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
- 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
- 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
- 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
- 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
- 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
- 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
- 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
- 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
- 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
- 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
- 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
- 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
- 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
-};
-
-#ifdef GCM_TRACE
-#include <stdio.h>
-
-#define GCM_TRACE_X(ghash, label) \
- { \
- unsigned char _X[MAX_BLOCK_SIZE]; \
- int i; \
- gcm_getX(ghash, _X, blocksize); \
- printf(label, (ghash)->m); \
- for (i = 0; i < blocksize; i++) \
- printf("%02x", _X[i]); \
- printf("\n"); \
- }
-#define GCM_TRACE_BLOCK(label, buf, blocksize) \
- { \
- printf(label); \
- for (i = 0; i < blocksize; i++) \
- printf("%02x", buf[i]); \
- printf("\n"); \
- }
-#else
-#define GCM_TRACE_X(ghash, label)
-#define GCM_TRACE_BLOCK(label, buf, blocksize)
+#ifdef NSS_X86_OR_X64
+#include <wmmintrin.h> /* clmul */
#endif
-#ifdef GCM_USE_MPI
+/* Forward declarations */
+SECStatus gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
+SECStatus gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
+SECStatus gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count);
-#ifdef GCM_USE_ALGORITHM_1
-#error "Only define one of GCM_USE_MPI, GCM_USE_ALGORITHM_1"
-#endif
-/* use the MPI functions to calculate Xn = (Xn-1^C_i)*H mod poly */
-#include "mpi.h"
-#include "secmpi.h"
-#include "mplogic.h"
-#include "mp_gf2m.h"
-
-/* state needed to handle GCM Hash function */
-struct gcmHashContextStr {
- mp_int H;
- mp_int X;
- mp_int C_i;
- const unsigned int *poly;
- unsigned char buffer[MAX_BLOCK_SIZE];
- unsigned int bufLen;
- int m; /* XXX what is m? */
- unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
- PRUint64 cLen;
-};
-
-/* f = x^128 + x^7 + x^2 + x + 1 */
-static const unsigned int poly_128[] = { 128, 7, 2, 1, 0 };
-
-/* sigh, GCM defines the bit strings exactly backwards from everything else */
-static void
-gcm_reverse(unsigned char *target, const unsigned char *src,
- unsigned int blocksize)
+uint64_t
+get64(const unsigned char *bytes)
{
- unsigned int i;
- for (i = 0; i < blocksize; i++) {
- target[blocksize - i - 1] = gcm_byte_rev[src[i]];
- }
+ return ((uint64_t)bytes[0]) << 56 |
+ ((uint64_t)bytes[1]) << 48 |
+ ((uint64_t)bytes[2]) << 40 |
+ ((uint64_t)bytes[3]) << 32 |
+ ((uint64_t)bytes[4]) << 24 |
+ ((uint64_t)bytes[5]) << 16 |
+ ((uint64_t)bytes[6]) << 8 |
+ ((uint64_t)bytes[7]);
}
/* Initialize a gcmHashContext */
-static SECStatus
-gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
- unsigned int blocksize)
+SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, PRBool sw)
{
- mp_err err = MP_OKAY;
- unsigned char H_rev[MAX_BLOCK_SIZE];
-
- MP_DIGITS(&ghash->H) = 0;
- MP_DIGITS(&ghash->X) = 0;
- MP_DIGITS(&ghash->C_i) = 0;
- CHECK_MPI_OK(mp_init(&ghash->H));
- CHECK_MPI_OK(mp_init(&ghash->X));
- CHECK_MPI_OK(mp_init(&ghash->C_i));
-
- mp_zero(&ghash->X);
- gcm_reverse(H_rev, H, blocksize);
- CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->H, H_rev, blocksize));
-
- /* set the irreducible polynomial. Each blocksize has its own polynomial.
- * for now only blocksize 16 (=128 bits) is defined */
- switch (blocksize) {
- case 16: /* 128 bits */
- ghash->poly = poly_128;
- break;
- default:
- PORT_SetError(SEC_ERROR_INVALID_ARGS);
- goto cleanup;
- }
ghash->cLen = 0;
ghash->bufLen = 0;
- ghash->m = 0;
PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
- return SECSuccess;
-cleanup:
- gcmHash_DestroyContext(ghash, PR_FALSE);
- return SECFailure;
-}
-/* Destroy a HashContext (Note we zero the digits so this function
- * is idempotent if called with freeit == PR_FALSE */
-static void
-gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
-{
- mp_clear(&ghash->H);
- mp_clear(&ghash->X);
- mp_clear(&ghash->C_i);
- PORT_Memset(ghash, 0, sizeof(gcmHashContext));
- if (freeit) {
- PORT_Free(ghash);
- }
-}
-
-static SECStatus
-gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
-{
- int len;
- mp_err err;
- unsigned char tmp_buf[MAX_BLOCK_SIZE];
- unsigned char *X;
-
- len = mp_unsigned_octet_size(&ghash->X);
- if (len <= 0) {
- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
- return SECFailure;
- }
- X = tmp_buf;
- PORT_Assert((unsigned int)len <= blocksize);
- if ((unsigned int)len > blocksize) {
- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
- return SECFailure;
- }
- /* zero pad the result */
- if (len != blocksize) {
- PORT_Memset(X, 0, blocksize - len);
- X += blocksize - len;
- }
-
- err = mp_to_unsigned_octets(&ghash->X, X, len);
- if (err < 0) {
+ ghash->h_low = get64(H + 8);
+ ghash->h_high = get64(H);
+ if (clmul_support() && !sw) {
+#ifdef NSS_X86_OR_X64
+ ghash->ghash_mul = gcm_HashMult_hw;
+ ghash->x = _mm_setzero_si128();
+ /* MSVC requires __m64 to load epi64. */
+ ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high,
+ ghash->h_low >> 32, (uint32_t)ghash->h_low);
+ ghash->hw = PR_TRUE;
+#else
PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
return SECFailure;
+#endif /* NSS_X86_OR_X64 */
+ } else {
+/* We fall back to the software implementation if we can't use / don't
+ * want to use pclmul. */
+#ifdef HAVE_INT128_SUPPORT
+ ghash->ghash_mul = gcm_HashMult_sftw;
+#else
+ ghash->ghash_mul = gcm_HashMult_sftw32;
+#endif
+ ghash->x_high = ghash->x_low = 0;
+ ghash->hw = PR_FALSE;
}
- gcm_reverse(T, tmp_buf, blocksize);
return SECSuccess;
}
-static SECStatus
-gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int count, unsigned int blocksize)
-{
- SECStatus rv = SECFailure;
- mp_err err = MP_OKAY;
- unsigned char tmp_buf[MAX_BLOCK_SIZE];
- unsigned int i;
-
- for (i = 0; i < count; i++, buf += blocksize) {
- ghash->m++;
- gcm_reverse(tmp_buf, buf, blocksize);
- CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->C_i, tmp_buf, blocksize));
- CHECK_MPI_OK(mp_badd(&ghash->X, &ghash->C_i, &ghash->C_i));
- /*
- * Looking to speed up GCM, this the the place to do it.
- * There are two areas that can be exploited to speed up this code.
- *
- * 1) H is a constant in this multiply. We can precompute H * (0 - 255)
- * at init time and this becomes an blockize xors of our table lookup.
- *
- * 2) poly is a constant for each blocksize. We can calculate the
- * modulo reduction by a series of adds and shifts.
- *
- * For now we are after functionality, so we will go ahead and use
- * the builtin bmulmod from mpi
- */
- CHECK_MPI_OK(mp_bmulmod(&ghash->C_i, &ghash->H,
- ghash->poly, &ghash->X));
- GCM_TRACE_X(ghash, "X%d = ")
- }
- rv = SECSuccess;
-cleanup:
- PORT_Memset(tmp_buf, 0, sizeof(tmp_buf));
- if (rv != SECSuccess) {
- MP_TO_SEC_ERROR(err);
- }
- return rv;
-}
-
-static void
-gcm_zeroX(gcmHashContext *ghash)
+#ifdef HAVE_INT128_SUPPORT
+/* Binary multiplication x * y = r_high << 64 | r_low. */
+void
+bmul(uint64_t x, uint64_t y, uint64_t *r_high, uint64_t *r_low)
{
- mp_zero(&ghash->X);
- ghash->m = 0;
+ uint128_t x1, x2, x3, x4, x5;
+ uint128_t y1, y2, y3, y4, y5;
+ uint128_t r, z;
+
+ uint128_t m1 = (uint128_t)0x2108421084210842 << 64 | 0x1084210842108421;
+ uint128_t m2 = (uint128_t)0x4210842108421084 << 64 | 0x2108421084210842;
+ uint128_t m3 = (uint128_t)0x8421084210842108 << 64 | 0x4210842108421084;
+ uint128_t m4 = (uint128_t)0x0842108421084210 << 64 | 0x8421084210842108;
+ uint128_t m5 = (uint128_t)0x1084210842108421 << 64 | 0x0842108421084210;
+
+ x1 = x & m1;
+ y1 = y & m1;
+ x2 = x & m2;
+ y2 = y & m2;
+ x3 = x & m3;
+ y3 = y & m3;
+ x4 = x & m4;
+ y4 = y & m4;
+ x5 = x & m5;
+ y5 = y & m5;
+
+ z = (x1 * y1) ^ (x2 * y5) ^ (x3 * y4) ^ (x4 * y3) ^ (x5 * y2);
+ r = z & m1;
+ z = (x1 * y2) ^ (x2 * y1) ^ (x3 * y5) ^ (x4 * y4) ^ (x5 * y3);
+ r |= z & m2;
+ z = (x1 * y3) ^ (x2 * y2) ^ (x3 * y1) ^ (x4 * y5) ^ (x5 * y4);
+ r |= z & m3;
+ z = (x1 * y4) ^ (x2 * y3) ^ (x3 * y2) ^ (x4 * y1) ^ (x5 * y5);
+ r |= z & m4;
+ z = (x1 * y5) ^ (x2 * y4) ^ (x3 * y3) ^ (x4 * y2) ^ (x5 * y1);
+ r |= z & m5;
+
+ *r_high = (uint64_t)(r >> 64);
+ *r_low = (uint64_t)r;
}
-#endif
-
-#ifdef GCM_USE_ALGORITHM_1
-/* use algorithm 1 of McGrew & Viega "The Galois/Counter Mode of Operation" */
-
-#define GCM_ARRAY_SIZE (MAX_BLOCK_SIZE / sizeof(unsigned long))
-
-struct gcmHashContextStr {
- unsigned long H[GCM_ARRAY_SIZE];
- unsigned long X[GCM_ARRAY_SIZE];
- unsigned long R;
- unsigned char buffer[MAX_BLOCK_SIZE];
- unsigned int bufLen;
- int m;
- unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
- PRUint64 cLen;
-};
-
-static void
-gcm_bytes_to_longs(unsigned long *l, const unsigned char *c, unsigned int len)
+SECStatus
+gcm_HashMult_sftw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
{
- int i, j;
- int array_size = len / sizeof(unsigned long);
-
- PORT_Assert(len % sizeof(unsigned long) == 0);
- for (i = 0; i < array_size; i++) {
- unsigned long tmp = 0;
- int byte_offset = i * sizeof(unsigned long);
- for (j = sizeof(unsigned long) - 1; j >= 0; j--) {
- tmp = (tmp << PR_BITS_PER_BYTE) | gcm_byte_rev[c[byte_offset + j]];
- }
- l[i] = tmp;
- }
+ uint64_t ci_low, ci_high;
+ size_t i;
+ uint64_t z2_low, z2_high, z0_low, z0_high, z1a_low, z1a_high;
+ uint128_t z_high = 0, z_low = 0;
+
+ ci_low = ghash->x_low;
+ ci_high = ghash->x_high;
+ for (i = 0; i < count; i++, buf += 16) {
+ ci_low ^= get64(buf + 8);
+ ci_high ^= get64(buf);
+
+ /* Do binary mult ghash->X = C * ghash->H (Karatsuba). */
+ bmul(ci_high, ghash->h_high, &z2_high, &z2_low);
+ bmul(ci_low, ghash->h_low, &z0_high, &z0_low);
+ bmul(ci_high ^ ci_low, ghash->h_high ^ ghash->h_low, &z1a_high, &z1a_low);
+ z1a_high ^= z2_high ^ z0_high;
+ z1a_low ^= z2_low ^ z0_low;
+ z_high = ((uint128_t)z2_high << 64) | (z2_low ^ z1a_high);
+ z_low = (((uint128_t)z0_high << 64) | z0_low) ^ (((uint128_t)z1a_low) << 64);
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ z_high = (z_high << 1) | (z_low >> 127);
+ z_low <<= 1;
+
+ /* Reduce */
+ z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121);
+ z_high ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7);
+ ci_low = (uint64_t)z_high;
+ ci_high = (uint64_t)(z_high >> 64);
+ }
+ ghash->x_low = ci_low;
+ ghash->x_high = ci_high;
+ return SECSuccess;
}
-
-static void
-gcm_longs_to_bytes(const unsigned long *l, unsigned char *c, unsigned int len)
+#else
+/* Binary multiplication x * y = r_high << 32 | r_low. */
+void
+bmul32(uint32_t x, uint32_t y, uint32_t *r_high, uint32_t *r_low)
{
- int i, j;
- int array_size = len / sizeof(unsigned long);
-
- PORT_Assert(len % sizeof(unsigned long) == 0);
- for (i = 0; i < array_size; i++) {
- unsigned long tmp = l[i];
- int byte_offset = i * sizeof(unsigned long);
- for (j = 0; j < sizeof(unsigned long); j++) {
- c[byte_offset + j] = gcm_byte_rev[tmp & 0xff];
- tmp = (tmp >> PR_BITS_PER_BYTE);
- }
- }
+ uint32_t x0, x1, x2, x3;
+ uint32_t y0, y1, y2, y3;
+ uint32_t m1 = (uint32_t)0x11111111;
+ uint32_t m2 = (uint32_t)0x22222222;
+ uint32_t m4 = (uint32_t)0x44444444;
+ uint32_t m8 = (uint32_t)0x88888888;
+ uint64_t z0, z1, z2, z3;
+ uint64_t z;
+
+ x0 = x & m1;
+ x1 = x & m2;
+ x2 = x & m4;
+ x3 = x & m8;
+ y0 = y & m1;
+ y1 = y & m2;
+ y2 = y & m4;
+ y3 = y & m8;
+ z0 = ((uint64_t)x0 * y0) ^ ((uint64_t)x1 * y3) ^
+ ((uint64_t)x2 * y2) ^ ((uint64_t)x3 * y1);
+ z1 = ((uint64_t)x0 * y1) ^ ((uint64_t)x1 * y0) ^
+ ((uint64_t)x2 * y3) ^ ((uint64_t)x3 * y2);
+ z2 = ((uint64_t)x0 * y2) ^ ((uint64_t)x1 * y1) ^
+ ((uint64_t)x2 * y0) ^ ((uint64_t)x3 * y3);
+ z3 = ((uint64_t)x0 * y3) ^ ((uint64_t)x1 * y2) ^
+ ((uint64_t)x2 * y1) ^ ((uint64_t)x3 * y0);
+ z0 &= ((uint64_t)m1 << 32) | m1;
+ z1 &= ((uint64_t)m2 << 32) | m2;
+ z2 &= ((uint64_t)m4 << 32) | m4;
+ z3 &= ((uint64_t)m8 << 32) | m8;
+ z = z0 | z1 | z2 | z3;
+ *r_high = (uint32_t)(z >> 32);
+ *r_low = (uint32_t)z;
}
-/* Initialize a gcmHashContext */
-static SECStatus
-gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
- unsigned int blocksize)
+SECStatus
+gcm_HashMult_sftw32(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
{
- PORT_Memset(ghash->X, 0, sizeof(ghash->X));
- PORT_Memset(ghash->H, 0, sizeof(ghash->H));
- gcm_bytes_to_longs(ghash->H, H, blocksize);
-
- /* set the irreducible polynomial. Each blocksize has its own polynommial
- * for now only blocksize 16 (=128 bits) is defined */
- switch (blocksize) {
- case 16: /* 128 bits */
- ghash->R = (unsigned long)0x87; /* x^7 + x^2 + x +1 */
- break;
- default:
- PORT_SetError(SEC_ERROR_INVALID_ARGS);
- goto cleanup;
+ size_t i;
+ uint64_t ci_low, ci_high;
+ uint64_t z_high_h, z_high_l, z_low_h, z_low_l;
+ uint32_t ci_high_h, ci_high_l, ci_low_h, ci_low_l;
+ uint32_t b_a_h, b_a_l, a_a_h, a_a_l, b_b_h, b_b_l;
+ uint32_t a_b_h, a_b_l, b_c_h, b_c_l, a_c_h, a_c_l, c_c_h, c_c_l;
+ uint32_t ci_highXlow_h, ci_highXlow_l, c_a_h, c_a_l, c_b_h, c_b_l;
+
+ uint32_t h_high_h = (uint32_t)(ghash->h_high >> 32);
+ uint32_t h_high_l = (uint32_t)ghash->h_high;
+ uint32_t h_low_h = (uint32_t)(ghash->h_low >> 32);
+ uint32_t h_low_l = (uint32_t)ghash->h_low;
+ uint32_t h_highXlow_h = h_high_h ^ h_low_h;
+ uint32_t h_highXlow_l = h_high_l ^ h_low_l;
+ uint32_t h_highX_xored = h_highXlow_h ^ h_highXlow_l;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ ci_low = ghash->x_low ^ get64(buf + 8);
+ ci_high = ghash->x_high ^ get64(buf);
+ ci_low_h = (uint32_t)(ci_low >> 32);
+ ci_low_l = (uint32_t)ci_low;
+ ci_high_h = (uint32_t)(ci_high >> 32);
+ ci_high_l = (uint32_t)ci_high;
+ ci_highXlow_h = ci_high_h ^ ci_low_h;
+ ci_highXlow_l = ci_high_l ^ ci_low_l;
+
+ /* Do binary mult ghash->X = C * ghash->H (recursive Karatsuba). */
+ bmul32(ci_high_h, h_high_h, &a_a_h, &a_a_l);
+ bmul32(ci_high_l, h_high_l, &a_b_h, &a_b_l);
+ bmul32(ci_high_h ^ ci_high_l, h_high_h ^ h_high_l, &a_c_h, &a_c_l);
+ a_c_h ^= a_a_h ^ a_b_h;
+ a_c_l ^= a_a_l ^ a_b_l;
+ a_a_l ^= a_c_h;
+ a_b_h ^= a_c_l;
+ /* ci_high * h_high = a_a_h:a_a_l:a_b_h:a_b_l */
+
+ bmul32(ci_low_h, h_low_h, &b_a_h, &b_a_l);
+ bmul32(ci_low_l, h_low_l, &b_b_h, &b_b_l);
+ bmul32(ci_low_h ^ ci_low_l, h_low_h ^ h_low_l, &b_c_h, &b_c_l);
+ b_c_h ^= b_a_h ^ b_b_h;
+ b_c_l ^= b_a_l ^ b_b_l;
+ b_a_l ^= b_c_h;
+ b_b_h ^= b_c_l;
+ /* ci_low * h_low = b_a_h:b_a_l:b_b_h:b_b_l */
+
+ bmul32(ci_highXlow_h, h_highXlow_h, &c_a_h, &c_a_l);
+ bmul32(ci_highXlow_l, h_highXlow_l, &c_b_h, &c_b_l);
+ bmul32(ci_highXlow_h ^ ci_highXlow_l, h_highX_xored, &c_c_h, &c_c_l);
+ c_c_h ^= c_a_h ^ c_b_h;
+ c_c_l ^= c_a_l ^ c_b_l;
+ c_a_l ^= c_c_h;
+ c_b_h ^= c_c_l;
+ /* (ci_high ^ ci_low) * (h_high ^ h_low) = c_a_h:c_a_l:c_b_h:c_b_l */
+
+ c_a_h ^= b_a_h ^ a_a_h;
+ c_a_l ^= b_a_l ^ a_a_l;
+ c_b_h ^= b_b_h ^ a_b_h;
+ c_b_l ^= b_b_l ^ a_b_l;
+ z_high_h = ((uint64_t)a_a_h << 32) | a_a_l;
+ z_high_l = (((uint64_t)a_b_h << 32) | a_b_l) ^
+ (((uint64_t)c_a_h << 32) | c_a_l);
+ z_low_h = (((uint64_t)b_a_h << 32) | b_a_l) ^
+ (((uint64_t)c_b_h << 32) | c_b_l);
+ z_low_l = ((uint64_t)b_b_h << 32) | b_b_l;
+
+ /* Shift one (multiply by x) as gcm spec is stupid. */
+ z_high_h = z_high_h << 1 | z_high_l >> 63;
+ z_high_l = z_high_l << 1 | z_low_h >> 63;
+ z_low_h = z_low_h << 1 | z_low_l >> 63;
+ z_low_l <<= 1;
+
+ /* Reduce */
+ z_low_h ^= (z_low_l << 63) ^ (z_low_l << 62) ^ (z_low_l << 57);
+ z_high_h ^= z_low_h ^ (z_low_h >> 1) ^ (z_low_h >> 2) ^ (z_low_h >> 7);
+ z_high_l ^= z_low_l ^ (z_low_l >> 1) ^ (z_low_l >> 2) ^ (z_low_l >> 7) ^
+ (z_low_h << 63) ^ (z_low_h << 62) ^ (z_low_h << 57);
+ ghash->x_high = z_high_h;
+ ghash->x_low = z_high_l;
}
- ghash->cLen = 0;
- ghash->bufLen = 0;
- ghash->m = 0;
- PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
return SECSuccess;
-cleanup:
- return SECFailure;
-}
-
-/* Destroy a HashContext (Note we zero the digits so this function
- * is idempotent if called with freeit == PR_FALSE */
-static void
-gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
-{
- PORT_Memset(ghash, 0, sizeof(gcmHashContext));
- if (freeit) {
- PORT_Free(ghash);
- }
}
+#endif /* HAVE_INT128_SUPPORT */
-static unsigned long
-gcm_shift_one(unsigned long *t, unsigned int count)
+SECStatus
+gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count)
{
- unsigned long carry = 0;
- unsigned long nextcarry = 0;
- unsigned int i;
- for (i = 0; i < count; i++) {
- nextcarry = t[i] >> ((sizeof(unsigned long) * PR_BITS_PER_BYTE) - 1);
- t[i] = (t[i] << 1) | carry;
- carry = nextcarry;
+#ifdef NSS_X86_OR_X64
+ size_t i;
+ pre_align __m128i z_high post_align;
+ pre_align __m128i z_low post_align;
+ pre_align __m128i C post_align;
+ pre_align __m128i D post_align;
+ pre_align __m128i E post_align;
+ pre_align __m128i F post_align;
+ pre_align __m128i bin post_align;
+ pre_align __m128i Ci post_align;
+ pre_align __m128i tmp post_align;
+
+ for (i = 0; i < count; i++, buf += 16) {
+ bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1],
+ ((uint16_t)buf[2] << 8) | buf[3],
+ ((uint16_t)buf[4] << 8) | buf[5],
+ ((uint16_t)buf[6] << 8) | buf[7],
+ ((uint16_t)buf[8] << 8) | buf[9],
+ ((uint16_t)buf[10] << 8) | buf[11],
+ ((uint16_t)buf[12] << 8) | buf[13],
+ ((uint16_t)buf[14] << 8) | buf[15]);
+ Ci = _mm_xor_si128(bin, ghash->x);
+
+ /* Do binary mult ghash->X = Ci * ghash->H. */
+ C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00);
+ D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11);
+ E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01);
+ F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10);
+ tmp = _mm_xor_si128(E, F);
+ z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8));
+ z_high = _mm_unpackhi_epi64(z_high, D);
+ z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C);
+ z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low);
+
+ /* Shift one to the left (multiply by x) as gcm spec is stupid. */
+ C = _mm_slli_si128(z_low, 8);
+ E = _mm_srli_epi64(C, 63);
+ D = _mm_slli_si128(z_high, 8);
+ F = _mm_srli_epi64(D, 63);
+ /* Carry over */
+ C = _mm_srli_si128(z_low, 8);
+ D = _mm_srli_epi64(C, 63);
+ z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E);
+ z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D);
+
+ /* Reduce */
+ C = _mm_slli_si128(z_low, 8);
+ /* D = z_low << 127 */
+ D = _mm_slli_epi64(C, 63);
+ /* E = z_low << 126 */
+ E = _mm_slli_epi64(C, 62);
+ /* F = z_low << 121 */
+ F = _mm_slli_epi64(C, 57);
+ /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */
+ z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F);
+ C = _mm_srli_si128(z_low, 8);
+ /* D = z_low >> 1 */
+ D = _mm_slli_epi64(C, 63);
+ D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D);
+ /* E = z_low >> 2 */
+ E = _mm_slli_epi64(C, 62);
+ E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E);
+ /* F = z_low >> 7 */
+ F = _mm_slli_epi64(C, 57);
+ F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F);
+ /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */
+ ghash->x = _mm_xor_si128(_mm_xor_si128(
+ _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E),
+ F);
}
- return carry;
-}
-
-static SECStatus
-gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
-{
- gcm_longs_to_bytes(ghash->X, T, blocksize);
return SECSuccess;
+#else
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+#endif /* NSS_X86_OR_X64 */
}
-#define GCM_XOR(t, s, len) \
- for (l = 0; l < len; l++) \
- t[l] ^= s[l]
-
static SECStatus
-gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int count, unsigned int blocksize)
+gcm_zeroX(gcmHashContext *ghash)
{
- unsigned long C_i[GCM_ARRAY_SIZE];
- unsigned int arraysize = blocksize / sizeof(unsigned long);
- unsigned int i, j, k, l;
-
- for (i = 0; i < count; i++, buf += blocksize) {
- ghash->m++;
- gcm_bytes_to_longs(C_i, buf, blocksize);
- GCM_XOR(C_i, ghash->X, arraysize);
- /* multiply X = C_i * H */
- PORT_Memset(ghash->X, 0, sizeof(ghash->X));
- for (j = 0; j < arraysize; j++) {
- unsigned long H = ghash->H[j];
- for (k = 0; k < sizeof(unsigned long) * PR_BITS_PER_BYTE; k++) {
- if (H & 1) {
- GCM_XOR(ghash->X, C_i, arraysize);
- }
- if (gcm_shift_one(C_i, arraysize)) {
- C_i[0] = C_i[0] ^ ghash->R;
- }
- H = H >> 1;
- }
- }
- GCM_TRACE_X(ghash, "X%d = ")
+ if (ghash->hw) {
+#ifdef NSS_X86_OR_X64
+ ghash->x = _mm_setzero_si128();
+ return SECSuccess;
+#else
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+#endif /* NSS_X86_OR_X64 */
}
- PORT_Memset(C_i, 0, sizeof(C_i));
- return SECSuccess;
-}
-static void
-gcm_zeroX(gcmHashContext *ghash)
-{
- PORT_Memset(ghash->X, 0, sizeof(ghash->X));
- ghash->m = 0;
+ ghash->x_high = ghash->x_low = 0;
+ return SECSuccess;
}
-#endif
/*
* implement GCM GHASH using the freebl GHASH function. The gcm_HashMult
- * function always takes blocksize lengths of data. gcmHash_Update will
+ * function always takes AES_BLOCK_SIZE lengths of data. gcmHash_Update will
* format the data properly.
*/
-static SECStatus
+SECStatus
gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
- unsigned int len, unsigned int blocksize)
+ unsigned int len)
{
unsigned int blocks;
SECStatus rv;
@@ -458,7 +398,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
/* first deal with the current buffer of data. Try to fill it out so
* we can hash it */
if (ghash->bufLen) {
- unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen);
+ unsigned int needed = PR_MIN(len, AES_BLOCK_SIZE - ghash->bufLen);
if (needed != 0) {
PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed);
}
@@ -469,24 +409,24 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
/* didn't add enough to hash the data, nothing more do do */
return SECSuccess;
}
- PORT_Assert(ghash->bufLen == blocksize);
+ PORT_Assert(ghash->bufLen == AES_BLOCK_SIZE);
/* hash the buffer and clear it */
- rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
- PORT_Memset(ghash->buffer, 0, blocksize);
+ rv = ghash->ghash_mul(ghash, ghash->buffer, 1);
+ PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE);
ghash->bufLen = 0;
if (rv != SECSuccess) {
return SECFailure;
}
}
/* now hash any full blocks remaining in the data stream */
- blocks = len / blocksize;
+ blocks = len / AES_BLOCK_SIZE;
if (blocks) {
- rv = gcm_HashMult(ghash, buf, blocks, blocksize);
+ rv = ghash->ghash_mul(ghash, buf, blocks);
if (rv != SECSuccess) {
return SECFailure;
}
- buf += blocks * blocksize;
- len -= blocks * blocksize;
+ buf += blocks * AES_BLOCK_SIZE;
+ len -= blocks * AES_BLOCK_SIZE;
}
/* save any remainder in the buffer to be hashed with the next call */
@@ -502,7 +442,7 @@ gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
* save the lengths for the final completion of the hash
*/
static SECStatus
-gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize)
+gcmHash_Sync(gcmHashContext *ghash)
{
int i;
SECStatus rv;
@@ -519,9 +459,9 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize)
/* now zero fill the buffer and hash the last block */
if (ghash->bufLen) {
- PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen);
- rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
- PORT_Memset(ghash->buffer, 0, blocksize);
+ PORT_Memset(ghash->buffer + ghash->bufLen, 0, AES_BLOCK_SIZE - ghash->bufLen);
+ rv = ghash->ghash_mul(ghash, ghash->buffer, 1);
+ PORT_Memset(ghash->buffer, 0, AES_BLOCK_SIZE);
ghash->bufLen = 0;
if (rv != SECSuccess) {
return SECFailure;
@@ -530,38 +470,56 @@ gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize)
return SECSuccess;
}
+#define WRITE64(x, bytes) \
+ (bytes)[0] = (x) >> 56; \
+ (bytes)[1] = (x) >> 48; \
+ (bytes)[2] = (x) >> 40; \
+ (bytes)[3] = (x) >> 32; \
+ (bytes)[4] = (x) >> 24; \
+ (bytes)[5] = (x) >> 16; \
+ (bytes)[6] = (x) >> 8; \
+ (bytes)[7] = (x);
+
/*
* This does the final sync, hashes the lengths, then returns
* "T", the hashed output.
*/
-static SECStatus
+SECStatus
gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
- unsigned int *outlen, unsigned int maxout,
- unsigned int blocksize)
+ unsigned int *outlen, unsigned int maxout)
{
unsigned char T[MAX_BLOCK_SIZE];
SECStatus rv;
- rv = gcmHash_Sync(ghash, blocksize);
+ rv = gcmHash_Sync(ghash);
if (rv != SECSuccess) {
goto cleanup;
}
- rv = gcm_HashMult(ghash, ghash->counterBuf, (GCM_HASH_LEN_LEN * 2) / blocksize,
- blocksize);
+ rv = ghash->ghash_mul(ghash, ghash->counterBuf,
+ (GCM_HASH_LEN_LEN * 2) / AES_BLOCK_SIZE);
if (rv != SECSuccess) {
goto cleanup;
}
- GCM_TRACE_X(ghash, "GHASH(H,A,C) = ")
-
- rv = gcm_getX(ghash, T, blocksize);
- if (rv != SECSuccess) {
- goto cleanup;
+ if (ghash->hw) {
+#ifdef NSS_X86_OR_X64
+ uint64_t tmp_out[2];
+ _mm_storeu_si128((__m128i *)tmp_out, ghash->x);
+ WRITE64(tmp_out[0], T + 8);
+ WRITE64(tmp_out[1], T);
+#else
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+#endif /* NSS_X86_OR_X64 */
+ } else {
+ WRITE64(ghash->x_low, T + 8);
+ WRITE64(ghash->x_high, T);
}
- if (maxout > blocksize)
- maxout = blocksize;
+ if (maxout > AES_BLOCK_SIZE) {
+ maxout = AES_BLOCK_SIZE;
+ }
PORT_Memcpy(outbuf, T, maxout);
*outlen = maxout;
rv = SECSuccess;
@@ -573,22 +531,25 @@ cleanup:
SECStatus
gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
- unsigned int AADLen, unsigned int blocksize)
+ unsigned int AADLen)
{
SECStatus rv;
ghash->cLen = 0;
PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2);
ghash->bufLen = 0;
- gcm_zeroX(ghash);
+ rv = gcm_zeroX(ghash);
+ if (rv != SECSuccess) {
+ return rv;
+ }
/* now kick things off by hashing the Additional Authenticated Data */
if (AADLen != 0) {
- rv = gcmHash_Update(ghash, AAD, AADLen, blocksize);
+ rv = gcmHash_Update(ghash, AAD, AADLen);
if (rv != SECSuccess) {
return SECFailure;
}
- rv = gcmHash_Sync(ghash, blocksize);
+ rv = gcmHash_Sync(ghash);
if (rv != SECSuccess) {
return SECFailure;
}
@@ -602,7 +563,7 @@ gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
/* state to handle the full GCM operation (hash and counter) */
struct GCMContextStr {
- gcmHashContext ghash_context;
+ gcmHashContext *ghash_context;
CTRContext ctr_context;
unsigned long tagBits;
unsigned char tagKey[MAX_BLOCK_SIZE];
@@ -610,58 +571,69 @@ struct GCMContextStr {
GCMContext *
GCM_CreateContext(void *context, freeblCipherFunc cipher,
- const unsigned char *params, unsigned int blocksize)
+ const unsigned char *params)
{
GCMContext *gcm = NULL;
- gcmHashContext *ghash;
+ gcmHashContext *ghash = NULL;
unsigned char H[MAX_BLOCK_SIZE];
unsigned int tmp;
PRBool freeCtr = PR_FALSE;
- PRBool freeHash = PR_FALSE;
const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params;
CK_AES_CTR_PARAMS ctrParams;
SECStatus rv;
+#ifdef DISABLE_HW_GCM
+ const PRBool sw = PR_TRUE;
+#else
+ const PRBool sw = PR_FALSE;
+#endif
- if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) {
- PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ if (gcmParams->ulIvLen == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
return NULL;
}
gcm = PORT_ZNew(GCMContext);
if (gcm == NULL) {
return NULL;
}
- /* first fill in the ghash context */
- ghash = &gcm->ghash_context;
- PORT_Memset(H, 0, blocksize);
- rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize);
+ /* aligned_alloc is C11 so we have to do it the old way. */
+ ghash = PORT_ZAlloc(sizeof(gcmHashContext) + 15);
+ if (ghash == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto loser;
+ }
+ ghash->mem = ghash;
+ ghash = (gcmHashContext *)(((uintptr_t)ghash + 15) & ~(uintptr_t)0x0F);
+
+ /* first plug in the ghash context */
+ gcm->ghash_context = ghash;
+ PORT_Memset(H, 0, AES_BLOCK_SIZE);
+ rv = (*cipher)(context, H, &tmp, AES_BLOCK_SIZE, H, AES_BLOCK_SIZE, AES_BLOCK_SIZE);
if (rv != SECSuccess) {
goto loser;
}
- rv = gcmHash_InitContext(ghash, H, blocksize);
+ rv = gcmHash_InitContext(ghash, H, sw);
if (rv != SECSuccess) {
goto loser;
}
- freeHash = PR_TRUE;
/* fill in the Counter context */
ctrParams.ulCounterBits = 32;
PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb));
- if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) {
+ if (gcmParams->ulIvLen == 12) {
PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen);
- ctrParams.cb[blocksize - 1] = 1;
+ ctrParams.cb[AES_BLOCK_SIZE - 1] = 1;
} else {
- rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen,
- blocksize);
+ rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen);
if (rv != SECSuccess) {
goto loser;
}
- rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize);
+ rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, AES_BLOCK_SIZE);
if (rv != SECSuccess) {
goto loser;
}
}
rv = CTR_InitContext(&gcm->ctr_context, context, cipher,
- (unsigned char *)&ctrParams, blocksize);
+ (unsigned char *)&ctrParams);
if (rv != SECSuccess) {
goto loser;
}
@@ -671,14 +643,14 @@ GCM_CreateContext(void *context, freeblCipherFunc cipher,
gcm->tagBits = gcmParams->ulTagBits; /* save for final step */
/* calculate the final tag key. NOTE: gcm->tagKey is zero to start with.
* if this assumption changes, we would need to explicitly clear it here */
- rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize,
- gcm->tagKey, blocksize, blocksize);
+ rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, AES_BLOCK_SIZE,
+ gcm->tagKey, AES_BLOCK_SIZE, AES_BLOCK_SIZE);
if (rv != SECSuccess) {
goto loser;
}
/* finally mix in the AAD data */
- rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize);
+ rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen);
if (rv != SECSuccess) {
goto loser;
}
@@ -689,8 +661,8 @@ loser:
if (freeCtr) {
CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
}
- if (freeHash) {
- gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+ if (ghash && ghash->mem) {
+ PORT_Free(ghash->mem);
}
if (gcm) {
PORT_Free(gcm);
@@ -705,7 +677,7 @@ GCM_DestroyContext(GCMContext *gcm, PRBool freeit)
* gcm. call their destroy functions to free up any locally
* allocated data (like mp_int's) */
CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
- gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+ PORT_Free(gcm->ghash_context->mem);
PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits));
PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey));
if (freeit) {
@@ -715,8 +687,7 @@ GCM_DestroyContext(GCMContext *gcm, PRBool freeit)
static SECStatus
gcm_GetTag(GCMContext *gcm, unsigned char *outbuf,
- unsigned int *outlen, unsigned int maxout,
- unsigned int blocksize)
+ unsigned int *outlen, unsigned int maxout)
{
unsigned int tagBytes;
unsigned int extra;
@@ -738,18 +709,14 @@ gcm_GetTag(GCMContext *gcm, unsigned char *outbuf,
return SECFailure;
}
maxout = tagBytes;
- rv = gcmHash_Final(&gcm->ghash_context, outbuf, outlen, maxout, blocksize);
+ rv = gcmHash_Final(gcm->ghash_context, outbuf, outlen, maxout);
if (rv != SECSuccess) {
return SECFailure;
}
- GCM_TRACE_BLOCK("GHASH=", outbuf, blocksize);
- GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
for (i = 0; i < *outlen; i++) {
outbuf[i] ^= gcm->tagKey[i];
}
- GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
- GCM_TRACE_BLOCK("T=", outbuf, blocksize);
/* mask off any extra bits we got */
if (extra) {
outbuf[tagBytes - 1] &= ~((1 << extra) - 1);
@@ -772,6 +739,12 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
unsigned int tagBytes;
unsigned int len;
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
if (UINT_MAX - inlen < tagBytes) {
PORT_SetError(SEC_ERROR_INPUT_LEN);
@@ -784,17 +757,17 @@ GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
}
rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
- inbuf, inlen, blocksize);
+ inbuf, inlen, AES_BLOCK_SIZE);
if (rv != SECSuccess) {
return SECFailure;
}
- rv = gcmHash_Update(&gcm->ghash_context, outbuf, *outlen, blocksize);
+ rv = gcmHash_Update(gcm->ghash_context, outbuf, *outlen);
if (rv != SECSuccess) {
PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
*outlen = 0;
return SECFailure;
}
- rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize);
+ rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen);
if (rv != SECSuccess) {
PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
*outlen = 0;
@@ -824,6 +797,12 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
const unsigned char *intag;
unsigned int len;
+ PORT_Assert(blocksize == AES_BLOCK_SIZE);
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
/* get the authentication block */
@@ -836,11 +815,11 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
intag = inbuf + inlen;
/* verify the block */
- rv = gcmHash_Update(&gcm->ghash_context, inbuf, inlen, blocksize);
+ rv = gcmHash_Update(gcm->ghash_context, inbuf, inlen);
if (rv != SECSuccess) {
return SECFailure;
}
- rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize);
+ rv = gcm_GetTag(gcm, tag, &len, AES_BLOCK_SIZE);
if (rv != SECSuccess) {
return SECFailure;
}
@@ -856,5 +835,5 @@ GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
PORT_Memset(tag, 0, sizeof(tag));
/* finish the decryption */
return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
- inbuf, inlen, blocksize);
+ inbuf, inlen, AES_BLOCK_SIZE);
}