diff options
Diffstat (limited to 'security/nss/lib/freebl/rijndael.c')
-rw-r--r-- | security/nss/lib/freebl/rijndael.c | 573 |
1 files changed, 272 insertions, 301 deletions
diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c index 4bb182693..e4ad60388 100644 --- a/security/nss/lib/freebl/rijndael.c +++ b/security/nss/lib/freebl/rijndael.c @@ -18,27 +18,14 @@ #include "cts.h" #include "ctr.h" #include "gcm.h" +#include "mpi.h" #ifdef USE_HW_AES #include "intel-aes.h" #endif - -#include "mpi.h" - -#ifdef USE_HW_AES -static int has_intel_aes = 0; -static PRBool use_hw_aes = PR_FALSE; - #ifdef INTEL_GCM #include "intel-gcm.h" -static int has_intel_avx = 0; -static int has_intel_clmul = 0; -static PRBool use_hw_gcm = PR_FALSE; -#if defined(_MSC_VER) && !defined(_M_IX86) -#include <intrin.h> /* for _xgetbv() */ -#endif -#endif -#endif /* USE_HW_AES */ +#endif /* INTEL_GCM */ /* * There are currently five ways to build this code, varying in performance @@ -379,7 +366,7 @@ init_rijndael_tables(void) * Nk == 8 where it happens twice in every key word, in the same positions). * For now, I'm implementing this case "dumbly", w/o any unrolling. */ -static SECStatus +static void rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int i; @@ -400,14 +387,169 @@ rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int N tmp = SUBBYTE(tmp); *pW = W[i - Nk] ^ tmp; } - return SECSuccess; +} + +#if defined(NSS_X86_OR_X64) +#define EXPAND_KEY128(k, rcon, res) \ + tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ + tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ + tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ + res = _mm_xor_si128(tmp, tmp_key) + +static void +native_key_expansion128(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); + EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); + EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); + EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); + EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); + EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); + EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); + EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); + EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); + EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); +} + +#define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ + tmp2 = _mm_slli_si128(k0, 4); \ + tmp1 = _mm_xor_si128(k0, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ + res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) + +#define EXPAND_KEY192_PART2(res, k1, k2) \ + tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ + res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) + +#define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ + EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ + EXPAND_KEY192_PART2(carry, res1, tmp3); \ + res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ + _mm_castsi128_pd(tmp3), 0)); \ + res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ + _mm_castsi128_pd(carry), 1)); \ + EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) + +static void +native_key_expansion192(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + pre_align __m128i tmp3 post_align; + pre_align __m128i carry post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], + keySchedule[3], carry, 0x1, 0x2); + EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); + EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], + keySchedule[6], carry, 0x4, 0x8); + EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); + EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], + keySchedule[9], carry, 0x10, 0x20); + EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); + EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], + keySchedule[12], carry, 0x40, 0x80); +} + +#define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ + tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ + tmp2 = _mm_slli_si128(k1x, 4); \ + tmp1 = _mm_xor_si128(k1x, tmp2); \ + tmp2 = _mm_slli_si128(tmp2, 4); \ + tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ + res = _mm_xor_si128(tmp1, tmp_key); + +#define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ + EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ + EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) + +static void +native_key_expansion256(AESContext *cx, const unsigned char *key) +{ + __m128i *keySchedule = cx->keySchedule; + pre_align __m128i tmp_key post_align; + pre_align __m128i tmp1 post_align; + pre_align __m128i tmp2 post_align; + keySchedule[0] = _mm_loadu_si128((__m128i *)key); + keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); + EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], + keySchedule[1], 0x01); + EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], + keySchedule[3], 0x02); + EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], + keySchedule[5], 0x04); + EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], + keySchedule[7], 0x08); + EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], + keySchedule[9], 0x10); + EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], + keySchedule[11], 0x20); + EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], + keySchedule[13], 0xFF); +} + +#endif /* NSS_X86_OR_X64 */ + +/* + * AES key expansion using aes-ni instructions. + */ +static void +native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ +#ifdef NSS_X86_OR_X64 + switch (Nk) { + case 4: + native_key_expansion128(cx, key); + return; + case 6: + native_key_expansion192(cx, key); + return; + case 8: + native_key_expansion256(cx, key); + return; + default: + /* This shouldn't happen. */ + PORT_Assert(0); + } +#else + PORT_Assert(0); +#endif /* NSS_X86_OR_X64 */ +} + +static void +native_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ +#ifdef NSS_X86_OR_X64 + int i; + pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); + m = _mm_xor_si128(m, cx->keySchedule[0]); + for (i = 1; i < cx->Nr; ++i) { + m = _mm_aesenc_si128(m, cx->keySchedule[i]); + } + m = _mm_aesenclast_si128(m, cx->keySchedule[cx->Nr]); + _mm_storeu_si128((__m128i *)output, m); +#else + PORT_Assert(0); +#endif /* NSS_X86_OR_X64 */ } /* rijndael_key_expansion * * Generate the expanded key from the key input by the user. */ -static SECStatus +static void rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int i; @@ -415,8 +557,10 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk PRUint32 *pW; PRUint32 tmp; unsigned int round_key_words = cx->Nb * (cx->Nr + 1); - if (Nk == 7) - return rijndael_key_expansion7(cx, key, Nk); + if (Nk == 7) { + rijndael_key_expansion7(cx, key, Nk); + return; + } W = cx->expandedKey; /* The first Nk words contain the input cipher key */ memcpy(W, key, Nk * 4); @@ -475,7 +619,6 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk *pW = W[i - Nk] ^ tmp; } } - return SECSuccess; } /* rijndael_invkey_expansion @@ -483,7 +626,7 @@ rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk * Generate the expanded key for the inverse cipher from the key input by * the user. */ -static SECStatus +static void rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) { unsigned int r; @@ -491,8 +634,7 @@ rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int PRUint8 *b; int Nb = cx->Nb; /* begins like usual key expansion ... */ - if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) - return SECFailure; + rijndael_key_expansion(cx, key, Nk); /* ... but has the additional step of InvMixColumn, * excepting the first and last round keys. */ @@ -534,12 +676,11 @@ rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int IMXC2(b[2]) ^ IMXC3(b[3]); } } - return SECSuccess; } + /************************************************************************** * - * Stuff related to Rijndael encryption/decryption, optimized for - * a 128-bit blocksize. + * Stuff related to Rijndael encryption/decryption. * *************************************************************************/ @@ -567,7 +708,7 @@ typedef union { #define STATE_BYTE(i) state.b[i] -static SECStatus NO_SANITIZE_ALIGNMENT +static void NO_SANITIZE_ALIGNMENT rijndael_encryptBlock128(AESContext *cx, unsigned char *output, const unsigned char *input) @@ -660,7 +801,6 @@ rijndael_encryptBlock128(AESContext *cx, memcpy(output, outBuf, sizeof outBuf); } #endif - return SECSuccess; } static SECStatus NO_SANITIZE_ALIGNMENT @@ -757,104 +897,6 @@ rijndael_decryptBlock128(AESContext *cx, /************************************************************************** * - * Stuff related to general Rijndael encryption/decryption, for blocksizes - * greater than 128 bits. - * - * XXX This code is currently untested! So far, AES specs have only been - * released for 128 bit blocksizes. This will be tested, but for now - * only the code above has been tested using known values. - * - *************************************************************************/ - -#define COLUMN(array, j) *((PRUint32 *)(array + j)) - -SECStatus -rijndael_encryptBlock(AESContext *cx, - unsigned char *output, - const unsigned char *input) -{ - return SECFailure; -#ifdef rijndael_large_blocks_fixed - unsigned int j, r, Nb; - unsigned int c2 = 0, c3 = 0; - PRUint32 *roundkeyw; - PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; - Nb = cx->Nb; - roundkeyw = cx->expandedKey; - /* Step 1: Add Round Key 0 to initial state */ - for (j = 0; j < 4 * Nb; j += 4) { - COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; - } - /* Step 2: Loop over rounds [1..NR-1] */ - for (r = 1; r < cx->Nr; ++r) { - for (j = 0; j < Nb; ++j) { - COLUMN(output, j) = T0(STATE_BYTE(4 * j)) ^ - T1(STATE_BYTE(4 * ((j + 1) % Nb) + 1)) ^ - T2(STATE_BYTE(4 * ((j + c2) % Nb) + 2)) ^ - T3(STATE_BYTE(4 * ((j + c3) % Nb) + 3)); - } - for (j = 0; j < 4 * Nb; j += 4) { - COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; - } - } - /* Step 3: Do the last round */ - /* Final round does not employ MixColumn */ - for (j = 0; j < Nb; ++j) { - COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4 * j)))) | - (BYTE1WORD(T3(STATE_BYTE(4 * (j + 1) % Nb) + 1))) | - (BYTE2WORD(T0(STATE_BYTE(4 * (j + c2) % Nb) + 2))) | - (BYTE3WORD(T1(STATE_BYTE(4 * (j + c3) % Nb) + 3)))) ^ - *roundkeyw++; - } - return SECSuccess; -#endif -} - -SECStatus -rijndael_decryptBlock(AESContext *cx, - unsigned char *output, - const unsigned char *input) -{ - return SECFailure; -#ifdef rijndael_large_blocks_fixed - int j, r, Nb; - int c2 = 0, c3 = 0; - PRUint32 *roundkeyw; - PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; - Nb = cx->Nb; - roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; - /* reverse key addition */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; - } - /* Loop over rounds in reverse [NR..1] */ - for (r = cx->Nr; r > 1; --r) { - /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ - for (j = 0; j < Nb; ++j) { - COLUMN(output, 4 * j) = TInv0(STATE_BYTE(4 * j)) ^ - TInv1(STATE_BYTE(4 * (j + Nb - 1) % Nb) + 1) ^ - TInv2(STATE_BYTE(4 * (j + Nb - c2) % Nb) + 2) ^ - TInv3(STATE_BYTE(4 * (j + Nb - c3) % Nb) + 3); - } - /* Invert the key addition step */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; - } - } - /* inverse sub */ - for (j = 0; j < 4 * Nb; ++j) { - output[j] = SINV(clone[j]); - } - /* final key addition */ - for (j = 4 * Nb; j >= 0; j -= 4) { - COLUMN(output, j) ^= *roundkeyw--; - } - return SECSuccess; -#endif -} - -/************************************************************************** - * * Rijndael modes of operation (ECB and CBC) * *************************************************************************/ @@ -862,22 +904,21 @@ rijndael_decryptBlock(AESContext *cx, static SECStatus rijndael_encryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; AESBlockFunc *encryptor; - encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_encryptBlock128 - : &rijndael_encryptBlock; + if (aesni_support()) { + /* Use hardware acceleration for normal AES parameters. */ + encryptor = &native_encryptBlock; + } else { + encryptor = &rijndael_encryptBlock128; + } while (inputLen > 0) { - rv = (*encryptor)(cx, output, input); - if (rv != SECSuccess) - return rv; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + (*encryptor)(cx, output, input); + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } return SECSuccess; } @@ -885,58 +926,44 @@ rijndael_encryptECB(AESContext *cx, unsigned char *output, static SECStatus rijndael_encryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { unsigned int j; - SECStatus rv; - AESBlockFunc *encryptor; unsigned char *lastblock; - unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; + unsigned char inblock[AES_BLOCK_SIZE * 8]; if (!inputLen) return SECSuccess; lastblock = cx->iv; - encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_encryptBlock128 - : &rijndael_encryptBlock; while (inputLen > 0) { /* XOR with the last block (IV if first block) */ - for (j = 0; j < blocksize; ++j) + for (j = 0; j < AES_BLOCK_SIZE; ++j) { inblock[j] = input[j] ^ lastblock[j]; + } /* encrypt */ - rv = (*encryptor)(cx, output, inblock); - if (rv != SECSuccess) - return rv; + rijndael_encryptBlock128(cx, output, inblock); /* move to the next block */ lastblock = output; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } - memcpy(cx->iv, lastblock, blocksize); + memcpy(cx->iv, lastblock, AES_BLOCK_SIZE); return SECSuccess; } static SECStatus rijndael_decryptECB(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; - AESBlockFunc *decryptor; - - decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_decryptBlock128 - : &rijndael_decryptBlock; while (inputLen > 0) { - rv = (*decryptor)(cx, output, input); - if (rv != SECSuccess) - return rv; - output += blocksize; - input += blocksize; - inputLen -= blocksize; + if (rijndael_decryptBlock128(cx, output, input) != SECSuccess) { + return SECFailure; + } + output += AES_BLOCK_SIZE; + input += AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } return SECSuccess; } @@ -944,43 +971,37 @@ rijndael_decryptECB(AESContext *cx, unsigned char *output, static SECStatus rijndael_decryptCBC(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, - const unsigned char *input, unsigned int inputLen, - unsigned int blocksize) + const unsigned char *input, unsigned int inputLen) { - SECStatus rv; - AESBlockFunc *decryptor; const unsigned char *in; unsigned char *out; unsigned int j; - unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; + unsigned char newIV[AES_BLOCK_SIZE]; if (!inputLen) return SECSuccess; PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); - decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) - ? &rijndael_decryptBlock128 - : &rijndael_decryptBlock; - in = input + (inputLen - blocksize); - memcpy(newIV, in, blocksize); - out = output + (inputLen - blocksize); - while (inputLen > blocksize) { - rv = (*decryptor)(cx, out, in); - if (rv != SECSuccess) - return rv; - for (j = 0; j < blocksize; ++j) - out[j] ^= in[(int)(j - blocksize)]; - out -= blocksize; - in -= blocksize; - inputLen -= blocksize; + in = input + (inputLen - AES_BLOCK_SIZE); + memcpy(newIV, in, AES_BLOCK_SIZE); + out = output + (inputLen - AES_BLOCK_SIZE); + while (inputLen > AES_BLOCK_SIZE) { + if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { + return SECFailure; + } + for (j = 0; j < AES_BLOCK_SIZE; ++j) + out[j] ^= in[(int)(j - AES_BLOCK_SIZE)]; + out -= AES_BLOCK_SIZE; + in -= AES_BLOCK_SIZE; + inputLen -= AES_BLOCK_SIZE; } if (in == input) { - rv = (*decryptor)(cx, out, in); - if (rv != SECSuccess) - return rv; - for (j = 0; j < blocksize; ++j) + if (rijndael_decryptBlock128(cx, out, in) != SECSuccess) { + return SECFailure; + } + for (j = 0; j < AES_BLOCK_SIZE; ++j) out[j] ^= cx->iv[j]; } - memcpy(cx->iv, newIV, blocksize); + memcpy(cx->iv, newIV, AES_BLOCK_SIZE); return SECSuccess; } @@ -996,41 +1017,15 @@ rijndael_decryptCBC(AESContext *cx, unsigned char *output, AESContext * AES_AllocateContext(void) { - return PORT_ZNew(AESContext); -} - -#ifdef INTEL_GCM -/* - * Adapted from the example code in "How to detect New Instruction support in - * the 4th generation Intel Core processor family" by Max Locktyukhin. - * - * XGETBV: - * Reads an extended control register (XCR) specified by ECX into EDX:EAX. - */ -static PRBool -check_xcr0_ymm() -{ - PRUint32 xcr0; -#if defined(_MSC_VER) -#if defined(_M_IX86) - __asm { - mov ecx, 0 - xgetbv - mov xcr0, eax + /* aligned_alloc is C11 so we have to do it the old way. */ + AESContext *ctx = PORT_ZAlloc(sizeof(AESContext) + 15); + if (ctx == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; } -#else - xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ -#endif -#else - __asm__("xgetbv" - : "=a"(xcr0) - : "c"(0) - : "%edx"); -#endif - /* Check if xmm and ymm state are enabled in XCR0. */ - return (xcr0 & 6) == 6; + ctx->mem = ctx; + return (AESContext *)(((uintptr_t)ctx + 15) & ~(uintptr_t)0x0F); } -#endif /* ** Initialize a new AES context suitable for AES encryption/decryption in @@ -1039,21 +1034,19 @@ check_xcr0_ymm() */ static SECStatus aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, - const unsigned char *iv, int mode, unsigned int encrypt, - unsigned int blocksize) + const unsigned char *iv, int mode, unsigned int encrypt) { unsigned int Nk; - /* According to Rijndael AES Proposal, section 12.1, block and key - * lengths between 128 and 256 bits are supported, as long as the + PRBool use_hw_aes; + /* According to AES, block lengths are 128 and key lengths are 128, 192, or + * 256 bits. We support other key sizes as well [128, 256] as long as the * length in bytes is divisible by 4. */ + if (key == NULL || - keysize < RIJNDAEL_MIN_BLOCKSIZE || - keysize > RIJNDAEL_MAX_BLOCKSIZE || - keysize % 4 != 0 || - blocksize < RIJNDAEL_MIN_BLOCKSIZE || - blocksize > RIJNDAEL_MAX_BLOCKSIZE || - blocksize % 4 != 0) { + keysize < AES_BLOCK_SIZE || + keysize > 32 || + keysize % 4 != 0) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } @@ -1069,45 +1062,16 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } -#ifdef USE_HW_AES - if (has_intel_aes == 0) { - unsigned long eax, ebx, ecx, edx; - char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); - - if (disable_hw_aes == NULL) { - freebl_cpuid(1, &eax, &ebx, &ecx, &edx); - has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; -#ifdef INTEL_GCM - has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; - if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && - check_xcr0_ymm()) { - has_intel_avx = 1; - } else { - has_intel_avx = -1; - } -#endif - } else { - has_intel_aes = -1; -#ifdef INTEL_GCM - has_intel_avx = -1; - has_intel_clmul = -1; -#endif - } - } - use_hw_aes = (PRBool)(has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); -#ifdef INTEL_GCM - use_hw_gcm = (PRBool)(use_hw_aes && has_intel_avx > 0 && has_intel_clmul > 0); -#endif -#endif /* USE_HW_AES */ + use_hw_aes = aesni_support() && (keysize % 8) == 0; /* Nb = (block size in bits) / 32 */ - cx->Nb = blocksize / 4; + cx->Nb = AES_BLOCK_SIZE / 4; /* Nk = (key size in bits) / 32 */ Nk = keysize / 4; /* Obtain number of rounds from "table" */ cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); /* copy in the iv, if neccessary */ if (mode == NSS_AES_CBC) { - memcpy(cx->iv, iv, blocksize); + memcpy(cx->iv, iv, AES_BLOCK_SIZE); #ifdef USE_HW_AES if (use_hw_aes) { cx->worker = (freeblCipherFunc) @@ -1135,7 +1099,7 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); - goto cleanup; + return SECFailure; } #ifdef USE_HW_AES if (use_hw_aes) { @@ -1148,25 +1112,28 @@ aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, defined(RIJNDAEL_GENERATE_TABLES_MACRO) if (rijndaelTables == NULL) { if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) { - return SecFailure; + return SECFailure; } } #endif /* Generate expanded key */ if (encrypt) { - if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) - goto cleanup; + if (use_hw_aes && (cx->mode == NSS_AES_GCM || cx->mode == NSS_AES || + cx->mode == NSS_AES_CTR)) { + PORT_Assert(keysize == 16 || keysize == 24 || keysize == 32); + /* Prepare hardware key for normal AES parameters. */ + native_key_expansion(cx, key, Nk); + } else { + rijndael_key_expansion(cx, key, Nk); + } } else { - if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) - goto cleanup; + rijndael_invkey_expansion(cx, key, Nk); } } cx->worker_cx = cx; cx->destroy = NULL; cx->isBlock = PR_TRUE; return SECSuccess; -cleanup: - return SECFailure; } SECStatus @@ -1178,6 +1145,11 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, PRBool baseencrypt = encrypt; SECStatus rv; + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + switch (mode) { case NSS_AES_CTS: basemode = NSS_AES_CBC; @@ -1188,45 +1160,47 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, baseencrypt = PR_TRUE; break; } - /* make sure enough is initializes so we can safely call Destroy */ + /* Make sure enough is initialized so we can safely call Destroy. */ cx->worker_cx = NULL; cx->destroy = NULL; - rv = aes_InitContext(cx, key, keysize, iv, basemode, - baseencrypt, blocksize); + cx->mode = mode; + rv = aes_InitContext(cx, key, keysize, iv, basemode, baseencrypt); if (rv != SECSuccess) { AES_DestroyContext(cx, PR_FALSE); return rv; } - cx->mode = mode; /* finally, set up any mode specific contexts */ switch (mode) { case NSS_AES_CTS: - cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv); cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); cx->destroy = (freeblDestroyFunc)CTS_DestroyContext; cx->isBlock = PR_FALSE; break; case NSS_AES_GCM: -#ifdef INTEL_GCM - if (use_hw_gcm) { - cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); - cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); +#if defined(INTEL_GCM) && defined(USE_HW_AES) + if (aesni_support() && (keysize % 8) == 0 && avx_support() && + clmul_support()) { + cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate + : intel_AES_GCM_DecryptUpdate); cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; cx->isBlock = PR_FALSE; } else #endif { - cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); - cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); + cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv); + cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate + : GCM_DecryptUpdate); cx->destroy = (freeblDestroyFunc)GCM_DestroyContext; cx->isBlock = PR_FALSE; } break; case NSS_AES_CTR: - cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv); #if defined(USE_HW_AES) && defined(_MSC_VER) - if (use_hw_aes) { + if (aesni_support() && (keysize % 8) == 0) { cx->worker = (freeblCipherFunc)CTR_Update_HW_AES; } else #endif @@ -1238,7 +1212,7 @@ AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, break; default: /* everything has already been set up by aes_InitContext, just - * return */ + * return */ return SECSuccess; } /* check to see if we succeeded in getting the worker context */ @@ -1287,8 +1261,9 @@ AES_DestroyContext(AESContext *cx, PRBool freeit) cx->worker_cx = NULL; cx->destroy = NULL; } - if (freeit) - PORT_Free(cx); + if (freeit) { + PORT_Free(cx->mem); + } } /* @@ -1302,14 +1277,12 @@ AES_Encrypt(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - int blocksize; /* Check args */ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } - blocksize = 4 * cx->Nb; - if (cx->isBlock && (inputLen % blocksize != 0)) { + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { PORT_SetError(SEC_ERROR_INPUT_LEN); return SECFailure; } @@ -1340,7 +1313,7 @@ AES_Encrypt(AESContext *cx, unsigned char *output, #endif return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, - input, inputLen, blocksize); + input, inputLen, AES_BLOCK_SIZE); } /* @@ -1354,14 +1327,12 @@ AES_Decrypt(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen) { - int blocksize; /* Check args */ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { PORT_SetError(SEC_ERROR_INVALID_ARGS); return SECFailure; } - blocksize = 4 * cx->Nb; - if (cx->isBlock && (inputLen % blocksize != 0)) { + if (cx->isBlock && (inputLen % AES_BLOCK_SIZE != 0)) { PORT_SetError(SEC_ERROR_INPUT_LEN); return SECFailure; } @@ -1371,5 +1342,5 @@ AES_Decrypt(AESContext *cx, unsigned char *output, } *outputLen = inputLen; return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, - input, inputLen, blocksize); + input, inputLen, AES_BLOCK_SIZE); } |