diff options
author | wolfbeast <mcwerewolf@gmail.com> | 2018-02-23 11:04:39 +0100 |
---|---|---|
committer | wolfbeast <mcwerewolf@gmail.com> | 2018-02-23 11:04:39 +0100 |
commit | f1a0f0a56fdd0fc39f255174ce08c06b91c66c94 (patch) | |
tree | 99ccc8e212257e1da1902036ae261e8e55d55c1c /security/nss/lib/freebl/gcm-x86.c | |
parent | 8781f745556be5d7402d0f3adc67ecfe32fe04a0 (diff) | |
download | UXP-f1a0f0a56fdd0fc39f255174ce08c06b91c66c94.tar UXP-f1a0f0a56fdd0fc39f255174ce08c06b91c66c94.tar.gz UXP-f1a0f0a56fdd0fc39f255174ce08c06b91c66c94.tar.lz UXP-f1a0f0a56fdd0fc39f255174ce08c06b91c66c94.tar.xz UXP-f1a0f0a56fdd0fc39f255174ce08c06b91c66c94.zip |
Update NSS to 3.35-RTM
Diffstat (limited to 'security/nss/lib/freebl/gcm-x86.c')
-rw-r--r-- | security/nss/lib/freebl/gcm-x86.c | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/gcm-x86.c b/security/nss/lib/freebl/gcm-x86.c new file mode 100644 index 000000000..e34d63394 --- /dev/null +++ b/security/nss/lib/freebl/gcm-x86.c @@ -0,0 +1,127 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "gcm.h" +#include "secerr.h" + +#include <wmmintrin.h> /* clmul */ + +#define WRITE64(x, bytes) \ + (bytes)[0] = (x) >> 56; \ + (bytes)[1] = (x) >> 48; \ + (bytes)[2] = (x) >> 40; \ + (bytes)[3] = (x) >> 32; \ + (bytes)[4] = (x) >> 24; \ + (bytes)[5] = (x) >> 16; \ + (bytes)[6] = (x) >> 8; \ + (bytes)[7] = (x); + +SECStatus +gcm_HashWrite_hw(gcmHashContext *ghash, unsigned char *outbuf) +{ + uint64_t tmp_out[2]; + _mm_storeu_si128((__m128i *)tmp_out, ghash->x); + /* maxout must be larger than 16 byte (checked by the caller). */ + WRITE64(tmp_out[0], outbuf + 8); + WRITE64(tmp_out[1], outbuf); + return SECSuccess; +} + +SECStatus +gcm_HashMult_hw(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count) +{ + size_t i; + pre_align __m128i z_high post_align; + pre_align __m128i z_low post_align; + pre_align __m128i C post_align; + pre_align __m128i D post_align; + pre_align __m128i E post_align; + pre_align __m128i F post_align; + pre_align __m128i bin post_align; + pre_align __m128i Ci post_align; + pre_align __m128i tmp post_align; + + for (i = 0; i < count; i++, buf += 16) { + bin = _mm_set_epi16(((uint16_t)buf[0] << 8) | buf[1], + ((uint16_t)buf[2] << 8) | buf[3], + ((uint16_t)buf[4] << 8) | buf[5], + ((uint16_t)buf[6] << 8) | buf[7], + ((uint16_t)buf[8] << 8) | buf[9], + ((uint16_t)buf[10] << 8) | buf[11], + ((uint16_t)buf[12] << 8) | buf[13], + ((uint16_t)buf[14] << 8) | buf[15]); + Ci = _mm_xor_si128(bin, ghash->x); + + /* Do binary mult ghash->X = Ci * ghash->H. */ + C = _mm_clmulepi64_si128(Ci, ghash->h, 0x00); + D = _mm_clmulepi64_si128(Ci, ghash->h, 0x11); + E = _mm_clmulepi64_si128(Ci, ghash->h, 0x01); + F = _mm_clmulepi64_si128(Ci, ghash->h, 0x10); + tmp = _mm_xor_si128(E, F); + z_high = _mm_xor_si128(tmp, _mm_slli_si128(D, 8)); + z_high = _mm_unpackhi_epi64(z_high, D); + z_low = _mm_xor_si128(_mm_slli_si128(tmp, 8), C); + z_low = _mm_unpackhi_epi64(_mm_slli_si128(C, 8), z_low); + + /* Shift one to the left (multiply by x) as gcm spec is stupid. */ + C = _mm_slli_si128(z_low, 8); + E = _mm_srli_epi64(C, 63); + D = _mm_slli_si128(z_high, 8); + F = _mm_srli_epi64(D, 63); + /* Carry over */ + C = _mm_srli_si128(z_low, 8); + D = _mm_srli_epi64(C, 63); + z_low = _mm_or_si128(_mm_slli_epi64(z_low, 1), E); + z_high = _mm_or_si128(_mm_or_si128(_mm_slli_epi64(z_high, 1), F), D); + + /* Reduce */ + C = _mm_slli_si128(z_low, 8); + /* D = z_low << 127 */ + D = _mm_slli_epi64(C, 63); + /* E = z_low << 126 */ + E = _mm_slli_epi64(C, 62); + /* F = z_low << 121 */ + F = _mm_slli_epi64(C, 57); + /* z_low ^= (z_low << 127) ^ (z_low << 126) ^ (z_low << 121); */ + z_low = _mm_xor_si128(_mm_xor_si128(_mm_xor_si128(z_low, D), E), F); + C = _mm_srli_si128(z_low, 8); + /* D = z_low >> 1 */ + D = _mm_slli_epi64(C, 63); + D = _mm_or_si128(_mm_srli_epi64(z_low, 1), D); + /* E = z_low >> 2 */ + E = _mm_slli_epi64(C, 62); + E = _mm_or_si128(_mm_srli_epi64(z_low, 2), E); + /* F = z_low >> 7 */ + F = _mm_slli_epi64(C, 57); + F = _mm_or_si128(_mm_srli_epi64(z_low, 7), F); + /* ghash->x ^= z_low ^ (z_low >> 1) ^ (z_low >> 2) ^ (z_low >> 7); */ + ghash->x = _mm_xor_si128(_mm_xor_si128( + _mm_xor_si128(_mm_xor_si128(z_high, z_low), D), E), + F); + } + return SECSuccess; +} + +SECStatus +gcm_HashInit_hw(gcmHashContext *ghash) +{ + ghash->ghash_mul = gcm_HashMult_hw; + ghash->x = _mm_setzero_si128(); + /* MSVC requires __m64 to load epi64. */ + ghash->h = _mm_set_epi32(ghash->h_high >> 32, (uint32_t)ghash->h_high, + ghash->h_low >> 32, (uint32_t)ghash->h_low); + ghash->hw = PR_TRUE; + return SECSuccess; +} + +SECStatus +gcm_HashZeroX_hw(gcmHashContext *ghash) +{ + ghash->x = _mm_setzero_si128(); + return SECSuccess; +} |