summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/mpi/mpi_x86_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_x86_asm.c')
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_asm.c531
1 files changed, 531 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
new file mode 100644
index 000000000..4faeef30c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
@@ -0,0 +1,531 @@
+/*
+ * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_2 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_1:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_1 ; jmp if a_len != 0
+L_2:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_6 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_5:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_5 ; jmp if a_len != 0
+L_6:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_11 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_10:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_10 ; jmp if a_len != 0
+L_11:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_16 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_15:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd mm0, [edi]
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_15 ; jmp if a_len != 0
+L_16:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_21 ; jmp if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = a
+L_20:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_20 ; jmp if a_len != 0
+L_21:
+ cmp ebx,0 ; is carry zero?
+ jz L_23
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_23
+L_22:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_22
+L_23:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_26 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_25:
+ movd mm0, [esi] ; mm0 = *a++
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add *c++
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_25 ; jmp if a_len != 0
+L_26:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_28
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_28
+L_27:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_27
+L_28:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 20: caller's esi
+ * ebp - 16: caller's edi
+ * ebp - 12:
+ * ebp - 8: carry
+ * ebp - 4: a_len local
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: pa argument
+ * ebp + 12: a_len argument
+ * ebp + 16: ps argument
+ * ebp + 20:
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,12
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; a_len
+ mov edi,[ebp+16] ; edi = ps
+ cmp ecx,0
+ je L_31 ; jump if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = pa
+L_30:
+ lodsd ; eax = [ds:si]; si += 4;
+ mul eax
+
+ add eax,ebx ; add "carry"
+ adc edx,0
+ mov ebx,[edi]
+ add eax,ebx ; add low word from result
+ mov ebx,[edi+4]
+ stosd ; [es:di] = eax; di += 4;
+ adc edx,ebx ; add high word from result
+ mov ebx,0
+ mov eax,edx
+ adc ebx,0
+ stosd ; [es:di] = eax; di += 4;
+ dec ecx ; --a_len
+ jnz L_30 ; jmp if a_len != 0
+L_31:
+ cmp ebx,0 ; is carry zero?
+ jz L_34
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_34
+L_32:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_32
+L_34:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ mov edi, [ebp+16]
+ cmp ecx, 0
+ je L_36 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_35:
+ movd mm0, [esi] ; mm0 = *a
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm0 ; mm0 = sqr(a)
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add the low word
+ movd mm3, [edi+4]
+ movd [edi], mm2 ; store the 32bit result
+ psrlq mm2, 32
+ paddq mm2, mm3 ; add the high word
+ movd [edi+4], mm2 ; store the 32bit result
+ psrlq mm2, 32 ; save the carry.
+ add edi, 8
+ dec ecx ; --a_len
+ jnz L_35 ; jmp if a_len != 0
+L_36:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_38
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_38
+L_37:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_37
+L_38:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ * so its high bit is 1. This code is from NSPR.
+ *
+ * Dump of assembler code for function s_mpv_div_2dx1d:
+ *
+ * esp + 0: Caller's ebx
+ * esp + 4: return address
+ * esp + 8: Nhi argument
+ * esp + 12: Nlo argument
+ * esp + 16: divisor argument
+ * esp + 20: qp argument
+ * esp + 24: rp argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) mp_err
+ s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ __asm {
+ push ebx
+ mov edx,[esp+8]
+ mov eax,[esp+12]
+ mov ebx,[esp+16]
+ div ebx
+ mov ebx,[esp+20]
+ mov [ebx],eax
+ mov ebx,[esp+24]
+ mov [ebx],edx
+ xor eax,eax ; return zero
+ pop ebx
+ ret
+ nop
+ }
+}