diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_sse2.s')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpi_sse2.s | 294 |
1 files changed, 294 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s new file mode 100644 index 000000000..16a47019c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sse2.s @@ -0,0 +1,294 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef DARWIN +#define s_mpv_mul_d _s_mpv_mul_d +#define s_mpv_mul_d_add _s_mpv_mul_d_add +#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop +#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop +#define s_mpv_div_2dx1d _s_mpv_div_2dx1d +#define TYPE_FUNCTION(x) +#else +#define TYPE_FUNCTION(x) .type x, @function +#endif + +.text + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.private_extern s_mpv_mul_d +TYPE_FUNCTION(s_mpv_mul_d) +s_mpv_mul_d: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.private_extern s_mpv_mul_d_add +TYPE_FUNCTION(s_mpv_mul_d_add) +s_mpv_mul_d_add: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd 0(%edi), %mm0 + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.private_extern s_mpv_mul_d_add_prop +TYPE_FUNCTION(s_mpv_mul_d_add_prop) +s_mpv_mul_d_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add *c++ + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_sqr_add_prop +.private_extern s_mpv_sqr_add_prop +TYPE_FUNCTION(s_mpv_sqr_add_prop) +s_mpv_sqr_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + mov 16(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm0, %mm0 # mm0 = sqr(a) + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add the low word + movd 4(%edi), %mm3 + movd %mm2, 0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3, %mm2 # add the high word + movd %mm2, 4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8, %edi + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # +.globl s_mpv_div_2dx1d +.private_extern s_mpv_div_2dx1d +TYPE_FUNCTION(s_mpv_div_2dx1d) +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp), %edx + mov 12(%esp), %eax + mov 16(%esp), %ebx + div %ebx + mov 20(%esp), %ebx + mov %eax, 0(%ebx) + mov 24(%esp), %ebx + mov %edx, 0(%ebx) + xor %eax, %eax # return zero + pop %ebx + ret + nop + +#ifndef DARWIN + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif |