summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl/mpi/mpi_x86_os2.s
diff options
context:
space:
mode:
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_x86_os2.s')
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_os2.s538
1 files changed, 538 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 000000000..b903e2564
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET var,reg
+# movl \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT reg,var
+# movl \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d
+.type _s_mpv_mul_d,@function
+_s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_x86
+ jg _s_mpv_mul_d_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add
+.type _s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_x86
+ jg _s_mpv_mul_d_add_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add_prop
+.type _s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_prop_x86
+ jg _s_mpv_mul_d_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl _s_mpv_sqr_add_prop
+.type _s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_sqr_add_prop_x86
+ jg _s_mpv_sqr_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl _s_mpv_div_2dx1d
+.type _s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+