1 files changed, 538 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 000000000..b903e2564
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse 
+ #    instructions.
+ #  0 means to use x86 instructions
+ #  1 means to use sse2 instructions
+.type	is_sse,@object
+.size	is_sse,4
+is_sse: .long	-1 
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET   var,reg
+#    movl   \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT   reg,var
+#    movl   \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET   var,reg
+    movl   \var,\reg
+.endm
+.macro PUT   reg,var
+    movl   \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d
+.type	_s_mpv_mul_d,@function
+_s_mpv_mul_d:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_x86
+    jg     _s_mpv_mul_d_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     2f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+1:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    1b			# jmp if a_len != 0
+2:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     6f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+5:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    5b			# jmp if a_len != 0
+6:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 36:	caller's esi
+ #  ebp - 32:	caller's edi
+ #  ebp - 28:	
+ #  ebp - 24:	
+ #  ebp - 20:	
+ #  ebp - 16:	
+ #  ebp - 12:	
+ #  ebp - 8:	
+ #  ebp - 4:	
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d_add
+.type	_s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_add_x86
+    jg     _s_mpv_mul_d_add_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     11f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+10:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    10b			# jmp if a_len != 0
+11:
+    mov    %ebx,0(%edi)		# *c = carry
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_add_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     16f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+15:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    movd   0(%edi),%mm0
+    paddq  %mm0,%mm2            # add the carry
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    15b			# jmp if a_len != 0
+16:
+    movd   %mm2,0(%edi)		# *c = carry
+    emms
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #  ebp - 8:	caller's esi
+ #  ebp - 4:	caller's edi
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	a	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	b	argument
+ #  ebp + 20:	c	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+.globl	_s_mpv_mul_d_add_prop
+.type	_s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+    GET    is_sse,%eax
+    cmp    $0,%eax
+    je     _s_mpv_mul_d_add_prop_x86
+    jg     _s_mpv_mul_d_add_prop_sse2
+    call   _s_mpi_is_sse2
+    PUT    %eax,is_sse
+    cmp    $0,%eax
+    jg     _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+    push   %ebp
+    mov    %esp,%ebp
+    sub    $28,%esp
+    push   %edi
+    push   %esi
+    push   %ebx
+    movl   $0,%ebx		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     21f			# jmp if a_len == 0
+    cld
+    mov    8(%ebp),%esi		# esi = a
+20:
+    lodsl			# eax = [ds:esi]; esi += 4
+    mov    16(%ebp),%edx	# edx = b
+    mull   %edx			# edx:eax = Phi:Plo = a_i * b
+
+    add    %ebx,%eax		# add carry (%ebx) to edx:eax
+    adc    $0,%edx
+    mov    0(%edi),%ebx		# add in current word from *c
+    add    %ebx,%eax		
+    adc    $0,%edx
+    mov    %edx,%ebx		# high half of product becomes next carry
+
+    stosl			# [es:edi] = ax; edi += 4;
+    dec    %ecx			# --a_len
+    jnz    20b			# jmp if a_len != 0
+21:
+    cmp    $0,%ebx		# is carry zero?
+    jz     23f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    23f
+22:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     22b
+23:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_mul_d_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    movd   16(%ebp),%mm1	# mm1 = b
+    mov    20(%ebp),%edi
+    cmp    $0,%ecx
+    je     26f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+25:
+    movd   0(%esi),%mm0         # mm0 = *a++
+    movd   0(%edi),%mm3		# fetch the sum
+    add    $4,%esi
+    pmuludq %mm1,%mm0           # mm0 = b * *a++
+    paddq  %mm0,%mm2            # add the carry
+    paddq  %mm3,%mm2            # add *c++
+    movd   %mm2,0(%edi)         # store the 32bit result
+    add    $4,%edi
+    psrlq  $32, %mm2		# save the carry
+    dec    %ecx			# --a_len
+    jnz    25b			# jmp if a_len != 0
+26:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     28f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    28f
+27:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     27b
+28:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+
+ #  ebp - 20:	caller's esi
+ #  ebp - 16:	caller's edi
+ #  ebp - 12:	
+ #  ebp - 8:	carry
+ #  ebp - 4:	a_len	local
+ #  ebp + 0:	caller's ebp
+ #  ebp + 4:	return address
+ #  ebp + 8:	pa	argument
+ #  ebp + 12:	a_len	argument
+ #  ebp + 16:	ps	argument
+ #  ebp + 20:	
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+
+.globl	_s_mpv_sqr_add_prop
+.type	_s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+     GET   is_sse,%eax
+     cmp    $0,%eax
+     je     _s_mpv_sqr_add_prop_x86
+     jg     _s_mpv_sqr_add_prop_sse2
+     call   _s_mpi_is_sse2
+     PUT    %eax,is_sse
+     cmp    $0,%eax
+     jg     _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+     push   %ebp
+     mov    %esp,%ebp
+     sub    $12,%esp
+     push   %edi
+     push   %esi
+     push   %ebx
+     movl   $0,%ebx		# carry = 0
+     mov    12(%ebp),%ecx	# a_len
+     mov    16(%ebp),%edi	# edi = ps
+     cmp    $0,%ecx
+     je     31f			# jump if a_len == 0
+     cld
+     mov    8(%ebp),%esi	# esi = pa
+30:
+     lodsl			# %eax = [ds:si]; si += 4;
+     mull   %eax
+
+     add    %ebx,%eax		# add "carry"
+     adc    $0,%edx
+     mov    0(%edi),%ebx
+     add    %ebx,%eax		# add low word from result
+     mov    4(%edi),%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     adc    %ebx,%edx		# add high word from result
+     movl   $0,%ebx
+     mov    %edx,%eax
+     adc    $0,%ebx
+     stosl			# [es:di] = %eax; di += 4;
+     dec    %ecx		# --a_len
+     jnz    30b			# jmp if a_len != 0
+31:
+    cmp    $0,%ebx		# is carry zero?
+    jz     34f
+    mov    0(%edi),%eax		# add in current word from *c
+    add	   %ebx,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jnc    34f
+32:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     32b
+34:
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+_s_mpv_sqr_add_prop_sse2:
+    push   %ebp
+    mov    %esp,%ebp
+    push   %edi
+    push   %esi
+    push   %ebx
+    psubq  %mm2,%mm2		# carry = 0
+    mov    12(%ebp),%ecx	# ecx = a_len
+    mov    16(%ebp),%edi
+    cmp    $0,%ecx
+    je     36f			# jmp if a_len == 0
+    mov    8(%ebp),%esi		# esi = a
+    cld
+35:
+    movd   0(%esi),%mm0        # mm0 = *a
+    movd   0(%edi),%mm3	       # fetch the sum
+    add	   $4,%esi
+    pmuludq %mm0,%mm0          # mm0 = sqr(a)
+    paddq  %mm0,%mm2           # add the carry
+    paddq  %mm3,%mm2           # add the low word
+    movd   4(%edi),%mm3
+    movd   %mm2,0(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	
+    paddq  %mm3,%mm2           # add the high word
+    movd   %mm2,4(%edi)        # store the 32bit result
+    psrlq  $32, %mm2	       # save the carry.
+    add    $8,%edi
+    dec    %ecx			# --a_len
+    jnz    35b			# jmp if a_len != 0
+36:
+    movd   %mm2,%ebx
+    cmp    $0,%ebx		# is carry zero?
+    jz     38f
+    mov    0(%edi),%eax
+    add    %ebx, %eax
+    stosl
+    jnc    38f
+37:
+    mov    0(%edi),%eax		# add in current word from *c
+    adc	   $0,%eax
+    stosl			# [es:edi] = ax; edi += 4;
+    jc     37b
+38:
+    emms
+    pop    %ebx
+    pop    %esi
+    pop    %edi
+    leave  
+    ret    
+    nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1.   This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # 		          mp_digit *qp, mp_digit *rp)
+
+ #  esp +  0:   Caller's ebx
+ #  esp +  4:	return address
+ #  esp +  8:	Nhi	argument
+ #  esp + 12:	Nlo	argument
+ #  esp + 16:	divisor	argument
+ #  esp + 20:	qp	argument
+ #  esp + 24:   rp	argument
+ #  registers:
+ # 	eax:
+ #	ebx:	carry
+ #	ecx:	a_len
+ #	edx:
+ #	esi:	a ptr
+ #	edi:	c ptr
+ # 
+
+.globl	_s_mpv_div_2dx1d
+.type	_s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+       push   %ebx
+       mov    8(%esp),%edx
+       mov    12(%esp),%eax
+       mov    16(%esp),%ebx
+       div    %ebx
+       mov    20(%esp),%ebx
+       mov    %eax,0(%ebx)
+       mov    24(%esp),%ebx
+       mov    %edx,0(%ebx)
+       xor    %eax,%eax		# return zero
+       pop    %ebx
+       ret    
+       nop
+