diff options
Diffstat (limited to 'security/nss/lib/freebl/mpi/mpi_mips.s')
-rw-r--r-- | security/nss/lib/freebl/mpi/mpi_mips.s | 472 |
1 files changed, 472 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s new file mode 100644 index 000000000..455792bbb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_mips.s @@ -0,0 +1,472 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include <regdef.h> + .set noreorder + .set noat + + .section .text, 1, 0x00000006, 4, 4 +.text: + .section .text + + .ent s_mpv_mul_d_add + .globl s_mpv_mul_d_add + +s_mpv_mul_d_add: + #/* c += a * b */ + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.L.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.L.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.L.3 + # a1 = a[1]; + lwu a5,4(a0) +.L.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.L.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.L.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.L.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .L.6 + addiu a3,a3,4 + # } else { +.L.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .L.6 + dsrl32 t2,t0,0 + # } + # } else { +.L.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.L.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.L.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add + + .ent s_mpv_mul_d_add_prop + .globl s_mpv_mul_d_add_prop + +s_mpv_mul_d_add_prop: + #/* c += a * b */ + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.M.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.M.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.M.3 + # a1 = a[1]; + lwu a5,4(a0) +.M.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.M.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.M.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.M.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .M.6 + addiu a3,a3,8 + # } else { +.M.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + b .M.6 + addiu a3,a3,4 + # } + # } else { +.M.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + addiu a3,a3,4 + # } +.M.6: + + # while (cy) { + beq t2,zero,.M.1 + nop +.M.7: + # mp_word w = (mp_word)*c + cy; + lwu a6,0(a3) + daddu t2,t2,a6 + # *c++ = ACCUM(w); + sw t2,0(a3) + # cy = CARRYOUT(w); + dsrl32 t2,t2,0 + bne t2,zero,.M.7 + addiu a3,a3,4 + + # } +.M.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add_prop + + .ent s_mpv_mul_d + .globl s_mpv_mul_d + +s_mpv_mul_d: + #/* c = a * b */ + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.N.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.N.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.N.3 + # a1 = a[1]; + lwu a5,4(a0) +.N.4: + # a_len -= 2; + addiu a1,a1,-2 + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # w0 = (mp_word)b * a0; + dmultu a2,a4 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.N.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.N.3: + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.N.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .N.6 + addiu a3,a3,4 + # } else { +.N.5: + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .N.6 + dsrl32 t2,t0,0 + # } + # } else { +.N.2: + mflo t0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.N.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.N.1: + jr ra + nop + #} + # + .end s_mpv_mul_d + + + .ent s_mpv_sqr_add_prop + .globl s_mpv_sqr_add_prop + #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); + # registers + # a0 *a + # a1 a_len + # a2 *sqr + # a3 digit from *a, a_i + # a4 square of digit from a + # a5,a6 next 2 digits in sqr + # a7,t0 carry +s_mpv_sqr_add_prop: + move a7,zero + move t0,zero + lwu a3,0(a0) + addiu a1,a1,-1 # --a_len + dmultu a3,a3 + beq a1,zero,.P.3 # jump if we've already done the only sqr + addiu a0,a0,4 # ++a +.P.2: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + lwu a3,0(a0) + addiu a0,a0,4 # ++a + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + dmultu a3,a3 + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + addiu a1,a1,-1 # --a_len + dsrl32 a4,a4,0 + bne a1,zero,.P.2 # loop if a_len > 0 + sw a4,-4(a2) +.P.3: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + beq t0,zero,.P.9 # jump if no carry + dsrl32 a4,a4,0 +.P.8: + sw a4,-4(a2) + /* propagate final carry */ + lwu a5,0(a2) + daddu a6,a5,t0 + sltu t0,a6,a5 + bne t0,zero,.P.8 # loop if carry persists + addiu a2,a2,4 # sqrs++ +.P.9: + jr ra + sw a4,-4(a2) + + .end s_mpv_sqr_add_prop |