summaryrefslogtreecommitdiffstats
path: root/security/nss/lib/freebl
diff options
context:
space:
mode:
authorMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
committerMatt A. Tobin <mattatobin@localhost.localdomain>2018-02-02 04:16:08 -0500
commit5f8de423f190bbb79a62f804151bc24824fa32d8 (patch)
tree10027f336435511475e392454359edea8e25895d /security/nss/lib/freebl
parent49ee0794b5d912db1f95dce6eb52d781dc210db5 (diff)
downloadUXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.gz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.lz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.tar.xz
UXP-5f8de423f190bbb79a62f804151bc24824fa32d8.zip
Add m-esr52 at 52.6.0
Diffstat (limited to 'security/nss/lib/freebl')
-rw-r--r--security/nss/lib/freebl/Makefile764
-rw-r--r--security/nss/lib/freebl/aeskeywrap.c389
-rw-r--r--security/nss/lib/freebl/alg2268.c509
-rw-r--r--security/nss/lib/freebl/alghmac.c165
-rw-r--r--security/nss/lib/freebl/alghmac.h64
-rw-r--r--security/nss/lib/freebl/arcfive.c87
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-gas.s88
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-masm.asm107
-rw-r--r--security/nss/lib/freebl/arcfour-amd64-sun.s84
-rw-r--r--security/nss/lib/freebl/arcfour.c594
-rw-r--r--security/nss/lib/freebl/blapi.h1625
-rw-r--r--security/nss/lib/freebl/blapii.h61
-rw-r--r--security/nss/lib/freebl/blapit.h414
-rw-r--r--security/nss/lib/freebl/blname.c100
-rw-r--r--security/nss/lib/freebl/camellia.c1896
-rw-r--r--security/nss/lib/freebl/camellia.h42
-rw-r--r--security/nss/lib/freebl/chacha20.c119
-rw-r--r--security/nss/lib/freebl/chacha20.h26
-rw-r--r--security/nss/lib/freebl/chacha20_vec.c327
-rw-r--r--security/nss/lib/freebl/chacha20poly1305.c198
-rw-r--r--security/nss/lib/freebl/chacha20poly1305.h15
-rw-r--r--security/nss/lib/freebl/config.mk97
-rw-r--r--security/nss/lib/freebl/ctr.c246
-rw-r--r--security/nss/lib/freebl/ctr.h53
-rw-r--r--security/nss/lib/freebl/cts.c307
-rw-r--r--security/nss/lib/freebl/cts.h33
-rw-r--r--security/nss/lib/freebl/des.c676
-rw-r--r--security/nss/lib/freebl/des.h43
-rw-r--r--security/nss/lib/freebl/desblapi.c256
-rw-r--r--security/nss/lib/freebl/det_rng.c67
-rw-r--r--security/nss/lib/freebl/det_rng.h12
-rw-r--r--security/nss/lib/freebl/dh.c452
-rw-r--r--security/nss/lib/freebl/drbg.c968
-rw-r--r--security/nss/lib/freebl/dsa.c647
-rw-r--r--security/nss/lib/freebl/ec.c1159
-rw-r--r--security/nss/lib/freebl/ec.h21
-rw-r--r--security/nss/lib/freebl/ecdecode.c311
-rw-r--r--security/nss/lib/freebl/ecl/README267
-rw-r--r--security/nss/lib/freebl/ecl/curve25519_32.c390
-rw-r--r--security/nss/lib/freebl/ecl/curve25519_64.c514
-rw-r--r--security/nss/lib/freebl/ecl/ec_naf.c68
-rw-r--r--security/nss/lib/freebl/ecl/ecl-curve.h123
-rw-r--r--security/nss/lib/freebl/ecl/ecl-exp.h167
-rw-r--r--security/nss/lib/freebl/ecl/ecl-priv.h257
-rw-r--r--security/nss/lib/freebl/ecl/ecl.c301
-rw-r--r--security/nss/lib/freebl/ecl/ecl.h60
-rw-r--r--security/nss/lib/freebl/ecl/ecl_curve.c93
-rw-r--r--security/nss/lib/freebl/ecl/ecl_gf.c958
-rw-r--r--security/nss/lib/freebl/ecl/ecl_mult.c305
-rw-r--r--security/nss/lib/freebl/ecl/ecp.h106
-rw-r--r--security/nss/lib/freebl/ecl/ecp_25519.c120
-rw-r--r--security/nss/lib/freebl/ecl/ecp_256.c401
-rw-r--r--security/nss/lib/freebl/ecl/ecp_256_32.c1535
-rw-r--r--security/nss/lib/freebl/ecl/ecp_384.c258
-rw-r--r--security/nss/lib/freebl/ecl/ecp_521.c137
-rw-r--r--security/nss/lib/freebl/ecl/ecp_aff.c308
-rw-r--r--security/nss/lib/freebl/ecl/ecp_jac.c513
-rw-r--r--security/nss/lib/freebl/ecl/ecp_jm.c283
-rw-r--r--security/nss/lib/freebl/ecl/ecp_mont.c154
-rw-r--r--security/nss/lib/freebl/ecl/tests/ec_naft.c121
-rw-r--r--security/nss/lib/freebl/ecl/tests/ecp_test.c409
-rw-r--r--security/nss/lib/freebl/ecl/uint128.c87
-rw-r--r--security/nss/lib/freebl/ecl/uint128.h35
-rw-r--r--security/nss/lib/freebl/exports.gyp48
-rw-r--r--security/nss/lib/freebl/fipsfreebl.c1715
-rw-r--r--security/nss/lib/freebl/freebl.def26
-rw-r--r--security/nss/lib/freebl/freebl.gyp408
-rw-r--r--security/nss/lib/freebl/freebl.rc68
-rw-r--r--security/nss/lib/freebl/freebl_hash.def39
-rw-r--r--security/nss/lib/freebl/freebl_hash_vector.def34
-rw-r--r--security/nss/lib/freebl/freeblver.c18
-rw-r--r--security/nss/lib/freebl/gcm.c860
-rw-r--r--security/nss/lib/freebl/gcm.h31
-rw-r--r--security/nss/lib/freebl/genload.c167
-rw-r--r--security/nss/lib/freebl/hmacct.c335
-rw-r--r--security/nss/lib/freebl/hmacct.h38
-rw-r--r--security/nss/lib/freebl/intel-aes-x64-masm.asm971
-rw-r--r--security/nss/lib/freebl/intel-aes-x86-masm.asm949
-rw-r--r--security/nss/lib/freebl/intel-aes.h143
-rw-r--r--security/nss/lib/freebl/intel-aes.s2514
-rw-r--r--security/nss/lib/freebl/intel-gcm-wrap.c254
-rw-r--r--security/nss/lib/freebl/intel-gcm-x64-masm.asm1295
-rw-r--r--security/nss/lib/freebl/intel-gcm-x86-masm.asm1209
-rw-r--r--security/nss/lib/freebl/intel-gcm.h83
-rw-r--r--security/nss/lib/freebl/intel-gcm.s1340
-rw-r--r--security/nss/lib/freebl/jpake.c495
-rw-r--r--security/nss/lib/freebl/ldvector.c353
-rw-r--r--security/nss/lib/freebl/loader.c2126
-rw-r--r--security/nss/lib/freebl/loader.h788
-rw-r--r--security/nss/lib/freebl/lowhash_vector.c217
-rw-r--r--security/nss/lib/freebl/manifest.mn195
-rw-r--r--security/nss/lib/freebl/md2.c269
-rw-r--r--security/nss/lib/freebl/md5.c598
-rw-r--r--security/nss/lib/freebl/mknewpc2.c208
-rw-r--r--security/nss/lib/freebl/mksp.c119
-rw-r--r--security/nss/lib/freebl/mpi/Makefile244
-rw-r--r--security/nss/lib/freebl/mpi/Makefile.os2243
-rw-r--r--security/nss/lib/freebl/mpi/Makefile.win254
-rw-r--r--security/nss/lib/freebl/mpi/README749
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/all-tests83
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE11
-rw-r--r--security/nss/lib/freebl/mpi/doc/LICENSE-MPL3
-rw-r--r--security/nss/lib/freebl/mpi/doc/basecvt.pod65
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/doc/build30
-rw-r--r--security/nss/lib/freebl/mpi/doc/div.txt64
-rw-r--r--security/nss/lib/freebl/mpi/doc/expt.txt94
-rw-r--r--security/nss/lib/freebl/mpi/doc/gcd.pod28
-rw-r--r--security/nss/lib/freebl/mpi/doc/invmod.pod34
-rw-r--r--security/nss/lib/freebl/mpi/doc/isprime.pod63
-rw-r--r--security/nss/lib/freebl/mpi/doc/lap.pod36
-rw-r--r--security/nss/lib/freebl/mpi/doc/mpi-test.pod51
-rw-r--r--security/nss/lib/freebl/mpi/doc/mul.txt77
-rw-r--r--security/nss/lib/freebl/mpi/doc/pi.txt53
-rw-r--r--security/nss/lib/freebl/mpi/doc/prime.txt6542
-rw-r--r--security/nss/lib/freebl/mpi/doc/prng.pod38
-rw-r--r--security/nss/lib/freebl/mpi/doc/redux.txt86
-rw-r--r--security/nss/lib/freebl/mpi/doc/sqrt.txt50
-rw-r--r--security/nss/lib/freebl/mpi/doc/square.txt72
-rw-r--r--security/nss/lib/freebl/mpi/doc/timing.txt213
-rw-r--r--security/nss/lib/freebl/mpi/hpma512.s615
-rw-r--r--security/nss/lib/freebl/mpi/hppa20.s904
-rw-r--r--security/nss/lib/freebl/mpi/hppatch.adb21
-rw-r--r--security/nss/lib/freebl/mpi/logtab.h28
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/make-logtab29
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/make-test-arrays98
-rw-r--r--security/nss/lib/freebl/mpi/mdxptest.c306
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.c286
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.h65
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulf.s1938
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.il108
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv8.s1818
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.il93
-rw-r--r--security/nss/lib/freebl/mpi/montmulfv9.s2346
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba.c3235
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm13066
-rw-r--r--security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s16097
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m-priv.h73
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.c678
-rw-r--r--security/nss/lib/freebl/mpi/mp_gf2m.h28
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache.c808
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_amd64.s861
-rw-r--r--security/nss/lib/freebl/mpi/mpcpucache_x86.s902
-rw-r--r--security/nss/lib/freebl/mpi/mpi-config.h68
-rw-r--r--security/nss/lib/freebl/mpi/mpi-priv.h243
-rw-r--r--security/nss/lib/freebl/mpi/mpi.c4839
-rw-r--r--security/nss/lib/freebl/mpi/mpi.h313
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64.c32
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_gas.s389
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_masm.asm388
-rw-r--r--security/nss/lib/freebl/mpi/mpi_amd64_sun.s385
-rw-r--r--security/nss/lib/freebl/mpi/mpi_arm.c175
-rw-r--r--security/nss/lib/freebl/mpi/mpi_hp.c81
-rw-r--r--security/nss/lib/freebl/mpi/mpi_i86pc.s313
-rw-r--r--security/nss/lib/freebl/mpi/mpi_mips.s472
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sparc.c226
-rw-r--r--security/nss/lib/freebl/mpi/mpi_sse2.s294
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86.s541
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_asm.c531
-rw-r--r--security/nss/lib/freebl/mpi/mpi_x86_os2.s538
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.c443
-rw-r--r--security/nss/lib/freebl/mpi/mplogic.h52
-rw-r--r--security/nss/lib/freebl/mpi/mpmontg.c1141
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.c599
-rw-r--r--security/nss/lib/freebl/mpi/mpprime.h38
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparc.c221
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv8.s1607
-rw-r--r--security/nss/lib/freebl/mpi/mpv_sparcv9.s1645
-rw-r--r--security/nss/lib/freebl/mpi/mpvalpha.c183
-rw-r--r--security/nss/lib/freebl/mpi/mulsqr.c84
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/multest76
-rw-r--r--security/nss/lib/freebl/mpi/primes.c841
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/stats39
-rw-r--r--security/nss/lib/freebl/mpi/target.mk233
-rw-r--r--security/nss/lib/freebl/mpi/test-arrays.txt55
-rw-r--r--security/nss/lib/freebl/mpi/tests/LICENSE6
-rw-r--r--security/nss/lib/freebl/mpi/tests/LICENSE-MPL3
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-1.c43
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-2.c62
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-3.c105
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-3a.c123
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-4.c111
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-4a.c109
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-4b.c107
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-5.c85
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-5a.c147
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-6.c78
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-7.c85
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-8.c68
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-9.c109
-rw-r--r--security/nss/lib/freebl/mpi/tests/mptest-b.c230
-rw-r--r--security/nss/lib/freebl/mpi/tests/pi1k.txt1
-rw-r--r--security/nss/lib/freebl/mpi/tests/pi2k.txt1
-rw-r--r--security/nss/lib/freebl/mpi/tests/pi5k.txt1
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/timetest99
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/types.pl127
-rw-r--r--security/nss/lib/freebl/mpi/utils/LICENSE4
-rw-r--r--security/nss/lib/freebl/mpi/utils/LICENSE-MPL3
-rw-r--r--security/nss/lib/freebl/mpi/utils/PRIMES41
-rw-r--r--security/nss/lib/freebl/mpi/utils/README206
-rw-r--r--security/nss/lib/freebl/mpi/utils/basecvt.c68
-rw-r--r--security/nss/lib/freebl/mpi/utils/bbs_rand.c65
-rw-r--r--security/nss/lib/freebl/mpi/utils/bbs_rand.h24
-rw-r--r--security/nss/lib/freebl/mpi/utils/bbsrand.c35
-rw-r--r--security/nss/lib/freebl/mpi/utils/dec2hex.c40
-rw-r--r--security/nss/lib/freebl/mpi/utils/exptmod.c55
-rw-r--r--security/nss/lib/freebl/mpi/utils/fact.c84
-rw-r--r--security/nss/lib/freebl/mpi/utils/gcd.c95
-rw-r--r--security/nss/lib/freebl/mpi/utils/hex2dec.c40
-rw-r--r--security/nss/lib/freebl/mpi/utils/identest.c84
-rw-r--r--security/nss/lib/freebl/mpi/utils/invmod.c61
-rw-r--r--security/nss/lib/freebl/mpi/utils/isprime.c89
-rw-r--r--security/nss/lib/freebl/mpi/utils/lap.c90
-rw-r--r--security/nss/lib/freebl/mpi/utils/makeprime.c116
-rw-r--r--security/nss/lib/freebl/mpi/utils/metime.c102
-rw-r--r--security/nss/lib/freebl/mpi/utils/pi.c171
-rw-r--r--security/nss/lib/freebl/mpi/utils/primegen.c159
-rw-r--r--security/nss/lib/freebl/mpi/utils/prng.c57
-rwxr-xr-xsecurity/nss/lib/freebl/mpi/utils/ptab.pl26
-rw-r--r--security/nss/lib/freebl/mpi/utils/sieve.c243
-rw-r--r--security/nss/lib/freebl/mpi/vis_32.il1291
-rw-r--r--security/nss/lib/freebl/mpi/vis_64.il997
-rw-r--r--security/nss/lib/freebl/mpi/vis_proto.h234
-rw-r--r--security/nss/lib/freebl/nsslowhash.c150
-rw-r--r--security/nss/lib/freebl/nsslowhash.h33
-rw-r--r--security/nss/lib/freebl/os2_rand.c334
-rw-r--r--security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c881
-rw-r--r--security/nss/lib/freebl/poly1305.c314
-rw-r--r--security/nss/lib/freebl/poly1305.h28
-rw-r--r--security/nss/lib/freebl/pqg.c1878
-rw-r--r--security/nss/lib/freebl/pqg.h25
-rw-r--r--security/nss/lib/freebl/rawhash.c154
-rw-r--r--security/nss/lib/freebl/ret_cr16.s27
-rw-r--r--security/nss/lib/freebl/rijndael.c1375
-rw-r--r--security/nss/lib/freebl/rijndael.h67
-rw-r--r--security/nss/lib/freebl/rijndael32.tab1219
-rw-r--r--security/nss/lib/freebl/rijndael_tables.c215
-rw-r--r--security/nss/lib/freebl/rsa.c1625
-rw-r--r--security/nss/lib/freebl/rsapkcs.c1385
-rw-r--r--security/nss/lib/freebl/secmpi.h54
-rw-r--r--security/nss/lib/freebl/secrng.h65
-rw-r--r--security/nss/lib/freebl/seed.c641
-rw-r--r--security/nss/lib/freebl/seed.h125
-rw-r--r--security/nss/lib/freebl/sha-fast-amd64-sun.s2151
-rw-r--r--security/nss/lib/freebl/sha256.h19
-rw-r--r--security/nss/lib/freebl/sha512.c1655
-rw-r--r--security/nss/lib/freebl/sha_fast.c545
-rw-r--r--security/nss/lib/freebl/sha_fast.h176
-rw-r--r--security/nss/lib/freebl/shsign.h14
-rw-r--r--security/nss/lib/freebl/shvfy.c534
-rw-r--r--security/nss/lib/freebl/stubs.c711
-rw-r--r--security/nss/lib/freebl/stubs.h66
-rw-r--r--security/nss/lib/freebl/sysrand.c49
-rw-r--r--security/nss/lib/freebl/tlsprfalg.c134
-rw-r--r--security/nss/lib/freebl/unix_rand.c1176
-rw-r--r--security/nss/lib/freebl/win_rand.c161
256 files changed, 133850 insertions, 0 deletions
diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile
new file mode 100644
index 000000000..0ce1425f1
--- /dev/null
+++ b/security/nss/lib/freebl/Makefile
@@ -0,0 +1,764 @@
+#! gmake
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#######################################################################
+# (1) Include initial platform-independent assignments (MANDATORY). #
+#######################################################################
+
+include manifest.mn
+
+#######################################################################
+# (2) Include "global" configuration information. (OPTIONAL) #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/config.mk
+
+#######################################################################
+# (3) Include "component" configuration information. (OPTIONAL) #
+#######################################################################
+
+
+
+#######################################################################
+# (4) Include "local" platform-dependent assignments (OPTIONAL). #
+#######################################################################
+
+include config.mk
+
+# default for all platforms
+# unset this on those that have multiple freebl libraries
+FREEBL_BUILD_SINGLE_SHLIB = 1
+
+ifdef USE_64
+ DEFINES += -DNSS_USE_64
+endif
+
+ifdef USE_ABI32_FPU
+ DEFINES += -DNSS_USE_ABI32_FPU
+endif
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+ DEFINES += -DFREEBL_NO_DEPEND
+ STUBS_SRCS = stubs.c
+endif
+
+ifeq ($(FREEBL_LOWHASH),1)
+ DEFINES += -DFREEBL_LOWHASH
+ LOWHASH_SRCS = nsslowhash.c
+ LOWHASH_EXPORTS = nsslowhash.h
+ MAPFILE_SOURCE = freebl_hash_vector.def
+ NEED_STUB_BUILD = 1
+else
+ MAPFILE_SOURCE = freebl.def
+endif
+
+ifdef USE_STUB_BUILD
+ CSRCS = lowhash_vector.c
+ SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX))
+ OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+ ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+ $(NOSUCHFILE) so_locations
+ MAPFILE_SOURCE = freebl_hash.def
+endif
+
+# FREEBL_USE_PRELINK
+#
+# Most modern version of Linux support a speed optimization scheme where an
+# application called prelink modifies programs and shared libraries to quickly
+# load if they fit into an already designed address space. In short, prelink
+# scans the list of programs and libraries on your system, assigns them a
+# predefined space in the the address space, then provides the fixups to the
+# library.
+#
+# The modification of the shared library is correctly detected by the freebl
+# FIPS checksum scheme where we check a signed hash of the library against the
+# library itself.
+#
+# The prelink command itself can reverse the process of modification and output
+# the prestine shared library as it was before prelink made it's changes.
+# This option tells Freebl could use prelink to output the original copy of
+# the shared library before prelink modified it.
+#
+# FREEBL_PRELINK_COMMAND
+#
+# This is an optional environment variable which can override the default
+# prelink command. It could be used on systems that did something similiar to
+# prelink but used a different command and syntax. The only requirement is the
+# program must take the library as the last argument, the program must output
+# the original library to standard out, and the program does not need to take
+# any quoted or imbedded spaces in its arguments (except the path to the
+# library itself, which can have imbedded spaces or special characters).
+#
+ifdef FREEBL_USE_PRELINK
+ DEFINES += -DFREEBL_USE_PRELINK
+ifdef LINUX
+ DEFINES += -D__GNU_SOURCE=1
+endif
+endif
+ifdef NSS_NO_INIT_SUPPORT
+ DEFINES += -DNSS_NO_INIT_SUPPORT
+endif
+
+ifdef FREEBL_PRELINK_COMMAND
+ DEFINES +=-DFREEBL_PRELINK_COMMAND=\"$(FREEBL_PRELINK_COMMAND)\"
+endif
+# NSS_X86 means the target is a 32-bits x86 CPU architecture
+# NSS_X64 means the target is a 64-bits 64 CPU architecture
+# NSS_X86_OR_X64 means the target is either x86 or x64
+ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+ DEFINES += -DNSS_X86_OR_X64
+ifneq (,$(USE_64)$(USE_X32))
+ DEFINES += -DNSS_X64
+else
+ DEFINES += -DNSS_X86
+endif
+endif
+
+ifeq ($(OS_TARGET),OSF1)
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_NO_MP_WORD
+ MPI_SRCS += mpvalpha.c
+endif
+
+ifeq (OS2,$(OS_TARGET))
+ ASFILES = mpi_x86_os2.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+ DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+endif
+
+ifeq (,$(filter-out WINNT WIN95,$(OS_TARGET)))
+ifndef USE_64
+# 32-bit Windows
+ifdef NS_USE_GCC
+# Ideally, we want to use assembler
+# ASFILES = mpi_x86.s
+# DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE \
+# -DMP_ASSEMBLY_DIV_2DX1D
+# but we haven't figured out how to make it work, so we are not
+# using assembler right now.
+ ASFILES =
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT
+else
+# MSVC
+ MPI_SRCS += mpi_x86_asm.c
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ ifdef BUILD_OPT
+ OPTIMIZER += -Ox # maximum optimization for freebl
+ endif
+ # The Intel AES assembly code requires Visual C++ 2010.
+ # if $(_MSC_VER) >= 1600 (Visual C++ 2010)
+ ifeq ($(firstword $(sort $(_MSC_VER) 1600)),1600)
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes-x86-masm.asm intel-gcm-x86-masm.asm
+ EXTRA_SRCS += intel-gcm-wrap.c
+ ifeq ($(CLANG_CL),1)
+ INTEL_GCM_CLANG_CL = 1
+ endif
+ endif
+endif
+else
+ # -DMP_NO_MP_WORD
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+ifdef NS_USE_GCC
+# Ideally, we should use amd64 assembly code, but it's not yet mingw-w64
+# compatible.
+else
+# MSVC
+ ifdef BUILD_OPT
+ OPTIMIZER += -Ox # maximum optimization for freebl
+ endif
+ ASFILES = arcfour-amd64-masm.asm mpi_amd64_masm.asm mp_comba_amd64_masm.asm
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA
+ # The Intel AES assembly code requires Visual C++ 2010 (10.0). The _xgetbv
+ # compiler intrinsic function requires Visual C++ 2010 (10.0) SP1.
+ ifeq ($(_MSC_VER_GE_10SP1),1)
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes-x64-masm.asm intel-gcm-x64-masm.asm
+ EXTRA_SRCS += intel-gcm-wrap.c
+ ifeq ($(CLANG_CL),1)
+ INTEL_GCM_CLANG_CL = 1
+ endif
+ endif
+ MPI_SRCS += mpi_amd64.c
+endif
+endif
+endif
+
+ifeq ($(OS_TARGET),IRIX)
+ifeq ($(USE_N32),1)
+ ASFILES = mpi_mips.s
+ ifeq ($(NS_USE_GCC),1)
+ ASFLAGS = -Wp,-P -Wp,-traditional -O -mips3
+ else
+ ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
+ endif
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_USE_UINT_DIGIT
+endif
+endif
+
+ifeq ($(OS_TARGET),Darwin)
+ifeq ($(CPU_ARCH),x86)
+ ASFILES = mpi_sse2.s
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+endif
+endif # Darwin
+
+ifeq ($(OS_TARGET),Linux)
+ifeq ($(CPU_ARCH),x86_64)
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
+ ASFLAGS += -fPIC -Wa,--noexecstack
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+# DEFINES += -DMPI_AMD64_ADD
+ # comment the next four lines to turn off Intel HW acceleration.
+ DEFINES += -DUSE_HW_AES -DINTEL_GCM
+ ASFILES += intel-aes.s intel-gcm.s
+ EXTRA_SRCS += intel-gcm-wrap.c
+ INTEL_GCM = 1
+ MPI_SRCS += mpi_amd64.c mp_comba.c
+endif
+ifeq ($(CPU_ARCH),x86)
+ ASFILES = mpi_x86.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_IS_LITTLE_ENDIAN
+ # The floating point ECC code doesn't work on Linux x86 (bug 311432).
+ #ECL_USE_FP = 1
+endif
+ifeq ($(CPU_ARCH),arm)
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+ MPI_SRCS += mpi_arm.c
+endif
+ifeq ($(CPU_ARCH),ppc)
+ifdef USE_64
+ DEFINES += -DNSS_NO_INIT_SUPPORT
+endif # USE_64
+endif # ppc
+endif # Linux
+
+ifeq ($(OS_TARGET),AIX)
+ DEFINES += -DMP_USE_UINT_DIGIT
+ ifndef USE_64
+ DEFINES += -DMP_NO_DIV_WORD -DMP_NO_ADD_WORD -DMP_NO_SUB_WORD
+ endif
+endif # AIX
+
+ifeq ($(OS_TARGET), HP-UX)
+ifneq ($(OS_TEST), ia64)
+# PA-RISC
+ASFILES += ret_cr16.s
+ifndef USE_64
+ FREEBL_BUILD_SINGLE_SHLIB =
+ HAVE_ABI32_INT32 = 1
+ HAVE_ABI32_FPU = 1
+endif
+ifdef FREEBL_CHILD_BUILD
+ifdef USE_ABI32_INT32
+# build for DA1.1 (HP PA 1.1) 32-bit ABI build with 32-bit arithmetic
+ DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+ DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512
+else
+ifdef USE_64
+# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits
+ MPI_SRCS += mpi_hp.c
+ ASFILES += hpma512.s hppa20.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+else
+# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model
+# (the 32-bit ABI with 64-bit registers) using 64-bit digits
+ MPI_SRCS += mpi_hp.c
+ ASFILES += hpma512.s hppa20.s
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ifndef NS_USE_GCC
+ ARCHFLAG = -Aa +e +DA2.0 +DS2.0
+endif
+endif
+endif
+endif
+endif
+endif
+
+# The blapi functions are defined not only in the freebl shared
+# libraries but also in the shared libraries linked with loader.c
+# (libsoftokn3.so and libssl3.so). We need to use GNU ld's
+# -Bsymbolic option or the equivalent option for other linkers
+# to bind the blapi function references in FREEBLVector vector
+# (ldvector.c) to the blapi functions defined in the freebl
+# shared libraries.
+ifeq (,$(filter-out BSD_OS FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET)))
+ MKSHLIB += -Wl,-Bsymbolic
+endif
+
+ifeq ($(OS_TARGET),SunOS)
+
+ifdef NS_USE_GCC
+ ifdef GCC_USE_GNU_LD
+ MKSHLIB += -Wl,-Bsymbolic,-z,now,-z,text
+ else
+ MKSHLIB += -Wl,-B,symbolic,-z,now,-z,text
+ endif # GCC_USE_GNU_LD
+else
+ MKSHLIB += -B symbolic -z now -z text
+endif # NS_USE_GCC
+
+# Sun's WorkShop defines v8, v8plus and v9 architectures.
+# gcc on Solaris defines v8 and v9 "cpus".
+# gcc's v9 is equivalent to Workshop's v8plus.
+# gcc's -m64 is equivalent to Workshop's v9
+# We always use Sun's assembler, which uses Sun's naming convention.
+ifeq ($(CPU_ARCH),sparc)
+ FREEBL_BUILD_SINGLE_SHLIB=
+ ifdef USE_64
+ HAVE_ABI64_INT = 1
+ HAVE_ABI64_FPU = 1
+ else
+ HAVE_ABI32_FPU = 1
+ HAVE_ABI32_INT64 = 1
+ endif
+ SYSV_SPARC = 1
+ SOLARIS_AS = /usr/ccs/bin/as
+ #### set arch, asm, c flags
+ ifdef NS_USE_GCC
+ ifdef USE_ABI32_INT64
+ ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plus
+ SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+ endif
+ ifdef USE_ABI32_FPU
+ ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plusa
+ SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+ endif # USE_ABI32_FPU
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture
+ ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9
+ SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size
+ ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9a
+ SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+ endif
+ else # NS_USE_GCC
+ # FPU_TARGET_OPTIMIZER specifies the target processor and cache
+ # properties of the ABI32_FPU and ABI64_FPU architectures for use
+ # by the optimizer.
+ ifeq (,$(findstring Sun WorkShop 6,$(shell $(CC) -V 2>&1)))
+ # if the compiler is not Forte 6
+ FPU_TARGET_OPTIMIZER = -xcache=64/32/4:1024/64/4 -xchip=ultra3
+ else
+ # Forte 6 C compiler generates incorrect code for rijndael.c
+ # if -xchip=ultra3 is used (Bugzilla bug 333925). So we revert
+ # to what we used in NSS 3.10.
+ FPU_TARGET_OPTIMIZER = -xchip=ultra2
+ endif
+ ifdef USE_ABI32_INT64
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses 64-bit words, integer arithmetic,
+ # no FPU (non-VIS cpus).
+ # These flags were suggested by the compiler group for building
+ # with SunStudio 10.
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -xO4
+ endif
+ SOL_CFLAGS += -xtarget=generic
+ ARCHFLAG = -xarch=v8plus
+ SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC
+ endif
+ ifdef USE_ABI32_FPU
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses FPU code, and 32-bit word size.
+ # these flags were determined by running cc -### -fast and copying
+ # the generated flag settings
+ SOL_CFLAGS += -fsingle -xmemalign=8s
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+ SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+ SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+ SOL_CFLAGS += -xlibmil -xO5
+ endif
+ ARCHFLAG = -xarch=v8plusa
+ SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC
+ endif
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture,
+ # no FPU (non-VIS cpus). For building with SunStudio 10.
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -xO4
+ endif
+ SOL_CFLAGS += -xtarget=generic
+ ARCHFLAG = -xarch=v9
+ SOLARIS_AS_FLAGS = -xarch=v9 -K PIC
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size.
+ # See comment for USE_ABI32_FPU.
+ SOL_CFLAGS += -fsingle -xmemalign=8s
+ ifdef BUILD_OPT
+ SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1
+ SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all
+ SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend
+ SOL_CFLAGS += -xlibmil -xO5
+ endif
+ ARCHFLAG = -xarch=v9a
+ SOLARIS_AS_FLAGS = -xarch=v9a -K PIC
+ endif
+ endif # NS_USE_GCC
+
+ ### set flags for both GCC and Sun cc
+ ifdef USE_ABI32_INT64
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU
+ # best times are with no MP_ flags specified
+ endif
+ ifdef USE_ABI32_FPU
+ # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers,
+ # 32-bit ABI, it uses FPU code, and 32-bit word size
+ MPI_SRCS += mpi_sparc.c
+ ASFILES = mpv_sparcv8.s montmulfv8.s
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+ ECL_USE_FP = 1
+ endif
+ ifdef USE_ABI64_INT
+ # this builds for Sparc v9a pure 64-bit architecture
+ # best times are with no MP_ flags specified
+ endif
+ ifdef USE_ABI64_FPU
+ # this builds for Sparc v9a pure 64-bit architecture
+ # It uses floating point, and 32-bit word size
+ MPI_SRCS += mpi_sparc.c
+ ASFILES = mpv_sparcv9.s montmulfv9.s
+ DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL
+ ECL_USE_FP = 1
+ endif
+
+else
+ # Solaris for non-sparc family CPUs
+ ifdef NS_USE_GCC
+ LD = gcc
+ AS = gcc
+ ASFLAGS = -x assembler-with-cpp
+ endif
+ ifeq ($(USE_64),1)
+ # Solaris for AMD64
+ ifdef NS_USE_GCC
+ ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s
+ ASFLAGS += -march=opteron -m64 -fPIC
+ MPI_SRCS += mp_comba.c
+ # comment the next four lines to turn off Intel HW acceleration
+ ASFILES += intel-gcm.s
+ EXTRA_SRCS += intel-gcm-wrap.c
+ INTEL_GCM = 1
+ DEFINES += -DINTEL_GCM
+ else
+ ASFILES = arcfour-amd64-sun.s mpi_amd64_sun.s sha-fast-amd64-sun.s
+ ASFILES += mp_comba_amd64_sun.s mpcpucache_amd64.s
+ ASFLAGS += -xarch=generic64 -K PIC
+ SOL_CFLAGS += -xprefetch=no
+ SHA_SRCS =
+ MPCPU_SRCS =
+ # Intel acceleration for GCM does not build currently with Studio
+ endif
+ DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY
+ DEFINES += -DNSS_USE_COMBA -DMP_IS_LITTLE_ENDIAN
+ # comment the next two lines to turn off Intel HW acceleration
+ DEFINES += -DUSE_HW_AES
+ ASFILES += intel-aes.s
+ MPI_SRCS += mpi_amd64.c
+ else
+ # Solaris x86
+ DEFINES += -DMP_USE_UINT_DIGIT
+ DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+ DEFINES += -DMP_ASSEMBLY_DIV_2DX1D
+ ASFILES = mpi_i86pc.s
+ ifndef NS_USE_GCC
+ MPCPU_SRCS =
+ ASFILES += mpcpucache_x86.s
+ endif
+ endif
+endif # Solaris for non-sparc family CPUs
+endif # target == SunOS
+
+ifndef NSS_DISABLE_ECC
+ ifdef ECL_USE_FP
+ #enable floating point ECC code
+ DEFINES += -DECL_USE_FP
+ ECL_SRCS += ecp_fp160.c ecp_fp192.c ecp_fp224.c ecp_fp.c
+ ECL_HDRS += ecp_fp.h
+ endif
+endif
+
+# poly1305-donna-x64-sse2-incremental-source.c requires __int128 support
+# in GCC 4.6.0.
+ifdef USE_64
+ ifdef CC_IS_CLANG
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ else ifeq (1,$(CC_IS_GCC))
+ ifneq (,$(filter 4.6 4.7 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION))))
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ endif
+ ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION))))
+ HAVE_INT128_SUPPORT = 1
+ DEFINES += -DHAVE_INT128_SUPPORT
+ endif
+ endif
+endif
+
+ifndef NSS_DISABLE_CHACHAPOLY
+ ifeq ($(CPU_ARCH),x86_64)
+ ifdef HAVE_INT128_SUPPORT
+ EXTRA_SRCS += poly1305-donna-x64-sse2-incremental-source.c
+ else
+ EXTRA_SRCS += poly1305.c
+ endif
+
+ ifneq (1,$(CC_IS_GCC))
+ EXTRA_SRCS += chacha20.c
+ else
+ EXTRA_SRCS += chacha20_vec.c
+ endif
+ else
+ EXTRA_SRCS += poly1305.c
+ EXTRA_SRCS += chacha20.c
+ endif # x86_64
+endif # NSS_DISABLE_CHACHAPOLY
+
+ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH)))
+ # All intel architectures get the 64 bit version
+ # With custom uint128 if necessary (faster than generic 32 bit version).
+ ECL_SRCS += curve25519_64.c
+else
+ # All non intel architectures get the generic 32 bit implementation (slow!)
+ ECL_SRCS += curve25519_32.c
+endif
+
+ifndef HAVE_INT128_SUPPORT
+ ECL_SRCS += uint128.c
+endif
+
+#######################################################################
+# (5) Execute "global" rules. (OPTIONAL) #
+#######################################################################
+
+include $(CORE_DEPTH)/coreconf/rules.mk
+
+#######################################################################
+# (6) Execute "component" rules. (OPTIONAL) #
+#######################################################################
+
+
+
+#######################################################################
+# (7) Execute "local" rules. (OPTIONAL). #
+#######################################################################
+
+export:: private_export
+
+rijndael_tables:
+ $(CC) -o $(OBJDIR)/make_rijndael_tab rijndael_tables.c \
+ $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a
+ $(OBJDIR)/make_rijndael_tab
+
+vpath %.h mpi ecl
+vpath %.c mpi ecl
+vpath %.S mpi ecl
+vpath %.s mpi ecl
+vpath %.asm mpi ecl
+INCLUDES += -Impi -Iecl
+
+
+DEFINES += -DMP_API_COMPATIBLE
+
+MPI_USERS = dh.c pqg.c dsa.c rsa.c ec.c
+
+MPI_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_SRCS:.c=$(OBJ_SUFFIX)))
+MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_USERS:.c=$(OBJ_SUFFIX)))
+
+$(MPI_OBJS): $(MPI_HDRS)
+
+ECL_USERS = ec.c
+
+ECL_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_SRCS:.c=$(OBJ_SUFFIX)) $(ECL_ASM_SRCS:$(ASM_SUFFIX)=$(OBJ_SUFFIX)))
+ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX)))
+
+$(ECL_OBJS): $(ECL_HDRS)
+
+
+
+$(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c os2_rand.c
+
+$(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c
+
+$(OBJDIR)/ldvector$(OBJ_SUFFIX) $(OBJDIR)/loader$(OBJ_SUFFIX) : loader.h
+
+ifeq ($(SYSV_SPARC),1)
+
+$(OBJDIR)/mpv_sparcv8.o $(OBJDIR)/mpv_sparcv8x.o $(OBJDIR)/montmulfv8.o : $(OBJDIR)/%.o : %.s
+ @$(MAKE_OBJDIR)
+ $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpv_sparcv9.o $(OBJDIR)/montmulfv9.o : $(OBJDIR)/%.o : %.s
+ @$(MAKE_OBJDIR)
+ $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $<
+
+$(OBJDIR)/mpmontg.o: mpmontg.c montmulf.h
+
+endif
+
+ifndef FREEBL_CHILD_BUILD
+
+# Parent build. This is where we decide which shared libraries to build
+
+ifdef FREEBL_BUILD_SINGLE_SHLIB
+
+################### Single shared lib stuff #########################
+SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
+ALL_TRASH += $(SINGLE_SHLIB_DIR)
+
+$(SINGLE_SHLIB_DIR):
+ -mkdir -p $(SINGLE_SHLIB_DIR)
+
+release_md libs:: $(SINGLE_SHLIB_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 \
+ OBJDIR=$(SINGLE_SHLIB_DIR) $@
+######################## common stuff #########################
+
+endif
+
+ifdef NEED_STUB_BUILD
+SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB
+ALL_TRASH += $(SINGLE_SHLIB_DIR)
+$(SINGLE_SHLIB_DIR):
+ -mkdir $(SINGLE_SHLIB_DIR)
+
+release_md libs:: $(SINGLE_SHLIB_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_STUB_BUILD=1 \
+ OBJDIR=$(SINGLE_SHLIB_DIR) $@
+endif
+
+# multiple shared libraries
+
+######################## ABI32_FPU stuff #########################
+ifdef HAVE_ABI32_FPU
+ABI32_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_FPU
+ALL_TRASH += $(ABI32_FPU_DIR)
+
+$(ABI32_FPU_DIR):
+ -mkdir $(ABI32_FPU_DIR)
+
+release_md libs:: $(ABI32_FPU_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_FPU=1 \
+ OBJDIR=$(ABI32_FPU_DIR) $@
+endif
+
+######################## ABI32_INT32 stuff #########################
+ifdef HAVE_ABI32_INT32
+ABI32_INT32_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT32
+ALL_TRASH += $(ABI32_INT32_DIR)
+
+$(ABI32_INT32_DIR):
+ -mkdir $(ABI32_INT32_DIR)
+
+release_md libs:: $(ABI32_INT32_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT32=1 \
+ OBJDIR=$(ABI32_INT32_DIR) $@
+endif
+
+######################## ABI32_INT64 stuff #########################
+ifdef HAVE_ABI32_INT64
+ABI32_INT64_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT64
+ALL_TRASH += $(ABI32_INT64_DIR)
+
+$(ABI32_INT64_DIR):
+ -mkdir $(ABI32_INT64_DIR)
+
+release_md libs:: $(ABI32_INT64_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT64=1\
+ OBJDIR=$(ABI32_INT64_DIR) $@
+endif
+
+######################## END of 32-bit stuff #########################
+
+# above is 32-bit builds, below is 64-bit builds
+
+######################## ABI64_FPU stuff #########################
+ifdef HAVE_ABI64_FPU
+ABI64_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_FPU
+ALL_TRASH += $(ABI64_FPU_DIR)
+
+$(ABI64_FPU_DIR):
+ -mkdir $(ABI64_FPU_DIR)
+
+release_md libs:: $(ABI64_FPU_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_FPU=1 \
+ OBJDIR=$(ABI64_FPU_DIR) $@
+endif
+
+######################## ABI64_INT stuff #########################
+ifdef HAVE_ABI64_INT
+ABI64_INT_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_INT
+ALL_TRASH += $(ABI64_INT_DIR)
+
+$(ABI64_INT_DIR):
+ -mkdir $(ABI64_INT_DIR)
+
+release_md libs:: $(ABI64_INT_DIR)
+ $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_INT=1 \
+ OBJDIR=$(ABI64_INT_DIR) $@
+endif
+
+endif # FREEBL_CHILD_BUILD
+
+
+# Bugzilla Bug 333917: the non-x86 code in desblapi.c seems to violate
+# ANSI C's strict aliasing rules.
+ifeq ($(OS_TARGET),Linux)
+ifneq ($(CPU_ARCH),x86)
+$(OBJDIR)/$(PROG_PREFIX)desblapi$(OBJ_SUFFIX): desblapi.c
+ @$(MAKE_OBJDIR)
+ifdef NEED_ABSOLUTE_PATH
+ $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $(call core_abspath,$<)
+else
+ $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $<
+endif
+endif
+endif
+
+ifdef INTEL_GCM
+#
+# GCM binary needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+
+# The integrated assembler in Clang 3.2 does not support % in the
+# expression of a .set directive. intel-gcm.s uses .set to give
+# symbolic names to registers, for example,
+# .set Htbl, %rdi
+# So we can't use Clang's integrated assembler with intel-gcm.s.
+ifdef CC_IS_CLANG
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm$(OBJ_SUFFIX): CFLAGS += -no-integrated-as
+endif
+endif
+
+ifdef INTEL_GCM_CLANG_CL
+#
+# clang-cl needs -mssse3
+#
+$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3
+endif
diff --git a/security/nss/lib/freebl/aeskeywrap.c b/security/nss/lib/freebl/aeskeywrap.c
new file mode 100644
index 000000000..79ff8a852
--- /dev/null
+++ b/security/nss/lib/freebl/aeskeywrap.c
@@ -0,0 +1,389 @@
+/*
+ * aeskeywrap.c - implement AES Key Wrap algorithm from RFC 3394
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#if defined(IS_LITTLE_ENDIAN) || defined(SHA_NO_LONG_LONG)
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 0
+#else
+#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 1
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "secport.h" /* for PORT_XXX */
+#include "secerr.h"
+#include "blapi.h" /* for AES_ functions */
+#include "rijndael.h"
+
+struct AESKeyWrapContextStr {
+ unsigned char iv[AES_KEY_WRAP_IV_BYTES];
+ AESContext aescx;
+};
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+ AESKeyWrapContext *cx = PORT_New(AESKeyWrapContext);
+ return cx;
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int x1,
+ unsigned int encrypt,
+ unsigned int x2)
+{
+ SECStatus rv = SECFailure;
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (iv) {
+ memcpy(cx->iv, iv, sizeof cx->iv);
+ } else {
+ memset(cx->iv, 0xA6, sizeof cx->iv);
+ }
+ rv = AES_InitContext(&cx->aescx, key, keylen, NULL, NSS_AES, encrypt,
+ AES_BLOCK_SIZE);
+ return rv;
+}
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen)
+{
+ SECStatus rv;
+ AESKeyWrapContext *cx = AESKeyWrap_AllocateContext();
+ if (!cx)
+ return NULL; /* error is already set */
+ rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0);
+ if (rv != SECSuccess) {
+ PORT_Free(cx);
+ cx = NULL; /* error should already be set */
+ }
+ return cx;
+}
+
+/*
+** Destroy a AES KeyWrap context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+ if (cx) {
+ AES_DestroyContext(&cx->aescx, PR_FALSE);
+ /* memset(cx, 0, sizeof *cx); */
+ if (freeit)
+ PORT_Free(cx);
+ }
+}
+
+#if !BIG_ENDIAN_WITH_64_BIT_REGISTERS
+
+/* The AES Key Wrap algorithm has 64-bit values that are ALWAYS big-endian
+** (Most significant byte first) in memory. The only ALU operations done
+** on them are increment, decrement, and XOR. So, on little-endian CPUs,
+** and on CPUs that lack 64-bit registers, these big-endian 64-bit operations
+** are simulated in the following code. This is thought to be faster and
+** simpler than trying to convert the data to little-endian and back.
+*/
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian). This function increments the 64-bit value T, and then
+** XORs it with A, changing A.
+*/
+static void
+increment_and_xor(unsigned char *A, unsigned char *T)
+{
+ if (!++T[7])
+ if (!++T[6])
+ if (!++T[5])
+ if (!++T[4])
+ if (!++T[3])
+ if (!++T[2])
+ if (!++T[1])
+ ++T[0];
+
+ A[0] ^= T[0];
+ A[1] ^= T[1];
+ A[2] ^= T[2];
+ A[3] ^= T[3];
+ A[4] ^= T[4];
+ A[5] ^= T[5];
+ A[6] ^= T[6];
+ A[7] ^= T[7];
+}
+
+/* A and T point to two 64-bit values stored most signficant byte first
+** (big endian). This function XORs T with A, giving a new A, then
+** decrements the 64-bit value T.
+*/
+static void
+xor_and_decrement(PRUint64 *A, PRUint64 *T)
+{
+ unsigned char *TP = (unsigned char *)T;
+ const PRUint64 mask = 0xFF;
+ *A = ((*A & mask << 56) ^ (*T & mask << 56)) |
+ ((*A & mask << 48) ^ (*T & mask << 48)) |
+ ((*A & mask << 40) ^ (*T & mask << 40)) |
+ ((*A & mask << 32) ^ (*T & mask << 32)) |
+ ((*A & mask << 24) ^ (*T & mask << 23)) |
+ ((*A & mask << 16) ^ (*T & mask << 16)) |
+ ((*A & mask << 8) ^ (*T & mask << 8)) |
+ ((*A & mask) ^ (*T & mask));
+
+ if (!TP[7]--)
+ if (!TP[6]--)
+ if (!TP[5]--)
+ if (!TP[4]--)
+ if (!TP[3]--)
+ if (!TP[2]--)
+ if (!TP[1]--)
+ TP[0]--;
+}
+
+/* Given an unsigned long t (in host byte order), store this value as a
+** 64-bit big-endian value (MSB first) in *pt.
+*/
+static void
+set_t(unsigned char *pt, unsigned long t)
+{
+ pt[7] = (unsigned char)t;
+ t >>= 8;
+ pt[6] = (unsigned char)t;
+ t >>= 8;
+ pt[5] = (unsigned char)t;
+ t >>= 8;
+ pt[4] = (unsigned char)t;
+ t >>= 8;
+ pt[3] = (unsigned char)t;
+ t >>= 8;
+ pt[2] = (unsigned char)t;
+ t >>= 8;
+ pt[1] = (unsigned char)t;
+ t >>= 8;
+ pt[0] = (unsigned char)t;
+}
+
+#endif
+
+/*
+** Perform AES key wrap.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint64 *R = NULL;
+ unsigned int nBlocks;
+ unsigned int i, j;
+ unsigned int aesLen = AES_BLOCK_SIZE;
+ unsigned int outLen = inputLen + AES_KEY_WRAP_BLOCK_SIZE;
+ SECStatus s = SECFailure;
+ /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+ PRUint64 t;
+ PRUint64 B[2];
+
+#define A B[0]
+
+ /* Check args */
+ if (!inputLen || 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return s;
+ }
+#ifdef maybe
+ if (!output && pOutputLen) { /* caller is asking for output size */
+ *pOutputLen = outLen;
+ return SECSuccess;
+ }
+#endif
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return s;
+ }
+ if (cx == NULL || output == NULL || input == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return s;
+ }
+ nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+ R = PORT_NewArray(PRUint64, nBlocks + 1);
+ if (!R)
+ return s; /* error is already set. */
+ /*
+ ** 1) Initialize variables.
+ */
+ memcpy(&A, cx->iv, AES_KEY_WRAP_IV_BYTES);
+ memcpy(&R[1], input, inputLen);
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ t = 0;
+#else
+ memset(&t, 0, sizeof t);
+#endif
+ /*
+ ** 2) Calculate intermediate values.
+ */
+ for (j = 0; j < 6; ++j) {
+ for (i = 1; i <= nBlocks; ++i) {
+ B[1] = R[i];
+ s = AES_Encrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+ sizeof B, (unsigned char *)B, sizeof B);
+ if (s != SECSuccess)
+ break;
+ R[i] = B[1];
+/* here, increment t and XOR A with t (in big endian order); */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ A ^= ++t;
+#else
+ increment_and_xor((unsigned char *)&A, (unsigned char *)&t);
+#endif
+ }
+ }
+ /*
+ ** 3) Output the results.
+ */
+ if (s == SECSuccess) {
+ R[0] = A;
+ memcpy(output, &R[0], outLen);
+ if (pOutputLen)
+ *pOutputLen = outLen;
+ } else if (pOutputLen) {
+ *pOutputLen = 0;
+ }
+ PORT_ZFree(R, outLen);
+ return s;
+}
+#undef A
+
+/*
+** Perform AES key unwrap.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *pOutputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint64 *R = NULL;
+ unsigned int nBlocks;
+ unsigned int i, j;
+ unsigned int aesLen = AES_BLOCK_SIZE;
+ unsigned int outLen;
+ SECStatus s = SECFailure;
+ /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */
+ PRUint64 t;
+ PRUint64 B[2];
+
+ /* Check args */
+ if (inputLen < 3 * AES_KEY_WRAP_BLOCK_SIZE ||
+ 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return s;
+ }
+ outLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE;
+#ifdef maybe
+ if (!output && pOutputLen) { /* caller is asking for output size */
+ *pOutputLen = outLen;
+ return SECSuccess;
+ }
+#endif
+ if (maxOutputLen < outLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return s;
+ }
+ if (cx == NULL || output == NULL || input == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return s;
+ }
+ nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE;
+ R = PORT_NewArray(PRUint64, nBlocks);
+ if (!R)
+ return s; /* error is already set. */
+ nBlocks--;
+ /*
+ ** 1) Initialize variables.
+ */
+ memcpy(&R[0], input, inputLen);
+ B[0] = R[0];
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ t = 6UL * nBlocks;
+#else
+ set_t((unsigned char *)&t, 6UL * nBlocks);
+#endif
+ /*
+ ** 2) Calculate intermediate values.
+ */
+ for (j = 0; j < 6; ++j) {
+ for (i = nBlocks; i; --i) {
+/* here, XOR A with t (in big endian order) and decrement t; */
+#if BIG_ENDIAN_WITH_64_BIT_REGISTERS
+ B[0] ^= t--;
+#else
+ xor_and_decrement(&B[0], &t);
+#endif
+ B[1] = R[i];
+ s = AES_Decrypt(&cx->aescx, (unsigned char *)B, &aesLen,
+ sizeof B, (unsigned char *)B, sizeof B);
+ if (s != SECSuccess)
+ break;
+ R[i] = B[1];
+ }
+ }
+ /*
+ ** 3) Output the results.
+ */
+ if (s == SECSuccess) {
+ int bad = memcmp(&B[0], cx->iv, AES_KEY_WRAP_IV_BYTES);
+ if (!bad) {
+ memcpy(output, &R[1], outLen);
+ if (pOutputLen)
+ *pOutputLen = outLen;
+ } else {
+ s = SECFailure;
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ if (pOutputLen)
+ *pOutputLen = 0;
+ }
+ } else if (pOutputLen) {
+ *pOutputLen = 0;
+ }
+ PORT_ZFree(R, inputLen);
+ return s;
+}
+#undef A
diff --git a/security/nss/lib/freebl/alg2268.c b/security/nss/lib/freebl/alg2268.c
new file mode 100644
index 000000000..54c6f4dff
--- /dev/null
+++ b/security/nss/lib/freebl/alg2268.c
@@ -0,0 +1,509 @@
+/*
+ * alg2268.c - implementation of the algorithm in RFC 2268
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "blapii.h"
+#include "secerr.h"
+#ifdef XP_UNIX_XXX
+#include <stddef.h> /* for ptrdiff_t */
+#endif
+
+/*
+** RC2 symmetric block cypher
+*/
+
+typedef SECStatus(rc2Func)(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen);
+
+/* forward declarations */
+static rc2Func rc2_EncryptECB;
+static rc2Func rc2_DecryptECB;
+static rc2Func rc2_EncryptCBC;
+static rc2Func rc2_DecryptCBC;
+
+typedef union {
+ PRUint32 l[2];
+ PRUint16 s[4];
+ PRUint8 b[8];
+} RC2Block;
+
+struct RC2ContextStr {
+ union {
+ PRUint8 Kb[128];
+ PRUint16 Kw[64];
+ } u;
+ RC2Block iv;
+ rc2Func *enc;
+ rc2Func *dec;
+};
+
+#define B u.Kb
+#define K u.Kw
+#define BYTESWAP(x) ((x) << 8 | (x) >> 8)
+#define SWAPK(i) cx->K[i] = (tmpS = cx->K[i], BYTESWAP(tmpS))
+#define RC2_BLOCK_SIZE 8
+
+#define LOAD_HARD(R) \
+ R[0] = (PRUint16)input[1] << 8 | input[0]; \
+ R[1] = (PRUint16)input[3] << 8 | input[2]; \
+ R[2] = (PRUint16)input[5] << 8 | input[4]; \
+ R[3] = (PRUint16)input[7] << 8 | input[6];
+#define LOAD_EASY(R) \
+ R[0] = ((PRUint16 *)input)[0]; \
+ R[1] = ((PRUint16 *)input)[1]; \
+ R[2] = ((PRUint16 *)input)[2]; \
+ R[3] = ((PRUint16 *)input)[3];
+#define STORE_HARD(R) \
+ output[0] = (PRUint8)(R[0]); \
+ output[1] = (PRUint8)(R[0] >> 8); \
+ output[2] = (PRUint8)(R[1]); \
+ output[3] = (PRUint8)(R[1] >> 8); \
+ output[4] = (PRUint8)(R[2]); \
+ output[5] = (PRUint8)(R[2] >> 8); \
+ output[6] = (PRUint8)(R[3]); \
+ output[7] = (PRUint8)(R[3] >> 8);
+#define STORE_EASY(R) \
+ ((PRUint16 *)output)[0] = R[0]; \
+ ((PRUint16 *)output)[1] = R[1]; \
+ ((PRUint16 *)output)[2] = R[2]; \
+ ((PRUint16 *)output)[3] = R[3];
+
+#if defined(NSS_X86_OR_X64)
+#define LOAD(R) LOAD_EASY(R)
+#define STORE(R) STORE_EASY(R)
+#elif !defined(IS_LITTLE_ENDIAN)
+#define LOAD(R) LOAD_HARD(R)
+#define STORE(R) STORE_HARD(R)
+#else
+#define LOAD(R) \
+ if ((ptrdiff_t)input & 1) { \
+ LOAD_HARD(R) \
+ } else { \
+ LOAD_EASY(R) \
+ }
+#define STORE(R) \
+ if ((ptrdiff_t)input & 1) { \
+ STORE_HARD(R) \
+ } else { \
+ STORE_EASY(R) \
+ }
+#endif
+
+static const PRUint8 S[256] = {
+ 0331, 0170, 0371, 0304, 0031, 0335, 0265, 0355, 0050, 0351, 0375, 0171, 0112, 0240, 0330, 0235,
+ 0306, 0176, 0067, 0203, 0053, 0166, 0123, 0216, 0142, 0114, 0144, 0210, 0104, 0213, 0373, 0242,
+ 0027, 0232, 0131, 0365, 0207, 0263, 0117, 0023, 0141, 0105, 0155, 0215, 0011, 0201, 0175, 0062,
+ 0275, 0217, 0100, 0353, 0206, 0267, 0173, 0013, 0360, 0225, 0041, 0042, 0134, 0153, 0116, 0202,
+ 0124, 0326, 0145, 0223, 0316, 0140, 0262, 0034, 0163, 0126, 0300, 0024, 0247, 0214, 0361, 0334,
+ 0022, 0165, 0312, 0037, 0073, 0276, 0344, 0321, 0102, 0075, 0324, 0060, 0243, 0074, 0266, 0046,
+ 0157, 0277, 0016, 0332, 0106, 0151, 0007, 0127, 0047, 0362, 0035, 0233, 0274, 0224, 0103, 0003,
+ 0370, 0021, 0307, 0366, 0220, 0357, 0076, 0347, 0006, 0303, 0325, 0057, 0310, 0146, 0036, 0327,
+ 0010, 0350, 0352, 0336, 0200, 0122, 0356, 0367, 0204, 0252, 0162, 0254, 0065, 0115, 0152, 0052,
+ 0226, 0032, 0322, 0161, 0132, 0025, 0111, 0164, 0113, 0237, 0320, 0136, 0004, 0030, 0244, 0354,
+ 0302, 0340, 0101, 0156, 0017, 0121, 0313, 0314, 0044, 0221, 0257, 0120, 0241, 0364, 0160, 0071,
+ 0231, 0174, 0072, 0205, 0043, 0270, 0264, 0172, 0374, 0002, 0066, 0133, 0045, 0125, 0227, 0061,
+ 0055, 0135, 0372, 0230, 0343, 0212, 0222, 0256, 0005, 0337, 0051, 0020, 0147, 0154, 0272, 0311,
+ 0323, 0000, 0346, 0317, 0341, 0236, 0250, 0054, 0143, 0026, 0001, 0077, 0130, 0342, 0211, 0251,
+ 0015, 0070, 0064, 0033, 0253, 0063, 0377, 0260, 0273, 0110, 0014, 0137, 0271, 0261, 0315, 0056,
+ 0305, 0363, 0333, 0107, 0345, 0245, 0234, 0167, 0012, 0246, 0040, 0150, 0376, 0177, 0301, 0255
+};
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+ return PORT_ZNew(RC2Context);
+}
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key, unsigned int len,
+ const unsigned char *input, int mode, unsigned int efLen8,
+ unsigned int unused)
+{
+ PRUint8 *L, *L2;
+ int i;
+#if !defined(IS_LITTLE_ENDIAN)
+ PRUint16 tmpS;
+#endif
+ PRUint8 tmpB;
+
+ if (!key || !cx || !len || len > (sizeof cx->B) ||
+ efLen8 > (sizeof cx->B)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_RC2) {
+ /* groovy */
+ } else if (mode == NSS_RC2_CBC) {
+ if (!input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ } else {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (mode == NSS_RC2_CBC) {
+ cx->enc = &rc2_EncryptCBC;
+ cx->dec = &rc2_DecryptCBC;
+ LOAD(cx->iv.s);
+ } else {
+ cx->enc = &rc2_EncryptECB;
+ cx->dec = &rc2_DecryptECB;
+ }
+
+ /* Step 0. Copy key into table. */
+ memcpy(cx->B, key, len);
+
+ /* Step 1. Compute all values to the right of the key. */
+ L2 = cx->B;
+ L = L2 + len;
+ tmpB = L[-1];
+ for (i = (sizeof cx->B) - len; i > 0; --i) {
+ *L++ = tmpB = S[(PRUint8)(tmpB + *L2++)];
+ }
+
+ /* step 2. Adjust left most byte of effective key. */
+ i = (sizeof cx->B) - efLen8;
+ L = cx->B + i;
+ *L = tmpB = S[*L]; /* mask is always 0xff */
+
+ /* step 3. Recompute all values to the left of effective key. */
+ L2 = --L + efLen8;
+ while (L >= cx->B) {
+ *L-- = tmpB = S[tmpB ^ *L2--];
+ }
+
+#if !defined(IS_LITTLE_ENDIAN)
+ for (i = 63; i >= 0; --i) {
+ SWAPK(i); /* candidate for unrolling */
+ }
+#endif
+ return SECSuccess;
+}
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+** "mode" one of NSS_RC2 or NSS_RC2_CBC
+** "effectiveKeyLen" in bytes, not bits.
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode, unsigned efLen8)
+{
+ RC2Context *cx = PORT_ZNew(RC2Context);
+ if (cx) {
+ SECStatus rv = RC2_InitContext(cx, key, len, iv, mode, efLen8, 0);
+ if (rv != SECSuccess) {
+ RC2_DestroyContext(cx, PR_TRUE);
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+/*
+** Destroy an RC2 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+ }
+}
+
+#define ROL(x, k) (x << k | x >> (16 - k))
+#define MIX(j) \
+ R0 = R0 + cx->K[4 * j + 0] + (R3 & R2) + (~R3 & R1); \
+ R0 = ROL(R0, 1); \
+ R1 = R1 + cx->K[4 * j + 1] + (R0 & R3) + (~R0 & R2); \
+ R1 = ROL(R1, 2); \
+ R2 = R2 + cx->K[4 * j + 2] + (R1 & R0) + (~R1 & R3); \
+ R2 = ROL(R2, 3); \
+ R3 = R3 + cx->K[4 * j + 3] + (R2 & R1) + (~R2 & R0); \
+ R3 = ROL(R3, 5)
+#define MASH \
+ R0 = R0 + cx->K[R3 & 63]; \
+ R1 = R1 + cx->K[R0 & 63]; \
+ R2 = R2 + cx->K[R1 & 63]; \
+ R3 = R3 + cx->K[R2 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Encrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+ register PRUint16 R0, R1, R2, R3;
+
+ /* step 1. Initialize input. */
+ R0 = input->s[0];
+ R1 = input->s[1];
+ R2 = input->s[2];
+ R3 = input->s[3];
+
+ /* step 2. Expand Key (already done, in context) */
+ /* step 3. j = 0 */
+ /* step 4. Perform 5 mixing rounds. */
+
+ MIX(0);
+ MIX(1);
+ MIX(2);
+ MIX(3);
+ MIX(4);
+
+ /* step 5. Perform 1 mashing round. */
+ MASH;
+
+ /* step 6. Perform 6 mixing rounds. */
+
+ MIX(5);
+ MIX(6);
+ MIX(7);
+ MIX(8);
+ MIX(9);
+ MIX(10);
+
+ /* step 7. Perform 1 mashing round. */
+ MASH;
+
+ /* step 8. Perform 5 mixing rounds. */
+
+ MIX(11);
+ MIX(12);
+ MIX(13);
+ MIX(14);
+ MIX(15);
+
+ /* output results */
+ output->s[0] = R0;
+ output->s[1] = R1;
+ output->s[2] = R2;
+ output->s[3] = R3;
+}
+
+#define ROR(x, k) (x >> k | x << (16 - k))
+#define R_MIX(j) \
+ R3 = ROR(R3, 5); \
+ R3 = R3 - cx->K[4 * j + 3] - (R2 & R1) - (~R2 & R0); \
+ R2 = ROR(R2, 3); \
+ R2 = R2 - cx->K[4 * j + 2] - (R1 & R0) - (~R1 & R3); \
+ R1 = ROR(R1, 2); \
+ R1 = R1 - cx->K[4 * j + 1] - (R0 & R3) - (~R0 & R2); \
+ R0 = ROR(R0, 1); \
+ R0 = R0 - cx->K[4 * j + 0] - (R3 & R2) - (~R3 & R1)
+#define R_MASH \
+ R3 = R3 - cx->K[R2 & 63]; \
+ R2 = R2 - cx->K[R1 & 63]; \
+ R1 = R1 - cx->K[R0 & 63]; \
+ R0 = R0 - cx->K[R3 & 63]
+
+/* Encrypt one block */
+static void
+rc2_Decrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input)
+{
+ register PRUint16 R0, R1, R2, R3;
+
+ /* step 1. Initialize input. */
+ R0 = input->s[0];
+ R1 = input->s[1];
+ R2 = input->s[2];
+ R3 = input->s[3];
+
+ /* step 2. Expand Key (already done, in context) */
+ /* step 3. j = 63 */
+ /* step 4. Perform 5 r_mixing rounds. */
+ R_MIX(15);
+ R_MIX(14);
+ R_MIX(13);
+ R_MIX(12);
+ R_MIX(11);
+
+ /* step 5. Perform 1 r_mashing round. */
+ R_MASH;
+
+ /* step 6. Perform 6 r_mixing rounds. */
+ R_MIX(10);
+ R_MIX(9);
+ R_MIX(8);
+ R_MIX(7);
+ R_MIX(6);
+ R_MIX(5);
+
+ /* step 7. Perform 1 r_mashing round. */
+ R_MASH;
+
+ /* step 8. Perform 5 r_mixing rounds. */
+ R_MIX(4);
+ R_MIX(3);
+ R_MIX(2);
+ R_MIX(1);
+ R_MIX(0);
+
+ /* output results */
+ output->s[0] = R0;
+ output->s[1] = R1;
+ output->s[2] = R2;
+ output->s[3] = R3;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptECB(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptECB(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Decrypt1Block(cx, &iBlock, &iBlock);
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_EncryptCBC(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+
+ while (inputLen > 0) {
+
+ LOAD(iBlock.s)
+ iBlock.l[0] ^= cx->iv.l[0];
+ iBlock.l[1] ^= cx->iv.l[1];
+ rc2_Encrypt1Block(cx, &iBlock, &iBlock);
+ cx->iv = iBlock;
+ STORE(iBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rc2_DecryptCBC(RC2Context *cx, unsigned char *output,
+ const unsigned char *input, unsigned int inputLen)
+{
+ RC2Block iBlock;
+ RC2Block oBlock;
+
+ while (inputLen > 0) {
+ LOAD(iBlock.s)
+ rc2_Decrypt1Block(cx, &oBlock, &iBlock);
+ oBlock.l[0] ^= cx->iv.l[0];
+ oBlock.l[1] ^= cx->iv.l[1];
+ cx->iv = iBlock;
+ STORE(oBlock.s)
+ output += RC2_BLOCK_SIZE;
+ input += RC2_BLOCK_SIZE;
+ inputLen -= RC2_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+/*
+** Perform RC2 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ if (inputLen) {
+ if (inputLen % RC2_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ rv = (*cx->enc)(cx, output, input, inputLen);
+ }
+ if (rv == SECSuccess) {
+ *outputLen = inputLen;
+ }
+ return rv;
+}
+
+/*
+** Perform RC2 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ if (inputLen) {
+ if (inputLen % RC2_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ rv = (*cx->dec)(cx, output, input, inputLen);
+ }
+ if (rv == SECSuccess) {
+ *outputLen = inputLen;
+ }
+ return rv;
+}
diff --git a/security/nss/lib/freebl/alghmac.c b/security/nss/lib/freebl/alghmac.c
new file mode 100644
index 000000000..dd8b73c5f
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.c
@@ -0,0 +1,165 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "alghmac.h"
+#include "secerr.h"
+
+#define HMAC_PAD_SIZE HASH_BLOCK_LENGTH_MAX
+
+struct HMACContextStr {
+ void *hash;
+ const SECHashObject *hashobj;
+ PRBool wasAllocated;
+ unsigned char ipad[HMAC_PAD_SIZE];
+ unsigned char opad[HMAC_PAD_SIZE];
+};
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+ if (cx == NULL)
+ return;
+
+ PORT_Assert(!freeit == !cx->wasAllocated);
+ if (cx->hash != NULL) {
+ cx->hashobj->destroy(cx->hash, PR_TRUE);
+ PORT_Memset(cx, 0, sizeof *cx);
+ }
+ if (freeit)
+ PORT_Free(cx);
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+ unsigned int i;
+ unsigned char hashed_secret[HASH_LENGTH_MAX];
+
+ /* required by FIPS 198 Section 3 */
+ if (isFIPS && secret_len < hash_obj->length / 2) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (cx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ cx->wasAllocated = PR_FALSE;
+ cx->hashobj = hash_obj;
+ cx->hash = cx->hashobj->create();
+ if (cx->hash == NULL)
+ goto loser;
+
+ if (secret_len > cx->hashobj->blocklength) {
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, secret, secret_len);
+ PORT_Assert(cx->hashobj->length <= sizeof hashed_secret);
+ cx->hashobj->end(cx->hash, hashed_secret, &secret_len,
+ sizeof hashed_secret);
+ if (secret_len != cx->hashobj->length) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ goto loser;
+ }
+ secret = (const unsigned char *)&hashed_secret[0];
+ }
+
+ PORT_Memset(cx->ipad, 0x36, cx->hashobj->blocklength);
+ PORT_Memset(cx->opad, 0x5c, cx->hashobj->blocklength);
+
+ /* fold secret into padding */
+ for (i = 0; i < secret_len; i++) {
+ cx->ipad[i] ^= secret[i];
+ cx->opad[i] ^= secret[i];
+ }
+ PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+ return SECSuccess;
+
+loser:
+ PORT_Memset(hashed_secret, 0, sizeof hashed_secret);
+ if (cx->hash != NULL)
+ cx->hashobj->destroy(cx->hash, PR_TRUE);
+ return SECFailure;
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS)
+{
+ SECStatus rv;
+ HMACContext *cx = PORT_ZNew(HMACContext);
+ if (cx == NULL)
+ return NULL;
+ rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS);
+ cx->wasAllocated = PR_TRUE;
+ if (rv != SECSuccess) {
+ PORT_Free(cx); /* contains no secret info */
+ cx = NULL;
+ }
+ return cx;
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+ /* start inner hash */
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, cx->ipad, cx->hashobj->blocklength);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+ cx->hashobj->update(cx->hash, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (max_result_len < cx->hashobj->length) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+ if (*result_len != cx->hashobj->length)
+ return SECFailure;
+
+ cx->hashobj->begin(cx->hash);
+ cx->hashobj->update(cx->hash, cx->opad, cx->hashobj->blocklength);
+ cx->hashobj->update(cx->hash, result, *result_len);
+ cx->hashobj->end(cx->hash, result, result_len, max_result_len);
+ return SECSuccess;
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+ HMACContext *newcx;
+
+ newcx = (HMACContext *)PORT_ZAlloc(sizeof(HMACContext));
+ if (newcx == NULL)
+ goto loser;
+
+ newcx->wasAllocated = PR_TRUE;
+ newcx->hashobj = cx->hashobj;
+ newcx->hash = cx->hashobj->clone(cx->hash);
+ if (newcx->hash == NULL)
+ goto loser;
+ PORT_Memcpy(newcx->ipad, cx->ipad, cx->hashobj->blocklength);
+ PORT_Memcpy(newcx->opad, cx->opad, cx->hashobj->blocklength);
+ return newcx;
+
+loser:
+ HMAC_Destroy(newcx, PR_TRUE);
+ return NULL;
+}
diff --git a/security/nss/lib/freebl/alghmac.h b/security/nss/lib/freebl/alghmac.h
new file mode 100644
index 000000000..462526ac4
--- /dev/null
+++ b/security/nss/lib/freebl/alghmac.h
@@ -0,0 +1,64 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _ALGHMAC_H_
+#define _ALGHMAC_H_
+
+typedef struct HMACContextStr HMACContext;
+
+SEC_BEGIN_PROTOS
+
+/* destroy HMAC context */
+extern void
+HMAC_Destroy(HMACContext *cx, PRBool freeit);
+
+/* create HMAC context
+ * hash_obj hash object from SECRawHashObjects[]
+ * secret the secret with which the HMAC is performed.
+ * secret_len the length of the secret.
+ * isFIPS true if conforming to FIPS 198.
+ *
+ * NULL is returned if an error occurs.
+ */
+extern HMACContext *
+HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+
+/* like HMAC_Create, except caller allocates HMACContext. */
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS);
+
+/* reset HMAC for a fresh round */
+extern void
+HMAC_Begin(HMACContext *cx);
+
+/* update HMAC
+ * cx HMAC Context
+ * data the data to perform HMAC on
+ * data_len the length of the data to process
+ */
+extern void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len);
+
+/* Finish HMAC -- place the results within result
+ * cx HMAC context
+ * result buffer for resulting hmac'd data
+ * result_len where the resultant hmac length is stored
+ * max_result_len maximum possible length that can be stored in result
+ */
+extern SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len);
+
+/* clone a copy of the HMAC state. this is usefult when you would
+ * need to keep a running hmac but also need to extract portions
+ * partway through the process.
+ */
+extern HMACContext *
+HMAC_Clone(HMACContext *cx);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/arcfive.c b/security/nss/lib/freebl/arcfive.c
new file mode 100644
index 000000000..dda77710f
--- /dev/null
+++ b/security/nss/lib/freebl/arcfive.c
@@ -0,0 +1,87 @@
+/*
+ * arcfive.c - stubs for RC5 - NOT a working implementation!
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "prerror.h"
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+** "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return NULL;
+}
+
+/*
+** Destroy an RC5 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
+
+/*
+** Perform RC5 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
+
+/*
+** Perform RC5 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
diff --git a/security/nss/lib/freebl/arcfour-amd64-gas.s b/security/nss/lib/freebl/arcfour-amd64-gas.s
new file mode 100644
index 000000000..7c4f5358f
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-gas.s
@@ -0,0 +1,88 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# ** ARCFOUR implementation optimized for AMD64.
+# **
+# ** The throughput achieved by this code is about 320 MBytes/sec, on
+# ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+ pushq %rbp
+ pushq %rbx
+ movq %rdi, %rbp # key = ARG(key)
+ movq %rsi, %rbx # rbx = ARG(len)
+ movq %rdx, %rsi # in = ARG(in)
+ movq %rcx, %rdi # out = ARG(out)
+ movq (%rbp), %rcx # x = key->x
+ movq 8(%rbp), %rdx # y = key->y
+ addq $16, %rbp # d = key->data
+ incq %rcx # x++
+ andq $255, %rcx # x &= 0xff
+ leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8
+ movq %rbx, %r9 # tmp = in+len-8
+ movq 0(%rbp,%rcx,8), %rax # tx = d[x]
+ cmpq %rsi, %rbx # cmp in with in+len-8
+ jl .Lend # jump if (in+len-8 < in)
+
+.Lstart:
+ addq $8, %rsi # increment in
+ addq $8, %rdi # increment out
+
+ # generate the next 8 bytes of the rc4 stream into %r8
+ movq $8, %r11 # byte counter
+1: addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ decb %r11b
+ rorq $8, %r8 # (ror does not change ZF)
+ jnz 1b
+
+ # xor 8 bytes
+ xorq -8(%rsi), %r8
+ cmpq %r9, %rsi # cmp in+len-8 with in
+ movq %r8, -8(%rdi)
+ jle .Lstart # jump if (in <= in+len-8)
+
+.Lend:
+ addq $8, %r9 # tmp = in+len
+
+ # handle the last bytes, one by one
+1: cmpq %rsi, %r9 # cmp in with in+len
+ jle .Lfinished # jump if (in+len <= in)
+ addb %al, %dl # y += tx
+ movl 0(%rbp,%rdx,8), %ebx # ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty
+ addb %al, %bl # val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) # d[y] = tx
+ incb %cl # x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b # val = d[val]
+ xorb (%rsi), %r8b # xor 1 byte
+ movb %r8b, (%rdi)
+ incq %rsi # in++
+ incq %rdi # out++
+ jmp 1b
+
+.Lfinished:
+ decq %rcx # x--
+ movb %dl, -8(%rbp) # key->y = y
+ movb %cl, -16(%rbp) # key->x = x
+ popq %rbx
+ popq %rbp
+ ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack,"",@progbits
+.previous
diff --git a/security/nss/lib/freebl/arcfour-amd64-masm.asm b/security/nss/lib/freebl/arcfour-amd64-masm.asm
new file mode 100644
index 000000000..1601c4f89
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-masm.asm
@@ -0,0 +1,107 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+; ** ARCFOUR implementation optimized for AMD64.
+; **
+; ** The throughput achieved by this code is about 320 MBytes/sec, on
+; ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.CODE
+
+; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen,
+; const unsigned char *input, unsigned char *output);
+
+
+ARCFOUR PROC
+
+ push rbp
+ push rbx
+ push rsi
+ push rdi
+
+ mov rbp, rcx ; key = ARG(key)
+ mov rbx, rdx ; rbx = ARG(len)
+ mov rsi, r8 ; in = ARG(in)
+ mov rdi, r9 ; out = ARG(out)
+ mov rcx, [rbp] ; x = key->x
+ mov rdx, [rbp+8] ; y = key->y
+ add rbp, 16 ; d = key->data
+ inc rcx ; x++
+ and rcx, 0ffh ; x &= 0xff
+ lea rbx, [rbx+rsi-8] ; rbx = in+len-8
+ mov r9, rbx ; tmp = in+len-8
+ mov rax, [rbp+rcx*8] ; tx = d[x]
+ cmp rbx, rsi ; cmp in with in+len-8
+ jl Lend ; jump if (in+len-8 < in)
+
+Lstart:
+ add rsi, 8 ; increment in
+ add rdi, 8 ; increment out
+
+ ;
+ ; generate the next 8 bytes of the rc4 stream into r8
+ ;
+
+ mov r11, 8 ; byte counter
+
+@@:
+ add dl, al ; y += tx
+ mov ebx, [rbp+rdx*8] ; ty = d[y]
+ mov [rbp+rcx*8], ebx ; d[x] = ty
+ add bl, al ; val = ty + tx
+ mov [rbp+rdx*8], eax ; d[y] = tx
+ inc cl ; x++ (NEXT ROUND)
+ mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
+ mov r8b, [rbp+rbx*8] ; val = d[val]
+ dec r11b
+ ror r8, 8 ; (ror does not change ZF)
+ jnz @b
+
+ ;
+ ; xor 8 bytes
+ ;
+
+ xor r8, [rsi-8]
+ cmp rsi, r9 ; cmp in+len-8 with in
+ mov [rdi-8], r8
+ jle Lstart
+
+Lend:
+ add r9, 8 ; tmp = in+len
+
+ ;
+ ; handle the last bytes, one by one
+ ;
+
+@@:
+ cmp r9, rsi ; cmp in with in+len
+ jle Lfinished ; jump if (in+len <= in)
+ add dl, al ; y += tx
+ mov ebx, [rbp+rdx*8] ; ty = d[y]
+ mov [rbp+rcx*8], ebx ; d[x] = ty
+ add bl, al ; val = ty + tx
+ mov [rbp+rdx*8], eax ; d[y] = tx
+ inc cl ; x++ (NEXT ROUND)
+ mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND)
+ mov r8b, [rbp+rbx*8] ; val = d[val]
+ xor r8b, [rsi] ; xor 1 byte
+ mov [rdi], r8b
+ inc rsi ; in++
+ inc rdi
+ jmp @b
+
+Lfinished:
+ dec rcx ; x--
+ mov [rbp-8], dl ; key->y = y
+ mov [rbp-16], cl ; key->x = x
+
+ pop rdi
+ pop rsi
+ pop rbx
+ pop rbp
+ ret
+
+ARCFOUR ENDP
+
+END
diff --git a/security/nss/lib/freebl/arcfour-amd64-sun.s b/security/nss/lib/freebl/arcfour-amd64-sun.s
new file mode 100644
index 000000000..8b649f901
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour-amd64-sun.s
@@ -0,0 +1,84 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+/ ** ARCFOUR implementation optimized for AMD64.
+/ **
+/ ** The throughput achieved by this code is about 320 MBytes/sec, on
+/ ** a 1.8 GHz AMD Opteron (rev C0) processor.
+
+.text
+.align 16
+.globl ARCFOUR
+.type ARCFOUR,@function
+ARCFOUR:
+ pushq %rbp
+ pushq %rbx
+ movq %rdi, %rbp / key = ARG(key)
+ movq %rsi, %rbx / rbx = ARG(len)
+ movq %rdx, %rsi / in = ARG(in)
+ movq %rcx, %rdi / out = ARG(out)
+ movq (%rbp), %rcx / x = key->x
+ movq 8(%rbp), %rdx / y = key->y
+ addq $16, %rbp / d = key->data
+ incq %rcx / x++
+ andq $255, %rcx / x &= 0xff
+ leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8
+ movq %rbx, %r9 / tmp = in+len-8
+ movq 0(%rbp,%rcx,8), %rax / tx = d[x]
+ cmpq %rsi, %rbx / cmp in with in+len-8
+ jl .Lend / jump if (in+len-8 < in)
+
+.Lstart:
+ addq $8, %rsi / increment in
+ addq $8, %rdi / increment out
+
+ / generate the next 8 bytes of the rc4 stream into %r8
+ movq $8, %r11 / byte counter
+1: addb %al, %dl / y += tx
+ movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
+ addb %al, %bl / val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
+ incb %cl / x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b / val = d[val]
+ decb %r11b
+ rorq $8, %r8 / (ror does not change ZF)
+ jnz 1b
+
+ / xor 8 bytes
+ xorq -8(%rsi), %r8
+ cmpq %r9, %rsi / cmp in+len-8 with in
+ movq %r8, -8(%rdi)
+ jle .Lstart / jump if (in <= in+len-8)
+
+.Lend:
+ addq $8, %r9 / tmp = in+len
+
+ / handle the last bytes, one by one
+1: cmpq %rsi, %r9 / cmp in with in+len
+ jle .Lfinished / jump if (in+len <= in)
+ addb %al, %dl / y += tx
+ movl 0(%rbp,%rdx,8), %ebx / ty = d[y]
+ movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty
+ addb %al, %bl / val = ty + tx
+ movl %eax, 0(%rbp,%rdx,8) / d[y] = tx
+ incb %cl / x++ (NEXT ROUND)
+ movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND)
+ movb 0(%rbp,%rbx,8), %r8b / val = d[val]
+ xorb (%rsi), %r8b / xor 1 byte
+ movb %r8b, (%rdi)
+ incq %rsi / in++
+ incq %rdi / out++
+ jmp 1b
+
+.Lfinished:
+ decq %rcx / x--
+ movb %dl, -8(%rbp) / key->y = y
+ movb %cl, -16(%rbp) / key->x = x
+ popq %rbx
+ popq %rbp
+ ret
+.L_ARCFOUR_end:
+.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR
diff --git a/security/nss/lib/freebl/arcfour.c b/security/nss/lib/freebl/arcfour.c
new file mode 100644
index 000000000..e37b45843
--- /dev/null
+++ b/security/nss/lib/freebl/arcfour.c
@@ -0,0 +1,594 @@
+/* arcfour.c - the arc four algorithm.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+
+/* Architecture-dependent defines */
+
+#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \
+ defined(_WIN64)
+/* Convert the byte-stream to a word-stream */
+#define CONVERT_TO_WORDS
+#endif
+
+#if defined(AIX) || defined(OSF1) || defined(NSS_BEVAND_ARCFOUR)
+/* Treat array variables as words, not bytes, on CPUs that take
+ * much longer to write bytes than to write words, or when using
+ * assembler code that required it.
+ */
+#define USE_WORD
+#endif
+
+#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR)
+typedef PRUint64 WORD;
+#else
+typedef PRUint32 WORD;
+#endif
+#define WORDSIZE sizeof(WORD)
+
+#if defined(USE_WORD)
+typedef WORD Stype;
+#else
+typedef PRUint8 Stype;
+#endif
+
+#define ARCFOUR_STATE_SIZE 256
+
+#define MASK1BYTE (WORD)(0xff)
+
+#define SWAP(a, b) \
+ tmp = a; \
+ a = b; \
+ b = tmp;
+
+/*
+ * State information for stream cipher.
+ */
+struct RC4ContextStr {
+#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR)
+ Stype i;
+ Stype j;
+ Stype S[ARCFOUR_STATE_SIZE];
+#else
+ Stype S[ARCFOUR_STATE_SIZE];
+ Stype i;
+ Stype j;
+#endif
+};
+
+/*
+ * array indices [0..255] to initialize cx->S array (faster than loop).
+ */
+static const Stype Kinit[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
+ 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
+ 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
+ 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
+ 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
+};
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+ return PORT_ZNew(RC4Context);
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len,
+ const unsigned char *unused1, int unused2,
+ unsigned int unused3, unsigned int unused4)
+{
+ unsigned int i;
+ PRUint8 j, tmp;
+ PRUint8 K[256];
+ PRUint8 *L;
+
+ /* verify the key length. */
+ PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE);
+ if (len == 0 || len >= ARCFOUR_STATE_SIZE) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ if (cx == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* Initialize the state using array indices. */
+ memcpy(cx->S, Kinit, sizeof cx->S);
+ /* Fill in K repeatedly with values from key. */
+ L = K;
+ for (i = sizeof K; i > len; i -= len) {
+ memcpy(L, key, len);
+ L += len;
+ }
+ memcpy(L, key, i);
+ /* Stir the state of the generator. At this point it is assumed
+ * that the key is the size of the state buffer. If this is not
+ * the case, the key bytes are repeated to fill the buffer.
+ */
+ j = 0;
+#define ARCFOUR_STATE_STIR(ii) \
+ j = j + cx->S[ii] + K[ii]; \
+ SWAP(cx->S[ii], cx->S[j]);
+ for (i = 0; i < ARCFOUR_STATE_SIZE; i++) {
+ ARCFOUR_STATE_STIR(i);
+ }
+ cx->i = 0;
+ cx->j = 0;
+ return SECSuccess;
+}
+
+/*
+ * Initialize a new generator.
+ */
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+ RC4Context *cx = RC4_AllocateContext();
+ if (cx) {
+ SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0);
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof(*cx));
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+ if (freeit)
+ PORT_ZFree(cx, sizeof(*cx));
+}
+
+#if defined(NSS_BEVAND_ARCFOUR)
+extern void ARCFOUR(RC4Context *cx, WORD inputLen,
+ const unsigned char *input, unsigned char *output);
+#else
+/*
+ * Generate the next byte in the stream.
+ */
+#define ARCFOUR_NEXT_BYTE() \
+ tmpSi = cx->S[++tmpi]; \
+ tmpj += tmpSi; \
+ tmpSj = cx->S[tmpj]; \
+ cx->S[tmpi] = tmpSj; \
+ cx->S[tmpj] = tmpSi; \
+ t = tmpSi + tmpSj;
+
+#ifdef CONVERT_TO_WORDS
+/*
+ * Straight ARCFOUR op. No optimization.
+ */
+static SECStatus
+rc4_no_opt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint8 t;
+ Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ unsigned int index;
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ for (index = 0; index < inputLen; index++) {
+ /* Generate next byte from stream. */
+ ARCFOUR_NEXT_BYTE();
+ /* output = next stream byte XOR next input byte */
+ output[index] = cx->S[t] ^ input[index];
+ }
+ *outputLen = inputLen;
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+}
+
+#else
+/* !CONVERT_TO_WORDS */
+
+/*
+ * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces.
+ */
+static SECStatus
+rc4_unrolled(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PRUint8 t;
+ Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ int index;
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ for (index = inputLen / 8; index-- > 0; input += 8, output += 8) {
+ ARCFOUR_NEXT_BYTE();
+ output[0] = cx->S[t] ^ input[0];
+ ARCFOUR_NEXT_BYTE();
+ output[1] = cx->S[t] ^ input[1];
+ ARCFOUR_NEXT_BYTE();
+ output[2] = cx->S[t] ^ input[2];
+ ARCFOUR_NEXT_BYTE();
+ output[3] = cx->S[t] ^ input[3];
+ ARCFOUR_NEXT_BYTE();
+ output[4] = cx->S[t] ^ input[4];
+ ARCFOUR_NEXT_BYTE();
+ output[5] = cx->S[t] ^ input[5];
+ ARCFOUR_NEXT_BYTE();
+ output[6] = cx->S[t] ^ input[6];
+ ARCFOUR_NEXT_BYTE();
+ output[7] = cx->S[t] ^ input[7];
+ }
+ index = inputLen % 8;
+ if (index) {
+ input += index;
+ output += index;
+ switch (index) {
+ case 7:
+ ARCFOUR_NEXT_BYTE();
+ output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */
+ case 6:
+ ARCFOUR_NEXT_BYTE();
+ output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */
+ case 5:
+ ARCFOUR_NEXT_BYTE();
+ output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */
+ case 4:
+ ARCFOUR_NEXT_BYTE();
+ output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */
+ case 3:
+ ARCFOUR_NEXT_BYTE();
+ output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */
+ case 2:
+ ARCFOUR_NEXT_BYTE();
+ output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */
+ case 1:
+ ARCFOUR_NEXT_BYTE();
+ output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */
+ default:
+ /* FALLTHRU */
+ ; /* hp-ux build breaks without this */
+ }
+ }
+ cx->i = tmpi;
+ cx->j = tmpj;
+ *outputLen = inputLen;
+ return SECSuccess;
+}
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT4BYTES_L(n) \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 8); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 16); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 24);
+#else
+#define ARCFOUR_NEXT4BYTES_B(n) \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 24); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 16); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n + 8); \
+ ARCFOUR_NEXT_BYTE(); \
+ streamWord |= (WORD)cx->S[t] << (n);
+#endif
+
+#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64)
+/* 64-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_L(0); \
+ ARCFOUR_NEXT4BYTES_L(32); \
+ }
+#else
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_B(32); \
+ ARCFOUR_NEXT4BYTES_B(0); \
+ }
+#endif
+#else
+/* 32-bit wordsize */
+#ifdef IS_LITTLE_ENDIAN
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_L(0); \
+ }
+#else
+#define ARCFOUR_NEXT_WORD() \
+ { \
+ streamWord = 0; \
+ ARCFOUR_NEXT4BYTES_B(0); \
+ }
+#endif
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define RSH <<
+#define LSH >>
+#else
+#define RSH >>
+#define LSH <<
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+#define LEFTMOST_BYTE_SHIFT 0
+#define NEXT_BYTE_SHIFT(shift) shift + 8
+#else
+#define LEFTMOST_BYTE_SHIFT 8 * (WORDSIZE - 1)
+#define NEXT_BYTE_SHIFT(shift) shift - 8
+#endif
+
+#ifdef CONVERT_TO_WORDS
+static SECStatus
+rc4_wordconv(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t));
+ unsigned int inOffset = (PRUword)input % WORDSIZE;
+ unsigned int outOffset = (PRUword)output % WORDSIZE;
+ register WORD streamWord;
+ register const WORD *pInWord;
+ register WORD *pOutWord;
+ register WORD inWord, nextInWord;
+ PRUint8 t;
+ register Stype tmpSi, tmpSj;
+ register PRUint8 tmpi = cx->i;
+ register PRUint8 tmpj = cx->j;
+ unsigned int bufShift, invBufShift;
+ unsigned int i;
+ const unsigned char *finalIn;
+ unsigned char *finalOut;
+
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ if (inputLen < 2 * WORDSIZE) {
+ /* Ignore word conversion, do byte-at-a-time */
+ return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen);
+ }
+ *outputLen = inputLen;
+ pInWord = (const WORD *)(input - inOffset);
+ pOutWord = (WORD *)(output - outOffset);
+ if (inOffset <= outOffset) {
+ bufShift = 8 * (outOffset - inOffset);
+ invBufShift = 8 * WORDSIZE - bufShift;
+ } else {
+ invBufShift = 8 * (inOffset - outOffset);
+ bufShift = 8 * WORDSIZE - invBufShift;
+ }
+ /*****************************************************************/
+ /* Step 1: */
+ /* If the first output word is partial, consume the bytes in the */
+ /* first partial output word by loading one or two words of */
+ /* input and shifting them accordingly. Otherwise, just load */
+ /* in the first word of input. At the end of this block, at */
+ /* least one partial word of input should ALWAYS be loaded. */
+ /*****************************************************************/
+ if (outOffset) {
+ unsigned int byteCount = WORDSIZE - outOffset;
+ for (i = 0; i < byteCount; i++) {
+ ARCFOUR_NEXT_BYTE();
+ output[i] = cx->S[t] ^ input[i];
+ }
+ /* Consumed byteCount bytes of input */
+ inputLen -= byteCount;
+ pInWord++;
+
+ /* move to next word of output */
+ pOutWord++;
+
+ /* If buffers are relatively misaligned, shift the bytes in inWord
+ * to be aligned to the output buffer.
+ */
+ if (inOffset < outOffset) {
+ /* The first input word (which may be partial) has more bytes
+ * than needed. Copy the remainder to inWord.
+ */
+ unsigned int shift = LEFTMOST_BYTE_SHIFT;
+ inWord = 0;
+ for (i = 0; i < outOffset - inOffset; i++) {
+ inWord |= (WORD)input[byteCount + i] << shift;
+ shift = NEXT_BYTE_SHIFT(shift);
+ }
+ } else if (inOffset > outOffset) {
+ /* Consumed some bytes in the second input word. Copy the
+ * remainder to inWord.
+ */
+ inWord = *pInWord++;
+ inWord = inWord LSH invBufShift;
+ } else {
+ inWord = 0;
+ }
+ } else {
+ /* output is word-aligned */
+ if (inOffset) {
+ /* Input is not word-aligned. The first word load of input
+ * will not produce a full word of input bytes, so one word
+ * must be pre-loaded. The main loop below will load in the
+ * next input word and shift some of its bytes into inWord
+ * in order to create a full input word. Note that the main
+ * loop must execute at least once because the input must
+ * be at least two words.
+ */
+ unsigned int shift = LEFTMOST_BYTE_SHIFT;
+ inWord = 0;
+ for (i = 0; i < WORDSIZE - inOffset; i++) {
+ inWord |= (WORD)input[i] << shift;
+ shift = NEXT_BYTE_SHIFT(shift);
+ }
+ pInWord++;
+ } else {
+ /* Input is word-aligned. The first word load of input
+ * will produce a full word of input bytes, so nothing
+ * needs to be loaded here.
+ */
+ inWord = 0;
+ }
+ }
+ /*****************************************************************/
+ /* Step 2: main loop */
+ /* At this point the output buffer is word-aligned. Any unused */
+ /* bytes from above will be in inWord (shifted correctly). If */
+ /* the input buffer is unaligned relative to the output buffer, */
+ /* shifting has to be done. */
+ /*****************************************************************/
+ if (bufShift) {
+ /* preloadedByteCount is the number of input bytes pre-loaded
+ * in inWord.
+ */
+ unsigned int preloadedByteCount = bufShift / 8;
+ for (; inputLen >= preloadedByteCount + WORDSIZE;
+ inputLen -= WORDSIZE) {
+ nextInWord = *pInWord++;
+ inWord |= nextInWord RSH bufShift;
+ nextInWord = nextInWord LSH invBufShift;
+ ARCFOUR_NEXT_WORD();
+ *pOutWord++ = inWord ^ streamWord;
+ inWord = nextInWord;
+ }
+ if (inputLen == 0) {
+ /* Nothing left to do. */
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+ }
+ finalIn = (const unsigned char *)pInWord - preloadedByteCount;
+ } else {
+ for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) {
+ inWord = *pInWord++;
+ ARCFOUR_NEXT_WORD();
+ *pOutWord++ = inWord ^ streamWord;
+ }
+ if (inputLen == 0) {
+ /* Nothing left to do. */
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+ }
+ finalIn = (const unsigned char *)pInWord;
+ }
+ /*****************************************************************/
+ /* Step 3: */
+ /* Do the remaining partial word of input one byte at a time. */
+ /*****************************************************************/
+ finalOut = (unsigned char *)pOutWord;
+ for (i = 0; i < inputLen; i++) {
+ ARCFOUR_NEXT_BYTE();
+ finalOut[i] = cx->S[t] ^ finalIn[i];
+ }
+ cx->i = tmpi;
+ cx->j = tmpj;
+ return SECSuccess;
+}
+#endif
+#endif /* NSS_BEVAND_ARCFOUR */
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+#if defined(NSS_BEVAND_ARCFOUR)
+ ARCFOUR(cx, inputLen, input, output);
+ *outputLen = inputLen;
+ return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+ /* Convert the byte-stream to a word-stream */
+ return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+ /* Operate on bytes, but unroll the main loop */
+ return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ PORT_Assert(maxOutputLen >= inputLen);
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+/* decrypt and encrypt are same operation. */
+#if defined(NSS_BEVAND_ARCFOUR)
+ ARCFOUR(cx, inputLen, input, output);
+ *outputLen = inputLen;
+ return SECSuccess;
+#elif defined(CONVERT_TO_WORDS)
+ /* Convert the byte-stream to a word-stream */
+ return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen);
+#else
+ /* Operate on bytes, but unroll the main loop */
+ return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen);
+#endif
+}
+
+#undef CONVERT_TO_WORDS
+#undef USE_WORD
diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h
new file mode 100644
index 000000000..e5a6cf30e
--- /dev/null
+++ b/security/nss/lib/freebl/blapi.h
@@ -0,0 +1,1625 @@
+/*
+ * blapi.h - public prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPI_H_
+#define _BLAPI_H_
+
+#include "blapit.h"
+#include "hasht.h"
+#include "alghmac.h"
+
+SEC_BEGIN_PROTOS
+
+/*
+** RSA encryption/decryption. When encrypting/decrypting the output
+** buffer must be at least the size of the public key modulus.
+*/
+
+extern SECStatus BL_Init(void);
+
+/*
+** Generate and return a new RSA public and private key.
+** Both keys are encoded in a single RSAPrivateKey structure.
+** "cx" is the random number generator context
+** "keySizeInBits" is the size of the key to be generated, in bits.
+** 512, 1024, etc.
+** "publicExponent" when not NULL is a pointer to some data that
+** represents the public exponent to use. The data is a byte
+** encoded integer, in "big endian" order.
+*/
+extern RSAPrivateKey *RSA_NewKey(int keySizeInBits,
+ SECItem *publicExponent);
+
+/*
+** Perform a raw public-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a raw private-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a raw private-key operation, and check the parameters used in
+** the operation for validity by performing a test operation first.
+** Length of input and output buffers are equal to key's modulus len.
+*/
+extern SECStatus RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+/*
+** Perform a check of private key parameters for consistency.
+*/
+extern SECStatus RSA_PrivateKeyCheck(const RSAPrivateKey *key);
+
+/*
+** Given only minimal private key parameters, fill in the rest of the
+** parameters.
+**
+**
+** All the entries, including those supplied by the caller, will be
+** overwritten with data alocated out of the arena.
+**
+** If no arena is supplied, one will be created.
+**
+** The following fields must be supplied in order for this function
+** to succeed:
+** one of either publicExponent or privateExponent
+** two more of the following 5 parameters (not counting the above).
+** modulus (n)
+** prime1 (p)
+** prime2 (q)
+** publicExponent (e)
+** privateExponent (d)
+**
+** NOTE: if only the publicExponent, privateExponent, and one prime is given,
+** then there may be more than one RSA key that matches that combination. If
+** we find 2 possible valid keys that meet this criteria, we return an error.
+** If we return the wrong key, and the original modulus is compared to the
+** new modulus, both can be factored by calculateing gcd(n_old,n_new) to get
+** the common prime.
+**
+** NOTE: in some cases the publicExponent must be less than 2^23 for this
+** function to work correctly. (The case where we have only one of: modulus
+** prime1 and prime2).
+**
+** All parameters will be replaced in the key structure with new parameters
+** allocated out of the arena. There is no attempt to free the old structures.
+** prime1 will always be greater than prime2 (even if the caller supplies the
+** smaller prime as prime1 or the larger prime as prime2). The parameters are
+** not overwritten on failure.
+**
+** While the remaining Chinese remainder theorem parameters (dp,dp, and qinv)
+** can also be used in reconstructing the private key, they are currently
+** ignored in this implementation.
+*/
+extern SECStatus RSA_PopulatePrivateKey(RSAPrivateKey *key);
+
+/********************************************************************
+** RSA algorithm
+*/
+
+/********************************************************************
+** Raw signing/encryption/decryption operations.
+**
+** No padding or formatting will be applied.
+** inputLen MUST be equivalent to the modulus size (in bytes).
+*/
+extern SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+
+extern SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+extern SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSAES-OAEP encryption/decryption, as defined in RFC 3447, Section 7.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "seed" should be NULL, indicating that
+** freebl should generate a random value. Otherwise, it should be an octet
+** string of seedLen bytes, which should be the same size as the output of
+** hashAlg.
+*/
+extern SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSAES-PKCS1-v1_5 encryption/decryption, as defined in RFC 3447, Section 7.2.
+*/
+extern SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+/********************************************************************
+** RSASSA-PSS signing/verifying, as defined in RFC 3447, Section 8.1.
+**
+** Note: Only MGF1 is supported as the mask generation function. It will be
+** used with maskHashAlg as the inner hash function.
+**
+** Unless performing Known Answer Tests, "salt" should be NULL, indicating that
+** freebl should generate a random value.
+*/
+extern SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+extern SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+
+/********************************************************************
+** RSASSA-PKCS1-v1_5 signing/verifying, as defined in RFC 3447, Section 8.2.
+**
+** These functions expect as input to be the raw value to be signed. For most
+** cases using PKCS1-v1_5, this should be the value of T, the DER-encoded
+** DigestInfo structure defined in Section 9.2, Step 2.
+** Note: This can also be used for signatures that use PKCS1-v1_5 padding, such
+** as the signatures used in SSL/TLS, which sign a raw hash.
+*/
+extern SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+
+extern SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+/********************************************************************
+** DSA signing algorithm
+*/
+
+/* Generate a new random value within the interval [2, q-1].
+*/
+extern SECStatus DSA_NewRandom(PLArenaPool *arena, const SECItem *q,
+ SECItem *random);
+
+/*
+** Generate and return a new DSA public and private key pair,
+** both of which are encoded into a single DSAPrivateKey struct.
+** "params" is a pointer to the PQG parameters for the domain
+** Uses a random seed.
+*/
+extern SECStatus DSA_NewKey(const PQGParams *params,
+ DSAPrivateKey **privKey);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus DSA_SignDigest(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+extern SECStatus DSA_VerifyDigest(DSAPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes long */
+extern SECStatus DSA_NewKeyFromSeed(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey);
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+extern SECStatus DSA_SignDigestWithSeed(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed);
+
+/******************************************************
+** Diffie Helman key exchange algorithm
+*/
+
+/* Generates parameters for Diffie-Helman key generation.
+** primeLen is the length in bytes of prime P to be generated.
+*/
+extern SECStatus DH_GenParam(int primeLen, DHParams **params);
+
+/* Generates a public and private key, both of which are encoded in a single
+** DHPrivateKey struct. Params is input, privKey are output.
+** This is Phase 1 of Diffie Hellman.
+*/
+extern SECStatus DH_NewKey(DHParams *params,
+ DHPrivateKey **privKey);
+
+/*
+** DH_Derive does the Diffie-Hellman phase 2 calculation, using the
+** other party's publicValue, and the prime and our privateValue.
+** maxOutBytes is the requested length of the generated secret in bytes.
+** A zero value means produce a value of any length up to the size of
+** the prime. If successful, derivedSecret->data is set
+** to the address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret produced.
+** The size of the secret produced will depend on the value of outBytes.
+** If outBytes is 0, the key length will be all the significant bytes of
+** the derived secret (leading zeros are dropped). This length could be less
+** than the length of the prime. If outBytes is nonzero, the length of the
+** produced key will be outBytes long. If the key is truncated, the most
+** significant bytes are truncated. If it is expanded, zero bytes are added
+** at the beginning.
+** It is the caller's responsibility to free the allocated buffer
+** containing the derived secret.
+*/
+extern SECStatus DH_Derive(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int outBytes);
+
+/*
+** KEA_CalcKey returns octet string with the private key for a dual
+** Diffie-Helman key generation as specified for government key exchange.
+*/
+extern SECStatus KEA_Derive(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret);
+
+/*
+ * verify that a KEA or DSA public key is a valid key for this prime and
+ * subprime domain.
+ */
+extern PRBool KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+/****************************************
+ * J-PAKE key transport
+ */
+
+/* Given gx == g^x, create a Schnorr zero-knowledge proof for the value x
+ * using the specified hash algorithm and signer ID. The signature is
+ * returned in the values gv and r. testRandom must be NULL for a PRNG
+ * generated random committment to be used in the sigature. When testRandom
+ * is non-NULL, that value must contain a value in the subgroup q; that
+ * value will be used instead of a PRNG-generated committment in order to
+ * facilitate known-answer tests.
+ *
+ * If gxIn is non-NULL then it must contain a pre-computed value of g^x that
+ * will be used by the function; in this case, the gxOut parameter must be NULL.
+ * If the gxIn parameter is NULL then gxOut must be non-NULL; in this case
+ * gxOut will contain the value g^x on output.
+ *
+ * gx (if not supplied by the caller), gv, and r will be allocated in the arena.
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ * The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r);
+
+/* Given gx == g^x, verify the Schnorr zero-knowledge proof (gv, r) for the
+ * value x using the specified hash algorithm and signer ID.
+ *
+ * The arena is *not* optional so do not pass NULL for the arena parameter.
+ */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r);
+
+/* Call before round 2 with x2, s, and x2s all non-NULL. This will calculate
+ * base = g^(x1+x3+x4) (mod p) and x2s = x2*s (mod q). The values to send in
+ * round 2 (A and the proof of knowledge of x2s) can then be calculated with
+ * JPAKE_Sign using pqg->base = base and x = x2s.
+ *
+ * Call after round 2 with x2, s, and x2s all NULL, and passing (gx1, gx2, gx3)
+ * instead of (gx1, gx3, gx4). This will calculate base = g^(x1+x2+x3). Then call
+ * JPAKE_Verify with pqg->base = base and then JPAKE_Final.
+ *
+ * base and x2s will be allocated in the arena. The arena is *not* optional so
+ * do not pass NULL for the arena parameter. The arena should be zeroed when it
+ * is freed.
+*/
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s);
+
+/* K = (B/g^(x2*x4*s))^x2 (mod p)
+ *
+ * K will be allocated in the arena. The arena is *not* optional so do not pass
+ * NULL for the arena parameter. The arena should be zeroed when it is freed.
+ */
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K);
+
+/******************************************************
+** Elliptic Curve algorithms
+*/
+
+/* Generates a public and private key, both of which are encoded
+** in a single ECPrivateKey struct. Params is input, privKey are
+** output.
+*/
+extern SECStatus EC_NewKey(ECParams *params,
+ ECPrivateKey **privKey);
+
+extern SECStatus EC_NewKeyFromSeed(ECParams *params,
+ ECPrivateKey **privKey,
+ const unsigned char *seed,
+ int seedlen);
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. Such validation prevents against small subgroup attacks
+ * when the ECDH primitive is used with the cofactor.
+ */
+extern SECStatus EC_ValidatePublicKey(ECParams *params,
+ SECItem *publicValue);
+
+/*
+** ECDH_Derive performs a scalar point multiplication of a point
+** representing a (peer's) public key and a large integer representing
+** a private key (its own). Both keys must use the same elliptic curve
+** parameters. If the withCofactor parameter is true, the
+** multiplication also uses the cofactor associated with the curve
+** parameters. The output of this scheme is the x-coordinate of the
+** resulting point. If successful, derivedSecret->data is set to the
+** address of the newly allocated buffer containing the derived
+** secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+extern SECStatus ECDH_Derive(SECItem *publicValue,
+ ECParams *params,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret);
+
+/* On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+extern SECStatus ECDSA_SignDigest(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+/* On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+extern SECStatus ECDSA_VerifyDigest(ECPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+/* Uses the provided seed. */
+extern SECStatus ECDSA_SignDigestWithSeed(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed,
+ const int seedlen);
+
+/******************************************/
+/*
+** RC4 symmetric stream cypher
+*/
+
+/*
+** Create a new RC4 context suitable for RC4 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+*/
+extern RC4Context *RC4_CreateContext(const unsigned char *key, int len);
+
+extern RC4Context *RC4_AllocateContext(void);
+extern SECStatus RC4_InitContext(RC4Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+
+/*
+** Destroy an RC4 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC4_DestroyContext(RC4Context *cx, PRBool freeit);
+
+/*
+** Perform RC4 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Encrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC4 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC2 symmetric block cypher
+*/
+
+/*
+** Create a new RC2 context suitable for RC2 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC)
+** "mode" one of NSS_RC2 or NSS_RC2_CBC
+** "effectiveKeyLen" is the effective key length (as specified in
+** RFC 2268) in bytes (not bits).
+**
+** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC2Context *RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode,
+ unsigned effectiveKeyLen);
+extern RC2Context *RC2_AllocateContext(void);
+extern SECStatus RC2_InitContext(RC2Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int effectiveKeyLen,
+ unsigned int);
+
+/*
+** Destroy an RC2 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC2_DestroyContext(RC2Context *cx, PRBool freeit);
+
+/*
+** Perform RC2 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Encrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC2 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC2_Decrypt(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** RC5 symmetric block cypher -- 64-bit block size
+*/
+
+/*
+** Create a new RC5 context suitable for RC5 encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC)
+** "mode" one of NSS_RC5 or NSS_RC5_CBC
+**
+** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block
+** chaining" mode.
+*/
+extern RC5Context *RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode);
+extern RC5Context *RC5_AllocateContext(void);
+extern SECStatus RC5_InitContext(RC5Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int rounds,
+ unsigned int wordSize);
+
+/*
+** Destroy an RC5 encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void RC5_DestroyContext(RC5Context *cx, PRBool freeit);
+
+/*
+** Perform RC5 encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus RC5_Encrypt(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform RC5 decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+
+extern SECStatus RC5_Decrypt(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** DES symmetric block cypher
+*/
+
+/*
+** Create a new DES context suitable for DES encryption/decryption.
+** "key" raw key data
+** "len" the number of bytes of key data
+** "iv" is the CBC initialization vector (if mode is NSS_DES_CBC or
+** mode is DES_EDE3_CBC)
+** "mode" one of NSS_DES, NSS_DES_CBC, NSS_DES_EDE3 or NSS_DES_EDE3_CBC
+** "encrypt" is PR_TRUE if the context will be used for encryption
+**
+** When mode is set to NSS_DES_CBC or NSS_DES_EDE3_CBC then the DES
+** cipher is run in "cipher block chaining" mode.
+*/
+extern DESContext *DES_CreateContext(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+extern DESContext *DES_AllocateContext(void);
+extern SECStatus DES_InitContext(DESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+
+/*
+** Destroy an DES encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void DES_DestroyContext(DESContext *cx, PRBool freeit);
+
+/*
+** Perform DES encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Encrypt(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform DES decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+**
+** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH
+*/
+extern SECStatus DES_Decrypt(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** SEED symmetric block cypher
+*/
+extern SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt);
+extern SEEDContext *SEED_AllocateContext(void);
+extern SECStatus SEED_InitContext(SEEDContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode, unsigned int encrypt,
+ unsigned int);
+extern void SEED_DestroyContext(SEEDContext *cx, PRBool freeit);
+extern SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+extern SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** AES symmetric block cypher (Rijndael)
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+** "blocklen" is the blocksize to use (16, 24, or 32)
+** XXX currently only blocksize==16 has been tested!
+*/
+extern AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen, unsigned int blocklen);
+extern AESContext *AES_AllocateContext(void);
+extern SECStatus AES_InitContext(AESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+
+/*
+** Destroy a AES encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AES_DestroyContext(AESContext *cx, PRBool freeit);
+
+/*
+** Perform AES encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** AES key wrap algorithm, RFC 3394
+*/
+
+/*
+** Create a new AES context suitable for AES encryption/decryption.
+** "key" raw key data
+** "iv" The 8 byte "initial value"
+** "encrypt", a boolean, true for key wrapping, false for unwrapping.
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen);
+extern AESKeyWrapContext *AESKeyWrap_AllocateContext(void);
+extern SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int,
+ unsigned int encrypt,
+ unsigned int);
+
+/*
+** Destroy a AES KeyWrap context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit);
+
+/*
+** Perform AES key wrap.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform AES key unwrap.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** Camellia symmetric block cypher
+*/
+
+/*
+** Create a new Camellia context suitable for Camellia encryption/decryption.
+** "key" raw key data
+** "keylen" the number of bytes of key data (16, 24, or 32)
+*/
+extern CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt, unsigned int keylen);
+
+extern CamelliaContext *Camellia_AllocateContext(void);
+extern SECStatus Camellia_InitContext(CamelliaContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int unused);
+/*
+** Destroy a Camellia encryption/decryption context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit);
+
+/*
+** Perform Camellia encryption.
+** "cx" the context
+** "output" the output buffer to store the encrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Perform Camellia decryption.
+** "cx" the context
+** "output" the output buffer to store the decrypted data.
+** "outputLen" how much data is stored in "output". Set by the routine
+** after some data is stored in output.
+** "maxOutputLen" the maximum amount of data that can ever be
+** stored in "output"
+** "input" the input data
+** "inputLen" the amount of input data
+*/
+extern SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+/******************************************/
+/*
+** ChaCha20+Poly1305 AEAD
+*/
+
+extern SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ unsigned int tagLen);
+
+extern ChaCha20Poly1305Context *ChaCha20Poly1305_CreateContext(
+ const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+extern void ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx,
+ PRBool freeit);
+
+extern SECStatus ChaCha20Poly1305_Seal(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+extern SECStatus ChaCha20Poly1305_Open(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+/******************************************/
+/*
+** MD5 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using MD5
+*/
+extern SECStatus MD5_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+/*
+** Create a new MD5 context
+*/
+extern MD5Context *MD5_NewContext(void);
+
+/*
+** Destroy an MD5 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD5_DestroyContext(MD5Context *cx, PRBool freeit);
+
+/*
+** Reset an MD5 context, preparing it for a fresh round of hashing
+*/
+extern void MD5_Begin(MD5Context *cx);
+
+/*
+** Update the MD5 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void MD5_Update(MD5Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD5 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the MD5 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD5 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int MD5_FlattenSize(MD5Context *cx);
+
+/*
+ * Flatten the MD5 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus MD5_Flatten(MD5Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD5 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern MD5Context *MD5_Resurrect(unsigned char *space, void *arg);
+extern void MD5_Clone(MD5Context *dest, MD5Context *src);
+
+/*
+** trace the intermediate state info of the MD5 hash.
+*/
+extern void MD5_TraceState(MD5Context *cx);
+
+/******************************************/
+/*
+** MD2 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using MD2
+*/
+extern SECStatus MD2_Hash(unsigned char *dest, const char *src);
+
+/*
+** Create a new MD2 context
+*/
+extern MD2Context *MD2_NewContext(void);
+
+/*
+** Destroy an MD2 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void MD2_DestroyContext(MD2Context *cx, PRBool freeit);
+
+/*
+** Reset an MD2 context, preparing it for a fresh round of hashing
+*/
+extern void MD2_Begin(MD2Context *cx);
+
+/*
+** Update the MD2 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void MD2_Update(MD2Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+/*
+** Finish the MD2 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (16) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+ * Return the the size of a buffer needed to flatten the MD2 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int MD2_FlattenSize(MD2Context *cx);
+
+/*
+ * Flatten the MD2 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus MD2_Flatten(MD2Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a MD2 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern MD2Context *MD2_Resurrect(unsigned char *space, void *arg);
+extern void MD2_Clone(MD2Context *dest, MD2Context *src);
+
+/******************************************/
+/*
+** SHA-1 secure hash function
+*/
+
+/*
+** Hash a null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_Hash(unsigned char *dest, const char *src);
+
+/*
+** Hash a non-null terminated string "src" into "dest" using SHA-1
+*/
+extern SECStatus SHA1_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+/*
+** Create a new SHA-1 context
+*/
+extern SHA1Context *SHA1_NewContext(void);
+
+/*
+** Destroy a SHA-1 secure hash context.
+** "cx" the context
+** "freeit" if PR_TRUE then free the object as well as its sub-objects
+*/
+extern void SHA1_DestroyContext(SHA1Context *cx, PRBool freeit);
+
+/*
+** Reset a SHA-1 context, preparing it for a fresh round of hashing
+*/
+extern void SHA1_Begin(SHA1Context *cx);
+
+/*
+** Update the SHA-1 hash function with more data.
+** "cx" the context
+** "input" the data to hash
+** "inputLen" the amount of data to hash
+*/
+extern void SHA1_Update(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+
+/*
+** Finish the SHA-1 hash function. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 16 bytes of digest data are stored
+** "digestLen" where the digest length (20) is stored
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA1_End(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** Export the current state of the SHA-1 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 20 bytes of digest data are stored
+** "digestLen" where the digest length (20) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA1_EndRaw(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+/*
+** trace the intermediate state info of the SHA1 hash.
+*/
+extern void SHA1_TraceState(SHA1Context *cx);
+
+/*
+ * Return the the size of a buffer needed to flatten the SHA-1 Context into
+ * "cx" the context
+ * returns size;
+ */
+extern unsigned int SHA1_FlattenSize(SHA1Context *cx);
+
+/*
+ * Flatten the SHA-1 Context into a buffer:
+ * "cx" the context
+ * "space" the buffer to flatten to
+ * returns status;
+ */
+extern SECStatus SHA1_Flatten(SHA1Context *cx, unsigned char *space);
+
+/*
+ * Resurrect a flattened context into a SHA-1 Context
+ * "space" the buffer of the flattend buffer
+ * "arg" ptr to void used by cryptographic resurrect
+ * returns resurected context;
+ */
+extern SHA1Context *SHA1_Resurrect(unsigned char *space, void *arg);
+extern void SHA1_Clone(SHA1Context *dest, SHA1Context *src);
+
+/******************************************/
+
+extern SHA224Context *SHA224_NewContext(void);
+extern void SHA224_DestroyContext(SHA224Context *cx, PRBool freeit);
+extern void SHA224_Begin(SHA224Context *cx);
+extern void SHA224_Update(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA224_End(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-224 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 28 bytes of digest data are stored
+** "digestLen" where the digest length (28) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA224_EndRaw(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA224_Hash(unsigned char *dest, const char *src);
+extern void SHA224_TraceState(SHA224Context *cx);
+extern unsigned int SHA224_FlattenSize(SHA224Context *cx);
+extern SECStatus SHA224_Flatten(SHA224Context *cx, unsigned char *space);
+extern SHA224Context *SHA224_Resurrect(unsigned char *space, void *arg);
+extern void SHA224_Clone(SHA224Context *dest, SHA224Context *src);
+
+/******************************************/
+
+extern SHA256Context *SHA256_NewContext(void);
+extern void SHA256_DestroyContext(SHA256Context *cx, PRBool freeit);
+extern void SHA256_Begin(SHA256Context *cx);
+extern void SHA256_Update(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA256_End(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-256 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 32 bytes of digest data are stored
+** "digestLen" where the digest length (32) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA256_EndRaw(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA256_Hash(unsigned char *dest, const char *src);
+extern void SHA256_TraceState(SHA256Context *cx);
+extern unsigned int SHA256_FlattenSize(SHA256Context *cx);
+extern SECStatus SHA256_Flatten(SHA256Context *cx, unsigned char *space);
+extern SHA256Context *SHA256_Resurrect(unsigned char *space, void *arg);
+extern void SHA256_Clone(SHA256Context *dest, SHA256Context *src);
+
+/******************************************/
+
+extern SHA512Context *SHA512_NewContext(void);
+extern void SHA512_DestroyContext(SHA512Context *cx, PRBool freeit);
+extern void SHA512_Begin(SHA512Context *cx);
+extern void SHA512_Update(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+/*
+** Export the current state of the SHA-512 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 64 bytes of digest data are stored
+** "digestLen" where the digest length (64) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA512_EndRaw(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern void SHA512_End(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA512_Hash(unsigned char *dest, const char *src);
+extern void SHA512_TraceState(SHA512Context *cx);
+extern unsigned int SHA512_FlattenSize(SHA512Context *cx);
+extern SECStatus SHA512_Flatten(SHA512Context *cx, unsigned char *space);
+extern SHA512Context *SHA512_Resurrect(unsigned char *space, void *arg);
+extern void SHA512_Clone(SHA512Context *dest, SHA512Context *src);
+
+/******************************************/
+
+extern SHA384Context *SHA384_NewContext(void);
+extern void SHA384_DestroyContext(SHA384Context *cx, PRBool freeit);
+extern void SHA384_Begin(SHA384Context *cx);
+extern void SHA384_Update(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+extern void SHA384_End(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+/*
+** Export the current state of the SHA-384 hash without appending the standard
+** padding and length bytes. Produce the digested results in "digest"
+** "cx" the context
+** "digest" where the 48 bytes of digest data are stored
+** "digestLen" where the digest length (48) is stored (optional)
+** "maxDigestLen" the maximum amount of data that can ever be
+** stored in "digest"
+*/
+extern void SHA384_EndRaw(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+extern SECStatus SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+extern SECStatus SHA384_Hash(unsigned char *dest, const char *src);
+extern void SHA384_TraceState(SHA384Context *cx);
+extern unsigned int SHA384_FlattenSize(SHA384Context *cx);
+extern SECStatus SHA384_Flatten(SHA384Context *cx, unsigned char *space);
+extern SHA384Context *SHA384_Resurrect(unsigned char *space, void *arg);
+extern void SHA384_Clone(SHA384Context *dest, SHA384Context *src);
+
+/****************************************
+ * implement TLS 1.0 Pseudo Random Function (PRF) and TLS P_hash function
+ */
+
+extern SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+ SECItem *result, PRBool isFIPS);
+
+extern SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS);
+
+/******************************************/
+/*
+** Pseudo Random Number Generation. FIPS compliance desirable.
+*/
+
+/*
+** Initialize the global RNG context and give it some seed input taken
+** from the system. This function is thread-safe and will only allow
+** the global context to be initialized once. The seed input is likely
+** small, so it is imperative that RNG_RandomUpdate() be called with
+** additional seed data before the generator is used. A good way to
+** provide the generator with additional entropy is to call
+** RNG_SystemInfoForRNG(). Note that NSS_Init() does exactly that.
+*/
+extern SECStatus RNG_RNGInit(void);
+
+/*
+** Update the global random number generator with more seeding
+** material
+*/
+extern SECStatus RNG_RandomUpdate(const void *data, size_t bytes);
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+extern SECStatus RNG_GenerateGlobalRandomBytes(void *dest, size_t len);
+
+extern SECStatus RNG_ResetForFuzzing(void);
+
+/* Destroy the global RNG context. After a call to RNG_RNGShutdown()
+** a call to RNG_RNGInit() is required in order to use the generator again,
+** along with seed data (see the comment above RNG_RNGInit()).
+*/
+extern void RNG_RNGShutdown(void);
+
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+ * FIPS 186-2 Change Notice 1 RNG Algorithm 1, used both to
+ * generate the DSA X parameter and as a generic purpose RNG.
+ *
+ * The following two FIPS186Change functions are needed for
+ * NIST RNG Validation System.
+ */
+
+/*
+ * FIPS186Change_GenerateX is now deprecated. It will return SECFailure with
+ * the error set to PR_NOT_IMPLEMENTED_ERROR.
+ */
+extern SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY,
+ const unsigned char *XSEEDj,
+ unsigned char *x_j);
+
+/*
+ * When generating the DSA X parameter, we generate 2*GSIZE bytes
+ * of random output and reduce it mod q.
+ *
+ * Input: w, 2*GSIZE bytes
+ * q, DSA_SUBPRIME_LEN bytes
+ * Output: xj, DSA_SUBPRIME_LEN bytes
+ */
+extern SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj);
+
+/* To allow NIST KAT tests */
+extern SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len);
+
+/*
+ * The following functions are for FIPS poweron self test and FIPS algorithm
+ * testing.
+ */
+extern SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len);
+
+extern SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len);
+
+extern SECStatus
+PRNGTEST_Uninstantiate(void);
+
+extern SECStatus
+PRNGTEST_RunHealthTests(void);
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of seed and length of h both equal length of P.
+ * All lengths are specified by "j", according to the table above.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGen(unsigned int j, /* input : determines length of P. */
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by j. Length of h will match length of P.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [20..255] or an error will result.
+ *
+ * The verify parameters will conform to FIPS186-1.
+ */
+extern SECStatus
+PQG_ParamGenSeedLen(
+ unsigned int j, /* input : determines length of P. */
+ unsigned int seedBytes, /* input : length of seed in bytes.*/
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Generate PQGParams and PQGVerify structs.
+ * Length of P specified by L in bits.
+ * Length of Q specified by N in bits.
+ * Length of SEED in bytes specified in seedBytes.
+ * seedBbytes must be in the range [N..L*2] or an error will result.
+ *
+ * Not that J uses the above table, L is the length exact. L and N must
+ * match the table below or an error will result:
+ *
+ * L N
+ * 1024 160
+ * 2048 224
+ * 2048 256
+ * 3072 256
+ *
+ * If N or seedBytes are set to zero, then PQG_ParamGenSeedLen will
+ * pick a default value (typically the smallest secure value for these
+ * variables).
+ *
+ * The verify parameters will conform to FIPS186-3 using the smallest
+ * permissible hash for the key strength.
+ */
+extern SECStatus
+PQG_ParamGenV2(
+ unsigned int L, /* input : determines length of P. */
+ unsigned int N, /* input : determines length of Q. */
+ unsigned int seedBytes, /* input : length of seed in bytes.*/
+ PQGParams **pParams, /* output: P Q and G returned here */
+ PQGVerify **pVfy); /* output: counter and seed. */
+
+/* Test PQGParams for validity as DSS PQG values.
+ * If vfy is non-NULL, test PQGParams to make sure they were generated
+ * using the specified seed, counter, and h values.
+ *
+ * Return value indicates whether Verification operation ran successfully
+ * to completion, but does not indicate if PQGParams are valid or not.
+ * If return value is SECSuccess, then *pResult has these meanings:
+ * SECSuccess: PQGParams are valid.
+ * SECFailure: PQGParams are invalid.
+ *
+ * Verify the PQG againts the counter, SEED and h.
+ * These tests are specified in FIPS 186-3 Appendix A.1.1.1, A.1.1.3, and A.2.2
+ * PQG_VerifyParams will automatically choose the appropriate test.
+ */
+
+extern SECStatus PQG_VerifyParams(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result);
+
+extern void PQG_DestroyParams(PQGParams *params);
+
+extern void PQG_DestroyVerify(PQGVerify *vfy);
+
+/*
+ * clean-up any global tables freebl may have allocated after it starts up.
+ * This function is not thread safe and should be called only after the
+ * library has been quiessed.
+ */
+extern void BL_Cleanup(void);
+
+/* unload freebl shared library from memory */
+extern void BL_Unload(void);
+
+/**************************************************************************
+ * Verify a given Shared library signature *
+ **************************************************************************/
+PRBool BLAPI_SHVerify(const char *name, PRFuncPtr addr);
+
+/**************************************************************************
+ * Verify a given filename's signature *
+ **************************************************************************/
+PRBool BLAPI_SHVerifyFile(const char *shName);
+
+/**************************************************************************
+ * Verify Are Own Shared library signature *
+ **************************************************************************/
+PRBool BLAPI_VerifySelf(const char *name);
+
+/*********************************************************************/
+extern const SECHashObject *HASH_GetRawHashObject(HASH_HashType hashType);
+
+extern void BL_SetForkState(PRBool forked);
+
+/*
+** pepare an ECParam structure from DEREncoded params
+ */
+extern SECStatus EC_FillParams(PLArenaPool *arena,
+ const SECItem *encodedParams, ECParams *params);
+extern SECStatus EC_DecodeParams(const SECItem *encodedParams,
+ ECParams **ecparams);
+extern SECStatus EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams);
+
+/*
+ * use the internal table to get the size in bytes of a single EC point
+ */
+extern int EC_GetPointSize(const ECParams *params);
+
+SEC_END_PROTOS
+
+#endif /* _BLAPI_H_ */
diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h
new file mode 100644
index 000000000..6ad2e2892
--- /dev/null
+++ b/security/nss/lib/freebl/blapii.h
@@ -0,0 +1,61 @@
+/*
+ * blapii.h - private data structures and prototypes for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPII_H_
+#define _BLAPII_H_
+
+#include "blapit.h"
+
+/* max block size of supported block ciphers */
+#define MAX_BLOCK_SIZE 16
+
+typedef SECStatus (*freeblCipherFunc)(void *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize);
+typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit);
+
+SEC_BEGIN_PROTOS
+
+SECStatus BL_FIPSEntryOK(PRBool freeblOnly);
+PRBool BL_POSTRan(PRBool freeblOnly);
+
+#if defined(XP_UNIX) && !defined(NO_FORK_CHECK)
+
+extern PRBool bl_parentForkedAfterC_Initialize;
+
+#define SKIP_AFTER_FORK(x) \
+ if (!bl_parentForkedAfterC_Initialize) \
+ x
+
+#else
+
+#define SKIP_AFTER_FORK(x) x
+
+#endif
+
+SEC_END_PROTOS
+
+#if defined(NSS_X86_OR_X64)
+#define HAVE_UNALIGNED_ACCESS 1
+#endif
+
+#if defined(__clang__)
+#define HAVE_NO_SANITIZE_ATTR __has_attribute(no_sanitize)
+#else
+#define HAVE_NO_SANITIZE_ATTR 0
+#endif
+
+#if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR
+#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment")))
+#else
+#define NO_SANITIZE_ALIGNMENT
+#endif
+
+#undef HAVE_NO_SANITIZE_ATTR
+
+#endif /* _BLAPII_H_ */
diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h
new file mode 100644
index 000000000..2a17b5f46
--- /dev/null
+++ b/security/nss/lib/freebl/blapit.h
@@ -0,0 +1,414 @@
+/*
+ * blapit.h - public data structures for the freebl library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _BLAPIT_H_
+#define _BLAPIT_H_
+
+#include "seccomon.h"
+#include "prlink.h"
+#include "plarena.h"
+#include "ecl-exp.h"
+
+/* RC2 operation modes */
+#define NSS_RC2 0
+#define NSS_RC2_CBC 1
+
+/* RC5 operation modes */
+#define NSS_RC5 0
+#define NSS_RC5_CBC 1
+
+/* DES operation modes */
+#define NSS_DES 0
+#define NSS_DES_CBC 1
+#define NSS_DES_EDE3 2
+#define NSS_DES_EDE3_CBC 3
+
+#define DES_KEY_LENGTH 8 /* Bytes */
+
+/* AES operation modes */
+#define NSS_AES 0
+#define NSS_AES_CBC 1
+#define NSS_AES_CTS 2
+#define NSS_AES_CTR 3
+#define NSS_AES_GCM 4
+
+/* Camellia operation modes */
+#define NSS_CAMELLIA 0
+#define NSS_CAMELLIA_CBC 1
+
+/* SEED operation modes */
+#define NSS_SEED 0
+#define NSS_SEED_CBC 1
+
+#define DSA1_SUBPRIME_LEN 20 /* Bytes */
+#define DSA1_SIGNATURE_LEN (DSA1_SUBPRIME_LEN * 2) /* Bytes */
+#define DSA_MAX_SUBPRIME_LEN 32 /* Bytes */
+#define DSA_MAX_SIGNATURE_LEN (DSA_MAX_SUBPRIME_LEN * 2) /* Bytes */
+
+/*
+ * Mark the old defines as deprecated. This will warn code that expected
+ * DSA1 only that they need to change if the are to support DSA2.
+ */
+#if defined(__GNUC__) && (__GNUC__ > 3)
+/* make GCC warn when we use these #defines */
+typedef int __BLAPI_DEPRECATED __attribute__((deprecated));
+#define DSA_SUBPRIME_LEN ((__BLAPI_DEPRECATED)DSA1_SUBPRIME_LEN)
+#define DSA_SIGNATURE_LEN ((__BLAPI_DEPRECATED)DSA1_SIGNATURE_LEN)
+#define DSA_Q_BITS ((__BLAPI_DEPRECATED)(DSA1_SUBPRIME_LEN * 8))
+#else
+#ifdef _WIN32
+/* This magic gets the windows compiler to give us a deprecation
+ * warning */
+#pragma deprecated(DSA_SUBPRIME_LEN, DSA_SIGNATURE_LEN, DSA_QBITS)
+#endif
+#define DSA_SUBPRIME_LEN DSA1_SUBPRIME_LEN
+#define DSA_SIGNATURE_LEN DSA1_SIGNATURE_LEN
+#define DSA_Q_BITS (DSA1_SUBPRIME_LEN * 8)
+#endif
+
+/* XXX We shouldn't have to hard code this limit. For
+ * now, this is the quickest way to support ECDSA signature
+ * processing (ECDSA signature lengths depend on curve
+ * size). This limit is sufficient for curves upto
+ * 576 bits.
+ */
+#define MAX_ECKEY_LEN 72 /* Bytes */
+
+#define EC_MAX_KEY_BITS 521 /* in bits */
+#define EC_MIN_KEY_BITS 256 /* in bits */
+
+/* EC point compression format */
+#define EC_POINT_FORM_COMPRESSED_Y0 0x02
+#define EC_POINT_FORM_COMPRESSED_Y1 0x03
+#define EC_POINT_FORM_UNCOMPRESSED 0x04
+#define EC_POINT_FORM_HYBRID_Y0 0x06
+#define EC_POINT_FORM_HYBRID_Y1 0x07
+
+/*
+ * Number of bytes each hash algorithm produces
+ */
+#define MD2_LENGTH 16 /* Bytes */
+#define MD5_LENGTH 16 /* Bytes */
+#define SHA1_LENGTH 20 /* Bytes */
+#define SHA256_LENGTH 32 /* bytes */
+#define SHA384_LENGTH 48 /* bytes */
+#define SHA512_LENGTH 64 /* bytes */
+#define HASH_LENGTH_MAX SHA512_LENGTH
+
+/*
+ * Input block size for each hash algorithm.
+ */
+
+#define MD2_BLOCK_LENGTH 64 /* bytes */
+#define MD5_BLOCK_LENGTH 64 /* bytes */
+#define SHA1_BLOCK_LENGTH 64 /* bytes */
+#define SHA224_BLOCK_LENGTH 64 /* bytes */
+#define SHA256_BLOCK_LENGTH 64 /* bytes */
+#define SHA384_BLOCK_LENGTH 128 /* bytes */
+#define SHA512_BLOCK_LENGTH 128 /* bytes */
+#define HASH_BLOCK_LENGTH_MAX SHA512_BLOCK_LENGTH
+
+#define AES_KEY_WRAP_IV_BYTES 8
+#define AES_KEY_WRAP_BLOCK_SIZE 8 /* bytes */
+#define AES_BLOCK_SIZE 16 /* bytes */
+
+#define AES_128_KEY_LENGTH 16 /* bytes */
+#define AES_192_KEY_LENGTH 24 /* bytes */
+#define AES_256_KEY_LENGTH 32 /* bytes */
+
+#define CAMELLIA_BLOCK_SIZE 16 /* bytes */
+
+#define SEED_BLOCK_SIZE 16 /* bytes */
+#define SEED_KEY_LENGTH 16 /* bytes */
+
+#define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048
+
+/*
+ * These values come from the initial key size limits from the PKCS #11
+ * module. They may be arbitrarily adjusted to any value freebl supports.
+ */
+#define RSA_MIN_MODULUS_BITS 128
+#define RSA_MAX_MODULUS_BITS 16384
+#define RSA_MAX_EXPONENT_BITS 64
+#define DH_MIN_P_BITS 128
+#define DH_MAX_P_BITS 16384
+
+/*
+ * The FIPS 186-1 algorithm for generating primes P and Q allows only 9
+ * distinct values for the length of P, and only one value for the
+ * length of Q.
+ * The algorithm uses a variable j to indicate which of the 9 lengths
+ * of P is to be used.
+ * The following table relates j to the lengths of P and Q in bits.
+ *
+ * j bits in P bits in Q
+ * _ _________ _________
+ * 0 512 160
+ * 1 576 160
+ * 2 640 160
+ * 3 704 160
+ * 4 768 160
+ * 5 832 160
+ * 6 896 160
+ * 7 960 160
+ * 8 1024 160
+ *
+ * The FIPS-186-1 compliant PQG generator takes j as an input parameter.
+ *
+ * FIPS 186-3 algorithm specifies 4 distinct P and Q sizes:
+ *
+ * bits in P bits in Q
+ * _________ _________
+ * 1024 160
+ * 2048 224
+ * 2048 256
+ * 3072 256
+ *
+ * The FIPS-186-3 complaiant PQG generator (PQG V2) takes arbitrary p and q
+ * lengths as input and returns an error if they aren't in this list.
+ */
+
+#define DSA1_Q_BITS 160
+#define DSA_MAX_P_BITS 3072
+#define DSA_MIN_P_BITS 512
+#define DSA_MAX_Q_BITS 256
+#define DSA_MIN_Q_BITS 160
+
+#if DSA_MAX_Q_BITS != DSA_MAX_SUBPRIME_LEN * 8
+#error "Inconsistent declaration of DSA SUBPRIME/Q parameters in blapit.h"
+#endif
+
+/*
+ * function takes desired number of bits in P,
+ * returns index (0..8) or -1 if number of bits is invalid.
+ */
+#define PQG_PBITS_TO_INDEX(bits) \
+ (((bits) < 512 || (bits) > 1024 || (bits) % 64) ? -1 : (int)((bits)-512) / 64)
+
+/*
+ * function takes index (0-8)
+ * returns number of bits in P for that index, or -1 if index is invalid.
+ */
+#define PQG_INDEX_TO_PBITS(j) (((unsigned)(j) > 8) ? -1 : (512 + 64 * (j)))
+
+/***************************************************************************
+** Opaque objects
+*/
+
+struct DESContextStr;
+struct RC2ContextStr;
+struct RC4ContextStr;
+struct RC5ContextStr;
+struct AESContextStr;
+struct CamelliaContextStr;
+struct MD2ContextStr;
+struct MD5ContextStr;
+struct SHA1ContextStr;
+struct SHA256ContextStr;
+struct SHA512ContextStr;
+struct AESKeyWrapContextStr;
+struct SEEDContextStr;
+struct ChaCha20Poly1305ContextStr;
+
+typedef struct DESContextStr DESContext;
+typedef struct RC2ContextStr RC2Context;
+typedef struct RC4ContextStr RC4Context;
+typedef struct RC5ContextStr RC5Context;
+typedef struct AESContextStr AESContext;
+typedef struct CamelliaContextStr CamelliaContext;
+typedef struct MD2ContextStr MD2Context;
+typedef struct MD5ContextStr MD5Context;
+typedef struct SHA1ContextStr SHA1Context;
+typedef struct SHA256ContextStr SHA256Context;
+/* SHA224Context is really a SHA256ContextStr. This is not a mistake. */
+typedef struct SHA256ContextStr SHA224Context;
+typedef struct SHA512ContextStr SHA512Context;
+/* SHA384Context is really a SHA512ContextStr. This is not a mistake. */
+typedef struct SHA512ContextStr SHA384Context;
+typedef struct AESKeyWrapContextStr AESKeyWrapContext;
+typedef struct SEEDContextStr SEEDContext;
+typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context;
+
+/***************************************************************************
+** RSA Public and Private Key structures
+*/
+
+/* member names from PKCS#1, section 7.1 */
+struct RSAPublicKeyStr {
+ PLArenaPool *arena;
+ SECItem modulus;
+ SECItem publicExponent;
+};
+typedef struct RSAPublicKeyStr RSAPublicKey;
+
+/* member names from PKCS#1, section 7.2 */
+struct RSAPrivateKeyStr {
+ PLArenaPool *arena;
+ SECItem version;
+ SECItem modulus;
+ SECItem publicExponent;
+ SECItem privateExponent;
+ SECItem prime1;
+ SECItem prime2;
+ SECItem exponent1;
+ SECItem exponent2;
+ SECItem coefficient;
+};
+typedef struct RSAPrivateKeyStr RSAPrivateKey;
+
+/***************************************************************************
+** DSA Public and Private Key and related structures
+*/
+
+struct PQGParamsStr {
+ PLArenaPool *arena;
+ SECItem prime; /* p */
+ SECItem subPrime; /* q */
+ SECItem base; /* g */
+ /* XXX chrisk: this needs to be expanded to hold j and validationParms (RFC2459 7.3.2) */
+};
+typedef struct PQGParamsStr PQGParams;
+
+struct PQGVerifyStr {
+ PLArenaPool *arena; /* includes this struct, seed, & h. */
+ unsigned int counter;
+ SECItem seed;
+ SECItem h;
+};
+typedef struct PQGVerifyStr PQGVerify;
+
+struct DSAPublicKeyStr {
+ PQGParams params;
+ SECItem publicValue;
+};
+typedef struct DSAPublicKeyStr DSAPublicKey;
+
+struct DSAPrivateKeyStr {
+ PQGParams params;
+ SECItem publicValue;
+ SECItem privateValue;
+};
+typedef struct DSAPrivateKeyStr DSAPrivateKey;
+
+/***************************************************************************
+** Diffie-Hellman Public and Private Key and related structures
+** Structure member names suggested by PKCS#3.
+*/
+
+struct DHParamsStr {
+ PLArenaPool *arena;
+ SECItem prime; /* p */
+ SECItem base; /* g */
+};
+typedef struct DHParamsStr DHParams;
+
+struct DHPublicKeyStr {
+ PLArenaPool *arena;
+ SECItem prime;
+ SECItem base;
+ SECItem publicValue;
+};
+typedef struct DHPublicKeyStr DHPublicKey;
+
+struct DHPrivateKeyStr {
+ PLArenaPool *arena;
+ SECItem prime;
+ SECItem base;
+ SECItem publicValue;
+ SECItem privateValue;
+};
+typedef struct DHPrivateKeyStr DHPrivateKey;
+
+/***************************************************************************
+** Data structures used for elliptic curve parameters and
+** public and private keys.
+*/
+
+/*
+** The ECParams data structures can encode elliptic curve
+** parameters for both GFp and GF2m curves.
+*/
+
+typedef enum { ec_params_explicit,
+ ec_params_named
+} ECParamsType;
+
+typedef enum { ec_field_GFp = 1,
+ ec_field_GF2m,
+ ec_field_plain
+} ECFieldType;
+
+struct ECFieldIDStr {
+ int size; /* field size in bits */
+ ECFieldType type;
+ union {
+ SECItem prime; /* prime p for (GFp) */
+ SECItem poly; /* irreducible binary polynomial for (GF2m) */
+ } u;
+ int k1; /* first coefficient of pentanomial or
+ * the only coefficient of trinomial
+ */
+ int k2; /* two remaining coefficients of pentanomial */
+ int k3;
+};
+typedef struct ECFieldIDStr ECFieldID;
+
+struct ECCurveStr {
+ SECItem a; /* contains octet stream encoding of
+ * field element (X9.62 section 4.3.3)
+ */
+ SECItem b;
+ SECItem seed;
+};
+typedef struct ECCurveStr ECCurve;
+
+struct ECParamsStr {
+ PLArenaPool *arena;
+ ECParamsType type;
+ ECFieldID fieldID;
+ ECCurve curve;
+ SECItem base;
+ SECItem order;
+ int cofactor;
+ SECItem DEREncoding;
+ ECCurveName name;
+ SECItem curveOID;
+};
+typedef struct ECParamsStr ECParams;
+
+struct ECPublicKeyStr {
+ ECParams ecParams;
+ SECItem publicValue; /* elliptic curve point encoded as
+ * octet stream.
+ */
+};
+typedef struct ECPublicKeyStr ECPublicKey;
+
+struct ECPrivateKeyStr {
+ ECParams ecParams;
+ SECItem publicValue; /* encoded ec point */
+ SECItem privateValue; /* private big integer */
+ SECItem version; /* As per SEC 1, Appendix C, Section C.4 */
+};
+typedef struct ECPrivateKeyStr ECPrivateKey;
+
+typedef void *(*BLapiAllocateFunc)(void);
+typedef void (*BLapiDestroyContextFunc)(void *cx, PRBool freeit);
+typedef SECStatus (*BLapiInitContextFunc)(void *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+typedef SECStatus (*BLapiEncrypt)(void *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+#endif /* _BLAPIT_H_ */
diff --git a/security/nss/lib/freebl/blname.c b/security/nss/lib/freebl/blname.c
new file mode 100644
index 000000000..4bad74ada
--- /dev/null
+++ b/security/nss/lib/freebl/blname.c
@@ -0,0 +1,100 @@
+/*
+ * blname.c - determine the freebl library name.
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if defined(FREEBL_LOWHASH)
+static const char* default_name =
+ SHLIB_PREFIX "freeblpriv" SHLIB_VERSION "." SHLIB_SUFFIX;
+#else
+static const char* default_name =
+ SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+/* getLibName() returns the name of the library to load. */
+
+#if defined(SOLARIS) && defined(__sparc)
+#include <stddef.h>
+#include <strings.h>
+#include <sys/systeminfo.h>
+
+#if defined(NSS_USE_64)
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_64int_3.so";
+const static char non_hybrid_shared_lib[] = "libfreebl_64fpu_3.so";
+
+const static char int_hybrid_isa[] = "sparcv9";
+const static char fpu_hybrid_isa[] = "sparcv9+vis";
+
+#else
+
+const static char fpu_hybrid_shared_lib[] = "libfreebl_32fpu_3.so";
+const static char int_hybrid_shared_lib[] = "libfreebl_32int64_3.so";
+/* This was for SPARC V8, now obsolete. */
+const static char* const non_hybrid_shared_lib = NULL;
+
+const static char int_hybrid_isa[] = "sparcv8plus";
+const static char fpu_hybrid_isa[] = "sparcv8plus+vis";
+
+#endif
+
+static const char*
+getLibName(void)
+{
+ char* found_int_hybrid;
+ char* found_fpu_hybrid;
+ long buflen;
+ char buf[256];
+
+ buflen = sysinfo(SI_ISALIST, buf, sizeof buf);
+ if (buflen <= 0)
+ return NULL;
+ /* sysinfo output is always supposed to be NUL terminated, but ... */
+ if (buflen < sizeof buf)
+ buf[buflen] = '\0';
+ else
+ buf[(sizeof buf) - 1] = '\0';
+ /* The ISA list is a space separated string of names of ISAs and
+ * ISA extensions, in order of decreasing performance.
+ * There are two different ISAs with which NSS's crypto code can be
+ * accelerated. If both are in the list, we take the first one.
+ * If one is in the list, we use it, and if neither then we use
+ * the base unaccelerated code.
+ */
+ found_int_hybrid = strstr(buf, int_hybrid_isa);
+ found_fpu_hybrid = strstr(buf, fpu_hybrid_isa);
+ if (found_fpu_hybrid &&
+ (!found_int_hybrid ||
+ (found_int_hybrid - found_fpu_hybrid) >= 0)) {
+ return fpu_hybrid_shared_lib;
+ }
+ if (found_int_hybrid) {
+ return int_hybrid_shared_lib;
+ }
+ return non_hybrid_shared_lib;
+}
+
+#elif defined(HPUX) && !defined(NSS_USE_64) && !defined(__ia64)
+#include <unistd.h>
+
+/* This code tests to see if we're running on a PA2.x CPU.
+** It returns true (1) if so, and false (0) otherwise.
+*/
+static const char*
+getLibName(void)
+{
+ long cpu = sysconf(_SC_CPU_VERSION);
+ return (cpu == CPU_PA_RISC2_0)
+ ? "libfreebl_32fpu_3.sl"
+ : "libfreebl_32int_3.sl";
+}
+#else
+/* default case, for platforms/ABIs that have only one freebl shared lib. */
+static const char*
+getLibName(void)
+{
+ return default_name;
+}
+#endif
diff --git a/security/nss/lib/freebl/camellia.c b/security/nss/lib/freebl/camellia.c
new file mode 100644
index 000000000..8a7bcb0fe
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.c
@@ -0,0 +1,1896 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "camellia.h"
+#include "sha_fast.h" /* for SHA_HTONL and related configuration macros */
+
+/* key constants */
+
+#define CAMELLIA_SIGMA1L (0xA09E667FL)
+#define CAMELLIA_SIGMA1R (0x3BCC908BL)
+#define CAMELLIA_SIGMA2L (0xB67AE858L)
+#define CAMELLIA_SIGMA2R (0x4CAA73B2L)
+#define CAMELLIA_SIGMA3L (0xC6EF372FL)
+#define CAMELLIA_SIGMA3R (0xE94F82BEL)
+#define CAMELLIA_SIGMA4L (0x54FF53A5L)
+#define CAMELLIA_SIGMA4R (0xF1D36F1CL)
+#define CAMELLIA_SIGMA5L (0x10E527FAL)
+#define CAMELLIA_SIGMA5R (0xDE682D1DL)
+#define CAMELLIA_SIGMA6L (0xB05688C2L)
+#define CAMELLIA_SIGMA6R (0xB3E6C1FDL)
+
+/*
+ * macros
+ */
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+
+/* require a CPU that allows unaligned access */
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+#define CAMELLIA_NEED_TMP_VARIABLE 1
+#endif
+
+#define GETU32(p) SHA_HTONL(*((PRUint32 *)(p)))
+#define PUTU32(ct, st) \
+ { \
+ *((PRUint32 *)(ct)) = SHA_HTONL(st); \
+ }
+
+#else /* no unaligned access */
+
+#define GETU32(pt) \
+ (((PRUint32)(pt)[0] << 24) ^ ((PRUint32)(pt)[1] << 16) ^ ((PRUint32)(pt)[2] << 8) ^ ((PRUint32)(pt)[3]))
+
+#define PUTU32(ct, st) \
+ { \
+ (ct)[0] = (PRUint8)((st) >> 24); \
+ (ct)[1] = (PRUint8)((st) >> 16); \
+ (ct)[2] = (PRUint8)((st) >> 8); \
+ (ct)[3] = (PRUint8)(st); \
+ }
+
+#endif
+
+#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2])
+#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1])
+
+/* rotation right shift 1byte */
+#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24))
+/* rotation left shift 1bit */
+#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31))
+/* rotation left shift 1byte */
+#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24))
+
+#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ ll = (ll << bits) + (lr >> (32 - bits)); \
+ lr = (lr << bits) + (rl >> (32 - bits)); \
+ rl = (rl << bits) + (rr >> (32 - bits)); \
+ rr = (rr << bits) + (w0 >> (32 - bits)); \
+ } while (0)
+
+#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \
+ do { \
+ w0 = ll; \
+ w1 = lr; \
+ ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \
+ lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \
+ rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \
+ rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \
+ } while (0)
+
+#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)])
+#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)])
+#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)])
+#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)])
+
+#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ il = xl ^ kl; \
+ ir = xr ^ kr; \
+ t0 = il >> 16; \
+ t1 = ir >> 16; \
+ yl = CAMELLIA_SP1110(ir & 0xff) ^ \
+ CAMELLIA_SP0222((t1 >> 8) & 0xff) ^ \
+ CAMELLIA_SP3033(t1 & 0xff) ^ \
+ CAMELLIA_SP4404((ir >> 8) & 0xff); \
+ yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) ^ \
+ CAMELLIA_SP0222(t0 & 0xff) ^ \
+ CAMELLIA_SP3033((il >> 8) & 0xff) ^ \
+ CAMELLIA_SP4404(il & 0xff); \
+ yl ^= yr; \
+ yr = CAMELLIA_RR8(yr); \
+ yr ^= yl; \
+ } while (0)
+
+/*
+ * for speed up
+ *
+ */
+#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \
+ do { \
+ t0 = kll; \
+ t0 &= ll; \
+ lr ^= CAMELLIA_RL1(t0); \
+ t1 = klr; \
+ t1 |= lr; \
+ ll ^= t1; \
+ \
+ t2 = krr; \
+ t2 |= rr; \
+ rl ^= t2; \
+ t3 = krl; \
+ t3 &= rl; \
+ rr ^= CAMELLIA_RL1(t3); \
+ } while (0)
+
+#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \
+ do { \
+ ir = CAMELLIA_SP1110(xr & 0xff) ^ \
+ CAMELLIA_SP0222((xr >> 24) & 0xff) ^ \
+ CAMELLIA_SP3033((xr >> 16) & 0xff) ^ \
+ CAMELLIA_SP4404((xr >> 8) & 0xff); \
+ il = CAMELLIA_SP1110((xl >> 24) & 0xff) ^ \
+ CAMELLIA_SP0222((xl >> 16) & 0xff) ^ \
+ CAMELLIA_SP3033((xl >> 8) & 0xff) ^ \
+ CAMELLIA_SP4404(xl & 0xff); \
+ il ^= kl; \
+ ir ^= kr; \
+ ir ^= il; \
+ il = CAMELLIA_RR8(il); \
+ il ^= ir; \
+ yl ^= ir; \
+ yr ^= il; \
+ } while (0)
+
+static const PRUint32 camellia_sp1110[256] = {
+ 0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00,
+ 0xb3b3b300, 0x27272700, 0xc0c0c000, 0xe5e5e500,
+ 0xe4e4e400, 0x85858500, 0x57575700, 0x35353500,
+ 0xeaeaea00, 0x0c0c0c00, 0xaeaeae00, 0x41414100,
+ 0x23232300, 0xefefef00, 0x6b6b6b00, 0x93939300,
+ 0x45454500, 0x19191900, 0xa5a5a500, 0x21212100,
+ 0xededed00, 0x0e0e0e00, 0x4f4f4f00, 0x4e4e4e00,
+ 0x1d1d1d00, 0x65656500, 0x92929200, 0xbdbdbd00,
+ 0x86868600, 0xb8b8b800, 0xafafaf00, 0x8f8f8f00,
+ 0x7c7c7c00, 0xebebeb00, 0x1f1f1f00, 0xcecece00,
+ 0x3e3e3e00, 0x30303000, 0xdcdcdc00, 0x5f5f5f00,
+ 0x5e5e5e00, 0xc5c5c500, 0x0b0b0b00, 0x1a1a1a00,
+ 0xa6a6a600, 0xe1e1e100, 0x39393900, 0xcacaca00,
+ 0xd5d5d500, 0x47474700, 0x5d5d5d00, 0x3d3d3d00,
+ 0xd9d9d900, 0x01010100, 0x5a5a5a00, 0xd6d6d600,
+ 0x51515100, 0x56565600, 0x6c6c6c00, 0x4d4d4d00,
+ 0x8b8b8b00, 0x0d0d0d00, 0x9a9a9a00, 0x66666600,
+ 0xfbfbfb00, 0xcccccc00, 0xb0b0b000, 0x2d2d2d00,
+ 0x74747400, 0x12121200, 0x2b2b2b00, 0x20202000,
+ 0xf0f0f000, 0xb1b1b100, 0x84848400, 0x99999900,
+ 0xdfdfdf00, 0x4c4c4c00, 0xcbcbcb00, 0xc2c2c200,
+ 0x34343400, 0x7e7e7e00, 0x76767600, 0x05050500,
+ 0x6d6d6d00, 0xb7b7b700, 0xa9a9a900, 0x31313100,
+ 0xd1d1d100, 0x17171700, 0x04040400, 0xd7d7d700,
+ 0x14141400, 0x58585800, 0x3a3a3a00, 0x61616100,
+ 0xdedede00, 0x1b1b1b00, 0x11111100, 0x1c1c1c00,
+ 0x32323200, 0x0f0f0f00, 0x9c9c9c00, 0x16161600,
+ 0x53535300, 0x18181800, 0xf2f2f200, 0x22222200,
+ 0xfefefe00, 0x44444400, 0xcfcfcf00, 0xb2b2b200,
+ 0xc3c3c300, 0xb5b5b500, 0x7a7a7a00, 0x91919100,
+ 0x24242400, 0x08080800, 0xe8e8e800, 0xa8a8a800,
+ 0x60606000, 0xfcfcfc00, 0x69696900, 0x50505000,
+ 0xaaaaaa00, 0xd0d0d000, 0xa0a0a000, 0x7d7d7d00,
+ 0xa1a1a100, 0x89898900, 0x62626200, 0x97979700,
+ 0x54545400, 0x5b5b5b00, 0x1e1e1e00, 0x95959500,
+ 0xe0e0e000, 0xffffff00, 0x64646400, 0xd2d2d200,
+ 0x10101000, 0xc4c4c400, 0x00000000, 0x48484800,
+ 0xa3a3a300, 0xf7f7f700, 0x75757500, 0xdbdbdb00,
+ 0x8a8a8a00, 0x03030300, 0xe6e6e600, 0xdadada00,
+ 0x09090900, 0x3f3f3f00, 0xdddddd00, 0x94949400,
+ 0x87878700, 0x5c5c5c00, 0x83838300, 0x02020200,
+ 0xcdcdcd00, 0x4a4a4a00, 0x90909000, 0x33333300,
+ 0x73737300, 0x67676700, 0xf6f6f600, 0xf3f3f300,
+ 0x9d9d9d00, 0x7f7f7f00, 0xbfbfbf00, 0xe2e2e200,
+ 0x52525200, 0x9b9b9b00, 0xd8d8d800, 0x26262600,
+ 0xc8c8c800, 0x37373700, 0xc6c6c600, 0x3b3b3b00,
+ 0x81818100, 0x96969600, 0x6f6f6f00, 0x4b4b4b00,
+ 0x13131300, 0xbebebe00, 0x63636300, 0x2e2e2e00,
+ 0xe9e9e900, 0x79797900, 0xa7a7a700, 0x8c8c8c00,
+ 0x9f9f9f00, 0x6e6e6e00, 0xbcbcbc00, 0x8e8e8e00,
+ 0x29292900, 0xf5f5f500, 0xf9f9f900, 0xb6b6b600,
+ 0x2f2f2f00, 0xfdfdfd00, 0xb4b4b400, 0x59595900,
+ 0x78787800, 0x98989800, 0x06060600, 0x6a6a6a00,
+ 0xe7e7e700, 0x46464600, 0x71717100, 0xbababa00,
+ 0xd4d4d400, 0x25252500, 0xababab00, 0x42424200,
+ 0x88888800, 0xa2a2a200, 0x8d8d8d00, 0xfafafa00,
+ 0x72727200, 0x07070700, 0xb9b9b900, 0x55555500,
+ 0xf8f8f800, 0xeeeeee00, 0xacacac00, 0x0a0a0a00,
+ 0x36363600, 0x49494900, 0x2a2a2a00, 0x68686800,
+ 0x3c3c3c00, 0x38383800, 0xf1f1f100, 0xa4a4a400,
+ 0x40404000, 0x28282800, 0xd3d3d300, 0x7b7b7b00,
+ 0xbbbbbb00, 0xc9c9c900, 0x43434300, 0xc1c1c100,
+ 0x15151500, 0xe3e3e300, 0xadadad00, 0xf4f4f400,
+ 0x77777700, 0xc7c7c700, 0x80808000, 0x9e9e9e00,
+};
+
+static const PRUint32 camellia_sp0222[256] = {
+ 0x00e0e0e0, 0x00050505, 0x00585858, 0x00d9d9d9,
+ 0x00676767, 0x004e4e4e, 0x00818181, 0x00cbcbcb,
+ 0x00c9c9c9, 0x000b0b0b, 0x00aeaeae, 0x006a6a6a,
+ 0x00d5d5d5, 0x00181818, 0x005d5d5d, 0x00828282,
+ 0x00464646, 0x00dfdfdf, 0x00d6d6d6, 0x00272727,
+ 0x008a8a8a, 0x00323232, 0x004b4b4b, 0x00424242,
+ 0x00dbdbdb, 0x001c1c1c, 0x009e9e9e, 0x009c9c9c,
+ 0x003a3a3a, 0x00cacaca, 0x00252525, 0x007b7b7b,
+ 0x000d0d0d, 0x00717171, 0x005f5f5f, 0x001f1f1f,
+ 0x00f8f8f8, 0x00d7d7d7, 0x003e3e3e, 0x009d9d9d,
+ 0x007c7c7c, 0x00606060, 0x00b9b9b9, 0x00bebebe,
+ 0x00bcbcbc, 0x008b8b8b, 0x00161616, 0x00343434,
+ 0x004d4d4d, 0x00c3c3c3, 0x00727272, 0x00959595,
+ 0x00ababab, 0x008e8e8e, 0x00bababa, 0x007a7a7a,
+ 0x00b3b3b3, 0x00020202, 0x00b4b4b4, 0x00adadad,
+ 0x00a2a2a2, 0x00acacac, 0x00d8d8d8, 0x009a9a9a,
+ 0x00171717, 0x001a1a1a, 0x00353535, 0x00cccccc,
+ 0x00f7f7f7, 0x00999999, 0x00616161, 0x005a5a5a,
+ 0x00e8e8e8, 0x00242424, 0x00565656, 0x00404040,
+ 0x00e1e1e1, 0x00636363, 0x00090909, 0x00333333,
+ 0x00bfbfbf, 0x00989898, 0x00979797, 0x00858585,
+ 0x00686868, 0x00fcfcfc, 0x00ececec, 0x000a0a0a,
+ 0x00dadada, 0x006f6f6f, 0x00535353, 0x00626262,
+ 0x00a3a3a3, 0x002e2e2e, 0x00080808, 0x00afafaf,
+ 0x00282828, 0x00b0b0b0, 0x00747474, 0x00c2c2c2,
+ 0x00bdbdbd, 0x00363636, 0x00222222, 0x00383838,
+ 0x00646464, 0x001e1e1e, 0x00393939, 0x002c2c2c,
+ 0x00a6a6a6, 0x00303030, 0x00e5e5e5, 0x00444444,
+ 0x00fdfdfd, 0x00888888, 0x009f9f9f, 0x00656565,
+ 0x00878787, 0x006b6b6b, 0x00f4f4f4, 0x00232323,
+ 0x00484848, 0x00101010, 0x00d1d1d1, 0x00515151,
+ 0x00c0c0c0, 0x00f9f9f9, 0x00d2d2d2, 0x00a0a0a0,
+ 0x00555555, 0x00a1a1a1, 0x00414141, 0x00fafafa,
+ 0x00434343, 0x00131313, 0x00c4c4c4, 0x002f2f2f,
+ 0x00a8a8a8, 0x00b6b6b6, 0x003c3c3c, 0x002b2b2b,
+ 0x00c1c1c1, 0x00ffffff, 0x00c8c8c8, 0x00a5a5a5,
+ 0x00202020, 0x00898989, 0x00000000, 0x00909090,
+ 0x00474747, 0x00efefef, 0x00eaeaea, 0x00b7b7b7,
+ 0x00151515, 0x00060606, 0x00cdcdcd, 0x00b5b5b5,
+ 0x00121212, 0x007e7e7e, 0x00bbbbbb, 0x00292929,
+ 0x000f0f0f, 0x00b8b8b8, 0x00070707, 0x00040404,
+ 0x009b9b9b, 0x00949494, 0x00212121, 0x00666666,
+ 0x00e6e6e6, 0x00cecece, 0x00ededed, 0x00e7e7e7,
+ 0x003b3b3b, 0x00fefefe, 0x007f7f7f, 0x00c5c5c5,
+ 0x00a4a4a4, 0x00373737, 0x00b1b1b1, 0x004c4c4c,
+ 0x00919191, 0x006e6e6e, 0x008d8d8d, 0x00767676,
+ 0x00030303, 0x002d2d2d, 0x00dedede, 0x00969696,
+ 0x00262626, 0x007d7d7d, 0x00c6c6c6, 0x005c5c5c,
+ 0x00d3d3d3, 0x00f2f2f2, 0x004f4f4f, 0x00191919,
+ 0x003f3f3f, 0x00dcdcdc, 0x00797979, 0x001d1d1d,
+ 0x00525252, 0x00ebebeb, 0x00f3f3f3, 0x006d6d6d,
+ 0x005e5e5e, 0x00fbfbfb, 0x00696969, 0x00b2b2b2,
+ 0x00f0f0f0, 0x00313131, 0x000c0c0c, 0x00d4d4d4,
+ 0x00cfcfcf, 0x008c8c8c, 0x00e2e2e2, 0x00757575,
+ 0x00a9a9a9, 0x004a4a4a, 0x00575757, 0x00848484,
+ 0x00111111, 0x00454545, 0x001b1b1b, 0x00f5f5f5,
+ 0x00e4e4e4, 0x000e0e0e, 0x00737373, 0x00aaaaaa,
+ 0x00f1f1f1, 0x00dddddd, 0x00595959, 0x00141414,
+ 0x006c6c6c, 0x00929292, 0x00545454, 0x00d0d0d0,
+ 0x00787878, 0x00707070, 0x00e3e3e3, 0x00494949,
+ 0x00808080, 0x00505050, 0x00a7a7a7, 0x00f6f6f6,
+ 0x00777777, 0x00939393, 0x00868686, 0x00838383,
+ 0x002a2a2a, 0x00c7c7c7, 0x005b5b5b, 0x00e9e9e9,
+ 0x00eeeeee, 0x008f8f8f, 0x00010101, 0x003d3d3d,
+};
+
+static const PRUint32 camellia_sp3033[256] = {
+ 0x38003838, 0x41004141, 0x16001616, 0x76007676,
+ 0xd900d9d9, 0x93009393, 0x60006060, 0xf200f2f2,
+ 0x72007272, 0xc200c2c2, 0xab00abab, 0x9a009a9a,
+ 0x75007575, 0x06000606, 0x57005757, 0xa000a0a0,
+ 0x91009191, 0xf700f7f7, 0xb500b5b5, 0xc900c9c9,
+ 0xa200a2a2, 0x8c008c8c, 0xd200d2d2, 0x90009090,
+ 0xf600f6f6, 0x07000707, 0xa700a7a7, 0x27002727,
+ 0x8e008e8e, 0xb200b2b2, 0x49004949, 0xde00dede,
+ 0x43004343, 0x5c005c5c, 0xd700d7d7, 0xc700c7c7,
+ 0x3e003e3e, 0xf500f5f5, 0x8f008f8f, 0x67006767,
+ 0x1f001f1f, 0x18001818, 0x6e006e6e, 0xaf00afaf,
+ 0x2f002f2f, 0xe200e2e2, 0x85008585, 0x0d000d0d,
+ 0x53005353, 0xf000f0f0, 0x9c009c9c, 0x65006565,
+ 0xea00eaea, 0xa300a3a3, 0xae00aeae, 0x9e009e9e,
+ 0xec00ecec, 0x80008080, 0x2d002d2d, 0x6b006b6b,
+ 0xa800a8a8, 0x2b002b2b, 0x36003636, 0xa600a6a6,
+ 0xc500c5c5, 0x86008686, 0x4d004d4d, 0x33003333,
+ 0xfd00fdfd, 0x66006666, 0x58005858, 0x96009696,
+ 0x3a003a3a, 0x09000909, 0x95009595, 0x10001010,
+ 0x78007878, 0xd800d8d8, 0x42004242, 0xcc00cccc,
+ 0xef00efef, 0x26002626, 0xe500e5e5, 0x61006161,
+ 0x1a001a1a, 0x3f003f3f, 0x3b003b3b, 0x82008282,
+ 0xb600b6b6, 0xdb00dbdb, 0xd400d4d4, 0x98009898,
+ 0xe800e8e8, 0x8b008b8b, 0x02000202, 0xeb00ebeb,
+ 0x0a000a0a, 0x2c002c2c, 0x1d001d1d, 0xb000b0b0,
+ 0x6f006f6f, 0x8d008d8d, 0x88008888, 0x0e000e0e,
+ 0x19001919, 0x87008787, 0x4e004e4e, 0x0b000b0b,
+ 0xa900a9a9, 0x0c000c0c, 0x79007979, 0x11001111,
+ 0x7f007f7f, 0x22002222, 0xe700e7e7, 0x59005959,
+ 0xe100e1e1, 0xda00dada, 0x3d003d3d, 0xc800c8c8,
+ 0x12001212, 0x04000404, 0x74007474, 0x54005454,
+ 0x30003030, 0x7e007e7e, 0xb400b4b4, 0x28002828,
+ 0x55005555, 0x68006868, 0x50005050, 0xbe00bebe,
+ 0xd000d0d0, 0xc400c4c4, 0x31003131, 0xcb00cbcb,
+ 0x2a002a2a, 0xad00adad, 0x0f000f0f, 0xca00caca,
+ 0x70007070, 0xff00ffff, 0x32003232, 0x69006969,
+ 0x08000808, 0x62006262, 0x00000000, 0x24002424,
+ 0xd100d1d1, 0xfb00fbfb, 0xba00baba, 0xed00eded,
+ 0x45004545, 0x81008181, 0x73007373, 0x6d006d6d,
+ 0x84008484, 0x9f009f9f, 0xee00eeee, 0x4a004a4a,
+ 0xc300c3c3, 0x2e002e2e, 0xc100c1c1, 0x01000101,
+ 0xe600e6e6, 0x25002525, 0x48004848, 0x99009999,
+ 0xb900b9b9, 0xb300b3b3, 0x7b007b7b, 0xf900f9f9,
+ 0xce00cece, 0xbf00bfbf, 0xdf00dfdf, 0x71007171,
+ 0x29002929, 0xcd00cdcd, 0x6c006c6c, 0x13001313,
+ 0x64006464, 0x9b009b9b, 0x63006363, 0x9d009d9d,
+ 0xc000c0c0, 0x4b004b4b, 0xb700b7b7, 0xa500a5a5,
+ 0x89008989, 0x5f005f5f, 0xb100b1b1, 0x17001717,
+ 0xf400f4f4, 0xbc00bcbc, 0xd300d3d3, 0x46004646,
+ 0xcf00cfcf, 0x37003737, 0x5e005e5e, 0x47004747,
+ 0x94009494, 0xfa00fafa, 0xfc00fcfc, 0x5b005b5b,
+ 0x97009797, 0xfe00fefe, 0x5a005a5a, 0xac00acac,
+ 0x3c003c3c, 0x4c004c4c, 0x03000303, 0x35003535,
+ 0xf300f3f3, 0x23002323, 0xb800b8b8, 0x5d005d5d,
+ 0x6a006a6a, 0x92009292, 0xd500d5d5, 0x21002121,
+ 0x44004444, 0x51005151, 0xc600c6c6, 0x7d007d7d,
+ 0x39003939, 0x83008383, 0xdc00dcdc, 0xaa00aaaa,
+ 0x7c007c7c, 0x77007777, 0x56005656, 0x05000505,
+ 0x1b001b1b, 0xa400a4a4, 0x15001515, 0x34003434,
+ 0x1e001e1e, 0x1c001c1c, 0xf800f8f8, 0x52005252,
+ 0x20002020, 0x14001414, 0xe900e9e9, 0xbd00bdbd,
+ 0xdd00dddd, 0xe400e4e4, 0xa100a1a1, 0xe000e0e0,
+ 0x8a008a8a, 0xf100f1f1, 0xd600d6d6, 0x7a007a7a,
+ 0xbb00bbbb, 0xe300e3e3, 0x40004040, 0x4f004f4f,
+};
+
+static const PRUint32 camellia_sp4404[256] = {
+ 0x70700070, 0x2c2c002c, 0xb3b300b3, 0xc0c000c0,
+ 0xe4e400e4, 0x57570057, 0xeaea00ea, 0xaeae00ae,
+ 0x23230023, 0x6b6b006b, 0x45450045, 0xa5a500a5,
+ 0xeded00ed, 0x4f4f004f, 0x1d1d001d, 0x92920092,
+ 0x86860086, 0xafaf00af, 0x7c7c007c, 0x1f1f001f,
+ 0x3e3e003e, 0xdcdc00dc, 0x5e5e005e, 0x0b0b000b,
+ 0xa6a600a6, 0x39390039, 0xd5d500d5, 0x5d5d005d,
+ 0xd9d900d9, 0x5a5a005a, 0x51510051, 0x6c6c006c,
+ 0x8b8b008b, 0x9a9a009a, 0xfbfb00fb, 0xb0b000b0,
+ 0x74740074, 0x2b2b002b, 0xf0f000f0, 0x84840084,
+ 0xdfdf00df, 0xcbcb00cb, 0x34340034, 0x76760076,
+ 0x6d6d006d, 0xa9a900a9, 0xd1d100d1, 0x04040004,
+ 0x14140014, 0x3a3a003a, 0xdede00de, 0x11110011,
+ 0x32320032, 0x9c9c009c, 0x53530053, 0xf2f200f2,
+ 0xfefe00fe, 0xcfcf00cf, 0xc3c300c3, 0x7a7a007a,
+ 0x24240024, 0xe8e800e8, 0x60600060, 0x69690069,
+ 0xaaaa00aa, 0xa0a000a0, 0xa1a100a1, 0x62620062,
+ 0x54540054, 0x1e1e001e, 0xe0e000e0, 0x64640064,
+ 0x10100010, 0x00000000, 0xa3a300a3, 0x75750075,
+ 0x8a8a008a, 0xe6e600e6, 0x09090009, 0xdddd00dd,
+ 0x87870087, 0x83830083, 0xcdcd00cd, 0x90900090,
+ 0x73730073, 0xf6f600f6, 0x9d9d009d, 0xbfbf00bf,
+ 0x52520052, 0xd8d800d8, 0xc8c800c8, 0xc6c600c6,
+ 0x81810081, 0x6f6f006f, 0x13130013, 0x63630063,
+ 0xe9e900e9, 0xa7a700a7, 0x9f9f009f, 0xbcbc00bc,
+ 0x29290029, 0xf9f900f9, 0x2f2f002f, 0xb4b400b4,
+ 0x78780078, 0x06060006, 0xe7e700e7, 0x71710071,
+ 0xd4d400d4, 0xabab00ab, 0x88880088, 0x8d8d008d,
+ 0x72720072, 0xb9b900b9, 0xf8f800f8, 0xacac00ac,
+ 0x36360036, 0x2a2a002a, 0x3c3c003c, 0xf1f100f1,
+ 0x40400040, 0xd3d300d3, 0xbbbb00bb, 0x43430043,
+ 0x15150015, 0xadad00ad, 0x77770077, 0x80800080,
+ 0x82820082, 0xecec00ec, 0x27270027, 0xe5e500e5,
+ 0x85850085, 0x35350035, 0x0c0c000c, 0x41410041,
+ 0xefef00ef, 0x93930093, 0x19190019, 0x21210021,
+ 0x0e0e000e, 0x4e4e004e, 0x65650065, 0xbdbd00bd,
+ 0xb8b800b8, 0x8f8f008f, 0xebeb00eb, 0xcece00ce,
+ 0x30300030, 0x5f5f005f, 0xc5c500c5, 0x1a1a001a,
+ 0xe1e100e1, 0xcaca00ca, 0x47470047, 0x3d3d003d,
+ 0x01010001, 0xd6d600d6, 0x56560056, 0x4d4d004d,
+ 0x0d0d000d, 0x66660066, 0xcccc00cc, 0x2d2d002d,
+ 0x12120012, 0x20200020, 0xb1b100b1, 0x99990099,
+ 0x4c4c004c, 0xc2c200c2, 0x7e7e007e, 0x05050005,
+ 0xb7b700b7, 0x31310031, 0x17170017, 0xd7d700d7,
+ 0x58580058, 0x61610061, 0x1b1b001b, 0x1c1c001c,
+ 0x0f0f000f, 0x16160016, 0x18180018, 0x22220022,
+ 0x44440044, 0xb2b200b2, 0xb5b500b5, 0x91910091,
+ 0x08080008, 0xa8a800a8, 0xfcfc00fc, 0x50500050,
+ 0xd0d000d0, 0x7d7d007d, 0x89890089, 0x97970097,
+ 0x5b5b005b, 0x95950095, 0xffff00ff, 0xd2d200d2,
+ 0xc4c400c4, 0x48480048, 0xf7f700f7, 0xdbdb00db,
+ 0x03030003, 0xdada00da, 0x3f3f003f, 0x94940094,
+ 0x5c5c005c, 0x02020002, 0x4a4a004a, 0x33330033,
+ 0x67670067, 0xf3f300f3, 0x7f7f007f, 0xe2e200e2,
+ 0x9b9b009b, 0x26260026, 0x37370037, 0x3b3b003b,
+ 0x96960096, 0x4b4b004b, 0xbebe00be, 0x2e2e002e,
+ 0x79790079, 0x8c8c008c, 0x6e6e006e, 0x8e8e008e,
+ 0xf5f500f5, 0xb6b600b6, 0xfdfd00fd, 0x59590059,
+ 0x98980098, 0x6a6a006a, 0x46460046, 0xbaba00ba,
+ 0x25250025, 0x42420042, 0xa2a200a2, 0xfafa00fa,
+ 0x07070007, 0x55550055, 0xeeee00ee, 0x0a0a000a,
+ 0x49490049, 0x68680068, 0x38380038, 0xa4a400a4,
+ 0x28280028, 0x7b7b007b, 0xc9c900c9, 0xc1c100c1,
+ 0xe3e300e3, 0xf4f400f4, 0xc7c700c7, 0x9e9e009e,
+};
+
+/**
+ * Stuff related to the Camellia key schedule
+ */
+#define subl(x) subL[(x)]
+#define subr(x) subR[(x)]
+
+void
+camellia_setup128(const unsigned char *key, PRUint32 *subkey)
+{
+ PRUint32 kll, klr, krl, krr;
+ PRUint32 il, ir, t0, t1, w0, w1;
+ PRUint32 kw4l, kw4r, dw, tl, tr;
+ PRUint32 subL[26];
+ PRUint32 subR[26];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ /**
+ * k == kll || klr || krl || krr (|| is concatination)
+ */
+ kll = GETU32(key);
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ /**
+ * generate KL dependent subkeys
+ */
+ subl(0) = kll;
+ subr(0) = klr;
+ subl(1) = krl;
+ subr(1) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(4) = kll;
+ subr(4) = klr;
+ subl(5) = krl;
+ subr(5) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(10) = kll;
+ subr(10) = klr;
+ subl(11) = krl;
+ subr(11) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(13) = krl;
+ subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(16) = kll;
+ subr(16) = klr;
+ subl(17) = krl;
+ subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(18) = kll;
+ subr(18) = klr;
+ subl(19) = krl;
+ subr(19) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll;
+ subr(22) = klr;
+ subl(23) = krl;
+ subr(23) = krr;
+
+ /* generate KA */
+ kll = subl(0);
+ klr = subr(0);
+ krl = subl(1);
+ krr = subr(1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0;
+ klr ^= w1;
+
+ /* generate KA dependent subkeys */
+ subl(2) = kll;
+ subr(2) = klr;
+ subl(3) = krl;
+ subr(3) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll;
+ subr(6) = klr;
+ subl(7) = krl;
+ subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(8) = kll;
+ subr(8) = klr;
+ subl(9) = krl;
+ subr(9) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(12) = kll;
+ subr(12) = klr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(14) = kll;
+ subr(14) = klr;
+ subl(15) = krl;
+ subr(15) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(20) = kll;
+ subr(20) = klr;
+ subl(21) = krl;
+ subr(21) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(24) = kll;
+ subr(24) = klr;
+ subl(25) = krl;
+ subr(25) = krr;
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1);
+ subr(3) ^= subr(1);
+ subl(5) ^= subl(1);
+ subr(5) ^= subr(1);
+ subl(7) ^= subl(1);
+ subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1);
+ subr(11) ^= subr(1);
+ subl(13) ^= subl(1);
+ subr(13) ^= subr(1);
+ subl(15) ^= subl(1);
+ subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1);
+ subr(19) ^= subr(1);
+ subl(21) ^= subl(1);
+ subr(21) ^= subr(1);
+ subl(23) ^= subl(1);
+ subr(23) ^= subr(1);
+ subl(24) ^= subl(1);
+ subr(24) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(25);
+ kw4r = subr(25);
+ subl(22) ^= kw4l;
+ subr(22) ^= kw4r;
+ subl(20) ^= kw4l;
+ subr(20) ^= kw4r;
+ subl(18) ^= kw4l;
+ subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l;
+ subr(14) ^= kw4r;
+ subl(12) ^= kw4l;
+ subr(12) ^= kw4r;
+ subl(10) ^= kw4l;
+ subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l;
+ subr(6) ^= kw4r;
+ subl(4) ^= kw4l;
+ subr(4) ^= kw4r;
+ subl(2) ^= kw4l;
+ subr(2) ^= kw4r;
+ subl(0) ^= kw4l;
+ subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ CamelliaSubkeyL(23) = subl(22);
+ CamelliaSubkeyR(23) = subr(22);
+ CamelliaSubkeyL(24) = subl(24) ^ subl(23);
+ CamelliaSubkeyR(24) = subr(24) ^ subr(23);
+
+ /* apply the inverse of the last half of P-function */
+ dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+ dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+ dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+ dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+ dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+ dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+ dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+ dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+ dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+ dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+ dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+ dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+ dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+ dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+ dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+ dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+ dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+ dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+
+ return;
+}
+
+void
+camellia_setup256(const unsigned char *key, PRUint32 *subkey)
+{
+ PRUint32 kll, klr, krl, krr; /* left half of key */
+ PRUint32 krll, krlr, krrl, krrr; /* right half of key */
+ PRUint32 il, ir, t0, t1, w0, w1; /* temporary variables */
+ PRUint32 kw4l, kw4r, dw, tl, tr;
+ PRUint32 subL[34];
+ PRUint32 subR[34];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ /**
+ * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr)
+ * (|| is concatination)
+ */
+
+ kll = GETU32(key);
+ klr = GETU32(key + 4);
+ krl = GETU32(key + 8);
+ krr = GETU32(key + 12);
+ krll = GETU32(key + 16);
+ krlr = GETU32(key + 20);
+ krrl = GETU32(key + 24);
+ krrr = GETU32(key + 28);
+
+ /* generate KL dependent subkeys */
+ subl(0) = kll;
+ subr(0) = klr;
+ subl(1) = krl;
+ subr(1) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45);
+ subl(12) = kll;
+ subr(12) = klr;
+ subl(13) = krl;
+ subr(13) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(16) = kll;
+ subr(16) = klr;
+ subl(17) = krl;
+ subr(17) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17);
+ subl(22) = kll;
+ subr(22) = klr;
+ subl(23) = krl;
+ subr(23) = krr;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34);
+ subl(30) = kll;
+ subr(30) = klr;
+ subl(31) = krl;
+ subr(31) = krr;
+
+ /* generate KR dependent subkeys */
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(4) = krll;
+ subr(4) = krlr;
+ subl(5) = krrl;
+ subr(5) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15);
+ subl(8) = krll;
+ subr(8) = krlr;
+ subl(9) = krrl;
+ subr(9) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(18) = krll;
+ subr(18) = krlr;
+ subl(19) = krrl;
+ subr(19) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+ subl(26) = krll;
+ subr(26) = krlr;
+ subl(27) = krrl;
+ subr(27) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34);
+
+ /* generate KA */
+ kll = subl(0) ^ krll;
+ klr = subr(0) ^ krlr;
+ krl = subl(1) ^ krrl;
+ krr = subr(1) ^ krrr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R,
+ w0, w1, il, ir, t0, t1);
+ krl ^= w0;
+ krr ^= w1;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R,
+ kll, klr, il, ir, t0, t1);
+ kll ^= krll;
+ klr ^= krlr;
+ CAMELLIA_F(kll, klr,
+ CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R,
+ krl, krr, il, ir, t0, t1);
+ krl ^= w0 ^ krrl;
+ krr ^= w1 ^ krrr;
+ CAMELLIA_F(krl, krr,
+ CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R,
+ w0, w1, il, ir, t0, t1);
+ kll ^= w0;
+ klr ^= w1;
+
+ /* generate KB */
+ krll ^= kll;
+ krlr ^= klr;
+ krrl ^= krl;
+ krrr ^= krr;
+ CAMELLIA_F(krll, krlr,
+ CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R,
+ w0, w1, il, ir, t0, t1);
+ krrl ^= w0;
+ krrr ^= w1;
+ CAMELLIA_F(krrl, krrr,
+ CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R,
+ w0, w1, il, ir, t0, t1);
+ krll ^= w0;
+ krlr ^= w1;
+
+ /* generate KA dependent subkeys */
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15);
+ subl(6) = kll;
+ subr(6) = klr;
+ subl(7) = krl;
+ subr(7) = krr;
+ CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30);
+ subl(14) = kll;
+ subr(14) = klr;
+ subl(15) = krl;
+ subr(15) = krr;
+ subl(24) = klr;
+ subr(24) = krl;
+ subl(25) = krr;
+ subr(25) = kll;
+ CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49);
+ subl(28) = kll;
+ subr(28) = klr;
+ subl(29) = krl;
+ subr(29) = krr;
+
+ /* generate KB dependent subkeys */
+ subl(2) = krll;
+ subr(2) = krlr;
+ subl(3) = krrl;
+ subr(3) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(10) = krll;
+ subr(10) = krlr;
+ subl(11) = krrl;
+ subr(11) = krrr;
+ CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30);
+ subl(20) = krll;
+ subr(20) = krlr;
+ subl(21) = krrl;
+ subr(21) = krrr;
+ CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51);
+ subl(32) = krll;
+ subr(32) = krlr;
+ subl(33) = krrl;
+ subr(33) = krrr;
+
+ /* absorb kw2 to other subkeys */
+ subl(3) ^= subl(1);
+ subr(3) ^= subr(1);
+ subl(5) ^= subl(1);
+ subr(5) ^= subr(1);
+ subl(7) ^= subl(1);
+ subr(7) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(9);
+ dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(11) ^= subl(1);
+ subr(11) ^= subr(1);
+ subl(13) ^= subl(1);
+ subr(13) ^= subr(1);
+ subl(15) ^= subl(1);
+ subr(15) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(17);
+ dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(19) ^= subl(1);
+ subr(19) ^= subr(1);
+ subl(21) ^= subl(1);
+ subr(21) ^= subr(1);
+ subl(23) ^= subl(1);
+ subr(23) ^= subr(1);
+ subl(1) ^= subr(1) & ~subr(25);
+ dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw);
+ subl(27) ^= subl(1);
+ subr(27) ^= subr(1);
+ subl(29) ^= subl(1);
+ subr(29) ^= subr(1);
+ subl(31) ^= subl(1);
+ subr(31) ^= subr(1);
+ subl(32) ^= subl(1);
+ subr(32) ^= subr(1);
+
+ /* absorb kw4 to other subkeys */
+ kw4l = subl(33);
+ kw4r = subr(33);
+ subl(30) ^= kw4l;
+ subr(30) ^= kw4r;
+ subl(28) ^= kw4l;
+ subr(28) ^= kw4r;
+ subl(26) ^= kw4l;
+ subr(26) ^= kw4r;
+ kw4l ^= kw4r & ~subr(24);
+ dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw);
+ subl(22) ^= kw4l;
+ subr(22) ^= kw4r;
+ subl(20) ^= kw4l;
+ subr(20) ^= kw4r;
+ subl(18) ^= kw4l;
+ subr(18) ^= kw4r;
+ kw4l ^= kw4r & ~subr(16);
+ dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw);
+ subl(14) ^= kw4l;
+ subr(14) ^= kw4r;
+ subl(12) ^= kw4l;
+ subr(12) ^= kw4r;
+ subl(10) ^= kw4l;
+ subr(10) ^= kw4r;
+ kw4l ^= kw4r & ~subr(8);
+ dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw);
+ subl(6) ^= kw4l;
+ subr(6) ^= kw4r;
+ subl(4) ^= kw4l;
+ subr(4) ^= kw4r;
+ subl(2) ^= kw4l;
+ subr(2) ^= kw4r;
+ subl(0) ^= kw4l;
+ subr(0) ^= kw4r;
+
+ /* key XOR is end of F-function */
+ CamelliaSubkeyL(0) = subl(0) ^ subl(2);
+ CamelliaSubkeyR(0) = subr(0) ^ subr(2);
+ CamelliaSubkeyL(2) = subl(3);
+ CamelliaSubkeyR(2) = subr(3);
+ CamelliaSubkeyL(3) = subl(2) ^ subl(4);
+ CamelliaSubkeyR(3) = subr(2) ^ subr(4);
+ CamelliaSubkeyL(4) = subl(3) ^ subl(5);
+ CamelliaSubkeyR(4) = subr(3) ^ subr(5);
+ CamelliaSubkeyL(5) = subl(4) ^ subl(6);
+ CamelliaSubkeyR(5) = subr(4) ^ subr(6);
+ CamelliaSubkeyL(6) = subl(5) ^ subl(7);
+ CamelliaSubkeyR(6) = subr(5) ^ subr(7);
+ tl = subl(10) ^ (subr(10) & ~subr(8));
+ dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(7) = subl(6) ^ tl;
+ CamelliaSubkeyR(7) = subr(6) ^ tr;
+ CamelliaSubkeyL(8) = subl(8);
+ CamelliaSubkeyR(8) = subr(8);
+ CamelliaSubkeyL(9) = subl(9);
+ CamelliaSubkeyR(9) = subr(9);
+ tl = subl(7) ^ (subr(7) & ~subr(9));
+ dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(10) = tl ^ subl(11);
+ CamelliaSubkeyR(10) = tr ^ subr(11);
+ CamelliaSubkeyL(11) = subl(10) ^ subl(12);
+ CamelliaSubkeyR(11) = subr(10) ^ subr(12);
+ CamelliaSubkeyL(12) = subl(11) ^ subl(13);
+ CamelliaSubkeyR(12) = subr(11) ^ subr(13);
+ CamelliaSubkeyL(13) = subl(12) ^ subl(14);
+ CamelliaSubkeyR(13) = subr(12) ^ subr(14);
+ CamelliaSubkeyL(14) = subl(13) ^ subl(15);
+ CamelliaSubkeyR(14) = subr(13) ^ subr(15);
+ tl = subl(18) ^ (subr(18) & ~subr(16));
+ dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(15) = subl(14) ^ tl;
+ CamelliaSubkeyR(15) = subr(14) ^ tr;
+ CamelliaSubkeyL(16) = subl(16);
+ CamelliaSubkeyR(16) = subr(16);
+ CamelliaSubkeyL(17) = subl(17);
+ CamelliaSubkeyR(17) = subr(17);
+ tl = subl(15) ^ (subr(15) & ~subr(17));
+ dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(18) = tl ^ subl(19);
+ CamelliaSubkeyR(18) = tr ^ subr(19);
+ CamelliaSubkeyL(19) = subl(18) ^ subl(20);
+ CamelliaSubkeyR(19) = subr(18) ^ subr(20);
+ CamelliaSubkeyL(20) = subl(19) ^ subl(21);
+ CamelliaSubkeyR(20) = subr(19) ^ subr(21);
+ CamelliaSubkeyL(21) = subl(20) ^ subl(22);
+ CamelliaSubkeyR(21) = subr(20) ^ subr(22);
+ CamelliaSubkeyL(22) = subl(21) ^ subl(23);
+ CamelliaSubkeyR(22) = subr(21) ^ subr(23);
+ tl = subl(26) ^ (subr(26) & ~subr(24));
+ dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(23) = subl(22) ^ tl;
+ CamelliaSubkeyR(23) = subr(22) ^ tr;
+ CamelliaSubkeyL(24) = subl(24);
+ CamelliaSubkeyR(24) = subr(24);
+ CamelliaSubkeyL(25) = subl(25);
+ CamelliaSubkeyR(25) = subr(25);
+ tl = subl(23) ^ (subr(23) & ~subr(25));
+ dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw);
+ CamelliaSubkeyL(26) = tl ^ subl(27);
+ CamelliaSubkeyR(26) = tr ^ subr(27);
+ CamelliaSubkeyL(27) = subl(26) ^ subl(28);
+ CamelliaSubkeyR(27) = subr(26) ^ subr(28);
+ CamelliaSubkeyL(28) = subl(27) ^ subl(29);
+ CamelliaSubkeyR(28) = subr(27) ^ subr(29);
+ CamelliaSubkeyL(29) = subl(28) ^ subl(30);
+ CamelliaSubkeyR(29) = subr(28) ^ subr(30);
+ CamelliaSubkeyL(30) = subl(29) ^ subl(31);
+ CamelliaSubkeyR(30) = subr(29) ^ subr(31);
+ CamelliaSubkeyL(31) = subl(30);
+ CamelliaSubkeyR(31) = subr(30);
+ CamelliaSubkeyL(32) = subl(32) ^ subl(31);
+ CamelliaSubkeyR(32) = subr(32) ^ subr(31);
+
+ /* apply the inverse of the last half of P-function */
+ dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw;
+ dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw;
+ dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw;
+ dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw;
+ dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw;
+ dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw;
+ dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw;
+ dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw;
+ dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw;
+ dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw;
+ dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw;
+ dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw;
+ dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw;
+ dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw;
+ dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw;
+ dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw;
+ dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw;
+ dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw;
+ dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw;
+ dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw;
+ dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw;
+ dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw;
+ dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw;
+ dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw);
+ CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw, CamelliaSubkeyL(31) = dw;
+
+ return;
+}
+
+void
+camellia_setup192(const unsigned char *key, PRUint32 *subkey)
+{
+ unsigned char kk[32];
+ PRUint32 krll, krlr, krrl, krrr;
+
+ memcpy(kk, key, 24);
+ memcpy((unsigned char *)&krll, key + 16, 4);
+ memcpy((unsigned char *)&krlr, key + 20, 4);
+ krrl = ~krll;
+ krrr = ~krlr;
+ memcpy(kk + 24, (unsigned char *)&krrl, 4);
+ memcpy(kk + 28, (unsigned char *)&krrr, 4);
+ camellia_setup256(kk, subkey);
+ return;
+}
+
+/**
+ * Stuff related to camellia encryption/decryption
+ *
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt128(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1;
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+ /* main iteration */
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(24);
+ io[3] ^= CamelliaSubkeyR(24);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt128(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(24);
+ io[1] ^= CamelliaSubkeyR(24);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+/**
+ * stuff for 192 and 256bit encryption/decryption
+ */
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_encrypt256(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(0);
+ io[1] ^= CamelliaSubkeyR(0);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+ CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(32);
+ io[3] ^= CamelliaSubkeyR(32);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+SECStatus NO_SANITIZE_ALIGNMENT
+camellia_decrypt256(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ PRUint32 il, ir, t0, t1; /* temporary valiables */
+ PRUint32 io[4];
+#if defined(CAMELLIA_NEED_TMP_VARIABLE)
+ PRUint32 tmp;
+#endif
+
+ io[0] = GETU32(input);
+ io[1] = GETU32(input + 4);
+ io[2] = GETU32(input + 8);
+ io[3] = GETU32(input + 12);
+
+ /* pre whitening but absorb kw2*/
+ io[0] ^= CamelliaSubkeyL(32);
+ io[1] ^= CamelliaSubkeyR(32);
+
+ /* main iteration */
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(31), CamelliaSubkeyR(31),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(30), CamelliaSubkeyR(30),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(29), CamelliaSubkeyR(29),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(28), CamelliaSubkeyR(28),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(27), CamelliaSubkeyR(27),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(26), CamelliaSubkeyR(26),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(25), CamelliaSubkeyR(25),
+ CamelliaSubkeyL(24), CamelliaSubkeyR(24),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(23), CamelliaSubkeyR(23),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(22), CamelliaSubkeyR(22),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(21), CamelliaSubkeyR(21),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(20), CamelliaSubkeyR(20),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(19), CamelliaSubkeyR(19),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(18), CamelliaSubkeyR(18),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(17), CamelliaSubkeyR(17),
+ CamelliaSubkeyL(16), CamelliaSubkeyR(16),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(15), CamelliaSubkeyR(15),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(14), CamelliaSubkeyR(14),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(13), CamelliaSubkeyR(13),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(12), CamelliaSubkeyR(12),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(11), CamelliaSubkeyR(11),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(10), CamelliaSubkeyR(10),
+ io[0], io[1], il, ir, t0, t1);
+
+ CAMELLIA_FLS(io[0], io[1], io[2], io[3],
+ CamelliaSubkeyL(9), CamelliaSubkeyR(9),
+ CamelliaSubkeyL(8), CamelliaSubkeyR(8),
+ t0, t1, il, ir);
+
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(7), CamelliaSubkeyR(7),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(6), CamelliaSubkeyR(6),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(5), CamelliaSubkeyR(5),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(4), CamelliaSubkeyR(4),
+ io[0], io[1], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[0], io[1],
+ CamelliaSubkeyL(3), CamelliaSubkeyR(3),
+ io[2], io[3], il, ir, t0, t1);
+ CAMELLIA_ROUNDSM(io[2], io[3],
+ CamelliaSubkeyL(2), CamelliaSubkeyR(2),
+ io[0], io[1], il, ir, t0, t1);
+
+ /* post whitening but kw4 */
+ io[2] ^= CamelliaSubkeyL(0);
+ io[3] ^= CamelliaSubkeyR(0);
+
+ t0 = io[0];
+ t1 = io[1];
+ io[0] = io[2];
+ io[1] = io[3];
+ io[2] = t0;
+ io[3] = t1;
+
+ PUTU32(output, io[0]);
+ PUTU32(output + 4, io[1]);
+ PUTU32(output + 8, io[2]);
+ PUTU32(output + 12, io[3]);
+
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Stuff related to the Camellia key schedule
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_key_expansion(CamelliaContext *cx,
+ const unsigned char *key,
+ const unsigned int keysize)
+{
+ cx->keysize = keysize;
+
+ switch (keysize) {
+ case 16:
+ camellia_setup128(key, cx->expandedKey);
+ break;
+ case 24:
+ camellia_setup192(key, cx->expandedKey);
+ break;
+ case 32:
+ camellia_setup256(key, cx->expandedKey);
+ break;
+ default:
+ break;
+ }
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Camellia modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+SECStatus
+camellia_encryptECB(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ CamelliaBlockFunc *encryptor;
+
+ encryptor = (cx->keysize == 16)
+ ? &camellia_encrypt128
+ : &camellia_encrypt256;
+
+ while (inputLen > 0) {
+ (*encryptor)(cx->expandedKey, output, input);
+
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+camellia_encryptCBC(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ unsigned int j;
+ unsigned char *lastblock;
+ unsigned char inblock[CAMELLIA_BLOCK_SIZE];
+ CamelliaBlockFunc *encryptor;
+
+ if (!inputLen)
+ return SECSuccess;
+ lastblock = cx->iv;
+
+ encryptor = (cx->keysize == 16)
+ ? &camellia_encrypt128
+ : &camellia_encrypt256;
+
+ while (inputLen > 0) {
+ /* XOR with the last block (IV if first block) */
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ inblock[j] = input[j] ^ lastblock[j];
+ /* encrypt */
+ (*encryptor)(cx->expandedKey, output, inblock);
+
+ /* move to the next block */
+ lastblock = output;
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ memcpy(cx->iv, lastblock, CAMELLIA_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+SECStatus
+camellia_decryptECB(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ CamelliaBlockFunc *decryptor;
+
+ decryptor = (cx->keysize == 16)
+ ? &camellia_decrypt128
+ : &camellia_decrypt256;
+
+ while (inputLen > 0) {
+
+ (*decryptor)(cx->expandedKey, output, input);
+
+ output += CAMELLIA_BLOCK_SIZE;
+ input += CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+camellia_decryptCBC(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ const unsigned char *in;
+ unsigned char *out;
+ unsigned int j;
+ unsigned char newIV[CAMELLIA_BLOCK_SIZE];
+ CamelliaBlockFunc *decryptor;
+
+ if (!inputLen)
+ return SECSuccess;
+
+ PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+
+ in = input + (inputLen - CAMELLIA_BLOCK_SIZE);
+ memcpy(newIV, in, CAMELLIA_BLOCK_SIZE);
+ out = output + (inputLen - CAMELLIA_BLOCK_SIZE);
+
+ decryptor = (cx->keysize == 16)
+ ? &camellia_decrypt128
+ : &camellia_decrypt256;
+
+ while (inputLen > CAMELLIA_BLOCK_SIZE) {
+ (*decryptor)(cx->expandedKey, out, in);
+
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ out[j] ^= in[(int)(j - CAMELLIA_BLOCK_SIZE)];
+
+ out -= CAMELLIA_BLOCK_SIZE;
+ in -= CAMELLIA_BLOCK_SIZE;
+ inputLen -= CAMELLIA_BLOCK_SIZE;
+ }
+ if (in == input) {
+ (*decryptor)(cx->expandedKey, out, in);
+
+ for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j)
+ out[j] ^= cx->iv[j];
+ }
+ memcpy(cx->iv, newIV, CAMELLIA_BLOCK_SIZE);
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ *************************************************************************/
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+ return PORT_ZNew(CamelliaContext);
+}
+
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+ unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int unused)
+{
+ if (key == NULL ||
+ (keysize != 16 && keysize != 24 && keysize != 32)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_CAMELLIA_CBC) {
+ memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+ cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+ } else {
+ cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+ }
+
+ /* Generate expanded key */
+ if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+ goto cleanup;
+
+ return SECSuccess;
+cleanup:
+ return SECFailure;
+}
+
+/*
+ * Camellia_CreateContext
+ * create a new context for Camellia operations
+ */
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keysize)
+{
+ CamelliaContext *cx;
+
+ if (key == NULL ||
+ (keysize != 16 && keysize != 24 && keysize != 32)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ if (mode == NSS_CAMELLIA_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ cx = PORT_ZNew(CamelliaContext);
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return NULL;
+ }
+
+ /* copy in the iv, if neccessary */
+ if (mode == NSS_CAMELLIA_CBC) {
+ memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE);
+ cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC;
+ } else {
+ cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB;
+ }
+ /* copy keysize */
+ cx->keysize = keysize;
+
+ /* Generate expanded key */
+ if (camellia_key_expansion(cx, key, keysize) != SECSuccess)
+ goto cleanup;
+
+ return cx;
+cleanup:
+ PORT_ZFree(cx, sizeof *cx);
+ return NULL;
+}
+
+/*
+ * Camellia_DestroyContext
+ *
+ * Zero an Camellia cipher context. If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+ if (cx)
+ memset(cx, 0, sizeof *cx);
+ if (freeit)
+ PORT_Free(cx);
+}
+
+/*
+ * Camellia_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+
+ /* Check args */
+ if (cx == NULL || output == NULL || input == NULL ||
+ outputLen == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+
+ return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+/*
+ * Camellia_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+
+ /* Check args */
+ if (cx == NULL || output == NULL || input == NULL || outputLen == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (inputLen % CAMELLIA_BLOCK_SIZE != 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+
+ return (*cx->worker)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
diff --git a/security/nss/lib/freebl/camellia.h b/security/nss/lib/freebl/camellia.h
new file mode 100644
index 000000000..15114db9a
--- /dev/null
+++ b/security/nss/lib/freebl/camellia.h
@@ -0,0 +1,42 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CAMELLIA_H_
+#define _CAMELLIA_H_ 1
+
+#define CAMELLIA_BLOCK_SIZE 16 /* bytes */
+#define CAMELLIA_MIN_KEYSIZE 16 /* bytes */
+#define CAMELLIA_MAX_KEYSIZE 32 /* bytes */
+
+#define CAMELLIA_MAX_EXPANDEDKEY (34 * 2) /* 32bit unit */
+
+typedef PRUint32 KEY_TABLE_TYPE[CAMELLIA_MAX_EXPANDEDKEY];
+
+typedef SECStatus CamelliaFunc(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+typedef SECStatus CamelliaBlockFunc(const PRUint32 *subkey,
+ unsigned char *output,
+ const unsigned char *input);
+
+/* CamelliaContextStr
+ *
+ * Values which maintain the state for Camellia encryption/decryption.
+ *
+ * keysize - the number of key bits
+ * worker - the encryption/decryption function to use with this context
+ * iv - initialization vector for CBC mode
+ * expandedKey - the round keys in 4-byte words
+ */
+struct CamelliaContextStr {
+ PRUint32 keysize; /* bytes */
+ CamelliaFunc *worker;
+ PRUint32 expandedKey[CAMELLIA_MAX_EXPANDEDKEY];
+ PRUint8 iv[CAMELLIA_BLOCK_SIZE];
+};
+
+#endif /* _CAMELLIA_H_ */
diff --git a/security/nss/lib/freebl/chacha20.c b/security/nss/lib/freebl/chacha20.c
new file mode 100644
index 000000000..f55d1e670
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20.c
@@ -0,0 +1,119 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Adopted from the public domain code in NaCl by djb. */
+
+#include <string.h>
+#include <stdio.h>
+
+#include "prtypes.h"
+#include "secport.h"
+#include "chacha20.h"
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_lrotl)
+#define ROTL32(x, n) _lrotl(x, n)
+#else
+#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n)))
+#endif
+
+#define ROTATE(v, c) ROTL32((v), (c))
+
+#define U32TO8_LITTLE(p, v) \
+ { \
+ (p)[0] = ((v)) & 0xff; \
+ (p)[1] = ((v) >> 8) & 0xff; \
+ (p)[2] = ((v) >> 16) & 0xff; \
+ (p)[3] = ((v) >> 24) & 0xff; \
+ }
+#define U8TO32_LITTLE(p) \
+ (((PRUint32)((p)[0])) | ((PRUint32)((p)[1]) << 8) | \
+ ((PRUint32)((p)[2]) << 16) | ((PRUint32)((p)[3]) << 24))
+
+#define QUARTERROUND(x, a, b, c, d) \
+ x[a] = x[a] + x[b]; \
+ x[d] = ROTATE(x[d] ^ x[a], 16); \
+ x[c] = x[c] + x[d]; \
+ x[b] = ROTATE(x[b] ^ x[c], 12); \
+ x[a] = x[a] + x[b]; \
+ x[d] = ROTATE(x[d] ^ x[a], 8); \
+ x[c] = x[c] + x[d]; \
+ x[b] = ROTATE(x[b] ^ x[c], 7);
+
+static void
+ChaChaCore(unsigned char output[64], const PRUint32 input[16], int num_rounds)
+{
+ PRUint32 x[16];
+ int i;
+
+ PORT_Memcpy(x, input, sizeof(PRUint32) * 16);
+ for (i = num_rounds; i > 0; i -= 2) {
+ QUARTERROUND(x, 0, 4, 8, 12)
+ QUARTERROUND(x, 1, 5, 9, 13)
+ QUARTERROUND(x, 2, 6, 10, 14)
+ QUARTERROUND(x, 3, 7, 11, 15)
+ QUARTERROUND(x, 0, 5, 10, 15)
+ QUARTERROUND(x, 1, 6, 11, 12)
+ QUARTERROUND(x, 2, 7, 8, 13)
+ QUARTERROUND(x, 3, 4, 9, 14)
+ }
+
+ for (i = 0; i < 16; ++i) {
+ x[i] = x[i] + input[i];
+ }
+ for (i = 0; i < 16; ++i) {
+ U32TO8_LITTLE(output + 4 * i, x[i]);
+ }
+}
+
+static const unsigned char sigma[16] = "expand 32-byte k";
+
+void
+ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inLen,
+ const unsigned char key[32], const unsigned char nonce[12],
+ uint32_t counter)
+{
+ unsigned char block[64];
+ PRUint32 input[16];
+ unsigned int i;
+
+ input[4] = U8TO32_LITTLE(key + 0);
+ input[5] = U8TO32_LITTLE(key + 4);
+ input[6] = U8TO32_LITTLE(key + 8);
+ input[7] = U8TO32_LITTLE(key + 12);
+
+ input[8] = U8TO32_LITTLE(key + 16);
+ input[9] = U8TO32_LITTLE(key + 20);
+ input[10] = U8TO32_LITTLE(key + 24);
+ input[11] = U8TO32_LITTLE(key + 28);
+
+ input[0] = U8TO32_LITTLE(sigma + 0);
+ input[1] = U8TO32_LITTLE(sigma + 4);
+ input[2] = U8TO32_LITTLE(sigma + 8);
+ input[3] = U8TO32_LITTLE(sigma + 12);
+
+ input[12] = counter;
+ input[13] = U8TO32_LITTLE(nonce + 0);
+ input[14] = U8TO32_LITTLE(nonce + 4);
+ input[15] = U8TO32_LITTLE(nonce + 8);
+
+ while (inLen >= 64) {
+ ChaChaCore(block, input, 20);
+ for (i = 0; i < 64; i++) {
+ out[i] = in[i] ^ block[i];
+ }
+
+ input[12]++;
+ inLen -= 64;
+ in += 64;
+ out += 64;
+ }
+
+ if (inLen > 0) {
+ ChaChaCore(block, input, 20);
+ for (i = 0; i < inLen; i++) {
+ out[i] = in[i] ^ block[i];
+ }
+ }
+}
diff --git a/security/nss/lib/freebl/chacha20.h b/security/nss/lib/freebl/chacha20.h
new file mode 100644
index 000000000..7e396fa8c
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20.h
@@ -0,0 +1,26 @@
+/*
+ * chacha20.h - header file for ChaCha20 implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FREEBL_CHACHA20_H_
+#define FREEBL_CHACHA20_H_
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+#include "prtypes.h"
+typedef PRUint32 uint32_t;
+typedef PRUint64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+/* ChaCha20XOR encrypts |inLen| bytes from |in| with the given key and
+ * nonce and writes the result to |out|, which may be equal to |in|. The
+ * initial block counter is specified by |counter|. */
+extern void ChaCha20XOR(unsigned char *out, const unsigned char *in,
+ unsigned int inLen, const unsigned char key[32],
+ const unsigned char nonce[12], uint32_t counter);
+
+#endif /* FREEBL_CHACHA20_H_ */
diff --git a/security/nss/lib/freebl/chacha20_vec.c b/security/nss/lib/freebl/chacha20_vec.c
new file mode 100644
index 000000000..12f94d897
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20_vec.c
@@ -0,0 +1,327 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and
+ * marked as public domain. It was been altered to allow for non-aligned inputs
+ * and to allow the block counter to be passed in specifically. */
+
+#include <string.h>
+
+#include "chacha20.h"
+#include "blapii.h"
+
+#ifndef CHACHA_RNDS
+#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */
+#endif
+
+/* Architecture-neutral way to specify 16-byte vector of ints */
+typedef unsigned vec __attribute__((vector_size(16)));
+
+/* This implementation is designed for Neon, SSE and AltiVec machines. The
+ * following specify how to do certain vector operations efficiently on
+ * each architecture, using intrinsics.
+ * This implementation supports parallel processing of multiple blocks,
+ * including potentially using general-purpose registers.
+ */
+#if __ARM_NEON__
+#include <arm_neon.h>
+#define GPR_TOO 1
+#define VBPI 2
+#define ONE (vec) vsetq_lane_u32(1, vdupq_n_u32(0), 0)
+#define LOAD(m) (vec)(*((vec *)(m)))
+#define STORE(m, r) (*((vec *)(m))) = (r)
+#define ROTV1(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 1)
+#define ROTV2(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 2)
+#define ROTV3(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 3)
+#define ROTW16(x) (vec) vrev32q_u16((uint16x8_t)x)
+#if __clang__
+#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7 })) ^ (x >> ((vec){ 25, 25, 25, 25 }))
+#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8 })) ^ (x >> ((vec){ 24, 24, 24, 24 }))
+#define ROTW12(x) (x << ((vec){ 12, 12, 12, 12 })) ^ (x >> ((vec){ 20, 20, 20, 20 }))
+#else
+#define ROTW7(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 7), (uint32x4_t)x, 25)
+#define ROTW8(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 8), (uint32x4_t)x, 24)
+#define ROTW12(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 12), (uint32x4_t)x, 20)
+#endif
+#elif __SSE2__
+#include <emmintrin.h>
+#define GPR_TOO 0
+#if __clang__
+#define VBPI 4
+#else
+#define VBPI 3
+#endif
+#define ONE (vec) _mm_set_epi32(0, 0, 0, 1)
+#define LOAD(m) (vec) _mm_loadu_si128((__m128i *)(m))
+#define STORE(m, r) _mm_storeu_si128((__m128i *)(m), (__m128i)(r))
+#define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1))
+#define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2))
+#define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3))
+#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25))
+#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20))
+#if __SSSE3__
+#include <tmmintrin.h>
+#define ROTW8(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3))
+#define ROTW16(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2))
+#else
+#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24))
+#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16))
+#endif
+#else
+#error-- Implementation supports only machines with neon or SSE2
+#endif
+
+#ifndef REVV_BE
+#define REVV_BE(x) (x)
+#endif
+
+#ifndef REVW_BE
+#define REVW_BE(x) (x)
+#endif
+
+#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */
+
+#define DQROUND_VECTORS(a, b, c, d) \
+ a += b; \
+ d ^= a; \
+ d = ROTW16(d); \
+ c += d; \
+ b ^= c; \
+ b = ROTW12(b); \
+ a += b; \
+ d ^= a; \
+ d = ROTW8(d); \
+ c += d; \
+ b ^= c; \
+ b = ROTW7(b); \
+ b = ROTV1(b); \
+ c = ROTV2(c); \
+ d = ROTV3(d); \
+ a += b; \
+ d ^= a; \
+ d = ROTW16(d); \
+ c += d; \
+ b ^= c; \
+ b = ROTW12(b); \
+ a += b; \
+ d ^= a; \
+ d = ROTW8(d); \
+ c += d; \
+ b ^= c; \
+ b = ROTW7(b); \
+ b = ROTV3(b); \
+ c = ROTV2(c); \
+ d = ROTV1(d);
+
+#define QROUND_WORDS(a, b, c, d) \
+ a = a + b; \
+ d ^= a; \
+ d = d << 16 | d >> 16; \
+ c = c + d; \
+ b ^= c; \
+ b = b << 12 | b >> 20; \
+ a = a + b; \
+ d ^= a; \
+ d = d << 8 | d >> 24; \
+ c = c + d; \
+ b ^= c; \
+ b = b << 7 | b >> 25;
+
+#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \
+ STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \
+ STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \
+ STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \
+ STORE(op + d + 12, LOAD(in + d + 12) ^ REVV_BE(v3));
+
+void NO_SANITIZE_ALIGNMENT
+ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inlen,
+ const unsigned char key[32], const unsigned char nonce[12],
+ uint32_t counter)
+{
+ unsigned iters, i, *op = (unsigned *)out, *ip = (unsigned *)in, *kp;
+#if defined(__ARM_NEON__)
+ unsigned *np;
+#endif
+ vec s0, s1, s2, s3;
+#if !defined(__ARM_NEON__) && !defined(__SSE2__)
+ __attribute__((aligned(16))) unsigned key[8], nonce[4];
+#endif
+ __attribute__((aligned(16))) unsigned chacha_const[] =
+ { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 };
+#if defined(__ARM_NEON__) || defined(__SSE2__)
+ kp = (unsigned *)key;
+#else
+ ((vec *)key)[0] = REVV_BE(((vec *)key)[0]);
+ ((vec *)key)[1] = REVV_BE(((vec *)key)[1]);
+ ((unsigned *)nonce)[0] = REVW_BE(((unsigned *)nonce)[0]);
+ ((unsigned *)nonce)[1] = REVW_BE(((unsigned *)nonce)[1]);
+ ((unsigned *)nonce)[2] = REVW_BE(((unsigned *)nonce)[2]);
+ ((unsigned *)nonce)[3] = REVW_BE(((unsigned *)nonce)[3]);
+ kp = (unsigned *)key;
+ np = (unsigned *)nonce;
+#endif
+#if defined(__ARM_NEON__)
+ np = (unsigned *)nonce;
+#endif
+ s0 = LOAD(chacha_const);
+ s1 = LOAD(&((vec *)kp)[0]);
+ s2 = LOAD(&((vec *)kp)[1]);
+ s3 = (vec){
+ counter,
+ ((uint32_t *)nonce)[0],
+ ((uint32_t *)nonce)[1],
+ ((uint32_t *)nonce)[2]
+ };
+
+ for (iters = 0; iters < inlen / (BPI * 64); iters++) {
+#if GPR_TOO
+ register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8,
+ x9, x10, x11, x12, x13, x14, x15;
+#endif
+#if VBPI > 2
+ vec v8, v9, v10, v11;
+#endif
+#if VBPI > 3
+ vec v12, v13, v14, v15;
+#endif
+
+ vec v0, v1, v2, v3, v4, v5, v6, v7;
+ v4 = v0 = s0;
+ v5 = v1 = s1;
+ v6 = v2 = s2;
+ v3 = s3;
+ v7 = v3 + ONE;
+#if VBPI > 2
+ v8 = v4;
+ v9 = v5;
+ v10 = v6;
+ v11 = v7 + ONE;
+#endif
+#if VBPI > 3
+ v12 = v8;
+ v13 = v9;
+ v14 = v10;
+ v15 = v11 + ONE;
+#endif
+#if GPR_TOO
+ x0 = chacha_const[0];
+ x1 = chacha_const[1];
+ x2 = chacha_const[2];
+ x3 = chacha_const[3];
+ x4 = kp[0];
+ x5 = kp[1];
+ x6 = kp[2];
+ x7 = kp[3];
+ x8 = kp[4];
+ x9 = kp[5];
+ x10 = kp[6];
+ x11 = kp[7];
+ x12 = counter + BPI * iters + (BPI - 1);
+ x13 = np[0];
+ x14 = np[1];
+ x15 = np[2];
+#endif
+ for (i = CHACHA_RNDS / 2; i; i--) {
+ DQROUND_VECTORS(v0, v1, v2, v3)
+ DQROUND_VECTORS(v4, v5, v6, v7)
+#if VBPI > 2
+ DQROUND_VECTORS(v8, v9, v10, v11)
+#endif
+#if VBPI > 3
+ DQROUND_VECTORS(v12, v13, v14, v15)
+#endif
+#if GPR_TOO
+ QROUND_WORDS(x0, x4, x8, x12)
+ QROUND_WORDS(x1, x5, x9, x13)
+ QROUND_WORDS(x2, x6, x10, x14)
+ QROUND_WORDS(x3, x7, x11, x15)
+ QROUND_WORDS(x0, x5, x10, x15)
+ QROUND_WORDS(x1, x6, x11, x12)
+ QROUND_WORDS(x2, x7, x8, x13)
+ QROUND_WORDS(x3, x4, x9, x14)
+#endif
+ }
+
+ WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3)
+ s3 += ONE;
+ WRITE_XOR(ip, op, 16, v4 + s0, v5 + s1, v6 + s2, v7 + s3)
+ s3 += ONE;
+#if VBPI > 2
+ WRITE_XOR(ip, op, 32, v8 + s0, v9 + s1, v10 + s2, v11 + s3)
+ s3 += ONE;
+#endif
+#if VBPI > 3
+ WRITE_XOR(ip, op, 48, v12 + s0, v13 + s1, v14 + s2, v15 + s3)
+ s3 += ONE;
+#endif
+ ip += VBPI * 16;
+ op += VBPI * 16;
+#if GPR_TOO
+ op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0]));
+ op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1]));
+ op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2]));
+ op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3]));
+ op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0]));
+ op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1]));
+ op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2]));
+ op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3]));
+ op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4]));
+ op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5]));
+ op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6]));
+ op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7]));
+ op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter + BPI * iters + (BPI - 1)));
+ op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0]));
+ op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1]));
+ op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2]));
+ s3 += ONE;
+ ip += 16;
+ op += 16;
+#endif
+ }
+
+ for (iters = inlen % (BPI * 64) / 64; iters != 0; iters--) {
+ vec v0 = s0, v1 = s1, v2 = s2, v3 = s3;
+ for (i = CHACHA_RNDS / 2; i; i--) {
+ DQROUND_VECTORS(v0, v1, v2, v3);
+ }
+ WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3)
+ s3 += ONE;
+ ip += 16;
+ op += 16;
+ }
+
+ inlen = inlen % 64;
+ if (inlen) {
+ __attribute__((aligned(16))) vec buf[4];
+ vec v0, v1, v2, v3;
+ v0 = s0;
+ v1 = s1;
+ v2 = s2;
+ v3 = s3;
+ for (i = CHACHA_RNDS / 2; i; i--) {
+ DQROUND_VECTORS(v0, v1, v2, v3);
+ }
+
+ if (inlen >= 16) {
+ STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0));
+ if (inlen >= 32) {
+ STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1));
+ if (inlen >= 48) {
+ STORE(op + 8, LOAD(ip + 8) ^ REVV_BE(v2 + s2));
+ buf[3] = REVV_BE(v3 + s3);
+ } else {
+ buf[2] = REVV_BE(v2 + s2);
+ }
+ } else {
+ buf[1] = REVV_BE(v1 + s1);
+ }
+ } else {
+ buf[0] = REVV_BE(v0 + s0);
+ }
+
+ for (i = inlen & ~15; i < inlen; i++) {
+ ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i];
+ }
+ }
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c
new file mode 100644
index 000000000..cd265e1ff
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.c
@@ -0,0 +1,198 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <string.h>
+#include <stdio.h>
+
+#include "seccomon.h"
+#include "secerr.h"
+#include "blapit.h"
+
+#ifndef NSS_DISABLE_CHACHAPOLY
+#include "poly1305.h"
+#include "chacha20.h"
+#include "chacha20poly1305.h"
+#endif
+
+/* Poly1305Do writes the Poly1305 authenticator of the given additional data
+ * and ciphertext to |out|. */
+#ifndef NSS_DISABLE_CHACHAPOLY
+static void
+Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen,
+ const unsigned char *ciphertext, unsigned int ciphertextLen,
+ const unsigned char key[32])
+{
+ poly1305_state state;
+ unsigned int j;
+ unsigned char lengthBytes[8];
+ static const unsigned char zeros[15];
+ unsigned int i;
+
+ Poly1305Init(&state, key);
+ Poly1305Update(&state, ad, adLen);
+ if (adLen % 16 > 0) {
+ Poly1305Update(&state, zeros, 16 - adLen % 16);
+ }
+ Poly1305Update(&state, ciphertext, ciphertextLen);
+ if (ciphertextLen % 16 > 0) {
+ Poly1305Update(&state, zeros, 16 - ciphertextLen % 16);
+ }
+ j = adLen;
+ for (i = 0; i < sizeof(lengthBytes); i++) {
+ lengthBytes[i] = j;
+ j >>= 8;
+ }
+ Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
+ j = ciphertextLen;
+ for (i = 0; i < sizeof(lengthBytes); i++) {
+ lengthBytes[i] = j;
+ j >>= 8;
+ }
+ Poly1305Update(&state, lengthBytes, sizeof(lengthBytes));
+ Poly1305Finish(&state, out);
+}
+#endif
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ if (keyLen != 32) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ if (tagLen == 0 || tagLen > 16) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ PORT_Memcpy(ctx->key, key, sizeof(ctx->key));
+ ctx->tagLen = tagLen;
+
+ return SECSuccess;
+#endif
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return NULL;
+#else
+ ChaCha20Poly1305Context *ctx;
+
+ ctx = PORT_New(ChaCha20Poly1305Context);
+ if (ctx == NULL) {
+ return NULL;
+ }
+
+ if (ChaCha20Poly1305_InitContext(ctx, key, keyLen, tagLen) != SECSuccess) {
+ PORT_Free(ctx);
+ ctx = NULL;
+ }
+
+ return ctx;
+#endif
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+#ifndef NSS_DISABLE_CHACHAPOLY
+ PORT_Memset(ctx, 0, sizeof(*ctx));
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ unsigned char block[64];
+ unsigned char tag[16];
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen + ctx->tagLen;
+ if (maxOutputLen < *outputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ PORT_Memset(block, 0, sizeof(block));
+ // Generate a block of keystream. The first 32 bytes will be the poly1305
+ // key. The remainder of the block is discarded.
+ ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
+ ChaCha20XOR(output, input, inputLen, ctx->key, nonce, 1);
+
+ Poly1305Do(tag, ad, adLen, output, inputLen, block);
+ PORT_Memcpy(output + inputLen, tag, ctx->tagLen);
+
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+#ifdef NSS_DISABLE_CHACHAPOLY
+ return SECFailure;
+#else
+ unsigned char block[64];
+ unsigned char tag[16];
+ unsigned int ciphertextLen;
+
+ if (nonceLen != 12) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (inputLen < ctx->tagLen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ ciphertextLen = inputLen - ctx->tagLen;
+ *outputLen = ciphertextLen;
+ if (maxOutputLen < *outputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ PORT_Memset(block, 0, sizeof(block));
+ // Generate a block of keystream. The first 32 bytes will be the poly1305
+ // key. The remainder of the block is discarded.
+ ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0);
+ Poly1305Do(tag, ad, adLen, input, ciphertextLen, block);
+ if (NSS_SecureMemcmp(tag, &input[ciphertextLen], ctx->tagLen) != 0) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+
+ ChaCha20XOR(output, input, ciphertextLen, ctx->key, nonce, 1);
+
+ return SECSuccess;
+#endif
+}
diff --git a/security/nss/lib/freebl/chacha20poly1305.h b/security/nss/lib/freebl/chacha20poly1305.h
new file mode 100644
index 000000000..c77632aa1
--- /dev/null
+++ b/security/nss/lib/freebl/chacha20poly1305.h
@@ -0,0 +1,15 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _CHACHA20_POLY1305_H_
+#define _CHACHA20_POLY1305_H_ 1
+
+/* ChaCha20Poly1305ContextStr saves the key and tag length for a
+ * ChaCha20+Poly1305 AEAD operation. */
+struct ChaCha20Poly1305ContextStr {
+ unsigned char key[32];
+ unsigned char tagLen;
+};
+
+#endif /* _CHACHA20_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk
new file mode 100644
index 000000000..918a66363
--- /dev/null
+++ b/security/nss/lib/freebl/config.mk
@@ -0,0 +1,97 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# only do this in the outermost freebl build.
+ifndef FREEBL_CHILD_BUILD
+
+# We're going to change this build so that it builds libfreebl.a with
+# just loader.c. Then we have to build this directory twice again to
+# build the two DSOs.
+# To build libfreebl.a with just loader.c, we must now override many
+# of the make variables setup by the prior inclusion of CORECONF's config.mk
+
+CSRCS = loader.c
+SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX))
+OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS))
+ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \
+ $(NOSUCHFILE) so_locations
+
+# this is not a recursive child make. We make a static lib. (archive)
+
+# Override the values defined in coreconf's ruleset.mk.
+#
+# - (1) LIBRARY: a static (archival) library
+# - (2) SHARED_LIBRARY: a shared (dynamic link) library
+# - (3) IMPORT_LIBRARY: an import library, used only on Windows
+# - (4) PROGRAM: an executable binary
+#
+# override these variables to prevent building a DSO/DLL.
+ TARGETS = $(LIBRARY)
+ SHARED_LIBRARY =
+ IMPORT_LIBRARY =
+ PROGRAM =
+
+else
+
+# This is a recursive child make. We build the shared lib.
+
+TARGETS = $(SHARED_LIBRARY)
+LIBRARY =
+IMPORT_LIBRARY =
+PROGRAM =
+
+ifeq ($(OS_TARGET), SunOS)
+OS_LIBS += -lkstat
+endif
+
+ifeq (,$(filter-out WIN%,$(OS_TARGET)))
+
+# don't want the 32 in the shared library name
+SHARED_LIBRARY = $(OBJDIR)/$(DLL_PREFIX)$(LIBRARY_NAME)$(LIBRARY_VERSION).$(DLL_SUFFIX)
+
+RES = $(OBJDIR)/$(LIBRARY_NAME).res
+RESNAME = freebl.rc
+
+ifdef NS_USE_GCC
+OS_LIBS += -ladvapi32
+else
+OS_LIBS += advapi32.lib
+endif
+
+ifdef NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+ -L$(DIST)/lib \
+ -L$(NSSUTIL_LIB_DIR) \
+ -lnssutil3 \
+ -L$(NSPR_LIB_DIR) \
+ -lnspr4 \
+ $(NULL)
+else # ! NS_USE_GCC
+EXTRA_SHARED_LIBS += \
+ $(DIST)/lib/nssutil3.lib \
+ $(NSPR_LIB_DIR)/$(NSPR31_LIB_PREFIX)nspr4.lib \
+ $(NULL)
+endif # NS_USE_GCC
+
+else
+
+ifeq ($(FREEBL_NO_DEPEND),1)
+#drop pthreads as well
+OS_PTHREAD=
+else
+EXTRA_SHARED_LIBS += \
+ -L$(DIST)/lib \
+ -L$(NSSUTIL_LIB_DIR) \
+ -lnssutil3 \
+ -L$(NSPR_LIB_DIR) \
+ -lnspr4 \
+ $(NULL)
+endif
+endif
+
+ifeq ($(OS_ARCH), Darwin)
+EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib
+endif
+
+endif
diff --git a/security/nss/lib/freebl/ctr.c b/security/nss/lib/freebl/ctr.c
new file mode 100644
index 000000000..d5715a505
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.c
@@ -0,0 +1,246 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "blapit.h"
+#include "blapii.h"
+#include "ctr.h"
+#include "pkcs11t.h"
+#include "secerr.h"
+
+#ifdef USE_HW_AES
+#include "intel-aes.h"
+#include "rijndael.h"
+#endif
+
+SECStatus
+CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher,
+ const unsigned char *param, unsigned int blocksize)
+{
+ const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param;
+
+ if (ctrParams->ulCounterBits == 0 ||
+ ctrParams->ulCounterBits > blocksize * PR_BITS_PER_BYTE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Invariant: 0 < ctr->bufPtr <= blocksize */
+ ctr->checkWrap = PR_FALSE;
+ ctr->bufPtr = blocksize; /* no unused data in the buffer */
+ ctr->cipher = cipher;
+ ctr->context = context;
+ ctr->counterBits = ctrParams->ulCounterBits;
+ if (blocksize > sizeof(ctr->counter) ||
+ blocksize > sizeof(ctrParams->cb)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ PORT_Memcpy(ctr->counter, ctrParams->cb, blocksize);
+ if (ctr->counterBits < 64) {
+ PORT_Memcpy(ctr->counterFirst, ctr->counter, blocksize);
+ ctr->checkWrap = PR_TRUE;
+ }
+ return SECSuccess;
+}
+
+CTRContext *
+CTR_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *param, unsigned int blocksize)
+{
+ CTRContext *ctr;
+ SECStatus rv;
+
+ /* first fill in the Counter context */
+ ctr = PORT_ZNew(CTRContext);
+ if (ctr == NULL) {
+ return NULL;
+ }
+ rv = CTR_InitContext(ctr, context, cipher, param, blocksize);
+ if (rv != SECSuccess) {
+ CTR_DestroyContext(ctr, PR_TRUE);
+ ctr = NULL;
+ }
+ return ctr;
+}
+
+void
+CTR_DestroyContext(CTRContext *ctr, PRBool freeit)
+{
+ PORT_Memset(ctr, 0, sizeof(CTRContext));
+ if (freeit) {
+ PORT_Free(ctr);
+ }
+}
+
+/*
+ * Used by counter mode. Increment the counter block. Not all bits in the
+ * counter block are part of the counter, counterBits tells how many bits
+ * are part of the counter. The counter block is blocksize long. It's a
+ * big endian value.
+ *
+ * XXX Does not handle counter rollover.
+ */
+static void
+ctr_GetNextCtr(unsigned char *counter, unsigned int counterBits,
+ unsigned int blocksize)
+{
+ unsigned char *counterPtr = counter + blocksize - 1;
+ unsigned char mask, count;
+
+ PORT_Assert(counterBits <= blocksize * PR_BITS_PER_BYTE);
+ while (counterBits >= PR_BITS_PER_BYTE) {
+ if (++(*(counterPtr--))) {
+ return;
+ }
+ counterBits -= PR_BITS_PER_BYTE;
+ }
+ if (counterBits == 0) {
+ return;
+ }
+ /* increment the final partial byte */
+ mask = (1 << counterBits) - 1;
+ count = ++(*counterPtr) & mask;
+ *counterPtr = ((*counterPtr) & ~mask) | count;
+ return;
+}
+
+static void
+ctr_xor(unsigned char *target, const unsigned char *x,
+ const unsigned char *y, unsigned int count)
+{
+ unsigned int i;
+ for (i = 0; i < count; i++) {
+ *target++ = *x++ ^ *y++;
+ }
+}
+
+SECStatus
+CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tmp;
+ SECStatus rv;
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outlen = 0;
+ if (ctr->bufPtr != blocksize) {
+ unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+ ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+ ctr->bufPtr += needed;
+ outbuf += needed;
+ inbuf += needed;
+ *outlen += needed;
+ inlen -= needed;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ PORT_Assert(ctr->bufPtr == blocksize);
+ }
+
+ while (inlen >= blocksize) {
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (ctr->checkWrap) {
+ if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ }
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, blocksize);
+ outbuf += blocksize;
+ inbuf += blocksize;
+ *outlen += blocksize;
+ inlen -= blocksize;
+ }
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (ctr->checkWrap) {
+ if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ }
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+ ctr->bufPtr = inlen;
+ *outlen += inlen;
+ return SECSuccess;
+}
+
+#if defined(USE_HW_AES) && defined(_MSC_VER)
+SECStatus
+CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int fullblocks;
+ unsigned int tmp;
+ SECStatus rv;
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outlen = 0;
+ if (ctr->bufPtr != blocksize) {
+ unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen);
+ ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed);
+ ctr->bufPtr += needed;
+ outbuf += needed;
+ inbuf += needed;
+ *outlen += needed;
+ inlen -= needed;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ PORT_Assert(ctr->bufPtr == blocksize);
+ }
+
+ intel_aes_ctr_worker(((AESContext *)(ctr->context))->Nr)(
+ ctr, outbuf, outlen, maxout, inbuf, inlen, blocksize);
+ /* XXX intel_aes_ctr_worker should set *outlen. */
+ PORT_Assert(*outlen == 0);
+ fullblocks = (inlen / blocksize) * blocksize;
+ *outlen += fullblocks;
+ outbuf += fullblocks;
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize,
+ ctr->counter, blocksize, blocksize);
+ ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ ctr_xor(outbuf, inbuf, ctr->buffer, inlen);
+ ctr->bufPtr = inlen;
+ *outlen += inlen;
+ return SECSuccess;
+}
+#endif
diff --git a/security/nss/lib/freebl/ctr.h b/security/nss/lib/freebl/ctr.h
new file mode 100644
index 000000000..a97da144e
--- /dev/null
+++ b/security/nss/lib/freebl/ctr.h
@@ -0,0 +1,53 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTR_H
+#define CTR_H 1
+
+#include "blapii.h"
+
+/* This structure is defined in this header because both ctr.c and gcm.c
+ * need it. */
+struct CTRContextStr {
+ freeblCipherFunc cipher;
+ void *context;
+ unsigned char counter[MAX_BLOCK_SIZE];
+ unsigned char buffer[MAX_BLOCK_SIZE];
+ unsigned char counterFirst[MAX_BLOCK_SIZE]; /* counter overlfow value */
+ PRBool checkWrap; /*check for counter overflow*/
+ unsigned long counterBits;
+ unsigned int bufPtr;
+};
+
+typedef struct CTRContextStr CTRContext;
+
+SECStatus CTR_InitContext(CTRContext *ctr, void *context,
+ freeblCipherFunc cipher, const unsigned char *param,
+ unsigned int blocksize);
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTRContext does not own context. context needs to remain valid for as long
+ * as the CTRContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *param, unsigned int blocksize);
+
+void CTR_DestroyContext(CTRContext *ctr, PRBool freeit);
+
+SECStatus CTR_Update(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+#ifdef USE_HW_AES
+SECStatus CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+#endif
+
+#endif
diff --git a/security/nss/lib/freebl/cts.c b/security/nss/lib/freebl/cts.c
new file mode 100644
index 000000000..99ccebb60
--- /dev/null
+++ b/security/nss/lib/freebl/cts.c
@@ -0,0 +1,307 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapit.h"
+#include "blapii.h"
+#include "cts.h"
+#include "secerr.h"
+
+struct CTSContextStr {
+ freeblCipherFunc cipher;
+ void *context;
+ /* iv stores the last ciphertext block of the previous message.
+ * Only used by decrypt. */
+ unsigned char iv[MAX_BLOCK_SIZE];
+};
+
+CTSContext *
+CTS_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *iv, unsigned int blocksize)
+{
+ CTSContext *cts;
+
+ if (blocksize > MAX_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return NULL;
+ }
+ cts = PORT_ZNew(CTSContext);
+ if (cts == NULL) {
+ return NULL;
+ }
+ PORT_Memcpy(cts->iv, iv, blocksize);
+ cts->cipher = cipher;
+ cts->context = context;
+ return cts;
+}
+
+void
+CTS_DestroyContext(CTSContext *cts, PRBool freeit)
+{
+ if (freeit) {
+ PORT_Free(cts);
+ }
+}
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Generically handle cipher text stealing. Basically this is doing CBC
+ * operations except someone can pass us a partial block.
+ *
+ * Output Order:
+ * CS-1: C1||C2||C3..Cn-1(could be partial)||Cn (NIST)
+ * CS-2: pad == 0 C1||C2||C3...Cn-1(is full)||Cn (Schneier)
+ * CS-2: pad != 0 C1||C2||C3...Cn||Cn-1(is partial)(Schneier)
+ * CS-3: C1||C2||C3...Cn||Cn-1(could be partial) (Kerberos)
+ *
+ * The characteristics of these three options:
+ * - NIST & Schneier (CS-1 & CS-2) are identical to CBC if there are no
+ * partial blocks on input.
+ * - Scheier and Kerberos (CS-2 and CS-3) have no embedded partial blocks,
+ * which make decoding easier.
+ * - NIST & Kerberos (CS-1 and CS-3) have consistent block order independent
+ * of padding.
+ *
+ * PKCS #11 did not specify which version to implement, but points to the NIST
+ * spec, so this code implements CTS-CS-1 from NIST.
+ *
+ * To convert the returned buffer to:
+ * CS-2 (Schneier): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = *outlen % blocksize;
+ * if (pad) {
+ * memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ * }
+ * CS-3 (Kerberos): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = *outlen % blocksize;
+ * if (pad == 0) {
+ * pad = blocksize;
+ * }
+ * memcpy(tmp, outbuf+*outlen-blocksize, blocksize);
+ * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad);
+ * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize);
+ */
+SECStatus
+CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned char lastBlock[MAX_BLOCK_SIZE];
+ unsigned int tmp;
+ int fullblocks;
+ int written;
+ unsigned char *saveout = outbuf;
+ SECStatus rv;
+
+ if (inlen < blocksize) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ fullblocks = (inlen / blocksize) * blocksize;
+ rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+ fullblocks, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ *outlen = fullblocks; /* AES low level doesn't set outlen */
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ written = *outlen - (blocksize - inlen);
+ outbuf += written;
+ maxout -= written;
+
+ /*
+ * here's the CTS magic, we pad our final block with zeros,
+ * then do a CBC encrypt. CBC will xor our plain text with
+ * the previous block (Cn-1), capturing part of that block (Cn-1**) as it
+ * xors with the zero pad. We then write this full block, overwritting
+ * (Cn-1**) in our buffer. This allows us to have input data == output
+ * data since Cn contains enough information to reconver Cn-1** when
+ * we decrypt (at the cost of some complexity as you can see in decrypt
+ * below */
+ PORT_Memcpy(lastBlock, inbuf, inlen);
+ PORT_Memset(lastBlock + inlen, 0, blocksize - inlen);
+ rv = (*cts->cipher)(cts->context, outbuf, &tmp, maxout, lastBlock,
+ blocksize, blocksize);
+ PORT_Memset(lastBlock, 0, blocksize);
+ if (rv == SECSuccess) {
+ *outlen = written + blocksize;
+ } else {
+ PORT_Memset(saveout, 0, written + blocksize);
+ }
+ return rv;
+}
+
+#define XOR_BLOCK(x, y, count) \
+ for (i = 0; i < count; i++) \
+ x[i] = x[i] ^ y[i]
+
+/*
+ * See addemdum to NIST SP 800-38A
+ * Decrypt, Expect CS-1: input. See the comment on the encrypt side
+ * to understand what CS-2 and CS-3 mean.
+ *
+ * To convert the input buffer to CS-1 from ...
+ * CS-2 (Schneier): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = inlen % blocksize;
+ * if (pad) {
+ * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ * memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ * }
+ * CS-3 (Kerberos): do
+ * unsigned char tmp[MAX_BLOCK_SIZE];
+ * pad = inlen % blocksize;
+ * if (pad == 0) {
+ * pad = blocksize;
+ * }
+ * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize);
+ * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad);
+ * memcpy(inbuf+inlen-blocksize, tmp, blocksize);
+ */
+SECStatus
+CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned char *Pn;
+ unsigned char Cn_2[MAX_BLOCK_SIZE]; /* block Cn-2 */
+ unsigned char Cn_1[MAX_BLOCK_SIZE]; /* block Cn-1 */
+ unsigned char Cn[MAX_BLOCK_SIZE]; /* block Cn */
+ unsigned char lastBlock[MAX_BLOCK_SIZE];
+ const unsigned char *tmp;
+ unsigned char *saveout = outbuf;
+ unsigned int tmpLen;
+ unsigned int fullblocks, pad;
+ unsigned int i;
+ SECStatus rv;
+
+ if (inlen < blocksize) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ fullblocks = (inlen / blocksize) * blocksize;
+
+ /* even though we expect the input to be CS-1, CS-2 is easier to parse,
+ * so convert to CS-2 immediately. NOTE: this is the same code as in
+ * the comment for encrypt. NOTE2: since we can't modify inbuf unless
+ * inbuf and outbuf overlap, just copy inbuf to outbuf and modify it there
+ */
+ pad = inlen - fullblocks;
+ if (pad != 0) {
+ if (inbuf != outbuf) {
+ memcpy(outbuf, inbuf, inlen);
+ /* keep the names so we logically know how we are using the
+ * buffers */
+ inbuf = outbuf;
+ }
+ memcpy(lastBlock, inbuf + inlen - blocksize, blocksize);
+ /* we know inbuf == outbuf now, inbuf is declared const and can't
+ * be the target, so use outbuf for the target here */
+ memcpy(outbuf + inlen - pad, inbuf + inlen - blocksize - pad, pad);
+ memcpy(outbuf + inlen - blocksize - pad, lastBlock, blocksize);
+ }
+ /* save the previous to last block so we can undo the misordered
+ * chaining */
+ tmp = (fullblocks < blocksize * 2) ? cts->iv : inbuf + fullblocks - blocksize * 2;
+ PORT_Memcpy(Cn_2, tmp, blocksize);
+ PORT_Memcpy(Cn, inbuf + fullblocks - blocksize, blocksize);
+ rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf,
+ fullblocks, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ *outlen = fullblocks; /* AES low level doesn't set outlen */
+ inbuf += fullblocks;
+ inlen -= fullblocks;
+ if (inlen == 0) {
+ return SECSuccess;
+ }
+ outbuf += fullblocks;
+
+ /* recover the stolen text */
+ PORT_Memset(lastBlock, 0, blocksize);
+ PORT_Memcpy(lastBlock, inbuf, inlen);
+ PORT_Memcpy(Cn_1, inbuf, inlen);
+ Pn = outbuf - blocksize;
+ /* inbuf points to Cn-1* in the input buffer */
+ /* NOTE: below there are 2 sections marked "make up for the out of order
+ * cbc decryption". You may ask, what is going on here.
+ * Short answer: CBC automatically xors the plain text with the previous
+ * encrypted block. We are decrypting the last 2 blocks out of order, so
+ * we have to 'back out' the decrypt xor and 'add back' the encrypt xor.
+ * Long answer: When we encrypted, we encrypted as follows:
+ * Pn-2, Pn-1, (Pn || 0), but on decryption we can't
+ * decrypt Cn-1 until we decrypt Cn because part of Cn-1 is stored in
+ * Cn (see below). So above we decrypted all the full blocks:
+ * Cn-2, Cn,
+ * to get:
+ * Pn-2, Pn, Except that Pn is not yet corect. On encrypt, we
+ * xor'd Pn || 0 with Cn-1, but on decrypt we xor'd it with Cn-2
+ * To recover Pn, we xor the block with Cn-1* || 0 (in last block) and
+ * Cn-2 to get Pn || Cn-1**. Pn can then be written to the output buffer
+ * and we can now reunite Cn-1. With the full Cn-1 we can decrypt it,
+ * but now decrypt is going to xor the decrypted data with Cn instead of
+ * Cn-2. xoring Cn and Cn-2 restores the original Pn-1 and we can now
+ * write that oout to the buffer */
+
+ /* make up for the out of order CBC decryption */
+ XOR_BLOCK(lastBlock, Cn_2, blocksize);
+ XOR_BLOCK(lastBlock, Pn, blocksize);
+ /* last buf now has Pn || Cn-1**, copy out Pn */
+ PORT_Memcpy(outbuf, lastBlock, inlen);
+ *outlen += inlen;
+ /* copy Cn-1* into last buf to recover Cn-1 */
+ PORT_Memcpy(lastBlock, Cn_1, inlen);
+ /* note: because Cn and Cn-1 were out of order, our pointer to Pn also
+ * points to where Pn-1 needs to reside. From here on out read Pn in
+ * the code as really Pn-1. */
+ rv = (*cts->cipher)(cts->context, Pn, &tmpLen, blocksize, lastBlock,
+ blocksize, blocksize);
+ if (rv != SECSuccess) {
+ PORT_Memset(lastBlock, 0, blocksize);
+ PORT_Memset(saveout, 0, *outlen);
+ return SECFailure;
+ }
+ /* make up for the out of order CBC decryption */
+ XOR_BLOCK(Pn, Cn_2, blocksize);
+ XOR_BLOCK(Pn, Cn, blocksize);
+ /* reset iv to Cn */
+ PORT_Memcpy(cts->iv, Cn, blocksize);
+ /* This makes Cn the last block for the next decrypt operation, which
+ * matches the encrypt. We don't care about the contexts of last block,
+ * only the side effect of setting the internal IV */
+ (void)(*cts->cipher)(cts->context, lastBlock, &tmpLen, blocksize, Cn,
+ blocksize, blocksize);
+ /* clear last block. At this point last block contains Pn xor Cn_1 xor
+ * Cn_2, both of with an attacker would know, so we need to clear this
+ * buffer out */
+ PORT_Memset(lastBlock, 0, blocksize);
+ /* Cn, Cn_1, and Cn_2 have encrypted data, so no need to clear them */
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/cts.h b/security/nss/lib/freebl/cts.h
new file mode 100644
index 000000000..a3ec180af
--- /dev/null
+++ b/security/nss/lib/freebl/cts.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef CTS_H
+#define CTS_H 1
+
+#include "blapii.h"
+
+typedef struct CTSContextStr CTSContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * CTSContext does not own context. context needs to remain valid for as long
+ * as the CTSContext is valid.
+ *
+ * The cipher argument is a block cipher in the CBC mode.
+ */
+CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *iv, unsigned int blocksize);
+
+void CTS_DestroyContext(CTSContext *cts, PRBool freeit);
+
+SECStatus CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+#endif
diff --git a/security/nss/lib/freebl/des.c b/security/nss/lib/freebl/des.c
new file mode 100644
index 000000000..fd433bbb2
--- /dev/null
+++ b/security/nss/lib/freebl/des.c
@@ -0,0 +1,676 @@
+/*
+ * des.c
+ *
+ * core source file for DES-150 library
+ * Make key schedule from DES key.
+ * Encrypt/Decrypt one 8-byte block.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h> /* for ptrdiff_t */
+/* #define USE_INDEXING 1 */
+
+/*
+ * The tables below are the 8 sbox functions, with the 6-bit input permutation
+ * and the 32-bit output permutation pre-computed.
+ * They are shifted circularly to the left 3 bits, which removes 2 shifts
+ * and an or from each round by reducing the number of sboxes whose
+ * indices cross word broundaries from 2 to 1.
+ */
+
+static const HALF SP[8][64] = {
+ /* Box S1 */
+ { 0x04041000, 0x00000000, 0x00040000, 0x04041010,
+ 0x04040010, 0x00041010, 0x00000010, 0x00040000,
+ 0x00001000, 0x04041000, 0x04041010, 0x00001000,
+ 0x04001010, 0x04040010, 0x04000000, 0x00000010,
+ 0x00001010, 0x04001000, 0x04001000, 0x00041000,
+ 0x00041000, 0x04040000, 0x04040000, 0x04001010,
+ 0x00040010, 0x04000010, 0x04000010, 0x00040010,
+ 0x00000000, 0x00001010, 0x00041010, 0x04000000,
+ 0x00040000, 0x04041010, 0x00000010, 0x04040000,
+ 0x04041000, 0x04000000, 0x04000000, 0x00001000,
+ 0x04040010, 0x00040000, 0x00041000, 0x04000010,
+ 0x00001000, 0x00000010, 0x04001010, 0x00041010,
+ 0x04041010, 0x00040010, 0x04040000, 0x04001010,
+ 0x04000010, 0x00001010, 0x00041010, 0x04041000,
+ 0x00001010, 0x04001000, 0x04001000, 0x00000000,
+ 0x00040010, 0x00041000, 0x00000000, 0x04040010 },
+ /* Box S2 */
+ { 0x00420082, 0x00020002, 0x00020000, 0x00420080,
+ 0x00400000, 0x00000080, 0x00400082, 0x00020082,
+ 0x00000082, 0x00420082, 0x00420002, 0x00000002,
+ 0x00020002, 0x00400000, 0x00000080, 0x00400082,
+ 0x00420000, 0x00400080, 0x00020082, 0x00000000,
+ 0x00000002, 0x00020000, 0x00420080, 0x00400002,
+ 0x00400080, 0x00000082, 0x00000000, 0x00420000,
+ 0x00020080, 0x00420002, 0x00400002, 0x00020080,
+ 0x00000000, 0x00420080, 0x00400082, 0x00400000,
+ 0x00020082, 0x00400002, 0x00420002, 0x00020000,
+ 0x00400002, 0x00020002, 0x00000080, 0x00420082,
+ 0x00420080, 0x00000080, 0x00020000, 0x00000002,
+ 0x00020080, 0x00420002, 0x00400000, 0x00000082,
+ 0x00400080, 0x00020082, 0x00000082, 0x00400080,
+ 0x00420000, 0x00000000, 0x00020002, 0x00020080,
+ 0x00000002, 0x00400082, 0x00420082, 0x00420000 },
+ /* Box S3 */
+ { 0x00000820, 0x20080800, 0x00000000, 0x20080020,
+ 0x20000800, 0x00000000, 0x00080820, 0x20000800,
+ 0x00080020, 0x20000020, 0x20000020, 0x00080000,
+ 0x20080820, 0x00080020, 0x20080000, 0x00000820,
+ 0x20000000, 0x00000020, 0x20080800, 0x00000800,
+ 0x00080800, 0x20080000, 0x20080020, 0x00080820,
+ 0x20000820, 0x00080800, 0x00080000, 0x20000820,
+ 0x00000020, 0x20080820, 0x00000800, 0x20000000,
+ 0x20080800, 0x20000000, 0x00080020, 0x00000820,
+ 0x00080000, 0x20080800, 0x20000800, 0x00000000,
+ 0x00000800, 0x00080020, 0x20080820, 0x20000800,
+ 0x20000020, 0x00000800, 0x00000000, 0x20080020,
+ 0x20000820, 0x00080000, 0x20000000, 0x20080820,
+ 0x00000020, 0x00080820, 0x00080800, 0x20000020,
+ 0x20080000, 0x20000820, 0x00000820, 0x20080000,
+ 0x00080820, 0x00000020, 0x20080020, 0x00080800 },
+ /* Box S4 */
+ { 0x02008004, 0x00008204, 0x00008204, 0x00000200,
+ 0x02008200, 0x02000204, 0x02000004, 0x00008004,
+ 0x00000000, 0x02008000, 0x02008000, 0x02008204,
+ 0x00000204, 0x00000000, 0x02000200, 0x02000004,
+ 0x00000004, 0x00008000, 0x02000000, 0x02008004,
+ 0x00000200, 0x02000000, 0x00008004, 0x00008200,
+ 0x02000204, 0x00000004, 0x00008200, 0x02000200,
+ 0x00008000, 0x02008200, 0x02008204, 0x00000204,
+ 0x02000200, 0x02000004, 0x02008000, 0x02008204,
+ 0x00000204, 0x00000000, 0x00000000, 0x02008000,
+ 0x00008200, 0x02000200, 0x02000204, 0x00000004,
+ 0x02008004, 0x00008204, 0x00008204, 0x00000200,
+ 0x02008204, 0x00000204, 0x00000004, 0x00008000,
+ 0x02000004, 0x00008004, 0x02008200, 0x02000204,
+ 0x00008004, 0x00008200, 0x02000000, 0x02008004,
+ 0x00000200, 0x02000000, 0x00008000, 0x02008200 },
+ /* Box S5 */
+ { 0x00000400, 0x08200400, 0x08200000, 0x08000401,
+ 0x00200000, 0x00000400, 0x00000001, 0x08200000,
+ 0x00200401, 0x00200000, 0x08000400, 0x00200401,
+ 0x08000401, 0x08200001, 0x00200400, 0x00000001,
+ 0x08000000, 0x00200001, 0x00200001, 0x00000000,
+ 0x00000401, 0x08200401, 0x08200401, 0x08000400,
+ 0x08200001, 0x00000401, 0x00000000, 0x08000001,
+ 0x08200400, 0x08000000, 0x08000001, 0x00200400,
+ 0x00200000, 0x08000401, 0x00000400, 0x08000000,
+ 0x00000001, 0x08200000, 0x08000401, 0x00200401,
+ 0x08000400, 0x00000001, 0x08200001, 0x08200400,
+ 0x00200401, 0x00000400, 0x08000000, 0x08200001,
+ 0x08200401, 0x00200400, 0x08000001, 0x08200401,
+ 0x08200000, 0x00000000, 0x00200001, 0x08000001,
+ 0x00200400, 0x08000400, 0x00000401, 0x00200000,
+ 0x00000000, 0x00200001, 0x08200400, 0x00000401 },
+ /* Box S6 */
+ { 0x80000040, 0x81000000, 0x00010000, 0x81010040,
+ 0x81000000, 0x00000040, 0x81010040, 0x01000000,
+ 0x80010000, 0x01010040, 0x01000000, 0x80000040,
+ 0x01000040, 0x80010000, 0x80000000, 0x00010040,
+ 0x00000000, 0x01000040, 0x80010040, 0x00010000,
+ 0x01010000, 0x80010040, 0x00000040, 0x81000040,
+ 0x81000040, 0x00000000, 0x01010040, 0x81010000,
+ 0x00010040, 0x01010000, 0x81010000, 0x80000000,
+ 0x80010000, 0x00000040, 0x81000040, 0x01010000,
+ 0x81010040, 0x01000000, 0x00010040, 0x80000040,
+ 0x01000000, 0x80010000, 0x80000000, 0x00010040,
+ 0x80000040, 0x81010040, 0x01010000, 0x81000000,
+ 0x01010040, 0x81010000, 0x00000000, 0x81000040,
+ 0x00000040, 0x00010000, 0x81000000, 0x01010040,
+ 0x00010000, 0x01000040, 0x80010040, 0x00000000,
+ 0x81010000, 0x80000000, 0x01000040, 0x80010040 },
+ /* Box S7 */
+ { 0x00800000, 0x10800008, 0x10002008, 0x00000000,
+ 0x00002000, 0x10002008, 0x00802008, 0x10802000,
+ 0x10802008, 0x00800000, 0x00000000, 0x10000008,
+ 0x00000008, 0x10000000, 0x10800008, 0x00002008,
+ 0x10002000, 0x00802008, 0x00800008, 0x10002000,
+ 0x10000008, 0x10800000, 0x10802000, 0x00800008,
+ 0x10800000, 0x00002000, 0x00002008, 0x10802008,
+ 0x00802000, 0x00000008, 0x10000000, 0x00802000,
+ 0x10000000, 0x00802000, 0x00800000, 0x10002008,
+ 0x10002008, 0x10800008, 0x10800008, 0x00000008,
+ 0x00800008, 0x10000000, 0x10002000, 0x00800000,
+ 0x10802000, 0x00002008, 0x00802008, 0x10802000,
+ 0x00002008, 0x10000008, 0x10802008, 0x10800000,
+ 0x00802000, 0x00000000, 0x00000008, 0x10802008,
+ 0x00000000, 0x00802008, 0x10800000, 0x00002000,
+ 0x10000008, 0x10002000, 0x00002000, 0x00800008 },
+ /* Box S8 */
+ { 0x40004100, 0x00004000, 0x00100000, 0x40104100,
+ 0x40000000, 0x40004100, 0x00000100, 0x40000000,
+ 0x00100100, 0x40100000, 0x40104100, 0x00104000,
+ 0x40104000, 0x00104100, 0x00004000, 0x00000100,
+ 0x40100000, 0x40000100, 0x40004000, 0x00004100,
+ 0x00104000, 0x00100100, 0x40100100, 0x40104000,
+ 0x00004100, 0x00000000, 0x00000000, 0x40100100,
+ 0x40000100, 0x40004000, 0x00104100, 0x00100000,
+ 0x00104100, 0x00100000, 0x40104000, 0x00004000,
+ 0x00000100, 0x40100100, 0x00004000, 0x00104100,
+ 0x40004000, 0x00000100, 0x40000100, 0x40100000,
+ 0x40100100, 0x40000000, 0x00100000, 0x40004100,
+ 0x00000000, 0x40104100, 0x00100100, 0x40000100,
+ 0x40100000, 0x40004000, 0x40004100, 0x00000000,
+ 0x40104100, 0x00104000, 0x00104000, 0x00004100,
+ 0x00004100, 0x00100100, 0x40000000, 0x40104000 }
+};
+
+static const HALF PC2[8][64] = {
+ /* table 0 */
+ { 0x00000000, 0x00001000, 0x04000000, 0x04001000,
+ 0x00100000, 0x00101000, 0x04100000, 0x04101000,
+ 0x00008000, 0x00009000, 0x04008000, 0x04009000,
+ 0x00108000, 0x00109000, 0x04108000, 0x04109000,
+ 0x00000004, 0x00001004, 0x04000004, 0x04001004,
+ 0x00100004, 0x00101004, 0x04100004, 0x04101004,
+ 0x00008004, 0x00009004, 0x04008004, 0x04009004,
+ 0x00108004, 0x00109004, 0x04108004, 0x04109004,
+ 0x08000000, 0x08001000, 0x0c000000, 0x0c001000,
+ 0x08100000, 0x08101000, 0x0c100000, 0x0c101000,
+ 0x08008000, 0x08009000, 0x0c008000, 0x0c009000,
+ 0x08108000, 0x08109000, 0x0c108000, 0x0c109000,
+ 0x08000004, 0x08001004, 0x0c000004, 0x0c001004,
+ 0x08100004, 0x08101004, 0x0c100004, 0x0c101004,
+ 0x08008004, 0x08009004, 0x0c008004, 0x0c009004,
+ 0x08108004, 0x08109004, 0x0c108004, 0x0c109004 },
+ /* table 1 */
+ { 0x00000000, 0x00002000, 0x80000000, 0x80002000,
+ 0x00000008, 0x00002008, 0x80000008, 0x80002008,
+ 0x00200000, 0x00202000, 0x80200000, 0x80202000,
+ 0x00200008, 0x00202008, 0x80200008, 0x80202008,
+ 0x20000000, 0x20002000, 0xa0000000, 0xa0002000,
+ 0x20000008, 0x20002008, 0xa0000008, 0xa0002008,
+ 0x20200000, 0x20202000, 0xa0200000, 0xa0202000,
+ 0x20200008, 0x20202008, 0xa0200008, 0xa0202008,
+ 0x00000400, 0x00002400, 0x80000400, 0x80002400,
+ 0x00000408, 0x00002408, 0x80000408, 0x80002408,
+ 0x00200400, 0x00202400, 0x80200400, 0x80202400,
+ 0x00200408, 0x00202408, 0x80200408, 0x80202408,
+ 0x20000400, 0x20002400, 0xa0000400, 0xa0002400,
+ 0x20000408, 0x20002408, 0xa0000408, 0xa0002408,
+ 0x20200400, 0x20202400, 0xa0200400, 0xa0202400,
+ 0x20200408, 0x20202408, 0xa0200408, 0xa0202408 },
+ /* table 2 */
+ { 0x00000000, 0x00004000, 0x00000020, 0x00004020,
+ 0x00080000, 0x00084000, 0x00080020, 0x00084020,
+ 0x00000800, 0x00004800, 0x00000820, 0x00004820,
+ 0x00080800, 0x00084800, 0x00080820, 0x00084820,
+ 0x00000010, 0x00004010, 0x00000030, 0x00004030,
+ 0x00080010, 0x00084010, 0x00080030, 0x00084030,
+ 0x00000810, 0x00004810, 0x00000830, 0x00004830,
+ 0x00080810, 0x00084810, 0x00080830, 0x00084830,
+ 0x00400000, 0x00404000, 0x00400020, 0x00404020,
+ 0x00480000, 0x00484000, 0x00480020, 0x00484020,
+ 0x00400800, 0x00404800, 0x00400820, 0x00404820,
+ 0x00480800, 0x00484800, 0x00480820, 0x00484820,
+ 0x00400010, 0x00404010, 0x00400030, 0x00404030,
+ 0x00480010, 0x00484010, 0x00480030, 0x00484030,
+ 0x00400810, 0x00404810, 0x00400830, 0x00404830,
+ 0x00480810, 0x00484810, 0x00480830, 0x00484830 },
+ /* table 3 */
+ { 0x00000000, 0x40000000, 0x00000080, 0x40000080,
+ 0x00040000, 0x40040000, 0x00040080, 0x40040080,
+ 0x00000040, 0x40000040, 0x000000c0, 0x400000c0,
+ 0x00040040, 0x40040040, 0x000400c0, 0x400400c0,
+ 0x10000000, 0x50000000, 0x10000080, 0x50000080,
+ 0x10040000, 0x50040000, 0x10040080, 0x50040080,
+ 0x10000040, 0x50000040, 0x100000c0, 0x500000c0,
+ 0x10040040, 0x50040040, 0x100400c0, 0x500400c0,
+ 0x00800000, 0x40800000, 0x00800080, 0x40800080,
+ 0x00840000, 0x40840000, 0x00840080, 0x40840080,
+ 0x00800040, 0x40800040, 0x008000c0, 0x408000c0,
+ 0x00840040, 0x40840040, 0x008400c0, 0x408400c0,
+ 0x10800000, 0x50800000, 0x10800080, 0x50800080,
+ 0x10840000, 0x50840000, 0x10840080, 0x50840080,
+ 0x10800040, 0x50800040, 0x108000c0, 0x508000c0,
+ 0x10840040, 0x50840040, 0x108400c0, 0x508400c0 },
+ /* table 4 */
+ { 0x00000000, 0x00000008, 0x08000000, 0x08000008,
+ 0x00040000, 0x00040008, 0x08040000, 0x08040008,
+ 0x00002000, 0x00002008, 0x08002000, 0x08002008,
+ 0x00042000, 0x00042008, 0x08042000, 0x08042008,
+ 0x80000000, 0x80000008, 0x88000000, 0x88000008,
+ 0x80040000, 0x80040008, 0x88040000, 0x88040008,
+ 0x80002000, 0x80002008, 0x88002000, 0x88002008,
+ 0x80042000, 0x80042008, 0x88042000, 0x88042008,
+ 0x00080000, 0x00080008, 0x08080000, 0x08080008,
+ 0x000c0000, 0x000c0008, 0x080c0000, 0x080c0008,
+ 0x00082000, 0x00082008, 0x08082000, 0x08082008,
+ 0x000c2000, 0x000c2008, 0x080c2000, 0x080c2008,
+ 0x80080000, 0x80080008, 0x88080000, 0x88080008,
+ 0x800c0000, 0x800c0008, 0x880c0000, 0x880c0008,
+ 0x80082000, 0x80082008, 0x88082000, 0x88082008,
+ 0x800c2000, 0x800c2008, 0x880c2000, 0x880c2008 },
+ /* table 5 */
+ { 0x00000000, 0x00400000, 0x00008000, 0x00408000,
+ 0x40000000, 0x40400000, 0x40008000, 0x40408000,
+ 0x00000020, 0x00400020, 0x00008020, 0x00408020,
+ 0x40000020, 0x40400020, 0x40008020, 0x40408020,
+ 0x00001000, 0x00401000, 0x00009000, 0x00409000,
+ 0x40001000, 0x40401000, 0x40009000, 0x40409000,
+ 0x00001020, 0x00401020, 0x00009020, 0x00409020,
+ 0x40001020, 0x40401020, 0x40009020, 0x40409020,
+ 0x00100000, 0x00500000, 0x00108000, 0x00508000,
+ 0x40100000, 0x40500000, 0x40108000, 0x40508000,
+ 0x00100020, 0x00500020, 0x00108020, 0x00508020,
+ 0x40100020, 0x40500020, 0x40108020, 0x40508020,
+ 0x00101000, 0x00501000, 0x00109000, 0x00509000,
+ 0x40101000, 0x40501000, 0x40109000, 0x40509000,
+ 0x00101020, 0x00501020, 0x00109020, 0x00509020,
+ 0x40101020, 0x40501020, 0x40109020, 0x40509020 },
+ /* table 6 */
+ { 0x00000000, 0x00000040, 0x04000000, 0x04000040,
+ 0x00000800, 0x00000840, 0x04000800, 0x04000840,
+ 0x00800000, 0x00800040, 0x04800000, 0x04800040,
+ 0x00800800, 0x00800840, 0x04800800, 0x04800840,
+ 0x10000000, 0x10000040, 0x14000000, 0x14000040,
+ 0x10000800, 0x10000840, 0x14000800, 0x14000840,
+ 0x10800000, 0x10800040, 0x14800000, 0x14800040,
+ 0x10800800, 0x10800840, 0x14800800, 0x14800840,
+ 0x00000080, 0x000000c0, 0x04000080, 0x040000c0,
+ 0x00000880, 0x000008c0, 0x04000880, 0x040008c0,
+ 0x00800080, 0x008000c0, 0x04800080, 0x048000c0,
+ 0x00800880, 0x008008c0, 0x04800880, 0x048008c0,
+ 0x10000080, 0x100000c0, 0x14000080, 0x140000c0,
+ 0x10000880, 0x100008c0, 0x14000880, 0x140008c0,
+ 0x10800080, 0x108000c0, 0x14800080, 0x148000c0,
+ 0x10800880, 0x108008c0, 0x14800880, 0x148008c0 },
+ /* table 7 */
+ { 0x00000000, 0x00000010, 0x00000400, 0x00000410,
+ 0x00000004, 0x00000014, 0x00000404, 0x00000414,
+ 0x00004000, 0x00004010, 0x00004400, 0x00004410,
+ 0x00004004, 0x00004014, 0x00004404, 0x00004414,
+ 0x20000000, 0x20000010, 0x20000400, 0x20000410,
+ 0x20000004, 0x20000014, 0x20000404, 0x20000414,
+ 0x20004000, 0x20004010, 0x20004400, 0x20004410,
+ 0x20004004, 0x20004014, 0x20004404, 0x20004414,
+ 0x00200000, 0x00200010, 0x00200400, 0x00200410,
+ 0x00200004, 0x00200014, 0x00200404, 0x00200414,
+ 0x00204000, 0x00204010, 0x00204400, 0x00204410,
+ 0x00204004, 0x00204014, 0x00204404, 0x00204414,
+ 0x20200000, 0x20200010, 0x20200400, 0x20200410,
+ 0x20200004, 0x20200014, 0x20200404, 0x20200414,
+ 0x20204000, 0x20204010, 0x20204400, 0x20204410,
+ 0x20204004, 0x20204014, 0x20204404, 0x20204414 }
+};
+
+/*
+ * The PC-1 Permutation
+ * If we number the bits of the 8 bytes of key input like this (in octal):
+ * 00 01 02 03 04 05 06 07
+ * 10 11 12 13 14 15 16 17
+ * 20 21 22 23 24 25 26 27
+ * 30 31 32 33 34 35 36 37
+ * 40 41 42 43 44 45 46 47
+ * 50 51 52 53 54 55 56 57
+ * 60 61 62 63 64 65 66 67
+ * 70 71 72 73 74 75 76 77
+ * then after the PC-1 permutation,
+ * C0 is
+ * 70 60 50 40 30 20 10 00
+ * 71 61 51 41 31 21 11 01
+ * 72 62 52 42 32 22 12 02
+ * 73 63 53 43
+ * D0 is
+ * 76 66 56 46 36 26 16 06
+ * 75 65 55 45 35 25 15 05
+ * 74 64 54 44 34 24 14 04
+ * 33 23 13 03
+ * and these parity bits have been discarded:
+ * 77 67 57 47 37 27 17 07
+ *
+ * We achieve this by flipping the input matrix about the diagonal from 70-07,
+ * getting left =
+ * 77 67 57 47 37 27 17 07 (these are the parity bits)
+ * 76 66 56 46 36 26 16 06
+ * 75 65 55 45 35 25 15 05
+ * 74 64 54 44 34 24 14 04
+ * right =
+ * 73 63 53 43 33 23 13 03
+ * 72 62 52 42 32 22 12 02
+ * 71 61 51 41 31 21 11 01
+ * 70 60 50 40 30 20 10 00
+ * then byte swap right, ala htonl() on a little endian machine.
+ * right =
+ * 70 60 50 40 30 20 10 00
+ * 71 67 57 47 37 27 11 07
+ * 72 62 52 42 32 22 12 02
+ * 73 63 53 43 33 23 13 03
+ * then
+ * c0 = right >> 4;
+ * d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+*/
+
+#define FLIP_RIGHT_DIAGONAL(word, temp) \
+ temp = (word ^ (word >> 18)) & 0x00003333; \
+ word ^= temp | (temp << 18); \
+ temp = (word ^ (word >> 9)) & 0x00550055; \
+ word ^= temp | (temp << 9);
+
+#if defined(__GNUC__) && defined(NSS_X86_OR_X64)
+#define BYTESWAP(word, temp) \
+ __asm("bswap %0" \
+ : "+r"(word));
+#elif (_MSC_VER >= 1300) && defined(NSS_X86_OR_X64)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define BYTESWAP(word, temp) \
+ word = _byteswap_ulong(word);
+#elif defined(__GNUC__) && (defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__))))
+#define BYTESWAP(word, temp) \
+ __asm("rev %0, %0" \
+ : "+r"(word));
+#else
+#define BYTESWAP(word, temp) \
+ word = (word >> 16) | (word << 16); \
+ temp = 0x00ff00ff; \
+ word = ((word & temp) << 8) | ((word >> 8) & temp);
+#endif
+
+#define PC1(left, right, c0, d0, temp) \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4; \
+ FLIP_RIGHT_DIAGONAL(left, temp); \
+ FLIP_RIGHT_DIAGONAL(right, temp); \
+ BYTESWAP(right, temp); \
+ c0 = right >> 4; \
+ d0 = ((left & 0x00ffffff) << 4) | (right & 0xf);
+
+#define LEFT_SHIFT_1(reg) (((reg << 1) | (reg >> 27)) & 0x0FFFFFFF)
+#define LEFT_SHIFT_2(reg) (((reg << 2) | (reg >> 26)) & 0x0FFFFFFF)
+
+/*
+ * setup key schedules from key
+ */
+
+void
+DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction)
+{
+ register HALF left, right;
+ register HALF c0, d0;
+ register HALF temp;
+ int delta;
+ unsigned int ls;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ left = HALFPTR(key)[0];
+ right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+#else
+ if (((ptrdiff_t)key & 0x03) == 0) {
+ left = HALFPTR(key)[0];
+ right = HALFPTR(key)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ } else {
+ left = ((HALF)key[0] << 24) | ((HALF)key[1] << 16) |
+ ((HALF)key[2] << 8) | key[3];
+ right = ((HALF)key[4] << 24) | ((HALF)key[5] << 16) |
+ ((HALF)key[6] << 8) | key[7];
+ }
+#endif
+
+ PC1(left, right, c0, d0, temp);
+
+ if (direction == DES_ENCRYPT) {
+ delta = 2 * (int)sizeof(HALF);
+ } else {
+ ks += 30;
+ delta = (-2) * (int)sizeof(HALF);
+ }
+
+ for (ls = 0x8103; ls; ls >>= 1) {
+ if (ls & 1) {
+ c0 = LEFT_SHIFT_1(c0);
+ d0 = LEFT_SHIFT_1(d0);
+ } else {
+ c0 = LEFT_SHIFT_2(c0);
+ d0 = LEFT_SHIFT_2(d0);
+ }
+
+#ifdef USE_INDEXING
+#define PC2LOOKUP(b, c) PC2[b][c]
+
+ left = PC2LOOKUP(0, ((c0 >> 22) & 0x3F));
+ left |= PC2LOOKUP(1, ((c0 >> 13) & 0x3F));
+ left |= PC2LOOKUP(2, ((c0 >> 4) & 0x38) | (c0 & 0x7));
+ left |= PC2LOOKUP(3, ((c0 >> 18) & 0xC) | ((c0 >> 11) & 0x3) | (c0 & 0x30));
+
+ right = PC2LOOKUP(4, ((d0 >> 22) & 0x3F));
+ right |= PC2LOOKUP(5, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf));
+ right |= PC2LOOKUP(6, ((d0 >> 7) & 0x3F));
+ right |= PC2LOOKUP(7, ((d0 >> 1) & 0x3C) | (d0 & 0x3));
+#else
+#define PC2LOOKUP(b, c) *(HALF *)((BYTE *)&PC2[b][0] + (c))
+
+ left = PC2LOOKUP(0, ((c0 >> 20) & 0xFC));
+ left |= PC2LOOKUP(1, ((c0 >> 11) & 0xFC));
+ left |= PC2LOOKUP(2, ((c0 >> 2) & 0xE0) | ((c0 << 2) & 0x1C));
+ left |= PC2LOOKUP(3, ((c0 >> 16) & 0x30) | ((c0 >> 9) & 0xC) | ((c0 << 2) & 0xC0));
+
+ right = PC2LOOKUP(4, ((d0 >> 20) & 0xFC));
+ right |= PC2LOOKUP(5, ((d0 >> 13) & 0xC0) | ((d0 >> 12) & 0x3C));
+ right |= PC2LOOKUP(6, ((d0 >> 5) & 0xFC));
+ right |= PC2LOOKUP(7, ((d0 << 1) & 0xF0) | ((d0 << 2) & 0x0C));
+#endif
+ /* left contains key bits for S1 S3 S2 S4 */
+ /* right contains key bits for S6 S8 S5 S7 */
+ temp = (left << 16) /* S2 S4 XX XX */
+ | (right >> 16); /* XX XX S6 S8 */
+ ks[0] = temp;
+
+ temp = (left & 0xffff0000) /* S1 S3 XX XX */
+ | (right & 0x0000ffff); /* XX XX S5 S7 */
+ ks[1] = temp;
+
+ ks = (HALF *)((BYTE *)ks + delta);
+ }
+}
+
+/*
+ * The DES Initial Permutation
+ * if we number the bits of the 8 bytes of input like this (in octal):
+ * 00 01 02 03 04 05 06 07
+ * 10 11 12 13 14 15 16 17
+ * 20 21 22 23 24 25 26 27
+ * 30 31 32 33 34 35 36 37
+ * 40 41 42 43 44 45 46 47
+ * 50 51 52 53 54 55 56 57
+ * 60 61 62 63 64 65 66 67
+ * 70 71 72 73 74 75 76 77
+ * then after the initial permutation, they will be in this order.
+ * 71 61 51 41 31 21 11 01
+ * 73 63 53 43 33 23 13 03
+ * 75 65 55 45 35 25 15 05
+ * 77 67 57 47 37 27 17 07
+ * 70 60 50 40 30 20 10 00
+ * 72 62 52 42 32 22 12 02
+ * 74 64 54 44 34 24 14 04
+ * 76 66 56 46 36 26 16 06
+ *
+ * One way to do this is in two steps:
+ * 1. Flip this matrix about the diagonal from 70-07 as done for PC1.
+ * 2. Rearrange the bytes (rows in the matrix above) with the following code.
+ *
+ * #define swapHiLo(word, temp) \
+ * temp = (word ^ (word >> 24)) & 0x000000ff; \
+ * word ^= temp | (temp << 24);
+ *
+ * right ^= temp = ((left << 8) ^ right) & 0xff00ff00;
+ * left ^= temp >> 8;
+ * swapHiLo(left, temp);
+ * swapHiLo(right,temp);
+ *
+ * However, the two steps can be combined, so that the rows are rearranged
+ * while the matrix is being flipped, reducing the number of bit exchange
+ * operations from 8 ot 5.
+ *
+ * Initial Permutation */
+#define IP(left, right, temp) \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4; \
+ right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+ left ^= temp << 16; \
+ right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \
+ left ^= temp >> 2; \
+ right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \
+ left ^= temp >> 8; \
+ right ^= temp = ((left >> 1) ^ right) & 0x55555555; \
+ left ^= temp << 1;
+
+/* The Final (Inverse Initial) permutation is done by reversing the
+** steps of the Initital Permutation
+*/
+
+#define FP(left, right, temp) \
+ right ^= temp = ((left >> 1) ^ right) & 0x55555555; \
+ left ^= temp << 1; \
+ right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \
+ left ^= temp >> 8; \
+ right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \
+ left ^= temp >> 2; \
+ right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \
+ left ^= temp << 16; \
+ right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \
+ left ^= temp << 4;
+
+void NO_SANITIZE_ALIGNMENT
+DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf)
+{
+ register HALF left, right;
+ register HALF temp;
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+ left = HALFPTR(inbuf)[0];
+ right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+#else
+ if (((ptrdiff_t)inbuf & 0x03) == 0) {
+ left = HALFPTR(inbuf)[0];
+ right = HALFPTR(inbuf)[1];
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ } else {
+ left = ((HALF)inbuf[0] << 24) | ((HALF)inbuf[1] << 16) |
+ ((HALF)inbuf[2] << 8) | inbuf[3];
+ right = ((HALF)inbuf[4] << 24) | ((HALF)inbuf[5] << 16) |
+ ((HALF)inbuf[6] << 8) | inbuf[7];
+ }
+#endif
+
+ IP(left, right, temp);
+
+ /* shift the values left circularly 3 bits. */
+ left = (left << 3) | (left >> 29);
+ right = (right << 3) | (right >> 29);
+
+#ifdef USE_INDEXING
+#define KSLOOKUP(s, b) SP[s][((temp >> (b + 2)) & 0x3f)]
+#else
+#define KSLOOKUP(s, b) *(HALF *)((BYTE *)&SP[s][0] + ((temp >> b) & 0xFC))
+#endif
+#define ROUND(out, in, r) \
+ temp = in ^ ks[2 * r]; \
+ out ^= KSLOOKUP(1, 24); \
+ out ^= KSLOOKUP(3, 16); \
+ out ^= KSLOOKUP(5, 8); \
+ out ^= KSLOOKUP(7, 0); \
+ temp = ((in >> 4) | (in << 28)) ^ ks[2 * r + 1]; \
+ out ^= KSLOOKUP(0, 24); \
+ out ^= KSLOOKUP(2, 16); \
+ out ^= KSLOOKUP(4, 8); \
+ out ^= KSLOOKUP(6, 0);
+
+ /* Do the 16 Feistel rounds */
+ ROUND(left, right, 0)
+ ROUND(right, left, 1)
+ ROUND(left, right, 2)
+ ROUND(right, left, 3)
+ ROUND(left, right, 4)
+ ROUND(right, left, 5)
+ ROUND(left, right, 6)
+ ROUND(right, left, 7)
+ ROUND(left, right, 8)
+ ROUND(right, left, 9)
+ ROUND(left, right, 10)
+ ROUND(right, left, 11)
+ ROUND(left, right, 12)
+ ROUND(right, left, 13)
+ ROUND(left, right, 14)
+ ROUND(right, left, 15)
+
+ /* now shift circularly right 3 bits to undo the shifting done
+ ** above. switch left and right here.
+ */
+ temp = (left >> 3) | (left << 29);
+ left = (right >> 3) | (right << 29);
+ right = temp;
+
+ FP(left, right, temp);
+
+#if defined(HAVE_UNALIGNED_ACCESS)
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ HALFPTR(outbuf)
+ [0] = left;
+ HALFPTR(outbuf)
+ [1] = right;
+#else
+ if (((ptrdiff_t)outbuf & 0x03) == 0) {
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP(left, temp);
+ BYTESWAP(right, temp);
+#endif
+ HALFPTR(outbuf)
+ [0] = left;
+ HALFPTR(outbuf)
+ [1] = right;
+ } else {
+ outbuf[0] = (BYTE)(left >> 24);
+ outbuf[1] = (BYTE)(left >> 16);
+ outbuf[2] = (BYTE)(left >> 8);
+ outbuf[3] = (BYTE)(left);
+
+ outbuf[4] = (BYTE)(right >> 24);
+ outbuf[5] = (BYTE)(right >> 16);
+ outbuf[6] = (BYTE)(right >> 8);
+ outbuf[7] = (BYTE)(right);
+ }
+#endif
+}
+
+/* Ackowledgements:
+** Two ideas used in this implementation were shown to me by Dennis Ferguson
+** in 1990. He credits them to Richard Outerbridge and Dan Hoey. They were:
+** 1. The method of computing the Initial and Final permutations.
+** 2. Circularly rotating the SP tables and the initial values of left and
+** right to reduce the number of shifts required during the 16 rounds.
+*/
diff --git a/security/nss/lib/freebl/des.h b/security/nss/lib/freebl/des.h
new file mode 100644
index 000000000..70a17e510
--- /dev/null
+++ b/security/nss/lib/freebl/des.h
@@ -0,0 +1,43 @@
+/*
+ * des.h
+ *
+ * header file for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _DES_H_
+#define _DES_H_ 1
+
+#include "blapi.h"
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define HALFPTR(x) ((HALF *)(x))
+#define SHORTPTR(x) ((unsigned short *)(x))
+#define BYTEPTR(x) ((BYTE *)(x))
+
+typedef enum {
+ DES_ENCRYPT = 0x5555,
+ DES_DECRYPT = 0xAAAA
+} DESDirection;
+
+typedef void DESFunc(struct DESContextStr *cx, BYTE *out, const BYTE *in,
+ unsigned int len);
+
+struct DESContextStr {
+ /* key schedule, 16 internal keys, each with 8 6-bit parts */
+ HALF ks0[32];
+ HALF ks1[32];
+ HALF ks2[32];
+ HALF iv[2];
+ DESDirection direction;
+ DESFunc *worker;
+};
+
+void DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction);
+void DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf);
+
+#endif
diff --git a/security/nss/lib/freebl/desblapi.c b/security/nss/lib/freebl/desblapi.c
new file mode 100644
index 000000000..c03ab27cc
--- /dev/null
+++ b/security/nss/lib/freebl/desblapi.c
@@ -0,0 +1,256 @@
+/*
+ * desblapi.c
+ *
+ * core source file for DES-150 library
+ * Implement DES Modes of Operation and Triple-DES.
+ * Adapt DES-150 to blapi API.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "des.h"
+#include "blapii.h"
+#include <stddef.h>
+#include "secerr.h"
+
+#if defined(NSS_X86_OR_X64)
+/* Intel X86 CPUs do unaligned loads and stores without complaint. */
+#define COPY8B(to, from, ptr) \
+ HALFPTR(to) \
+ [0] = HALFPTR(from)[0]; \
+ HALFPTR(to) \
+ [1] = HALFPTR(from)[1];
+#else
+#define COPY8B(to, from, ptr) memcpy(to, from, 8)
+#endif
+#define COPY8BTOHALF(to, from) COPY8B(to, from, from)
+#define COPY8BFROMHALF(to, from) COPY8B(to, from, to)
+
+static void
+DES_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ while (len) {
+ DES_Do1Block(cx->ks0, in, out);
+ len -= 8;
+ in += 8;
+ out += 8;
+ }
+}
+
+static void
+DES_EDE3_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ while (len) {
+ DES_Do1Block(cx->ks0, in, out);
+ len -= 8;
+ in += 8;
+ DES_Do1Block(cx->ks1, out, out);
+ DES_Do1Block(cx->ks2, out, out);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend = in + len;
+ HALF vec[2];
+
+ while (in != bufend) {
+ COPY8BTOHALF(vec, in);
+ in += 8;
+ vec[0] ^= cx->iv[0];
+ vec[1] ^= cx->iv[1];
+ DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+ COPY8BFROMHALF(out, cx->iv);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend;
+ HALF oldciphertext[2];
+ HALF plaintext[2];
+
+ for (bufend = in + len; in != bufend;) {
+ oldciphertext[0] = cx->iv[0];
+ oldciphertext[1] = cx->iv[1];
+ COPY8BTOHALF(cx->iv, in);
+ in += 8;
+ DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+ plaintext[0] ^= oldciphertext[0];
+ plaintext[1] ^= oldciphertext[1];
+ COPY8BFROMHALF(out, plaintext);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend = in + len;
+ HALF vec[2];
+
+ while (in != bufend) {
+ COPY8BTOHALF(vec, in);
+ in += 8;
+ vec[0] ^= cx->iv[0];
+ vec[1] ^= cx->iv[1];
+ DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv);
+ DES_Do1Block(cx->ks1, (BYTE *)cx->iv, (BYTE *)cx->iv);
+ DES_Do1Block(cx->ks2, (BYTE *)cx->iv, (BYTE *)cx->iv);
+ COPY8BFROMHALF(out, cx->iv);
+ out += 8;
+ }
+}
+
+static void NO_SANITIZE_ALIGNMENT
+DES_EDE3CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len)
+{
+ const BYTE *bufend;
+ HALF oldciphertext[2];
+ HALF plaintext[2];
+
+ for (bufend = in + len; in != bufend;) {
+ oldciphertext[0] = cx->iv[0];
+ oldciphertext[1] = cx->iv[1];
+ COPY8BTOHALF(cx->iv, in);
+ in += 8;
+ DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext);
+ DES_Do1Block(cx->ks1, (BYTE *)plaintext, (BYTE *)plaintext);
+ DES_Do1Block(cx->ks2, (BYTE *)plaintext, (BYTE *)plaintext);
+ plaintext[0] ^= oldciphertext[0];
+ plaintext[1] ^= oldciphertext[1];
+ COPY8BFROMHALF(out, plaintext);
+ out += 8;
+ }
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+ return PORT_ZNew(DESContext);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key, unsigned int keylen,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int unused)
+{
+ DESDirection opposite;
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ cx->direction = encrypt ? DES_ENCRYPT : DES_DECRYPT;
+ opposite = encrypt ? DES_DECRYPT : DES_ENCRYPT;
+ switch (mode) {
+ case NSS_DES: /* DES ECB */
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ cx->worker = &DES_ECB;
+ break;
+
+ case NSS_DES_EDE3: /* DES EDE ECB */
+ cx->worker = &DES_EDE3_ECB;
+ if (encrypt) {
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+ } else {
+ DES_MakeSchedule(cx->ks2, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+ }
+ break;
+
+ case NSS_DES_CBC: /* DES CBC */
+ COPY8BTOHALF(cx->iv, iv);
+ cx->worker = encrypt ? &DES_CBCEn : &DES_CBCDe;
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ break;
+
+ case NSS_DES_EDE3_CBC: /* DES EDE CBC */
+ COPY8BTOHALF(cx->iv, iv);
+ if (encrypt) {
+ cx->worker = &DES_EDE3CBCEn;
+ DES_MakeSchedule(cx->ks0, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks2, key + 16, cx->direction);
+ } else {
+ cx->worker = &DES_EDE3CBCDe;
+ DES_MakeSchedule(cx->ks2, key, cx->direction);
+ DES_MakeSchedule(cx->ks1, key + 8, opposite);
+ DES_MakeSchedule(cx->ks0, key + 16, cx->direction);
+ }
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+DESContext *
+DES_CreateContext(const BYTE *key, const BYTE *iv, int mode, PRBool encrypt)
+{
+ DESContext *cx = PORT_ZNew(DESContext);
+ SECStatus rv = DES_InitContext(cx, key, 0, iv, mode, encrypt, 0);
+
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof *cx);
+ cx = NULL;
+ }
+ return cx;
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+ if (freeit)
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+ unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+ if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+ cx->direction != DES_ENCRYPT) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->worker(cx, out, in, inLen);
+ if (outLen)
+ *outLen = inLen;
+ return SECSuccess;
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, BYTE *out, unsigned int *outLen,
+ unsigned int maxOutLen, const BYTE *in, unsigned int inLen)
+{
+
+ if ((inLen % 8) != 0 || maxOutLen < inLen || !cx ||
+ cx->direction != DES_DECRYPT) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ cx->worker(cx, out, in, inLen);
+ if (outLen)
+ *outLen = inLen;
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c
new file mode 100644
index 000000000..fcbf9b34a
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.c
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "blapi.h"
+#include "blapit.h"
+#include "chacha20.h"
+#include "nssilock.h"
+#include "seccomon.h"
+#include "secerr.h"
+
+static unsigned long globalNumCalls = 0;
+
+SECStatus
+prng_ResetForFuzzing(PZLock *rng_lock)
+{
+ /* Check for a valid RNG lock. */
+ PORT_Assert(rng_lock != NULL);
+ if (rng_lock == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* --- LOCKED --- */
+ PZ_Lock(rng_lock);
+ globalNumCalls = 0;
+ PZ_Unlock(rng_lock);
+ /* --- UNLOCKED --- */
+
+ return SECSuccess;
+}
+
+SECStatus
+prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest, size_t len)
+{
+ static const uint8_t key[32];
+ uint8_t nonce[12] = { 0 };
+
+ /* Check for a valid RNG lock. */
+ PORT_Assert(rng_lock != NULL);
+ if (rng_lock == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* --- LOCKED --- */
+ PZ_Lock(rng_lock);
+
+ memcpy(nonce, &globalNumCalls, sizeof(globalNumCalls));
+ globalNumCalls++;
+
+ ChaCha20Poly1305Context *cx =
+ ChaCha20Poly1305_CreateContext(key, sizeof(key), 16);
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PZ_Unlock(rng_lock);
+ return SECFailure;
+ }
+
+ memset(dest, 0, len);
+ ChaCha20XOR(dest, dest, len, key, nonce, 0);
+ ChaCha20Poly1305_DestroyContext(cx, PR_TRUE);
+
+ PZ_Unlock(rng_lock);
+ /* --- UNLOCKED --- */
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/det_rng.h b/security/nss/lib/freebl/det_rng.h
new file mode 100644
index 000000000..599d726ca
--- /dev/null
+++ b/security/nss/lib/freebl/det_rng.h
@@ -0,0 +1,12 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __det_rng_h_
+#define __det_rng_h_
+
+SECStatus prng_ResetForFuzzing(PZLock *rng_lock);
+SECStatus prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest,
+ size_t len);
+
+#endif /* __det_rng_h_ */
diff --git a/security/nss/lib/freebl/dh.c b/security/nss/lib/freebl/dh.c
new file mode 100644
index 000000000..97025c7e2
--- /dev/null
+++ b/security/nss/lib/freebl/dh.c
@@ -0,0 +1,452 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Diffie-Hellman parameter generation, key generation, and secret derivation.
+ * KEA secret generation and verification.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "blapi.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "secmpi.h"
+
+#define KEA_DERIVED_SECRET_LEN 128
+
+/* Lengths are in bytes. */
+static unsigned int
+dh_GetSecretKeyLen(unsigned int primeLen)
+{
+ /* Based on Table 2 in NIST SP 800-57. */
+ if (primeLen >= 1920) { /* 15360 bits */
+ return 64; /* 512 bits */
+ }
+ if (primeLen >= 960) { /* 7680 bits */
+ return 48; /* 384 bits */
+ }
+ if (primeLen >= 384) { /* 3072 bits */
+ return 32; /* 256 bits */
+ }
+ if (primeLen >= 256) { /* 2048 bits */
+ return 28; /* 224 bits */
+ }
+ return 20; /* 160 bits */
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+ PLArenaPool *arena;
+ DHParams *dhparams;
+ unsigned char *pb = NULL;
+ unsigned char *ab = NULL;
+ unsigned long counter = 0;
+ mp_int p, q, a, h, psub1, test;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!params || primeLen < 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ dhparams = (DHParams *)PORT_ArenaZAlloc(arena, sizeof(DHParams));
+ if (!dhparams) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ dhparams->arena = arena;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&test) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&test));
+ /* generate prime with MPI, uses Miller-Rabin to generate strong prime. */
+ pb = PORT_Alloc(primeLen);
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen));
+ pb[0] |= 0x80; /* set high-order bit */
+ pb[primeLen - 1] |= 0x01; /* set low-order bit */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&p, pb, primeLen));
+ CHECK_MPI_OK(mpp_make_prime(&p, primeLen * 8, PR_TRUE, &counter));
+ /* construct Sophie-Germain prime q = (p-1)/2. */
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+ CHECK_MPI_OK(mp_div_2(&psub1, &q));
+ /* construct a generator from the prime. */
+ ab = PORT_Alloc(primeLen);
+ /* generate a candidate number a in p's field */
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(ab, primeLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&a, ab, primeLen));
+ /* force a < p (note that quot(a/p) <= 1) */
+ if (mp_cmp(&a, &p) > 0)
+ CHECK_MPI_OK(mp_sub(&a, &p, &a));
+ do {
+ /* check that a is in the range [2..p-1] */
+ if (mp_cmp_d(&a, 2) < 0 || mp_cmp(&a, &psub1) >= 0) {
+ /* a is outside of the allowed range. Set a=3 and keep going. */
+ mp_set(&a, 3);
+ }
+ /* if a**q mod p != 1 then a is a generator */
+ CHECK_MPI_OK(mp_exptmod(&a, &q, &p, &test));
+ if (mp_cmp_d(&test, 1) != 0)
+ break;
+ /* increment the candidate and try again. */
+ CHECK_MPI_OK(mp_add_d(&a, 1, &a));
+ } while (PR_TRUE);
+ MPINT_TO_SECITEM(&p, &dhparams->prime, arena);
+ MPINT_TO_SECITEM(&a, &dhparams->base, arena);
+ *params = dhparams;
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&a);
+ mp_clear(&h);
+ mp_clear(&psub1);
+ mp_clear(&test);
+ if (pb)
+ PORT_ZFree(pb, primeLen);
+ if (ab)
+ PORT_ZFree(ab, primeLen);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv)
+ PORT_FreeArena(arena, PR_TRUE);
+ return rv;
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+ PLArenaPool *arena;
+ DHPrivateKey *key;
+ mp_int g, xa, p, Ya;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!params || !privKey) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ key = (DHPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DHPrivateKey));
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ key->arena = arena;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&xa) = 0;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Ya) = 0;
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&xa));
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Ya));
+ /* Set private key's p */
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->prime, &params->prime));
+ SECITEM_TO_MPINT(key->prime, &p);
+ /* Set private key's g */
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->base, &params->base));
+ SECITEM_TO_MPINT(key->base, &g);
+ /* Generate private key xa */
+ SECITEM_AllocItem(arena, &key->privateValue,
+ dh_GetSecretKeyLen(params->prime.len));
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(key->privateValue.data,
+ key->privateValue.len));
+ SECITEM_TO_MPINT(key->privateValue, &xa);
+ /* xa < p */
+ CHECK_MPI_OK(mp_mod(&xa, &p, &xa));
+ /* Compute public key Ya = g ** xa mod p */
+ CHECK_MPI_OK(mp_exptmod(&g, &xa, &p, &Ya));
+ MPINT_TO_SECITEM(&Ya, &key->publicValue, key->arena);
+ *privKey = key;
+cleanup:
+ mp_clear(&g);
+ mp_clear(&xa);
+ mp_clear(&p);
+ mp_clear(&Ya);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv) {
+ *privKey = NULL;
+ PORT_FreeArena(arena, PR_TRUE);
+ }
+ return rv;
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int outBytes)
+{
+ mp_int p, Xa, Yb, ZZ, psub1;
+ mp_err err = MP_OKAY;
+ unsigned int len = 0;
+ unsigned int nb;
+ unsigned char *secret = NULL;
+ if (!publicValue || !prime || !privateValue || !derivedSecret) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Xa) = 0;
+ MP_DIGITS(&Yb) = 0;
+ MP_DIGITS(&ZZ) = 0;
+ MP_DIGITS(&psub1) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Xa));
+ CHECK_MPI_OK(mp_init(&Yb));
+ CHECK_MPI_OK(mp_init(&ZZ));
+ CHECK_MPI_OK(mp_init(&psub1));
+ SECITEM_TO_MPINT(*publicValue, &Yb);
+ SECITEM_TO_MPINT(*privateValue, &Xa);
+ SECITEM_TO_MPINT(*prime, &p);
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+
+ /* We assume that the modulus, p, is a safe prime. That is, p = 2q+1 where
+ * q is also a prime. Thus the orders of the subgroups are factors of 2q:
+ * namely 1, 2, q and 2q.
+ *
+ * We check that the peer's public value isn't zero (which isn't in the
+ * group), one (subgroup of order one) or p-1 (subgroup of order 2). We
+ * also check that the public value is less than p, to avoid being fooled
+ * by values like p+1 or 2*p-1.
+ *
+ * Thus we must be operating in the subgroup of size q or 2q. */
+ if (mp_cmp_d(&Yb, 1) <= 0 ||
+ mp_cmp(&Yb, &psub1) >= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* ZZ = (Yb)**Xa mod p */
+ CHECK_MPI_OK(mp_exptmod(&Yb, &Xa, &p, &ZZ));
+ /* number of bytes in the derived secret */
+ len = mp_unsigned_octet_size(&ZZ);
+ if (len <= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /*
+ * We check to make sure that ZZ is not equal to 1 or -1 mod p.
+ * This helps guard against small subgroup attacks, since an attacker
+ * using a subgroup of size N will produce 1 or -1 with probability 1/N.
+ * When the protocol is executed within a properly large subgroup, the
+ * probability of this result will be negligibly small. For example,
+ * with a strong prime of the form 2p+1, the probability will be 1/p.
+ *
+ * We return MP_BADARG because this is probably the result of a bad
+ * public value or a bad prime having been provided.
+ */
+ if (mp_cmp_d(&ZZ, 1) == 0 ||
+ mp_cmp(&ZZ, &psub1) == 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* allocate a buffer which can hold the entire derived secret. */
+ secret = PORT_Alloc(len);
+ if (secret == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ /* grab the derived secret */
+ err = mp_to_unsigned_octets(&ZZ, secret, len);
+ if (err >= 0)
+ err = MP_OKAY;
+ /*
+ ** if outBytes is 0 take all of the bytes from the derived secret.
+ ** if outBytes is not 0 take exactly outBytes from the derived secret, zero
+ ** pad at the beginning if necessary, and truncate beginning bytes
+ ** if necessary.
+ */
+ if (outBytes > 0)
+ nb = outBytes;
+ else
+ nb = len;
+ if (SECITEM_AllocItem(NULL, derivedSecret, nb) == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ if (len < nb) {
+ unsigned int offset = nb - len;
+ memset(derivedSecret->data, 0, offset);
+ memcpy(derivedSecret->data + offset, secret, len);
+ } else {
+ memcpy(derivedSecret->data, secret + len - nb, nb);
+ }
+cleanup:
+ mp_clear(&p);
+ mp_clear(&Xa);
+ mp_clear(&Yb);
+ mp_clear(&ZZ);
+ mp_clear(&psub1);
+ if (secret) {
+ /* free the buffer allocated for the full secret. */
+ PORT_ZFree(secret, len);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ if (derivedSecret->data)
+ PORT_ZFree(derivedSecret->data, derivedSecret->len);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+KEA_Derive(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret)
+{
+ mp_int p, Y, R, r, x, t, u, w;
+ mp_err err;
+ unsigned char *secret = NULL;
+ unsigned int len = 0, offset;
+ if (!prime || !public1 || !public2 || !private1 || !private2 ||
+ !derivedSecret) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&Y) = 0;
+ MP_DIGITS(&R) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&u) = 0;
+ MP_DIGITS(&w) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&Y));
+ CHECK_MPI_OK(mp_init(&R));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&u));
+ CHECK_MPI_OK(mp_init(&w));
+ SECITEM_TO_MPINT(*prime, &p);
+ SECITEM_TO_MPINT(*public1, &Y);
+ SECITEM_TO_MPINT(*public2, &R);
+ SECITEM_TO_MPINT(*private1, &r);
+ SECITEM_TO_MPINT(*private2, &x);
+ /* t = DH(Y, r, p) = Y ** r mod p */
+ CHECK_MPI_OK(mp_exptmod(&Y, &r, &p, &t));
+ /* u = DH(R, x, p) = R ** x mod p */
+ CHECK_MPI_OK(mp_exptmod(&R, &x, &p, &u));
+ /* w = (t + u) mod p */
+ CHECK_MPI_OK(mp_addmod(&t, &u, &p, &w));
+ /* allocate a buffer for the full derived secret */
+ len = mp_unsigned_octet_size(&w);
+ secret = PORT_Alloc(len);
+ if (secret == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ /* grab the secret */
+ err = mp_to_unsigned_octets(&w, secret, len);
+ if (err > 0)
+ err = MP_OKAY;
+ /* allocate output buffer */
+ if (SECITEM_AllocItem(NULL, derivedSecret, KEA_DERIVED_SECRET_LEN) == NULL) {
+ err = MP_MEM;
+ goto cleanup;
+ }
+ memset(derivedSecret->data, 0, derivedSecret->len);
+ /* copy in the 128 lsb of the secret */
+ if (len >= KEA_DERIVED_SECRET_LEN) {
+ memcpy(derivedSecret->data, secret + (len - KEA_DERIVED_SECRET_LEN),
+ KEA_DERIVED_SECRET_LEN);
+ } else {
+ offset = KEA_DERIVED_SECRET_LEN - len;
+ memcpy(derivedSecret->data + offset, secret, len);
+ }
+cleanup:
+ mp_clear(&p);
+ mp_clear(&Y);
+ mp_clear(&R);
+ mp_clear(&r);
+ mp_clear(&x);
+ mp_clear(&t);
+ mp_clear(&u);
+ mp_clear(&w);
+ if (secret)
+ PORT_ZFree(secret, len);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ if (derivedSecret->data)
+ PORT_ZFree(derivedSecret->data, derivedSecret->len);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+ mp_int p, q, y, r;
+ mp_err err;
+ int cmp = 1; /* default is false */
+ if (!Y || !prime || !subPrime) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&y) = 0;
+ MP_DIGITS(&r) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&y));
+ CHECK_MPI_OK(mp_init(&r));
+ SECITEM_TO_MPINT(*prime, &p);
+ SECITEM_TO_MPINT(*subPrime, &q);
+ SECITEM_TO_MPINT(*Y, &y);
+ /* compute r = y**q mod p */
+ CHECK_MPI_OK(mp_exptmod(&y, &q, &p, &r));
+ /* compare to 1 */
+ cmp = mp_cmp_d(&r, 1);
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&y);
+ mp_clear(&r);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return PR_FALSE;
+ }
+ return (cmp == 0) ? PR_TRUE : PR_FALSE;
+}
diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c
new file mode 100644
index 000000000..ac0bba6e0
--- /dev/null
+++ b/security/nss/lib/freebl/drbg.c
@@ -0,0 +1,968 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "blapii.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "sha_fast.h"
+#include "sha256.h"
+#include "secrng.h" /* for RNG_SystemRNG() */
+#include "secmpi.h"
+
+#ifdef UNSAFE_FUZZER_MODE
+#include "det_rng.h"
+#endif
+
+/* PRNG_SEEDLEN defined in NIST SP 800-90 section 10.1
+ * for SHA-1, SHA-224, and SHA-256 it's 440 bits.
+ * for SHA-384 and SHA-512 it's 888 bits */
+#define PRNG_SEEDLEN (440 / PR_BITS_PER_BYTE)
+#define PRNG_MAX_ADDITIONAL_BYTES PR_INT64(0x100000000)
+/* 2^35 bits or 2^32 bytes */
+#define PRNG_MAX_REQUEST_SIZE 0x10000 /* 2^19 bits or 2^16 bytes */
+#define PRNG_ADDITONAL_DATA_CACHE_SIZE (8 * 1024) /* must be less than \
+ * PRNG_MAX_ADDITIONAL_BYTES \
+ */
+
+/* RESEED_COUNT is how many calls to the prng before we need to reseed
+ * under normal NIST rules, you must return an error. In the NSS case, we
+ * self-reseed with RNG_SystemRNG(). Count can be a large number. For code
+ * simplicity, we specify count with 2 components: RESEED_BYTE (which is
+ * the same as LOG256(RESEED_COUNT)) and RESEED_VALUE (which is the same as
+ * RESEED_COUNT / (256 ^ RESEED_BYTE)). Another way to look at this is
+ * RESEED_COUNT = RESEED_VALUE * (256 ^ RESEED_BYTE). For Hash based DRBG
+ * we use the maximum count value, 2^48, or RESEED_BYTE=6 and RESEED_VALUE=1
+ */
+#define RESEED_BYTE 6
+#define RESEED_VALUE 1
+
+#define PRNG_RESET_RESEED_COUNT(rng) \
+ PORT_Memset((rng)->reseed_counter, 0, sizeof(rng)->reseed_counter); \
+ (rng)->reseed_counter[RESEED_BYTE] = 1;
+
+/*
+ * The actual values of this enum are specified in SP 800-90, 10.1.1.*
+ * The spec does not name the types, it only uses bare values
+ */
+typedef enum {
+ prngCGenerateType = 0, /* used when creating a new 'C' */
+ prngReseedType = 1, /* used in reseeding */
+ prngAdditionalDataType = 2, /* used in mixing additional data */
+ prngGenerateByteType = 3 /* used when mixing internal state while
+ * generating bytes */
+} prngVTypes;
+
+/*
+ * Global RNG context
+ */
+struct RNGContextStr {
+ PZLock *lock; /* Lock to serialize access to global rng */
+ /*
+ * NOTE, a number of steps in the drbg algorithm need to hash
+ * V_type || V. The code, therefore, depends on the V array following
+ * immediately after V_type to avoid extra copies. To accomplish this
+ * in a way that compiliers can't perturb, we declare V_type and V
+ * as a V_Data array and reference them by macros */
+ PRUint8 V_Data[PRNG_SEEDLEN + 1]; /* internal state variables */
+#define V_type V_Data[0]
+#define V(rng) (((rng)->V_Data) + 1)
+#define VSize(rng) ((sizeof(rng)->V_Data) - 1)
+ PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */
+ PRUint8 lastOutput[SHA256_LENGTH]; /* for continuous rng checking */
+ /* If we get calls for the PRNG to return less than the length of our
+ * hash, we extend the request for a full hash (since we'll be doing
+ * the full hash anyway). Future requests for random numbers are fulfilled
+ * from the remainder of the bytes we generated. Requests for bytes longer
+ * than the hash size are fulfilled directly from the HashGen function
+ * of the random number generator. */
+ PRUint8 reseed_counter[RESEED_BYTE + 1]; /* number of requests since the
+ * last reseed. Need only be
+ * big enough to hold the whole
+ * reseed count */
+ PRUint8 data[SHA256_LENGTH]; /* when we request less than a block
+ * save the rest of the rng output for
+ * another partial block */
+ PRUint8 dataAvail; /* # bytes of output available in our cache,
+ * [0...SHA256_LENGTH] */
+ /* store additional data that has been shovelled off to us by
+ * RNG_RandomUpdate. */
+ PRUint8 additionalDataCache[PRNG_ADDITONAL_DATA_CACHE_SIZE];
+ PRUint32 additionalAvail;
+ PRBool isValid; /* false if RNG reaches an invalid state */
+ PRBool isKatTest; /* true if running NIST PRNG KAT tests */
+};
+
+typedef struct RNGContextStr RNGContext;
+static RNGContext *globalrng = NULL;
+static RNGContext theGlobalRng;
+
+/*
+ * The next several functions are derived from the NIST SP 800-90
+ * spec. In these functions, an attempt was made to use names consistent
+ * with the names in the spec, even if they differ from normal NSS usage.
+ */
+
+/*
+ * Hash Derive function defined in NISP SP 800-90 Section 10.4.1.
+ * This function is used in the Instantiate and Reseed functions.
+ *
+ * NOTE: requested_bytes cannot overlap with input_string_1 or input_string_2.
+ * input_string_1 and input_string_2 are logically concatentated.
+ * input_string_1 must be supplied.
+ * if input_string_2 is not supplied, NULL should be passed for this parameter.
+ */
+static SECStatus
+prng_Hash_df(PRUint8 *requested_bytes, unsigned int no_of_bytes_to_return,
+ const PRUint8 *input_string_1, unsigned int input_string_1_len,
+ const PRUint8 *input_string_2, unsigned int input_string_2_len)
+{
+ SHA256Context ctx;
+ PRUint32 tmp;
+ PRUint8 counter;
+
+ tmp = SHA_HTONL(no_of_bytes_to_return * 8);
+
+ for (counter = 1; no_of_bytes_to_return > 0; counter++) {
+ unsigned int hash_return_len;
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, &counter, 1);
+ SHA256_Update(&ctx, (unsigned char *)&tmp, sizeof tmp);
+ SHA256_Update(&ctx, input_string_1, input_string_1_len);
+ if (input_string_2) {
+ SHA256_Update(&ctx, input_string_2, input_string_2_len);
+ }
+ SHA256_End(&ctx, requested_bytes, &hash_return_len,
+ no_of_bytes_to_return);
+ requested_bytes += hash_return_len;
+ no_of_bytes_to_return -= hash_return_len;
+ }
+ return SECSuccess;
+}
+
+/*
+ * Hash_DRBG Instantiate NIST SP 800-90 10.1.1.2
+ *
+ * NOTE: bytes & len are entropy || nonce || personalization_string. In
+ * normal operation, NSS calculates them all together in a single call.
+ */
+static SECStatus
+prng_instantiate(RNGContext *rng, const PRUint8 *bytes, unsigned int len)
+{
+ if (!rng->isKatTest && len < PRNG_SEEDLEN) {
+ /* If the seedlen is too small, it's probably because we failed to get
+ * enough random data.
+ * This is stricter than NIST SP800-90A requires. Don't enforce it for
+ * tests. */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+ prng_Hash_df(V(rng), VSize(rng), bytes, len, NULL, 0);
+ rng->V_type = prngCGenerateType;
+ prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+ PRNG_RESET_RESEED_COUNT(rng)
+ return SECSuccess;
+}
+
+/*
+ * Update the global random number generator with more seeding
+ * material. Use the Hash_DRBG reseed algorithm from NIST SP-800-90
+ * section 10.1.1.3
+ *
+ * If entropy is NULL, it is fetched from the noise generator.
+ */
+static SECStatus
+prng_reseed(RNGContext *rng, const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional_input, unsigned int additional_input_len)
+{
+ PRUint8 noiseData[(sizeof rng->V_Data) + PRNG_SEEDLEN];
+ PRUint8 *noise = &noiseData[0];
+
+ /* if entropy wasn't supplied, fetch it. (normal operation case) */
+ if (entropy == NULL) {
+ entropy_len = (unsigned int)RNG_SystemRNG(
+ &noiseData[sizeof rng->V_Data], PRNG_SEEDLEN);
+ } else {
+ /* NOTE: this code is only available for testing, not to applications */
+ /* if entropy was too big for the stack variable, get it from malloc */
+ if (entropy_len > PRNG_SEEDLEN) {
+ noise = PORT_Alloc(entropy_len + (sizeof rng->V_Data));
+ if (noise == NULL) {
+ return SECFailure;
+ }
+ }
+ PORT_Memcpy(&noise[sizeof rng->V_Data], entropy, entropy_len);
+ }
+
+ if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+ /* noise == &noiseData[0] at this point, so nothing to free */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+
+ rng->V_type = prngReseedType;
+ PORT_Memcpy(noise, rng->V_Data, sizeof rng->V_Data);
+ prng_Hash_df(V(rng), VSize(rng), noise, (sizeof rng->V_Data) + entropy_len,
+ additional_input, additional_input_len);
+ /* clear potential CSP */
+ PORT_Memset(noise, 0, (sizeof rng->V_Data) + entropy_len);
+ rng->V_type = prngCGenerateType;
+ prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0);
+ PRNG_RESET_RESEED_COUNT(rng)
+
+ if (noise != &noiseData[0]) {
+ PORT_Free(noise);
+ }
+ return SECSuccess;
+}
+
+/*
+ * SP 800-90 requires we rerun our health tests on reseed
+ */
+static SECStatus
+prng_reseed_test(RNGContext *rng, const PRUint8 *entropy,
+ unsigned int entropy_len, const PRUint8 *additional_input,
+ unsigned int additional_input_len)
+{
+ SECStatus rv;
+
+ /* do health checks in FIPS mode */
+ rv = PRNGTEST_RunHealthTests();
+ if (rv != SECSuccess) {
+ /* error set by PRNGTEST_RunHealTests() */
+ rng->isValid = PR_FALSE;
+ return SECFailure;
+ }
+ return prng_reseed(rng, entropy, entropy_len,
+ additional_input, additional_input_len);
+}
+
+/*
+ * build some fast inline functions for adding.
+ */
+#define PRNG_ADD_CARRY_ONLY(dest, start, carry) \
+ { \
+ int k1; \
+ for (k1 = start; carry && k1 >= 0; k1--) { \
+ carry = !(++dest[k1]); \
+ } \
+ }
+
+/*
+ * NOTE: dest must be an array for the following to work.
+ */
+#define PRNG_ADD_BITS(dest, dest_len, add, len, carry) \
+ carry = 0; \
+ PORT_Assert((dest_len) >= (len)); \
+ { \
+ int k1, k2; \
+ for (k1 = dest_len - 1, k2 = len - 1; k2 >= 0; --k1, --k2) { \
+ carry += dest[k1] + add[k2]; \
+ dest[k1] = (PRUint8)carry; \
+ carry >>= 8; \
+ } \
+ }
+
+#define PRNG_ADD_BITS_AND_CARRY(dest, dest_len, add, len, carry) \
+ PRNG_ADD_BITS(dest, dest_len, add, len, carry) \
+ PRNG_ADD_CARRY_ONLY(dest, dest_len - len - 1, carry)
+
+/*
+ * This function expands the internal state of the prng to fulfill any number
+ * of bytes we need for this request. We only use this call if we need more
+ * than can be supplied by a single call to SHA256_HashBuf.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4, Hashgen
+ */
+static void
+prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes,
+ unsigned int no_of_returned_bytes)
+{
+ PRUint8 data[VSize(rng)];
+ PRUint8 thisHash[SHA256_LENGTH];
+ PRUint8 *lastHash = rng->lastOutput;
+
+ PORT_Memcpy(data, V(rng), VSize(rng));
+ while (no_of_returned_bytes) {
+ SHA256Context ctx;
+ unsigned int len;
+ unsigned int carry;
+
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, data, sizeof data);
+ SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH);
+ if (PORT_Memcmp(lastHash, thisHash, len) == 0) {
+ rng->isValid = PR_FALSE;
+ break;
+ }
+ if (no_of_returned_bytes < SHA256_LENGTH) {
+ len = no_of_returned_bytes;
+ }
+ PORT_Memcpy(returned_bytes, thisHash, len);
+ lastHash = returned_bytes;
+ returned_bytes += len;
+ no_of_returned_bytes -= len;
+ /* The carry parameter is a bool (increment or not).
+ * This increments data if no_of_returned_bytes is not zero */
+ carry = no_of_returned_bytes;
+ PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry);
+ }
+ PORT_Memcpy(rng->lastOutput, thisHash, SHA256_LENGTH);
+ PORT_Memset(data, 0, sizeof data);
+ PORT_Memset(thisHash, 0, sizeof thisHash);
+}
+
+/*
+ * Generates new random bytes and advances the internal prng state.
+ * additional bytes are only used in algorithm testing.
+ *
+ * This function is specified in NIST SP 800-90 section 10.1.1.4
+ */
+static SECStatus
+prng_generateNewBytes(RNGContext *rng,
+ PRUint8 *returned_bytes, unsigned int no_of_returned_bytes,
+ const PRUint8 *additional_input,
+ unsigned int additional_input_len)
+{
+ PRUint8 H[SHA256_LENGTH]; /* both H and w since they
+ * aren't used concurrently */
+ unsigned int carry;
+
+ if (!rng->isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This code only triggers during tests, normal
+ * prng operation does not use additional_input */
+ if (additional_input) {
+ SHA256Context ctx;
+/* NIST SP 800-90 defines two temporaries in their calculations,
+ * w and H. These temporaries are the same lengths, and used
+ * at different times, so we use the following macro to collapse
+ * them to the same variable, but keeping their unique names for
+ * easy comparison to the spec */
+#define w H
+ rng->V_type = prngAdditionalDataType;
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, rng->V_Data, sizeof rng->V_Data);
+ SHA256_Update(&ctx, additional_input, additional_input_len);
+ SHA256_End(&ctx, w, NULL, sizeof w);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), w, sizeof w, carry)
+ PORT_Memset(w, 0, sizeof w);
+#undef w
+ }
+
+ if (no_of_returned_bytes == SHA256_LENGTH) {
+ /* short_cut to hashbuf and a couple of copies and clears */
+ SHA256_HashBuf(returned_bytes, V(rng), VSize(rng));
+ /* continuous rng check */
+ if (memcmp(rng->lastOutput, returned_bytes, SHA256_LENGTH) == 0) {
+ rng->isValid = PR_FALSE;
+ }
+ PORT_Memcpy(rng->lastOutput, returned_bytes, sizeof rng->lastOutput);
+ } else {
+ prng_Hashgen(rng, returned_bytes, no_of_returned_bytes);
+ }
+ /* advance our internal state... */
+ rng->V_type = prngGenerateByteType;
+ SHA256_HashBuf(H, rng->V_Data, sizeof rng->V_Data);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), H, sizeof H, carry)
+ PRNG_ADD_BITS(V(rng), VSize(rng), rng->C, sizeof rng->C, carry);
+ PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), rng->reseed_counter,
+ sizeof rng->reseed_counter, carry)
+ carry = 1;
+ PRNG_ADD_CARRY_ONLY(rng->reseed_counter, (sizeof rng->reseed_counter) - 1, carry);
+
+ /* if the prng failed, don't return any output, signal softoken */
+ if (!rng->isValid) {
+ PORT_Memset(returned_bytes, 0, no_of_returned_bytes);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* Use NSPR to prevent RNG_RNGInit from being called from separate
+ * threads, creating a race condition.
+ */
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType coRNGInit;
+static PRStatus
+rng_init(void)
+{
+ PRUint8 bytes[PRNG_SEEDLEN * 2]; /* entropy + nonce */
+ unsigned int numBytes;
+ SECStatus rv = SECSuccess;
+
+ if (globalrng == NULL) {
+ /* bytes needs to have enough space to hold
+ * a SHA256 hash value. Blow up at compile time if this isn't true */
+ PR_STATIC_ASSERT(sizeof(bytes) >= SHA256_LENGTH);
+ /* create a new global RNG context */
+ globalrng = &theGlobalRng;
+ PORT_Assert(NULL == globalrng->lock);
+ /* create a lock for it */
+ globalrng->lock = PZ_NewLock(nssILockOther);
+ if (globalrng->lock == NULL) {
+ globalrng = NULL;
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return PR_FAILURE;
+ }
+
+ /* Try to get some seed data for the RNG */
+ numBytes = (unsigned int)RNG_SystemRNG(bytes, sizeof bytes);
+ PORT_Assert(numBytes == 0 || numBytes == sizeof bytes);
+ if (numBytes != 0) {
+ /* if this is our first call, instantiate, otherwise reseed
+ * prng_instantiate gets a new clean state, we want to mix
+ * any previous entropy we may have collected */
+ if (V(globalrng)[0] == 0) {
+ rv = prng_instantiate(globalrng, bytes, numBytes);
+ } else {
+ rv = prng_reseed_test(globalrng, bytes, numBytes, NULL, 0);
+ }
+ memset(bytes, 0, numBytes);
+ } else {
+ PZ_DestroyLock(globalrng->lock);
+ globalrng->lock = NULL;
+ globalrng = NULL;
+ return PR_FAILURE;
+ }
+
+ if (rv != SECSuccess) {
+ return PR_FAILURE;
+ }
+ /* the RNG is in a valid state */
+ globalrng->isValid = PR_TRUE;
+ globalrng->isKatTest = PR_FALSE;
+
+ /* fetch one random value so that we can populate rng->oldV for our
+ * continous random number test. */
+ prng_generateNewBytes(globalrng, bytes, SHA256_LENGTH, NULL, 0);
+
+ /* Fetch more entropy into the PRNG */
+ RNG_SystemInfoForRNG();
+ }
+ return PR_SUCCESS;
+}
+
+/*
+ * Clean up the global RNG context
+ */
+static void
+prng_freeRNGContext(RNGContext *rng)
+{
+ PRUint8 inputhash[VSize(rng) + (sizeof rng->C)];
+
+ /* destroy context lock */
+ SKIP_AFTER_FORK(PZ_DestroyLock(globalrng->lock));
+
+ /* zero global RNG context except for C & V to preserve entropy */
+ prng_Hash_df(inputhash, sizeof rng->C, rng->C, sizeof rng->C, NULL, 0);
+ prng_Hash_df(&inputhash[sizeof rng->C], VSize(rng), V(rng), VSize(rng),
+ NULL, 0);
+ memset(rng, 0, sizeof *rng);
+ memcpy(rng->C, inputhash, sizeof rng->C);
+ memcpy(V(rng), &inputhash[sizeof rng->C], VSize(rng));
+
+ memset(inputhash, 0, sizeof inputhash);
+}
+
+/*
+ * Public functions
+ */
+
+/*
+ * Initialize the global RNG context and give it some seed input taken
+ * from the system. This function is thread-safe and will only allow
+ * the global context to be initialized once. The seed input is likely
+ * small, so it is imperative that RNG_RandomUpdate() be called with
+ * additional seed data before the generator is used. A good way to
+ * provide the generator with additional entropy is to call
+ * RNG_SystemInfoForRNG(). Note that C_Initialize() does exactly that.
+ */
+SECStatus
+RNG_RNGInit(void)
+{
+ /* Allow only one call to initialize the context */
+ PR_CallOnce(&coRNGInit, rng_init);
+ /* Make sure there is a context */
+ return (globalrng != NULL) ? SECSuccess : SECFailure;
+}
+
+/*
+** Update the global random number generator with more seeding
+** material.
+*/
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+ SECStatus rv;
+
+ /* Make sure our assumption that size_t is unsigned is true */
+ PR_STATIC_ASSERT(((size_t)-1) > (size_t)1);
+
+#if defined(NS_PTR_GT_32) || (defined(NSS_USE_64) && !defined(NS_PTR_LE_32))
+ /*
+ * NIST 800-90 requires us to verify our inputs. This value can
+ * come from the application, so we need to make sure it's within the
+ * spec. The spec says it must be less than 2^32 bytes (2^35 bits).
+ * This can only happen if size_t is greater than 32 bits (i.e. on
+ * most 64 bit platforms). The 90% case (perhaps 100% case), size_t
+ * is less than or equal to 32 bits if the platform is not 64 bits, and
+ * greater than 32 bits if it is a 64 bit platform. The corner
+ * cases are handled with explicit defines NS_PTR_GT_32 and NS_PTR_LE_32.
+ *
+ * In general, neither NS_PTR_GT_32 nor NS_PTR_LE_32 will need to be
+ * defined. If you trip over the next two size ASSERTS at compile time,
+ * you will need to define them for your platform.
+ *
+ * if 'sizeof(size_t) > 4' is triggered it means that we were expecting
+ * sizeof(size_t) to be greater than 4, but it wasn't. Setting
+ * NS_PTR_LE_32 will correct that mistake.
+ *
+ * if 'sizeof(size_t) <= 4' is triggered, it means that we were expecting
+ * sizeof(size_t) to be less than or equal to 4, but it wasn't. Setting
+ * NS_PTR_GT_32 will correct that mistake.
+ */
+
+ PR_STATIC_ASSERT(sizeof(size_t) > 4);
+
+ if (bytes > (size_t)PRNG_MAX_ADDITIONAL_BYTES) {
+ bytes = PRNG_MAX_ADDITIONAL_BYTES;
+ }
+#else
+ PR_STATIC_ASSERT(sizeof(size_t) <= 4);
+#endif
+
+ PZ_Lock(globalrng->lock);
+ /* if we're passed more than our additionalDataCache, simply
+ * call reseed with that data */
+ if (bytes > sizeof(globalrng->additionalDataCache)) {
+ rv = prng_reseed_test(globalrng, NULL, 0, data, (unsigned int)bytes);
+ /* if we aren't going to fill or overflow the buffer, just cache it */
+ } else if (bytes < ((sizeof globalrng->additionalDataCache) - globalrng->additionalAvail)) {
+ PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+ data, bytes);
+ globalrng->additionalAvail += (PRUint32)bytes;
+ rv = SECSuccess;
+ } else {
+ /* we are going to fill or overflow the buffer. In this case we will
+ * fill the entropy buffer, reseed with it, start a new buffer with the
+ * remainder. We know the remainder will fit in the buffer because
+ * we already handled the case where bytes > the size of the buffer.
+ */
+ size_t bufRemain = (sizeof globalrng->additionalDataCache) - globalrng->additionalAvail;
+ /* fill the rest of the buffer */
+ if (bufRemain) {
+ PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail,
+ data, bufRemain);
+ data = ((unsigned char *)data) + bufRemain;
+ bytes -= bufRemain;
+ }
+ /* reseed from buffer */
+ rv = prng_reseed_test(globalrng, NULL, 0,
+ globalrng->additionalDataCache,
+ sizeof globalrng->additionalDataCache);
+
+ /* copy the rest into the cache */
+ PORT_Memcpy(globalrng->additionalDataCache, data, bytes);
+ globalrng->additionalAvail = (PRUint32)bytes;
+ }
+
+ PZ_Unlock(globalrng->lock);
+ return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+static SECStatus
+prng_GenerateGlobalRandomBytes(RNGContext *rng,
+ void *dest, size_t len)
+{
+ SECStatus rv = SECSuccess;
+ PRUint8 *output = dest;
+ /* check for a valid global RNG context */
+ PORT_Assert(rng != NULL);
+ if (rng == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* FIPS limits the amount of entropy available in a single request */
+ if (len > PRNG_MAX_REQUEST_SIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* --- LOCKED --- */
+ PZ_Lock(rng->lock);
+ /* Check the amount of seed data in the generator. If not enough,
+ * don't produce any data.
+ */
+ if (rng->reseed_counter[0] >= RESEED_VALUE) {
+ rv = prng_reseed_test(rng, NULL, 0, NULL, 0);
+ PZ_Unlock(rng->lock);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ RNG_SystemInfoForRNG();
+ PZ_Lock(rng->lock);
+ }
+ /*
+ * see if we have enough bytes to fulfill the request.
+ */
+ if (len <= rng->dataAvail) {
+ memcpy(output, rng->data + ((sizeof rng->data) - rng->dataAvail), len);
+ memset(rng->data + ((sizeof rng->data) - rng->dataAvail), 0, len);
+ rng->dataAvail -= len;
+ rv = SECSuccess;
+ /* if we are asking for a small number of bytes, cache the rest of
+ * the bytes */
+ } else if (len < sizeof rng->data) {
+ rv = prng_generateNewBytes(rng, rng->data, sizeof rng->data,
+ rng->additionalAvail ? rng->additionalDataCache : NULL,
+ rng->additionalAvail);
+ rng->additionalAvail = 0;
+ if (rv == SECSuccess) {
+ memcpy(output, rng->data, len);
+ memset(rng->data, 0, len);
+ rng->dataAvail = (sizeof rng->data) - len;
+ }
+ /* we are asking for lots of bytes, just ask the generator to pass them */
+ } else {
+ rv = prng_generateNewBytes(rng, output, len,
+ rng->additionalAvail ? rng->additionalDataCache : NULL,
+ rng->additionalAvail);
+ rng->additionalAvail = 0;
+ }
+ PZ_Unlock(rng->lock);
+ /* --- UNLOCKED --- */
+ return rv;
+}
+
+/*
+** Generate some random bytes, using the global random number generator
+** object.
+*/
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+#ifdef UNSAFE_FUZZER_MODE
+ return prng_GenerateDeterministicRandomBytes(globalrng->lock, dest, len);
+#else
+ return prng_GenerateGlobalRandomBytes(globalrng, dest, len);
+#endif
+}
+
+SECStatus
+RNG_ResetForFuzzing(void)
+{
+#ifdef UNSAFE_FUZZER_MODE
+ return prng_ResetForFuzzing(globalrng->lock);
+#else
+ return SECFailure;
+#endif
+}
+
+void
+RNG_RNGShutdown(void)
+{
+ /* check for a valid global RNG context */
+ PORT_Assert(globalrng != NULL);
+ if (globalrng == NULL) {
+ /* Should set a "not initialized" error code. */
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return;
+ }
+ /* clear */
+ prng_freeRNGContext(globalrng);
+ globalrng = NULL;
+ /* reset the callonce struct to allow a new call to RNG_RNGInit() */
+ coRNGInit = pristineCallOnce;
+}
+
+/*
+ * Test case interface. used by fips testing and power on self test
+ */
+/* make sure the test context is separate from the global context, This
+ * allows us to test the internal random number generator without losing
+ * entropy we may have previously collected. */
+RNGContext testContext;
+
+SECStatus
+PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ testContext.isKatTest = PR_TRUE;
+ return PRNGTEST_Instantiate(entropy, entropy_len,
+ nonce, nonce_len,
+ personal_string, ps_len);
+}
+
+/*
+ * Test vector API. Use NIST SP 800-90 general interface so one of the
+ * other NIST SP 800-90 algorithms may be used in the future.
+ */
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ int bytes_len = entropy_len + nonce_len + ps_len;
+ PRUint8 *bytes = NULL;
+ SECStatus rv;
+
+ if (entropy_len < 256 / PR_BITS_PER_BYTE) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ return SECFailure;
+ }
+
+ bytes = PORT_Alloc(bytes_len);
+ if (bytes == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ /* concatenate the various inputs, internally NSS only instantiates with
+ * a single long string */
+ PORT_Memcpy(bytes, entropy, entropy_len);
+ if (nonce) {
+ PORT_Memcpy(&bytes[entropy_len], nonce, nonce_len);
+ } else {
+ PORT_Assert(nonce_len == 0);
+ }
+ if (personal_string) {
+ PORT_Memcpy(&bytes[entropy_len + nonce_len], personal_string, ps_len);
+ } else {
+ PORT_Assert(ps_len == 0);
+ }
+ rv = prng_instantiate(&testContext, bytes, bytes_len);
+ PORT_ZFree(bytes, bytes_len);
+ if (rv == SECFailure) {
+ return SECFailure;
+ }
+ testContext.isValid = PR_TRUE;
+ return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This magic input tells us to set the reseed count to it's max count,
+ * so we can simulate PRNGTEST_Generate reaching max reseed count */
+ if ((entropy == NULL) && (entropy_len == 0) &&
+ (additional == NULL) && (additional_len == 0)) {
+ testContext.reseed_counter[0] = RESEED_VALUE;
+ return SECSuccess;
+ }
+ return prng_reseed(&testContext, entropy, entropy_len, additional,
+ additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ SECStatus rv;
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* replicate reseed test from prng_GenerateGlobalRandomBytes */
+ if (testContext.reseed_counter[0] >= RESEED_VALUE) {
+ rv = prng_reseed(&testContext, NULL, 0, NULL, 0);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ }
+ return prng_generateNewBytes(&testContext, bytes, bytes_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+ if (!testContext.isValid) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ PORT_Memset(&testContext, 0, sizeof testContext);
+ return SECSuccess;
+}
+
+SECStatus
+PRNGTEST_RunHealthTests()
+{
+ static const PRUint8 entropy[] = {
+ 0x8e, 0x9c, 0x0d, 0x25, 0x75, 0x22, 0x04, 0xf9,
+ 0xc5, 0x79, 0x10, 0x8b, 0x23, 0x79, 0x37, 0x14,
+ 0x9f, 0x2c, 0xc7, 0x0b, 0x39, 0xf8, 0xee, 0xef,
+ 0x95, 0x0c, 0x97, 0x59, 0xfc, 0x0a, 0x85, 0x41,
+ 0x76, 0x9d, 0x6d, 0x67, 0x00, 0x4e, 0x19, 0x12,
+ 0x02, 0x16, 0x53, 0xea, 0xf2, 0x73, 0xd7, 0xd6,
+ 0x7f, 0x7e, 0xc8, 0xae, 0x9c, 0x09, 0x99, 0x7d,
+ 0xbb, 0x9e, 0x48, 0x7f, 0xbb, 0x96, 0x46, 0xb3,
+ 0x03, 0x75, 0xf8, 0xc8, 0x69, 0x45, 0x3f, 0x97,
+ 0x5e, 0x2e, 0x48, 0xe1, 0x5d, 0x58, 0x97, 0x4c
+ };
+ static const PRUint8 rng_known_result[] = {
+ 0x16, 0xe1, 0x8c, 0x57, 0x21, 0xd8, 0xf1, 0x7e,
+ 0x5a, 0xa0, 0x16, 0x0b, 0x7e, 0xa6, 0x25, 0xb4,
+ 0x24, 0x19, 0xdb, 0x54, 0xfa, 0x35, 0x13, 0x66,
+ 0xbb, 0xaa, 0x2a, 0x1b, 0x22, 0x33, 0x2e, 0x4a,
+ 0x14, 0x07, 0x9d, 0x52, 0xfc, 0x73, 0x61, 0x48,
+ 0xac, 0xc1, 0x22, 0xfc, 0xa4, 0xfc, 0xac, 0xa4,
+ 0xdb, 0xda, 0x5b, 0x27, 0x33, 0xc4, 0xb3
+ };
+ static const PRUint8 reseed_entropy[] = {
+ 0xc6, 0x0b, 0x0a, 0x30, 0x67, 0x07, 0xf4, 0xe2,
+ 0x24, 0xa7, 0x51, 0x6f, 0x5f, 0x85, 0x3e, 0x5d,
+ 0x67, 0x97, 0xb8, 0x3b, 0x30, 0x9c, 0x7a, 0xb1,
+ 0x52, 0xc6, 0x1b, 0xc9, 0x46, 0xa8, 0x62, 0x79
+ };
+ static const PRUint8 additional_input[] = {
+ 0x86, 0x82, 0x28, 0x98, 0xe7, 0xcb, 0x01, 0x14,
+ 0xae, 0x87, 0x4b, 0x1d, 0x99, 0x1b, 0xc7, 0x41,
+ 0x33, 0xff, 0x33, 0x66, 0x40, 0x95, 0x54, 0xc6,
+ 0x67, 0x4d, 0x40, 0x2a, 0x1f, 0xf9, 0xeb, 0x65
+ };
+ static const PRUint8 rng_reseed_result[] = {
+ 0x02, 0x0c, 0xc6, 0x17, 0x86, 0x49, 0xba, 0xc4,
+ 0x7b, 0x71, 0x35, 0x05, 0xf0, 0xdb, 0x4a, 0xc2,
+ 0x2c, 0x38, 0xc1, 0xa4, 0x42, 0xe5, 0x46, 0x4a,
+ 0x7d, 0xf0, 0xbe, 0x47, 0x88, 0xb8, 0x0e, 0xc6,
+ 0x25, 0x2b, 0x1d, 0x13, 0xef, 0xa6, 0x87, 0x96,
+ 0xa3, 0x7d, 0x5b, 0x80, 0xc2, 0x38, 0x76, 0x61,
+ 0xc7, 0x80, 0x5d, 0x0f, 0x05, 0x76, 0x85
+ };
+ static const PRUint8 rng_no_reseed_result[] = {
+ 0xc4, 0x40, 0x41, 0x8c, 0xbf, 0x2f, 0x70, 0x23,
+ 0x88, 0xf2, 0x7b, 0x30, 0xc3, 0xca, 0x1e, 0xf3,
+ 0xef, 0x53, 0x81, 0x5d, 0x30, 0xed, 0x4c, 0xf1,
+ 0xff, 0x89, 0xa5, 0xee, 0x92, 0xf8, 0xc0, 0x0f,
+ 0x88, 0x53, 0xdf, 0xb6, 0x76, 0xf0, 0xaa, 0xd3,
+ 0x2e, 0x1d, 0x64, 0x37, 0x3e, 0xe8, 0x4a, 0x02,
+ 0xff, 0x0a, 0x7f, 0xe5, 0xe9, 0x2b, 0x6d
+ };
+
+ SECStatus rng_status = SECSuccess;
+ PR_STATIC_ASSERT(sizeof(rng_known_result) >= sizeof(rng_reseed_result));
+ PRUint8 result[sizeof(rng_known_result)];
+
+ /********************************************/
+ /* First test instantiate error path. */
+ /* In this case we supply enough entropy, */
+ /* but not enough seed. This will trigger */
+ /* the code that checks for a entropy */
+ /* source failure. */
+ /********************************************/
+ rng_status = PRNGTEST_Instantiate(entropy, 256 / PR_BITS_PER_BYTE,
+ NULL, 0, NULL, 0);
+ if (rng_status == SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* we failed with the proper error code, we can continue */
+
+ /********************************************/
+ /* Generate random bytes with a known seed. */
+ /********************************************/
+ rng_status = PRNGTEST_Instantiate(entropy, sizeof entropy,
+ NULL, 0, NULL, 0);
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNGTEST_Instantiate */
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Generate(result, sizeof rng_known_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ (PORT_Memcmp(result, rng_known_result,
+ sizeof rng_known_result) != 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Reseed(reseed_entropy, sizeof reseed_entropy,
+ additional_input, sizeof additional_input);
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNG_Reseed */
+ PRNGTEST_Uninstantiate();
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ (PORT_Memcmp(result, rng_reseed_result,
+ sizeof rng_reseed_result) != 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* This magic forces the reseed count to it's max count, so we can see if
+ * PRNGTEST_Generate will actually when it reaches it's count */
+ rng_status = PRNGTEST_Reseed(NULL, 0, NULL, 0);
+ if (rng_status != SECSuccess) {
+ PRNGTEST_Uninstantiate();
+ /* Error set by PRNG_Reseed */
+ return SECFailure;
+ }
+ /* This generate should now reseed */
+ rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0);
+ if ((rng_status != SECSuccess) ||
+ /* NOTE we fail if the result is equal to the no_reseed_result.
+ * no_reseed_result is the value we would have gotten if we didn't
+ * do an automatic reseed in PRNGTEST_Generate */
+ (PORT_Memcmp(result, rng_no_reseed_result,
+ sizeof rng_no_reseed_result) == 0)) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* make sure reseed fails when we don't supply enough entropy */
+ rng_status = PRNGTEST_Reseed(reseed_entropy, 4, NULL, 0);
+ if (rng_status == SECSuccess) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) {
+ PRNGTEST_Uninstantiate();
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ rng_status = PRNGTEST_Uninstantiate();
+ if (rng_status != SECSuccess) {
+ /* Error set by PRNG_Uninstantiate */
+ return rng_status;
+ }
+ /* make sure uninstantiate fails if the contest is not initiated (also tests
+ * if the context was cleared in the previous Uninstantiate) */
+ rng_status = PRNGTEST_Uninstantiate();
+ if (rng_status == SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ if (PORT_GetError() != SEC_ERROR_LIBRARY_FAILURE) {
+ return rng_status;
+ }
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c
new file mode 100644
index 000000000..9324d306b
--- /dev/null
+++ b/security/nss/lib/freebl/dsa.c
@@ -0,0 +1,647 @@
+/*
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerror.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "nssilock.h"
+#include "secitem.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "secmpi.h"
+#include "pqg.h"
+
+/* XXX to be replaced by define in blapit.h */
+#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ *
+ * Input: w, 2*qLen bytes
+ * q, qLen bytes
+ * Output: xj, qLen bytes
+ */
+static SECStatus
+fips186Change_ReduceModQForDSA(const PRUint8 *w, const PRUint8 *q,
+ unsigned int qLen, PRUint8 *xj)
+{
+ mp_int W, Q, Xj;
+ mp_err err;
+ SECStatus rv = SECSuccess;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&W) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&Xj) = 0;
+ CHECK_MPI_OK(mp_init(&W));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&Xj));
+ /*
+ * Convert input arguments into MPI integers.
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&W, w, 2 * qLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Q, q, qLen));
+
+ /*
+ * Algorithm 1 of FIPS 186-2 Change Notice 1, Step 3.3
+ *
+ * xj = (w0 || w1) mod q
+ */
+ CHECK_MPI_OK(mp_mod(&W, &Q, &Xj));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Xj, xj, qLen));
+cleanup:
+ mp_clear(&W);
+ mp_clear(&Q);
+ mp_clear(&Xj);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+ * FIPS 186-2 requires result from random output to be reduced mod q when
+ * generating random numbers for DSA.
+ */
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj)
+{
+ return fips186Change_ReduceModQForDSA(w, q, DSA1_SUBPRIME_LEN, xj);
+}
+
+/*
+ * The core of Algorithm 1 of FIPS 186-2 Change Notice 1.
+ *
+ * We no longer support FIPS 186-2 RNG. This function was exported
+ * for power-up self tests and FIPS tests. Keep this stub, which fails,
+ * to prevent crashes, but also to signal to test code that FIPS 186-2
+ * RNG is no longer supported.
+ */
+SECStatus
+FIPS186Change_GenerateX(PRUint8 *XKEY, const PRUint8 *XSEEDj,
+ PRUint8 *x_j)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+ return SECFailure;
+}
+
+/*
+ * Specialized RNG for DSA
+ *
+ * As per Algorithm 1 of FIPS 186-2 Change Notice 1, in step 3.3 the value
+ * Xj should be reduced mod q, a 160-bit prime number. Since this parameter
+ * is only meaningful in the context of DSA, the above RNG functions
+ * were implemented without it. They are re-implemented below for use
+ * with DSA.
+ */
+
+/*
+** Generate some random bytes, using the global random number generator
+** object. In DSA mode, so there is a q.
+*/
+static SECStatus
+dsa_GenerateGlobalRandomBytes(const SECItem *qItem, PRUint8 *dest,
+ unsigned int *destLen, unsigned int maxDestLen)
+{
+ SECStatus rv;
+ SECItem w;
+ const PRUint8 *q = qItem->data;
+ unsigned int qLen = qItem->len;
+
+ if (*q == 0) {
+ ++q;
+ --qLen;
+ }
+ if (maxDestLen < qLen) {
+ /* This condition can occur when DSA_SignDigest is passed a group
+ with a subprime that is larger than DSA_MAX_SUBPRIME_LEN. */
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ w.data = NULL; /* otherwise SECITEM_AllocItem asserts */
+ if (!SECITEM_AllocItem(NULL, &w, 2 * qLen)) {
+ return SECFailure;
+ }
+ *destLen = qLen;
+
+ rv = RNG_GenerateGlobalRandomBytes(w.data, w.len);
+ if (rv == SECSuccess) {
+ rv = fips186Change_ReduceModQForDSA(w.data, q, qLen, dest);
+ }
+
+ SECITEM_FreeItem(&w, PR_FALSE);
+ return rv;
+}
+
+static void
+translate_mpi_error(mp_err err)
+{
+ MP_TO_SEC_ERROR(err);
+}
+
+static SECStatus
+dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed,
+ DSAPrivateKey **privKey)
+{
+ mp_int p, g;
+ mp_int x, y;
+ mp_err err;
+ PLArenaPool *arena;
+ DSAPrivateKey *key;
+ /* Check args. */
+ if (!params || !privKey || !seed || !seed->data) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* Initialize an arena for the DSA key. */
+ arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ key = (DSAPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DSAPrivateKey));
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ key->params.arena = arena;
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&y) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&y));
+ /* Copy over the PQG params */
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.prime,
+ &params->prime));
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.subPrime,
+ &params->subPrime));
+ CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.base, &params->base));
+ /* Convert stored p, g, and received x into MPI integers. */
+ SECITEM_TO_MPINT(params->prime, &p);
+ SECITEM_TO_MPINT(params->base, &g);
+ OCTETS_TO_MPINT(seed->data, &x, seed->len);
+ /* Store x in private key */
+ SECITEM_AllocItem(arena, &key->privateValue, seed->len);
+ PORT_Memcpy(key->privateValue.data, seed->data, seed->len);
+ /* Compute public key y = g**x mod p */
+ CHECK_MPI_OK(mp_exptmod(&g, &x, &p, &y));
+ /* Store y in public key */
+ MPINT_TO_SECITEM(&y, &key->publicValue, arena);
+ *privKey = key;
+ key = NULL;
+cleanup:
+ mp_clear(&p);
+ mp_clear(&g);
+ mp_clear(&x);
+ mp_clear(&y);
+ if (key)
+ PORT_FreeArena(key->params.arena, PR_TRUE);
+ if (err) {
+ translate_mpi_error(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+ int retries = 10;
+ unsigned int i;
+ PRBool good;
+
+ if (q == NULL || q->data == NULL || q->len == 0 ||
+ (q->data[0] == 0 && q->len == 1)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (!SECITEM_AllocItem(arena, seed, q->len)) {
+ return SECFailure;
+ }
+
+ do {
+ /* Generate seed bytes for x according to FIPS 186-1 appendix 3 */
+ if (dsa_GenerateGlobalRandomBytes(q, seed->data, &seed->len,
+ seed->len)) {
+ goto loser;
+ }
+ /* Disallow values of 0 and 1 for x. */
+ good = PR_FALSE;
+ for (i = 0; i < seed->len - 1; i++) {
+ if (seed->data[i] != 0) {
+ good = PR_TRUE;
+ break;
+ }
+ }
+ if (!good && seed->data[i] > 1) {
+ good = PR_TRUE;
+ }
+ } while (!good && --retries > 0);
+
+ if (!good) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ loser:
+ if (arena != NULL) {
+ SECITEM_FreeItem(seed, PR_FALSE);
+ }
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/*
+** Generate and return a new DSA public and private key pair,
+** both of which are encoded into a single DSAPrivateKey struct.
+** "params" is a pointer to the PQG parameters for the domain
+** Uses a random seed.
+*/
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+ SECItem seed;
+ SECStatus rv;
+
+ rv = PQG_Check(params);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ seed.data = NULL;
+
+ rv = DSA_NewRandom(NULL, &params->subPrime, &seed);
+ if (rv == SECSuccess) {
+ if (seed.len != PQG_GetLength(&params->subPrime)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ } else {
+ rv = dsa_NewKeyExtended(params, &seed, privKey);
+ }
+ }
+ SECITEM_FreeItem(&seed, PR_FALSE);
+ return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly the size of subPrime */
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey)
+{
+ SECItem seedItem;
+ seedItem.data = (unsigned char *)seed;
+ seedItem.len = PQG_GetLength(&params->subPrime);
+ return dsa_NewKeyExtended(params, &seedItem, privKey);
+}
+
+static SECStatus
+dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest,
+ const unsigned char *kb)
+{
+ mp_int p, q, g; /* PQG parameters */
+ mp_int x, k; /* private key & pseudo-random integer */
+ mp_int r, s; /* tuple (r, s) is signature) */
+ mp_int t; /* holding tmp values */
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ unsigned int dsa_subprime_len, dsa_signature_len, offset;
+ SECItem localDigest;
+ unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+ SECItem t2 = { siBuffer, NULL, 0 };
+
+ /* FIPS-compliance dictates that digest is a SHA hash. */
+ /* Check args. */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ dsa_signature_len = dsa_subprime_len * 2;
+ if ((signature->len < dsa_signature_len) ||
+ (digest->len > HASH_LENGTH_MAX) ||
+ (digest->len < SHA1_LENGTH)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* DSA accepts digests not equal to dsa_subprime_len, if the
+ * digests are greater, then they are truncated to the size of
+ * dsa_subprime_len, using the left most bits. If they are less
+ * then they are padded on the left.*/
+ PORT_Memset(localDigestData, 0, dsa_subprime_len);
+ offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+ PORT_Memcpy(localDigestData + offset, digest->data,
+ dsa_subprime_len - offset);
+ localDigest.data = localDigestData;
+ localDigest.len = dsa_subprime_len;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&t) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&x));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&t));
+ /*
+ ** Convert stored PQG and private key into MPI integers.
+ */
+ SECITEM_TO_MPINT(key->params.prime, &p);
+ SECITEM_TO_MPINT(key->params.subPrime, &q);
+ SECITEM_TO_MPINT(key->params.base, &g);
+ SECITEM_TO_MPINT(key->privateValue, &x);
+ OCTETS_TO_MPINT(kb, &k, dsa_subprime_len);
+ /*
+ ** FIPS 186-1, Section 5, Step 1
+ **
+ ** r = (g**k mod p) mod q
+ */
+ CHECK_MPI_OK(mp_exptmod(&g, &k, &p, &r)); /* r = g**k mod p */
+ CHECK_MPI_OK(mp_mod(&r, &q, &r)); /* r = r mod q */
+ /*
+ ** FIPS 186-1, Section 5, Step 2
+ **
+ ** s = (k**-1 * (HASH(M) + x*r)) mod q
+ */
+ if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+ CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */
+ SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */
+ CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */
+ CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */
+ /*
+ ** verify r != 0 and s != 0
+ ** mentioned as optional in FIPS 186-1.
+ */
+ if (mp_cmp_z(&r) == 0 || mp_cmp_z(&s) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /*
+ ** Step 4
+ **
+ ** Signature is tuple (r, s)
+ */
+ err = mp_to_fixlen_octets(&r, signature->data, dsa_subprime_len);
+ if (err < 0)
+ goto cleanup;
+ err = mp_to_fixlen_octets(&s, signature->data + dsa_subprime_len,
+ dsa_subprime_len);
+ if (err < 0)
+ goto cleanup;
+ err = MP_OKAY;
+ signature->len = dsa_signature_len;
+cleanup:
+ PORT_Memset(localDigestData, 0, DSA_MAX_SUBPRIME_LEN);
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&x);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&s);
+ mp_clear(&t);
+ SECITEM_FreeItem(&t2, PR_FALSE);
+ if (err) {
+ translate_mpi_error(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* signature is caller-supplied buffer of at least 40 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+** On output, signature->len == size of signature in buffer.
+** Uses a random seed.
+*/
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ SECStatus rv;
+ int retries = 10;
+ unsigned char kSeed[DSA_MAX_SUBPRIME_LEN];
+ unsigned int kSeedLen = 0;
+ unsigned int i;
+ unsigned int dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ PRBool good;
+
+ PORT_SetError(0);
+ do {
+ rv = dsa_GenerateGlobalRandomBytes(&key->params.subPrime,
+ kSeed, &kSeedLen, sizeof kSeed);
+ if (rv != SECSuccess)
+ break;
+ if (kSeedLen != dsa_subprime_len) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ break;
+ }
+ /* Disallow a value of 0 for k. */
+ good = PR_FALSE;
+ for (i = 0; i < kSeedLen; i++) {
+ if (kSeed[i] != 0) {
+ good = PR_TRUE;
+ break;
+ }
+ }
+ if (!good) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ continue;
+ }
+ rv = dsa_SignDigest(key, signature, digest, kSeed);
+ } while (rv != SECSuccess && PORT_GetError() == SEC_ERROR_NEED_RANDOM &&
+ --retries > 0);
+ return rv;
+}
+
+/* For FIPS compliance testing. Seed must be exactly 20 bytes. */
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed)
+{
+ SECStatus rv;
+ rv = dsa_SignDigest(key, signature, digest, seed);
+ return rv;
+}
+
+/* signature is caller-supplied buffer of at least 20 bytes.
+** On input, signature->len == size of buffer to hold signature.
+** digest->len == size of digest.
+*/
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ /* FIPS-compliance dictates that digest is a SHA hash. */
+ mp_int p, q, g; /* PQG parameters */
+ mp_int r_, s_; /* tuple (r', s') is received signature) */
+ mp_int u1, u2, v, w; /* intermediate values used in verification */
+ mp_int y; /* public key */
+ mp_err err;
+ unsigned int dsa_subprime_len, dsa_signature_len, offset;
+ SECItem localDigest;
+ unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN];
+ SECStatus verified = SECFailure;
+
+ /* Check args. */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ dsa_subprime_len = PQG_GetLength(&key->params.subPrime);
+ dsa_signature_len = dsa_subprime_len * 2;
+ if ((signature->len != dsa_signature_len) ||
+ (digest->len > HASH_LENGTH_MAX) ||
+ (digest->len < SHA1_LENGTH)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* DSA accepts digests not equal to dsa_subprime_len, if the
+ * digests are greater, than they are truncated to the size of
+ * dsa_subprime_len, using the left most bits. If they are less
+ * then they are padded on the left.*/
+ PORT_Memset(localDigestData, 0, dsa_subprime_len);
+ offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0;
+ PORT_Memcpy(localDigestData + offset, digest->data,
+ dsa_subprime_len - offset);
+ localDigest.data = localDigestData;
+ localDigest.len = dsa_subprime_len;
+
+ /* Initialize MPI integers. */
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&y) = 0;
+ MP_DIGITS(&r_) = 0;
+ MP_DIGITS(&s_) = 0;
+ MP_DIGITS(&u1) = 0;
+ MP_DIGITS(&u2) = 0;
+ MP_DIGITS(&v) = 0;
+ MP_DIGITS(&w) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&y));
+ CHECK_MPI_OK(mp_init(&r_));
+ CHECK_MPI_OK(mp_init(&s_));
+ CHECK_MPI_OK(mp_init(&u1));
+ CHECK_MPI_OK(mp_init(&u2));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_MPI_OK(mp_init(&w));
+ /*
+ ** Convert stored PQG and public key into MPI integers.
+ */
+ SECITEM_TO_MPINT(key->params.prime, &p);
+ SECITEM_TO_MPINT(key->params.subPrime, &q);
+ SECITEM_TO_MPINT(key->params.base, &g);
+ SECITEM_TO_MPINT(key->publicValue, &y);
+ /*
+ ** Convert received signature (r', s') into MPI integers.
+ */
+ OCTETS_TO_MPINT(signature->data, &r_, dsa_subprime_len);
+ OCTETS_TO_MPINT(signature->data + dsa_subprime_len, &s_, dsa_subprime_len);
+ /*
+ ** Verify that 0 < r' < q and 0 < s' < q
+ */
+ if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+ mp_cmp(&r_, &q) >= 0 || mp_cmp(&s_, &q) >= 0) {
+ /* err is zero here. */
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto cleanup; /* will return verified == SECFailure */
+ }
+ /*
+ ** FIPS 186-1, Section 6, Step 1
+ **
+ ** w = (s')**-1 mod q
+ */
+ CHECK_MPI_OK(mp_invmod(&s_, &q, &w)); /* w = (s')**-1 mod q */
+ /*
+ ** FIPS 186-1, Section 6, Step 2
+ **
+ ** u1 = ((Hash(M')) * w) mod q
+ */
+ SECITEM_TO_MPINT(localDigest, &u1); /* u1 = HASH(M') */
+ CHECK_MPI_OK(mp_mulmod(&u1, &w, &q, &u1)); /* u1 = u1 * w mod q */
+ /*
+ ** FIPS 186-1, Section 6, Step 3
+ **
+ ** u2 = ((r') * w) mod q
+ */
+ CHECK_MPI_OK(mp_mulmod(&r_, &w, &q, &u2));
+ /*
+ ** FIPS 186-1, Section 6, Step 4
+ **
+ ** v = ((g**u1 * y**u2) mod p) mod q
+ */
+ CHECK_MPI_OK(mp_exptmod(&g, &u1, &p, &g)); /* g = g**u1 mod p */
+ CHECK_MPI_OK(mp_exptmod(&y, &u2, &p, &y)); /* y = y**u2 mod p */
+ CHECK_MPI_OK(mp_mulmod(&g, &y, &p, &v)); /* v = g * y mod p */
+ CHECK_MPI_OK(mp_mod(&v, &q, &v)); /* v = v mod q */
+ /*
+ ** Verification: v == r'
+ */
+ if (mp_cmp(&v, &r_)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ verified = SECFailure; /* Signature failed to verify. */
+ } else {
+ verified = SECSuccess; /* Signature verified. */
+ }
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&y);
+ mp_clear(&r_);
+ mp_clear(&s_);
+ mp_clear(&u1);
+ mp_clear(&u2);
+ mp_clear(&v);
+ mp_clear(&w);
+ if (err) {
+ translate_mpi_error(err);
+ }
+ return verified;
+}
diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c
new file mode 100644
index 000000000..12bfeed41
--- /dev/null
+++ b/security/nss/lib/freebl/ec.c
@@ -0,0 +1,1159 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "prerr.h"
+#include "secerr.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "mplogic.h"
+#include "ec.h"
+#include "ecl.h"
+
+#ifndef NSS_DISABLE_ECC
+
+static const ECMethod kMethods[] = {
+ { ECCurve25519,
+ ec_Curve25519_pt_mul,
+ ec_Curve25519_pt_validate }
+};
+
+static const ECMethod *
+ec_get_method_from_name(ECCurveName name)
+{
+ int i;
+ for (i = 0; i < sizeof(kMethods) / sizeof(kMethods[0]); ++i) {
+ if (kMethods[i].name == name) {
+ return &kMethods[i];
+ }
+ }
+ return NULL;
+}
+
+/*
+ * Returns true if pointP is the point at infinity, false otherwise
+ */
+PRBool
+ec_point_at_infinity(SECItem *pointP)
+{
+ unsigned int i;
+
+ for (i = 1; i < pointP->len; i++) {
+ if (pointP->data[i] != 0x00)
+ return PR_FALSE;
+ }
+
+ return PR_TRUE;
+}
+
+/*
+ * Computes scalar point multiplication pointQ = k1 * G + k2 * pointP for
+ * the curve whose parameters are encoded in params with base point G.
+ */
+SECStatus
+ec_points_mul(const ECParams *params, const mp_int *k1, const mp_int *k2,
+ const SECItem *pointP, SECItem *pointQ)
+{
+ mp_int Px, Py, Qx, Qy;
+ mp_int Gx, Gy, order, irreducible, a, b;
+ ECGroup *group = NULL;
+ SECStatus rv = SECFailure;
+ mp_err err = MP_OKAY;
+ int len;
+
+#if EC_DEBUG
+ int i;
+ char mpstr[256];
+
+ printf("ec_points_mul: params [len=%d]:", params->DEREncoding.len);
+ for (i = 0; i < params->DEREncoding.len; i++)
+ printf("%02x:", params->DEREncoding.data[i]);
+ printf("\n");
+
+ if (k1 != NULL) {
+ mp_tohex((mp_int *)k1, mpstr);
+ printf("ec_points_mul: scalar k1: %s\n", mpstr);
+ mp_todecimal((mp_int *)k1, mpstr);
+ printf("ec_points_mul: scalar k1: %s (dec)\n", mpstr);
+ }
+
+ if (k2 != NULL) {
+ mp_tohex((mp_int *)k2, mpstr);
+ printf("ec_points_mul: scalar k2: %s\n", mpstr);
+ mp_todecimal((mp_int *)k2, mpstr);
+ printf("ec_points_mul: scalar k2: %s (dec)\n", mpstr);
+ }
+
+ if (pointP != NULL) {
+ printf("ec_points_mul: pointP [len=%d]:", pointP->len);
+ for (i = 0; i < pointP->len; i++)
+ printf("%02x:", pointP->data[i]);
+ printf("\n");
+ }
+#endif
+
+ /* NOTE: We only support uncompressed points for now */
+ len = (params->fieldID.size + 7) >> 3;
+ if (pointP != NULL) {
+ if ((pointP->data[0] != EC_POINT_FORM_UNCOMPRESSED) ||
+ (pointP->len != (2 * len + 1))) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ return SECFailure;
+ };
+ }
+
+ MP_DIGITS(&Px) = 0;
+ MP_DIGITS(&Py) = 0;
+ MP_DIGITS(&Qx) = 0;
+ MP_DIGITS(&Qy) = 0;
+ MP_DIGITS(&Gx) = 0;
+ MP_DIGITS(&Gy) = 0;
+ MP_DIGITS(&order) = 0;
+ MP_DIGITS(&irreducible) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&b) = 0;
+ CHECK_MPI_OK(mp_init(&Px));
+ CHECK_MPI_OK(mp_init(&Py));
+ CHECK_MPI_OK(mp_init(&Qx));
+ CHECK_MPI_OK(mp_init(&Qy));
+ CHECK_MPI_OK(mp_init(&Gx));
+ CHECK_MPI_OK(mp_init(&Gy));
+ CHECK_MPI_OK(mp_init(&order));
+ CHECK_MPI_OK(mp_init(&irreducible));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&b));
+
+ if ((k2 != NULL) && (pointP != NULL)) {
+ /* Initialize Px and Py */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Px, pointP->data + 1, (mp_size)len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Py, pointP->data + 1 + len, (mp_size)len));
+ }
+
+ /* construct from named params, if possible */
+ if (params->name != ECCurve_noName) {
+ group = ECGroup_fromName(params->name);
+ }
+
+ if (group == NULL)
+ goto cleanup;
+
+ if ((k2 != NULL) && (pointP != NULL)) {
+ CHECK_MPI_OK(ECPoints_mul(group, k1, k2, &Px, &Py, &Qx, &Qy));
+ } else {
+ CHECK_MPI_OK(ECPoints_mul(group, k1, NULL, NULL, NULL, &Qx, &Qy));
+ }
+
+ /* Construct the SECItem representation of point Q */
+ pointQ->data[0] = EC_POINT_FORM_UNCOMPRESSED;
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Qx, pointQ->data + 1,
+ (mp_size)len));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&Qy, pointQ->data + 1 + len,
+ (mp_size)len));
+
+ rv = SECSuccess;
+
+#if EC_DEBUG
+ printf("ec_points_mul: pointQ [len=%d]:", pointQ->len);
+ for (i = 0; i < pointQ->len; i++)
+ printf("%02x:", pointQ->data[i]);
+ printf("\n");
+#endif
+
+cleanup:
+ ECGroup_free(group);
+ mp_clear(&Px);
+ mp_clear(&Py);
+ mp_clear(&Qx);
+ mp_clear(&Qy);
+ mp_clear(&Gx);
+ mp_clear(&Gy);
+ mp_clear(&order);
+ mp_clear(&irreducible);
+ mp_clear(&a);
+ mp_clear(&b);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+ return rv;
+}
+#endif /* NSS_DISABLE_ECC */
+
+/* Generates a new EC key pair. The private key is a supplied
+ * value and the public key is the result of performing a scalar
+ * point multiplication of that value with the curve's base point.
+ */
+SECStatus
+ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey,
+ const unsigned char *privKeyBytes, int privKeyLen)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ PLArenaPool *arena;
+ ECPrivateKey *key;
+ mp_int k;
+ mp_err err = MP_OKAY;
+ int len;
+
+#if EC_DEBUG
+ printf("ec_NewKey called\n");
+#endif
+ MP_DIGITS(&k) = 0;
+
+ if (!ecParams || !privKey || !privKeyBytes || (privKeyLen < 0) ||
+ !ecParams->name) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Initialize an arena for the EC key. */
+ if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+ return SECFailure;
+
+ key = (ECPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(ECPrivateKey));
+ if (!key) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+
+ /* Set the version number (SEC 1 section C.4 says it should be 1) */
+ SECITEM_AllocItem(arena, &key->version, 1);
+ key->version.data[0] = 1;
+
+ /* Copy all of the fields from the ECParams argument to the
+ * ECParams structure within the private key.
+ */
+ key->ecParams.arena = arena;
+ key->ecParams.type = ecParams->type;
+ key->ecParams.fieldID.size = ecParams->fieldID.size;
+ key->ecParams.fieldID.type = ecParams->fieldID.type;
+ if (ecParams->fieldID.type == ec_field_GFp ||
+ ecParams->fieldID.type == ec_field_plain) {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.prime,
+ &ecParams->fieldID.u.prime));
+ } else {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.poly,
+ &ecParams->fieldID.u.poly));
+ }
+ key->ecParams.fieldID.k1 = ecParams->fieldID.k1;
+ key->ecParams.fieldID.k2 = ecParams->fieldID.k2;
+ key->ecParams.fieldID.k3 = ecParams->fieldID.k3;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.a,
+ &ecParams->curve.a));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.b,
+ &ecParams->curve.b));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.seed,
+ &ecParams->curve.seed));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.base,
+ &ecParams->base));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.order,
+ &ecParams->order));
+ key->ecParams.cofactor = ecParams->cofactor;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.DEREncoding,
+ &ecParams->DEREncoding));
+ key->ecParams.name = ecParams->name;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curveOID,
+ &ecParams->curveOID));
+
+ SECITEM_AllocItem(arena, &key->publicValue, EC_GetPointSize(ecParams));
+ len = ecParams->order.len;
+ SECITEM_AllocItem(arena, &key->privateValue, len);
+
+ /* Copy private key */
+ if (privKeyLen >= len) {
+ memcpy(key->privateValue.data, privKeyBytes, len);
+ } else {
+ memset(key->privateValue.data, 0, (len - privKeyLen));
+ memcpy(key->privateValue.data + (len - privKeyLen), privKeyBytes, privKeyLen);
+ }
+
+ /* Compute corresponding public key */
+
+ /* Use curve specific code for point multiplication */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->mul == NULL) {
+ /* unknown curve */
+ rv = SECFailure;
+ goto cleanup;
+ }
+ rv = method->mul(&key->publicValue, &key->privateValue, NULL);
+ goto done;
+ }
+
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, key->privateValue.data,
+ (mp_size)len));
+
+ rv = ec_points_mul(ecParams, &k, NULL, NULL, &(key->publicValue));
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+done:
+ *privKey = key;
+
+cleanup:
+ mp_clear(&k);
+ if (rv) {
+ PORT_FreeArena(arena, PR_TRUE);
+ }
+
+#if EC_DEBUG
+ printf("ec_NewKey returning %s\n",
+ (rv == SECSuccess) ? "success" : "failure");
+#endif
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
+
+/* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+SECStatus
+EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey,
+ const unsigned char *seed, int seedlen)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ rv = ec_NewKey(ecParams, privKey, seed, seedlen);
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+ return rv;
+}
+
+#ifndef NSS_DISABLE_ECC
+/* Generate a random private key using the algorithm A.4.1 of ANSI X9.62,
+ * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the
+ * random number generator.
+ *
+ * Parameters
+ * - order: a buffer that holds the curve's group order
+ * - len: the length in octets of the order buffer
+ *
+ * Return Value
+ * Returns a buffer of len octets that holds the private key. The caller
+ * is responsible for freeing the buffer with PORT_ZFree.
+ */
+static unsigned char *
+ec_GenerateRandomPrivateKey(const unsigned char *order, int len)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ unsigned char *privKeyBytes = NULL;
+ mp_int privKeyVal, order_1, one;
+
+ MP_DIGITS(&privKeyVal) = 0;
+ MP_DIGITS(&order_1) = 0;
+ MP_DIGITS(&one) = 0;
+ CHECK_MPI_OK(mp_init(&privKeyVal));
+ CHECK_MPI_OK(mp_init(&order_1));
+ CHECK_MPI_OK(mp_init(&one));
+
+ /* Generates 2*len random bytes using the global random bit generator
+ * (which implements Algorithm 1 of FIPS 186-2 Change Notice 1) then
+ * reduces modulo the group order.
+ */
+ if ((privKeyBytes = PORT_Alloc(2 * len)) == NULL)
+ goto cleanup;
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(privKeyBytes, 2 * len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&privKeyVal, privKeyBytes, 2 * len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&order_1, order, len));
+ CHECK_MPI_OK(mp_set_int(&one, 1));
+ CHECK_MPI_OK(mp_sub(&order_1, &one, &order_1));
+ CHECK_MPI_OK(mp_mod(&privKeyVal, &order_1, &privKeyVal));
+ CHECK_MPI_OK(mp_add(&privKeyVal, &one, &privKeyVal));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&privKeyVal, privKeyBytes, len));
+ memset(privKeyBytes + len, 0, len);
+cleanup:
+ mp_clear(&privKeyVal);
+ mp_clear(&order_1);
+ mp_clear(&one);
+ if (err < MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv != SECSuccess && privKeyBytes) {
+ PORT_ZFree(privKeyBytes, 2 * len);
+ privKeyBytes = NULL;
+ }
+ return privKeyBytes;
+}
+#endif /* NSS_DISABLE_ECC */
+
+/* Generates a new EC key pair. The private key is a random value and
+ * the public key is the result of performing a scalar point multiplication
+ * of that value with the curve's base point.
+ */
+SECStatus
+EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ int len;
+ unsigned char *privKeyBytes = NULL;
+
+ if (!ecParams) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ len = ecParams->order.len;
+ privKeyBytes = ec_GenerateRandomPrivateKey(ecParams->order.data, len);
+ if (privKeyBytes == NULL)
+ goto cleanup;
+ /* generate public key */
+ CHECK_SEC_OK(ec_NewKey(ecParams, privKey, privKeyBytes, len));
+
+cleanup:
+ if (privKeyBytes) {
+ PORT_ZFree(privKeyBytes, len);
+ }
+#if EC_DEBUG
+ printf("EC_NewKey returning %s\n",
+ (rv == SECSuccess) ? "success" : "failure");
+#endif
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of
+ * X9.62. The ECDH primitive when used without the cofactor does
+ * not address small subgroup attacks, which may occur when the
+ * public key is not valid. These attacks can be prevented by
+ * validating the public key before using ECDH.
+ */
+SECStatus
+EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue)
+{
+#ifndef NSS_DISABLE_ECC
+ mp_int Px, Py;
+ ECGroup *group = NULL;
+ SECStatus rv = SECFailure;
+ mp_err err = MP_OKAY;
+ int len;
+
+ if (!ecParams || !publicValue || !ecParams->name) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Uses curve specific code for point validation. */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->validate == NULL) {
+ /* unknown curve */
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return method->validate(publicValue);
+ }
+
+ /* NOTE: We only support uncompressed points for now */
+ len = (ecParams->fieldID.size + 7) >> 3;
+ if (publicValue->data[0] != EC_POINT_FORM_UNCOMPRESSED) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM);
+ return SECFailure;
+ } else if (publicValue->len != (2 * len + 1)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&Px) = 0;
+ MP_DIGITS(&Py) = 0;
+ CHECK_MPI_OK(mp_init(&Px));
+ CHECK_MPI_OK(mp_init(&Py));
+
+ /* Initialize Px and Py */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Px, publicValue->data + 1, (mp_size)len));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&Py, publicValue->data + 1 + len, (mp_size)len));
+
+ /* construct from named params */
+ group = ECGroup_fromName(ecParams->name);
+ if (group == NULL) {
+ /*
+ * ECGroup_fromName fails if ecParams->name is not a valid
+ * ECCurveName value, or if we run out of memory, or perhaps
+ * for other reasons. Unfortunately if ecParams->name is a
+ * valid ECCurveName value, we don't know what the right error
+ * code should be because ECGroup_fromName doesn't return an
+ * error code to the caller. Set err to MP_UNDEF because
+ * that's what ECGroup_fromName uses internally.
+ */
+ if ((ecParams->name <= ECCurve_noName) ||
+ (ecParams->name >= ECCurve_pastLastCurve)) {
+ err = MP_BADARG;
+ } else {
+ err = MP_UNDEF;
+ }
+ goto cleanup;
+ }
+
+ /* validate public point */
+ if ((err = ECPoint_validate(group, &Px, &Py)) < MP_YES) {
+ if (err == MP_NO) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ rv = SECFailure;
+ err = MP_OKAY; /* don't change the error code */
+ }
+ goto cleanup;
+ }
+
+ rv = SECSuccess;
+
+cleanup:
+ ECGroup_free(group);
+ mp_clear(&Px);
+ mp_clear(&Py);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+ return SECFailure;
+#endif /* NSS_DISABLE_ECC */
+}
+
+/*
+** Performs an ECDH key derivation by computing the scalar point
+** multiplication of privateValue and publicValue (with or without the
+** cofactor) and returns the x-coordinate of the resulting elliptic
+** curve point in derived secret. If successful, derivedSecret->data
+** is set to the address of the newly allocated buffer containing the
+** derived secret, and derivedSecret->len is the size of the secret
+** produced. It is the caller's responsibility to free the allocated
+** buffer containing the derived secret.
+*/
+SECStatus
+ECDH_Derive(SECItem *publicValue,
+ ECParams *ecParams,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ unsigned int len = 0;
+ SECItem pointQ = { siBuffer, NULL, 0 };
+ mp_int k; /* to hold the private value */
+ mp_int cofactor;
+ mp_err err = MP_OKAY;
+#if EC_DEBUG
+ int i;
+#endif
+
+ if (!publicValue || !ecParams || !privateValue || !derivedSecret ||
+ !ecParams->name) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Perform curve specific multiplication using ECMethod */
+ if (ecParams->fieldID.type == ec_field_plain) {
+ const ECMethod *method;
+ memset(derivedSecret, 0, sizeof(*derivedSecret));
+ derivedSecret = SECITEM_AllocItem(NULL, derivedSecret, EC_GetPointSize(ecParams));
+ if (derivedSecret == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ method = ec_get_method_from_name(ecParams->name);
+ if (method == NULL || method->validate == NULL ||
+ method->mul == NULL) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ }
+ if (method->validate(publicValue) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+ return method->mul(derivedSecret, privateValue, publicValue);
+ }
+
+ /*
+ * We fail if the public value is the point at infinity, since
+ * this produces predictable results.
+ */
+ if (ec_point_at_infinity(publicValue)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&k) = 0;
+ memset(derivedSecret, 0, sizeof *derivedSecret);
+ len = (ecParams->fieldID.size + 7) >> 3;
+ pointQ.len = EC_GetPointSize(ecParams);
+ if ((pointQ.data = PORT_Alloc(pointQ.len)) == NULL)
+ goto cleanup;
+
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, privateValue->data,
+ (mp_size)privateValue->len));
+
+ if (withCofactor && (ecParams->cofactor != 1)) {
+ /* multiply k with the cofactor */
+ MP_DIGITS(&cofactor) = 0;
+ CHECK_MPI_OK(mp_init(&cofactor));
+ mp_set(&cofactor, ecParams->cofactor);
+ CHECK_MPI_OK(mp_mul(&k, &cofactor, &k));
+ }
+
+ /* Multiply our private key and peer's public point */
+ if (ec_points_mul(ecParams, NULL, &k, publicValue, &pointQ) != SECSuccess) {
+ goto cleanup;
+ }
+ if (ec_point_at_infinity(&pointQ)) {
+ PORT_SetError(SEC_ERROR_BAD_KEY); /* XXX better error code? */
+ goto cleanup;
+ }
+
+ /* Allocate memory for the derived secret and copy
+ * the x co-ordinate of pointQ into it.
+ */
+ SECITEM_AllocItem(NULL, derivedSecret, len);
+ memcpy(derivedSecret->data, pointQ.data + 1, len);
+
+ rv = SECSuccess;
+
+#if EC_DEBUG
+ printf("derived_secret:\n");
+ for (i = 0; i < derivedSecret->len; i++)
+ printf("%02x:", derivedSecret->data[i]);
+ printf("\n");
+#endif
+
+cleanup:
+ mp_clear(&k);
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ }
+
+ if (pointQ.data) {
+ PORT_ZFree(pointQ.data, pointQ.len);
+ }
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
+
+/* Computes the ECDSA signature (a concatenation of two values r and s)
+ * on the digest using the given key and the random value kb (used in
+ * computing s).
+ */
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *kb, const int kblen)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ mp_int x1;
+ mp_int d, k; /* private key, random integer */
+ mp_int r, s; /* tuple (r, s) is the signature */
+ mp_int t; /* holding tmp values */
+ mp_int n;
+ mp_err err = MP_OKAY;
+ ECParams *ecParams = NULL;
+ SECItem kGpoint = { siBuffer, NULL, 0 };
+ int flen = 0; /* length in bytes of the field size */
+ unsigned olen; /* length in bytes of the base point order */
+ unsigned obits; /* length in bits of the base point order */
+ unsigned char *t2 = NULL;
+
+#if EC_DEBUG
+ char mpstr[256];
+#endif
+
+ /* Initialize MPI integers. */
+ /* must happen before the first potential call to cleanup */
+ MP_DIGITS(&x1) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&t) = 0;
+
+ /* Check args */
+ if (!key || !signature || !digest || !kb || (kblen < 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+
+ ecParams = &(key->ecParams);
+ flen = (ecParams->fieldID.size + 7) >> 3;
+ olen = ecParams->order.len;
+ if (signature->data == NULL) {
+ /* a call to get the signature length only */
+ goto finish;
+ }
+ if (signature->len < 2 * olen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_init(&x1));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&t));
+
+ SECITEM_TO_MPINT(ecParams->order, &n);
+ SECITEM_TO_MPINT(key->privateValue, &d);
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, kblen));
+ /* Make sure k is in the interval [1, n-1] */
+ if ((mp_cmp_z(&k) <= 0) || (mp_cmp(&k, &n) >= 0)) {
+#if EC_DEBUG
+ printf("k is outside [1, n-1]\n");
+ mp_tohex(&k, mpstr);
+ printf("k : %s \n", mpstr);
+ mp_tohex(&n, mpstr);
+ printf("n : %s \n", mpstr);
+#endif
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ ** We do not want timing information to leak the length of k,
+ ** so we compute k*G using an equivalent scalar of fixed
+ ** bit-length.
+ ** Fix based on patch for ECDSA timing attack in the paper
+ ** by Billy Bob Brumley and Nicola Tuveri at
+ ** http://eprint.iacr.org/2011/232
+ **
+ ** How do we convert k to a value of a fixed bit-length?
+ ** k starts off as an integer satisfying 0 <= k < n. Hence,
+ ** n <= k+n < 2n, which means k+n has either the same number
+ ** of bits as n or one more bit than n. If k+n has the same
+ ** number of bits as n, the second addition ensures that the
+ ** final value has exactly one more bit than n. Thus, we
+ ** always end up with a value that exactly one more bit than n.
+ */
+ CHECK_MPI_OK(mp_add(&k, &n, &k));
+ if (mpl_significant_bits(&k) <= mpl_significant_bits(&n)) {
+ CHECK_MPI_OK(mp_add(&k, &n, &k));
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.3.2, Step 2
+ **
+ ** Compute kG
+ */
+ kGpoint.len = EC_GetPointSize(ecParams);
+ kGpoint.data = PORT_Alloc(kGpoint.len);
+ if ((kGpoint.data == NULL) ||
+ (ec_points_mul(ecParams, &k, NULL, NULL, &kGpoint) != SECSuccess))
+ goto cleanup;
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 1
+ **
+ ** Extract the x co-ordinate of kG into x1
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&x1, kGpoint.data + 1,
+ (mp_size)flen));
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 2
+ **
+ ** r = x1 mod n NOTE: n is the order of the curve
+ */
+ CHECK_MPI_OK(mp_mod(&x1, &n, &r));
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 3
+ **
+ ** verify r != 0
+ */
+ if (mp_cmp_z(&r) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 4
+ **
+ ** s = (k**-1 * (HASH(M) + d*r)) mod n
+ */
+ SECITEM_TO_MPINT(*digest, &s); /* s = HASH(M) */
+
+ /* In the definition of EC signing, digests are truncated
+ * to the length of n in bits.
+ * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+ CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+ if (digest->len * 8 > obits) {
+ mpl_rsh(&s, &s, digest->len * 8 - obits);
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&n, mpstr);
+ printf("n : %s (dec)\n", mpstr);
+ mp_todecimal(&d, mpstr);
+ printf("d : %s (dec)\n", mpstr);
+ mp_tohex(&x1, mpstr);
+ printf("x1: %s\n", mpstr);
+ mp_todecimal(&s, mpstr);
+ printf("digest: %s (decimal)\n", mpstr);
+ mp_todecimal(&r, mpstr);
+ printf("r : %s (dec)\n", mpstr);
+ mp_tohex(&r, mpstr);
+ printf("r : %s\n", mpstr);
+#endif
+
+ if ((t2 = PORT_Alloc(2 * ecParams->order.len)) == NULL) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */
+ CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */
+ CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d)); /* d = d * r mod n */
+ CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s)); /* s = s + d mod n */
+ CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */
+
+#if EC_DEBUG
+ mp_todecimal(&s, mpstr);
+ printf("s : %s (dec)\n", mpstr);
+ mp_tohex(&s, mpstr);
+ printf("s : %s\n", mpstr);
+#endif
+
+ /*
+ ** ANSI X9.62, Section 5.3.3, Step 5
+ **
+ ** verify s != 0
+ */
+ if (mp_cmp_z(&s) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+
+ /*
+ **
+ ** Signature is tuple (r, s)
+ */
+ CHECK_MPI_OK(mp_to_fixlen_octets(&r, signature->data, olen));
+ CHECK_MPI_OK(mp_to_fixlen_octets(&s, signature->data + olen, olen));
+finish:
+ signature->len = 2 * olen;
+
+ rv = SECSuccess;
+ err = MP_OKAY;
+cleanup:
+ mp_clear(&x1);
+ mp_clear(&d);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&s);
+ mp_clear(&n);
+ mp_clear(&t);
+
+ if (t2) {
+ PORT_Free(t2);
+ }
+
+ if (kGpoint.data) {
+ PORT_ZFree(kGpoint.data, kGpoint.len);
+ }
+
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+#if EC_DEBUG
+ printf("ECDSA signing with seed %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
+
+/*
+** Computes the ECDSA signature on the digest using the given key
+** and a random seed.
+*/
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ int len;
+ unsigned char *kBytes = NULL;
+
+ if (!key) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Generate random value k */
+ len = key->ecParams.order.len;
+ kBytes = ec_GenerateRandomPrivateKey(key->ecParams.order.data, len);
+ if (kBytes == NULL)
+ goto cleanup;
+
+ /* Generate ECDSA signature with the specified k value */
+ rv = ECDSA_SignDigestWithSeed(key, signature, digest, kBytes, len);
+
+cleanup:
+ if (kBytes) {
+ PORT_ZFree(kBytes, len);
+ }
+
+#if EC_DEBUG
+ printf("ECDSA signing %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
+
+/*
+** Checks the signature on the given digest using the key provided.
+**
+** The key argument must represent a valid EC public key (a point on
+** the relevant curve). If it is not a valid point, then the behavior
+** of this function is undefined. In cases where a public key might
+** not be valid, use EC_ValidatePublicKey to check.
+*/
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ SECStatus rv = SECFailure;
+#ifndef NSS_DISABLE_ECC
+ mp_int r_, s_; /* tuple (r', s') is received signature) */
+ mp_int c, u1, u2, v; /* intermediate values used in verification */
+ mp_int x1;
+ mp_int n;
+ mp_err err = MP_OKAY;
+ ECParams *ecParams = NULL;
+ SECItem pointC = { siBuffer, NULL, 0 };
+ int slen; /* length in bytes of a half signature (r or s) */
+ int flen; /* length in bytes of the field size */
+ unsigned olen; /* length in bytes of the base point order */
+ unsigned obits; /* length in bits of the base point order */
+
+#if EC_DEBUG
+ char mpstr[256];
+ printf("ECDSA verification called\n");
+#endif
+
+ /* Initialize MPI integers. */
+ /* must happen before the first potential call to cleanup */
+ MP_DIGITS(&r_) = 0;
+ MP_DIGITS(&s_) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&u1) = 0;
+ MP_DIGITS(&u2) = 0;
+ MP_DIGITS(&x1) = 0;
+ MP_DIGITS(&v) = 0;
+ MP_DIGITS(&n) = 0;
+
+ /* Check args */
+ if (!key || !signature || !digest) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+
+ ecParams = &(key->ecParams);
+ flen = (ecParams->fieldID.size + 7) >> 3;
+ olen = ecParams->order.len;
+ if (signature->len == 0 || signature->len % 2 != 0 ||
+ signature->len > 2 * olen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ goto cleanup;
+ }
+ slen = signature->len / 2;
+
+ SECITEM_AllocItem(NULL, &pointC, EC_GetPointSize(ecParams));
+ if (pointC.data == NULL)
+ goto cleanup;
+
+ CHECK_MPI_OK(mp_init(&r_));
+ CHECK_MPI_OK(mp_init(&s_));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&u1));
+ CHECK_MPI_OK(mp_init(&u2));
+ CHECK_MPI_OK(mp_init(&x1));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_MPI_OK(mp_init(&n));
+
+ /*
+ ** Convert received signature (r', s') into MPI integers.
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&r_, signature->data, slen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&s_, signature->data + slen, slen));
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Steps 1 and 2
+ **
+ ** Verify that 0 < r' < n and 0 < s' < n
+ */
+ SECITEM_TO_MPINT(ecParams->order, &n);
+ if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 ||
+ mp_cmp(&r_, &n) >= 0 || mp_cmp(&s_, &n) >= 0) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ goto cleanup; /* will return rv == SECFailure */
+ }
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 3
+ **
+ ** c = (s')**-1 mod n
+ */
+ CHECK_MPI_OK(mp_invmod(&s_, &n, &c)); /* c = (s')**-1 mod n */
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 4
+ **
+ ** u1 = ((HASH(M')) * c) mod n
+ */
+ SECITEM_TO_MPINT(*digest, &u1); /* u1 = HASH(M) */
+
+ /* In the definition of EC signing, digests are truncated
+ * to the length of n in bits.
+ * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/
+ CHECK_MPI_OK((obits = mpl_significant_bits(&n)));
+ if (digest->len * 8 > obits) { /* u1 = HASH(M') */
+ mpl_rsh(&u1, &u1, digest->len * 8 - obits);
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&r_, mpstr);
+ printf("r_: %s (dec)\n", mpstr);
+ mp_todecimal(&s_, mpstr);
+ printf("s_: %s (dec)\n", mpstr);
+ mp_todecimal(&c, mpstr);
+ printf("c : %s (dec)\n", mpstr);
+ mp_todecimal(&u1, mpstr);
+ printf("digest: %s (dec)\n", mpstr);
+#endif
+
+ CHECK_MPI_OK(mp_mulmod(&u1, &c, &n, &u1)); /* u1 = u1 * c mod n */
+
+ /*
+ ** ANSI X9.62, Section 5.4.2, Step 4
+ **
+ ** u2 = ((r') * c) mod n
+ */
+ CHECK_MPI_OK(mp_mulmod(&r_, &c, &n, &u2));
+
+ /*
+ ** ANSI X9.62, Section 5.4.3, Step 1
+ **
+ ** Compute u1*G + u2*Q
+ ** Here, A = u1.G B = u2.Q and C = A + B
+ ** If the result, C, is the point at infinity, reject the signature
+ */
+ if (ec_points_mul(ecParams, &u1, &u2, &key->publicValue, &pointC) != SECSuccess) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ if (ec_point_at_infinity(&pointC)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_read_unsigned_octets(&x1, pointC.data + 1, flen));
+
+ /*
+ ** ANSI X9.62, Section 5.4.4, Step 2
+ **
+ ** v = x1 mod n
+ */
+ CHECK_MPI_OK(mp_mod(&x1, &n, &v));
+
+#if EC_DEBUG
+ mp_todecimal(&r_, mpstr);
+ printf("r_: %s (dec)\n", mpstr);
+ mp_todecimal(&v, mpstr);
+ printf("v : %s (dec)\n", mpstr);
+#endif
+
+ /*
+ ** ANSI X9.62, Section 5.4.4, Step 3
+ **
+ ** Verification: v == r'
+ */
+ if (mp_cmp(&v, &r_)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure; /* Signature failed to verify. */
+ } else {
+ rv = SECSuccess; /* Signature verified. */
+ }
+
+#if EC_DEBUG
+ mp_todecimal(&u1, mpstr);
+ printf("u1: %s (dec)\n", mpstr);
+ mp_todecimal(&u2, mpstr);
+ printf("u2: %s (dec)\n", mpstr);
+ mp_tohex(&x1, mpstr);
+ printf("x1: %s\n", mpstr);
+ mp_todecimal(&v, mpstr);
+ printf("v : %s (dec)\n", mpstr);
+#endif
+
+cleanup:
+ mp_clear(&r_);
+ mp_clear(&s_);
+ mp_clear(&c);
+ mp_clear(&u1);
+ mp_clear(&u2);
+ mp_clear(&x1);
+ mp_clear(&v);
+ mp_clear(&n);
+
+ if (pointC.data)
+ SECITEM_ZfreeItem(&pointC, PR_FALSE);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+
+#if EC_DEBUG
+ printf("ECDSA verification %s\n",
+ (rv == SECSuccess) ? "succeeded" : "failed");
+#endif
+#else
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG);
+#endif /* NSS_DISABLE_ECC */
+
+ return rv;
+}
diff --git a/security/nss/lib/freebl/ec.h b/security/nss/lib/freebl/ec.h
new file mode 100644
index 000000000..bb65e82cd
--- /dev/null
+++ b/security/nss/lib/freebl/ec.h
@@ -0,0 +1,21 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ec_h_
+#define __ec_h_
+
+#define EC_DEBUG 0
+
+#define ANSI_X962_CURVE_OID_TOTAL_LEN 10
+#define SECG_CURVE_OID_TOTAL_LEN 7
+#define PKIX_NEWCURVES_OID_TOTAL_LEN 11
+
+struct ECMethodStr {
+ ECCurveName name;
+ SECStatus (*mul)(SECItem *result, SECItem *scalar, SECItem *point);
+ SECStatus (*validate)(const SECItem *point);
+};
+typedef struct ECMethodStr ECMethod;
+
+#endif /* __ec_h_ */
diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c
new file mode 100644
index 000000000..e1f1eb8a5
--- /dev/null
+++ b/security/nss/lib/freebl/ecdecode.c
@@ -0,0 +1,311 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef NSS_DISABLE_ECC
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secoid.h"
+#include "secitem.h"
+#include "secerr.h"
+#include "ec.h"
+#include "ecl-curve.h"
+
+#define CHECK_OK(func) \
+ if (func == NULL) \
+ goto cleanup
+#define CHECK_SEC_OK(func) \
+ if (SECSuccess != (rv = func)) \
+ goto cleanup
+
+/*
+ * Initializes a SECItem from a hexadecimal string
+ *
+ * Warning: This function ignores leading 00's, so any leading 00's
+ * in the hexadecimal string must be optional.
+ */
+static SECItem *
+hexString2SECItem(PLArenaPool *arena, SECItem *item, const char *str)
+{
+ int i = 0;
+ int byteval = 0;
+ int tmp = PORT_Strlen(str);
+
+ PORT_Assert(arena);
+ PORT_Assert(item);
+
+ if ((tmp % 2) != 0)
+ return NULL;
+
+ /* skip leading 00's unless the hex string is "00" */
+ while ((tmp > 2) && (str[0] == '0') && (str[1] == '0')) {
+ str += 2;
+ tmp -= 2;
+ }
+
+ item->data = (unsigned char *)PORT_ArenaAlloc(arena, tmp / 2);
+ if (item->data == NULL)
+ return NULL;
+ item->len = tmp / 2;
+
+ while (str[i]) {
+ if ((str[i] >= '0') && (str[i] <= '9'))
+ tmp = str[i] - '0';
+ else if ((str[i] >= 'a') && (str[i] <= 'f'))
+ tmp = str[i] - 'a' + 10;
+ else if ((str[i] >= 'A') && (str[i] <= 'F'))
+ tmp = str[i] - 'A' + 10;
+ else
+ return NULL;
+
+ byteval = byteval * 16 + tmp;
+ if ((i % 2) != 0) {
+ item->data[i / 2] = byteval;
+ byteval = 0;
+ }
+ i++;
+ }
+
+ return item;
+}
+
+/* Copy all of the fields from srcParams into dstParams
+ */
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams)
+{
+ SECStatus rv = SECFailure;
+
+ dstParams->arena = arena;
+ dstParams->type = srcParams->type;
+ dstParams->fieldID.size = srcParams->fieldID.size;
+ dstParams->fieldID.type = srcParams->fieldID.type;
+ if (srcParams->fieldID.type == ec_field_GFp ||
+ srcParams->fieldID.type == ec_field_plain) {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.prime,
+ &srcParams->fieldID.u.prime));
+ } else {
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.poly,
+ &srcParams->fieldID.u.poly));
+ }
+ dstParams->fieldID.k1 = srcParams->fieldID.k1;
+ dstParams->fieldID.k2 = srcParams->fieldID.k2;
+ dstParams->fieldID.k3 = srcParams->fieldID.k3;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.a,
+ &srcParams->curve.a));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.b,
+ &srcParams->curve.b));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.seed,
+ &srcParams->curve.seed));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->base,
+ &srcParams->base));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->order,
+ &srcParams->order));
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->DEREncoding,
+ &srcParams->DEREncoding));
+ dstParams->name = srcParams->name;
+ CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curveOID,
+ &srcParams->curveOID));
+ dstParams->cofactor = srcParams->cofactor;
+
+ return SECSuccess;
+
+cleanup:
+ return SECFailure;
+}
+
+static SECStatus
+gf_populate_params(ECCurveName name, ECFieldType field_type, ECParams *params)
+{
+ SECStatus rv = SECFailure;
+ const ECCurveParams *curveParams;
+ /* 2 ['0'+'4'] + MAX_ECKEY_LEN * 2 [x,y] * 2 [hex string] + 1 ['\0'] */
+ char genenc[3 + 2 * 2 * MAX_ECKEY_LEN];
+
+ if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve))
+ goto cleanup;
+ params->name = name;
+ curveParams = ecCurve_map[params->name];
+ CHECK_OK(curveParams);
+ params->fieldID.size = curveParams->size;
+ params->fieldID.type = field_type;
+ if (field_type == ec_field_GFp ||
+ field_type == ec_field_plain) {
+ CHECK_OK(hexString2SECItem(params->arena, &params->fieldID.u.prime,
+ curveParams->irr));
+ } else {
+ CHECK_OK(hexString2SECItem(params->arena, &params->fieldID.u.poly,
+ curveParams->irr));
+ }
+ CHECK_OK(hexString2SECItem(params->arena, &params->curve.a,
+ curveParams->curvea));
+ CHECK_OK(hexString2SECItem(params->arena, &params->curve.b,
+ curveParams->curveb));
+ genenc[0] = '0';
+ genenc[1] = '4';
+ genenc[2] = '\0';
+ strcat(genenc, curveParams->genx);
+ strcat(genenc, curveParams->geny);
+ CHECK_OK(hexString2SECItem(params->arena, &params->base, genenc));
+ CHECK_OK(hexString2SECItem(params->arena, &params->order,
+ curveParams->order));
+ params->cofactor = curveParams->cofactor;
+
+ rv = SECSuccess;
+
+cleanup:
+ return rv;
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena, const SECItem *encodedParams,
+ ECParams *params)
+{
+ SECStatus rv = SECFailure;
+ SECOidTag tag;
+ SECItem oid = { siBuffer, NULL, 0 };
+
+#if EC_DEBUG
+ int i;
+
+ printf("Encoded params in EC_DecodeParams: ");
+ for (i = 0; i < encodedParams->len; i++) {
+ printf("%02x:", encodedParams->data[i]);
+ }
+ printf("\n");
+#endif
+
+ if ((encodedParams->len != ANSI_X962_CURVE_OID_TOTAL_LEN) &&
+ (encodedParams->len != SECG_CURVE_OID_TOTAL_LEN) &&
+ (encodedParams->len != PKIX_NEWCURVES_OID_TOTAL_LEN)) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ };
+
+ oid.len = encodedParams->len - 2;
+ oid.data = encodedParams->data + 2;
+ if ((encodedParams->data[0] != SEC_ASN1_OBJECT_ID) ||
+ ((tag = SECOID_FindOIDTag(&oid)) == SEC_OID_UNKNOWN)) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+ return SECFailure;
+ }
+
+ params->arena = arena;
+ params->cofactor = 0;
+ params->type = ec_params_named;
+ params->name = ECCurve_noName;
+
+ /* Fill out curveOID */
+ params->curveOID.len = oid.len;
+ params->curveOID.data = (unsigned char *)PORT_ArenaAlloc(arena, oid.len);
+ if (params->curveOID.data == NULL)
+ goto cleanup;
+ memcpy(params->curveOID.data, oid.data, oid.len);
+
+#if EC_DEBUG
+ printf("Curve: %s\n", SECOID_FindOIDTagDescription(tag));
+#endif
+
+ switch (tag) {
+ case SEC_OID_ANSIX962_EC_PRIME256V1:
+ /* Populate params for prime256v1 aka secp256r1
+ * (the NIST P-256 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params(ECCurve_X9_62_PRIME_256V1, ec_field_GFp,
+ params));
+ break;
+
+ case SEC_OID_SECG_EC_SECP384R1:
+ /* Populate params for secp384r1
+ * (the NIST P-384 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_384R1, ec_field_GFp,
+ params));
+ break;
+
+ case SEC_OID_SECG_EC_SECP521R1:
+ /* Populate params for secp521r1
+ * (the NIST P-521 curve)
+ */
+ CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_521R1, ec_field_GFp,
+ params));
+ break;
+
+ case SEC_OID_CURVE25519:
+ /* Populate params for Curve25519 */
+ CHECK_SEC_OK(gf_populate_params(ECCurve25519, ec_field_plain, params));
+ break;
+
+ default:
+ break;
+ };
+
+cleanup:
+ if (!params->cofactor) {
+ PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE);
+#if EC_DEBUG
+ printf("Unrecognized curve, returning NULL params\n");
+#endif
+ }
+
+ return rv;
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams, ECParams **ecparams)
+{
+ PLArenaPool *arena;
+ ECParams *params;
+ SECStatus rv = SECFailure;
+
+ /* Initialize an arena for the ECParams structure */
+ if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE)))
+ return SECFailure;
+
+ params = (ECParams *)PORT_ArenaZAlloc(arena, sizeof(ECParams));
+ if (!params) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+
+ /* Copy the encoded params */
+ SECITEM_AllocItem(arena, &(params->DEREncoding),
+ encodedParams->len);
+ memcpy(params->DEREncoding.data, encodedParams->data, encodedParams->len);
+
+ /* Fill out the rest of the ECParams structure based on
+ * the encoded params
+ */
+ rv = EC_FillParams(arena, encodedParams, params);
+ if (rv == SECFailure) {
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ } else {
+ *ecparams = params;
+ ;
+ return SECSuccess;
+ }
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+ ECCurveName name = params->name;
+ const ECCurveParams *curveParams;
+
+ if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) ||
+ ((curveParams = ecCurve_map[name]) == NULL)) {
+ /* unknown curve, calculate point size from params. assume standard curves with 2 points
+ * and a point compression indicator byte */
+ int sizeInBytes = (params->fieldID.size + 7) / 8;
+ return sizeInBytes * 2 + 1;
+ }
+ return curveParams->pointSize;
+}
+
+#endif /* NSS_DISABLE_ECC */
diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README
new file mode 100644
index 000000000..04a8b3b01
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/README
@@ -0,0 +1,267 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+The ECL exposes routines for constructing and converting curve
+parameters for internal use.
+
+
+HEADER FILES
+============
+
+ecl-exp.h - Exports data structures and curve names. For use by code
+that does not have access to mp_ints.
+
+ecl-curve.h - Provides hex encodings (in the form of ECCurveParams
+structs) of standardizes elliptic curve domain parameters and mappings
+from ECCurveName to ECCurveParams. For use by code that does not have
+access to mp_ints.
+
+ecl.h - Interface to constructors for curve parameters and group object,
+and point multiplication operations. Used by higher level algorithms
+(like ECDH and ECDSA) to actually perform elliptic curve cryptography.
+
+ecl-priv.h - Data structures and functions for internal use within the
+library.
+
+ecp.h - Internal header file that contains all functions for point
+arithmetic over prime fields.
+
+DATA STRUCTURES AND TYPES
+=========================
+
+ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic
+curve domain parameters.
+
+ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding
+of elliptic curve domain parameters. Can be generated by a user
+and passed to ECGroup_fromHex or can be generated from a name by
+EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for
+the standardized curves defined by ECCurveName.
+
+ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that
+represents a group of elliptic curve points for a particular set of
+elliptic curve domain parameters. Contains all domain parameters (curve
+a and b, field, base point) as well as pointers to the functions that
+should be used for point arithmetic and the underlying field GFMethod.
+Generated by either ECGroup_fromHex or ECGroup_fromName.
+
+GFMethod (from ecl-priv.h) - Represents a field underlying a set of
+elliptic curve domain parameters. Contains the irreducible that defines
+the field (either the prime or the binary polynomial) as well as
+pointers to the functions that should be used for field arithmetic.
+
+ARITHMETIC FUNCTIONS
+====================
+
+Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul
+or ECPoints_mul (from ecl.h) to do point arithmetic. These functions
+will choose which underlying algorithms to use, based on the ECGroup
+structure.
+
+Point Multiplication
+--------------------
+
+ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers.
+It also provides two implementations for the pts_mul operation -
+ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and
+ec_pts_mul_simul_w2 (which does a simultaneous point multiplication
+using a table with window size 2*2).
+
+ec_naf.c provides an implementation of an algorithm to calculate a
+non-adjacent form of a scalar, minimizing the number of point
+additions that need to be done in a point multiplication.
+
+Point Arithmetic over Prime Fields
+----------------------------------
+
+ecp_aff.c provides point arithmetic using affine coordinates.
+
+ecp_jac.c provides point arithmetic using Jacobian projective
+coordinates and mixed Jacobian-affine coordinates. (Jacobian projective
+coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2,
+y=Y/Z^3).
+
+ecp_jm.c provides point arithmetic using Modified Jacobian
+coordinates and mixed Modified_Jacobian-affine coordinates.
+(Modified Jacobian coordinates represent a point (x, y)
+as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is
+the linear coefficient in the curve defining equation).
+
+ecp_192.c and ecp_224.c provide optimized field arithmetic.
+
+Point Arithmetic over Binary Polynomial Fields
+----------------------------------------------
+
+ec2_aff.c provides point arithmetic using affine coordinates.
+
+ec2_proj.c provides point arithmetic using projective coordinates.
+(Projective coordinates represent a point (x, y) as (X, Y, Z), where
+x=X/Z, y=Y/Z^2).
+
+ec2_mont.c provides point multiplication using Montgomery projective
+coordinates.
+
+ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field arithmetic.
+
+Field Arithmetic
+----------------
+
+ecl_gf.c provides constructors for field objects (GFMethod) with the
+functions GFMethod_cons*. It also provides wrappers around the basic
+field operations.
+
+Prime Field Arithmetic
+----------------------
+
+The mpi library provides the basic prime field arithmetic.
+
+ecp_mont.c provides wrappers around the Montgomery multiplication
+functions from the mpi library and adds encoding and decoding functions.
+It also provides the function to construct a GFMethod object using
+Montgomery multiplication.
+
+ecp_192.c and ecp_224.c provide optimized modular reduction for the
+fields defined by nistp192 and nistp224 primes.
+
+ecl_gf.c provides wrappers around the basic field operations.
+
+Binary Polynomial Field Arithmetic
+----------------------------------
+
+../mpi/mp_gf2m.c provides basic binary polynomial field arithmetic,
+including addition, multiplication, squaring, mod, and division, as well
+as conversion ob polynomial representations between bitstring and int[].
+
+ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field mod, mul,
+and sqr operations.
+
+ecl_gf.c provides wrappers around the basic field operations.
+
+Field Encoding
+--------------
+
+By default, field elements are encoded in their basic form. It is
+possible to use an alternative encoding, however. For example, it is
+possible to Montgomery representation of prime field elements and
+take advantage of the fast modular multiplication that Montgomery
+representation provides. The process of converting from basic form to
+Montgomery representation is called field encoding, and the opposite
+process would be field decoding. All internal point operations assume
+that the operands are field encoded as appropriate. By rewiring the
+underlying field arithmetic to perform operations on these encoded
+values, the same overlying point arithmetic operations can be used
+regardless of field representation.
+
+ALGORITHM WIRING
+================
+
+The EC library allows point and field arithmetic algorithms to be
+substituted ("wired-in") on a fine-grained basis. This allows for
+generic algorithms and algorithms that are optimized for a particular
+curve, field, or architecture, to coexist and to be automatically
+selected at runtime.
+
+Wiring Mechanism
+----------------
+
+The ECGroup and GFMethod structure contain pointers to the point and
+field arithmetic functions, respectively, that are to be used in
+operations.
+
+The selection of algorithms to use is handled in the function
+ecgroup_fromNameAndHex in ecl.c.
+
+Default Wiring
+--------------
+
+Curves over prime fields by default use montgomery field arithmetic,
+point multiplication using 5-bit window non-adjacent-form with
+Modified Jacobian coordinates, and 2*2-bit simultaneous point
+multiplication using Jacobian coordinates.
+(Wiring in function ECGroup_consGFp_mont in ecl.c.)
+
+Curves over prime fields that have optimized modular reduction (i.e.,
+secp160r1, nistp192, and nistp224) do not use Montgomery field
+arithmetic. Instead, they use basic field arithmetic with their
+optimized reduction (as in ecp_192.c and ecp_224.c). They
+use the same point multiplication and simultaneous point multiplication
+algorithms as other curves over prime fields.
+
+Curves over binary polynomial fields by default use generic field
+arithmetic with montgomery point multiplication and basic kP + lQ
+computation (multiply, multiply, and add). (Wiring in function
+ECGroup_cons_GF2m in ecl.c.)
+
+Curves over binary polynomial fields that have optimized field
+arithmetic (i.e., any 163-, 193, or 233-bit field) use their optimized
+field arithmetic. They use the same point multiplication and
+simultaneous point multiplication algorithms as other curves over binary
+fields.
+
+Example
+-------
+
+We provide an example for plugging in an optimized implementation for
+the Koblitz curve nistk163.
+
+Suppose the file ec2_k163.c contains the optimized implementation. In
+particular it contains a point multiplication function:
+
+ mp_err ec_GF2m_nistk163_pt_mul(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group);
+
+Since only a pt_mul function is provided, the generic pt_add function
+will be used.
+
+There are two options for handling the optimized field arithmetic used
+by the ..._pt_mul function. Say the optimized field arithmetic includes
+the following functions:
+
+ mp_err ec_GF2m_nistk163_add(const mp_int *a, const mp_int *b,
+ mp_int *r, const GFMethod *meth);
+ mp_err ec_GF2m_nistk163_mul(const mp_int *a, const mp_int *b,
+ mp_int *r, const GFMethod *meth);
+ mp_err ec_GF2m_nistk163_sqr(const mp_int *a, const mp_int *b,
+ mp_int *r, const GFMethod *meth);
+ mp_err ec_GF2m_nistk163_div(const mp_int *a, const mp_int *b,
+ mp_int *r, const GFMethod *meth);
+
+First, the optimized field arithmetic could simply be called directly
+by the ..._pt_mul function. This would be accomplished by changing
+the ecgroup_fromNameAndHex function in ecl.c to include the following
+statements:
+
+ if (name == ECCurve_NIST_K163) {
+ group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx,
+ &geny, &order, params->cofactor);
+ if (group == NULL) { res = MP_UNDEF; goto CLEANUP; }
+ MP_CHECKOK( ec_group_set_nistk163(group) );
+ }
+
+and including in ec2_k163.c the following function:
+
+ mp_err ec_group_set_nistk163(ECGroup *group) {
+ group->point_mul = &ec_GF2m_nistk163_pt_mul;
+ return MP_OKAY;
+ }
+
+As a result, ec_GF2m_pt_add and similar functions would use the
+basic binary polynomial field arithmetic ec_GF2m_add, ec_GF2m_mul,
+ec_GF2m_sqr, and ec_GF2m_div.
+
+Alternatively, the optimized field arithmetic could be wired into the
+group's GFMethod. This would be accomplished by putting the following
+function in ec2_k163.c:
+
+ mp_err ec_group_set_nistk163(ECGroup *group) {
+ group->meth->field_add = &ec_GF2m_nistk163_add;
+ group->meth->field_mul = &ec_GF2m_nistk163_mul;
+ group->meth->field_sqr = &ec_GF2m_nistk163_sqr;
+ group->meth->field_div = &ec_GF2m_nistk163_div;
+ group->point_mul = &ec_GF2m_nistk163_pt_mul;
+ return MP_OKAY;
+ }
+
+For an example of functions that use special field encodings, take a
+look at ecp_mont.c.
diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c
new file mode 100644
index 000000000..0122961e6
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_32.c
@@ -0,0 +1,390 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Derived from public domain code by Matthew Dempsky and D. J. Bernstein.
+ */
+
+#include "ecl-priv.h"
+#include "mpi.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+typedef uint32_t elem[32];
+
+/*
+ * Add two field elements.
+ * out = a + b
+ */
+static void
+add(elem out, const elem a, const elem b)
+{
+ uint32_t j;
+ uint32_t u = 0;
+ for (j = 0; j < 31; ++j) {
+ u += a[j] + b[j];
+ out[j] = u & 0xFF;
+ u >>= 8;
+ }
+ u += a[31] + b[31];
+ out[31] = u;
+}
+
+/*
+ * Subtract two field elements.
+ * out = a - b
+ */
+static void
+sub(elem out, const elem a, const elem b)
+{
+ uint32_t j;
+ uint32_t u;
+ u = 218;
+ for (j = 0; j < 31; ++j) {
+ u += a[j] + 0xFF00 - b[j];
+ out[j] = u & 0xFF;
+ u >>= 8;
+ }
+ u += a[31] - b[31];
+ out[31] = u;
+}
+
+/*
+ * "Squeeze" an element after multiplication (and square).
+ */
+static void
+squeeze(elem a)
+{
+ uint32_t j;
+ uint32_t u;
+ u = 0;
+ for (j = 0; j < 31; ++j) {
+ u += a[j];
+ a[j] = u & 0xFF;
+ u >>= 8;
+ }
+ u += a[31];
+ a[31] = u & 0x7F;
+ u = 19 * (u >> 7);
+ for (j = 0; j < 31; ++j) {
+ u += a[j];
+ a[j] = u & 0xFF;
+ u >>= 8;
+ }
+ a[31] += u;
+}
+
+static const elem minusp = { 19, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 128 };
+
+/*
+ * Reduce point a by 2^255-19
+ */
+static void
+reduce(elem a)
+{
+ elem aorig;
+ uint32_t j;
+ uint32_t negative;
+
+ for (j = 0; j < 32; ++j) {
+ aorig[j] = a[j];
+ }
+ add(a, a, minusp);
+ negative = 1 + ~((a[31] >> 7) & 1);
+ for (j = 0; j < 32; ++j) {
+ a[j] ^= negative & (aorig[j] ^ a[j]);
+ }
+}
+
+/*
+ * Multiplication and squeeze
+ * out = a * b
+ */
+static void
+mult(elem out, const elem a, const elem b)
+{
+ uint32_t i;
+ uint32_t j;
+ uint32_t u;
+
+ for (i = 0; i < 32; ++i) {
+ u = 0;
+ for (j = 0; j <= i; ++j) {
+ u += a[j] * b[i - j];
+ }
+ for (j = i + 1; j < 32; ++j) {
+ u += 38 * a[j] * b[i + 32 - j];
+ }
+ out[i] = u;
+ }
+ squeeze(out);
+}
+
+/*
+ * Multiplication
+ * out = 121665 * a
+ */
+static void
+mult121665(elem out, const elem a)
+{
+ uint32_t j;
+ uint32_t u;
+
+ u = 0;
+ for (j = 0; j < 31; ++j) {
+ u += 121665 * a[j];
+ out[j] = u & 0xFF;
+ u >>= 8;
+ }
+ u += 121665 * a[31];
+ out[31] = u & 0x7F;
+ u = 19 * (u >> 7);
+ for (j = 0; j < 31; ++j) {
+ u += out[j];
+ out[j] = u & 0xFF;
+ u >>= 8;
+ }
+ u += out[j];
+ out[j] = u;
+}
+
+/*
+ * Square a and squeeze the result.
+ * out = a * a
+ */
+static void
+square(elem out, const elem a)
+{
+ uint32_t i;
+ uint32_t j;
+ uint32_t u;
+
+ for (i = 0; i < 32; ++i) {
+ u = 0;
+ for (j = 0; j < i - j; ++j) {
+ u += a[j] * a[i - j];
+ }
+ for (j = i + 1; j < i + 32 - j; ++j) {
+ u += 38 * a[j] * a[i + 32 - j];
+ }
+ u *= 2;
+ if ((i & 1) == 0) {
+ u += a[i / 2] * a[i / 2];
+ u += 38 * a[i / 2 + 16] * a[i / 2 + 16];
+ }
+ out[i] = u;
+ }
+ squeeze(out);
+}
+
+/*
+ * Constant time swap between r and s depending on b
+ */
+static void
+cswap(uint32_t p[64], uint32_t q[64], uint32_t b)
+{
+ uint32_t j;
+ uint32_t swap = 1 + ~b;
+
+ for (j = 0; j < 64; ++j) {
+ const uint32_t t = swap & (p[j] ^ q[j]);
+ p[j] ^= t;
+ q[j] ^= t;
+ }
+}
+
+/*
+ * Montgomery ladder
+ */
+static void
+monty(elem x_2_out, elem z_2_out,
+ const elem point, const elem scalar)
+{
+ uint32_t x_3[64] = { 0 };
+ uint32_t x_2[64] = { 0 };
+ uint32_t a0[64];
+ uint32_t a1[64];
+ uint32_t b0[64];
+ uint32_t b1[64];
+ uint32_t c1[64];
+ uint32_t r[32];
+ uint32_t s[32];
+ uint32_t t[32];
+ uint32_t u[32];
+ uint32_t swap = 0;
+ uint32_t k_t = 0;
+ int j;
+
+ for (j = 0; j < 32; ++j) {
+ x_3[j] = point[j];
+ }
+ x_3[32] = 1;
+ x_2[0] = 1;
+
+ for (j = 254; j >= 0; --j) {
+ k_t = (scalar[j >> 3] >> (j & 7)) & 1;
+ swap ^= k_t;
+ cswap(x_2, x_3, swap);
+ swap = k_t;
+ add(a0, x_2, x_2 + 32);
+ sub(a0 + 32, x_2, x_2 + 32);
+ add(a1, x_3, x_3 + 32);
+ sub(a1 + 32, x_3, x_3 + 32);
+ square(b0, a0);
+ square(b0 + 32, a0 + 32);
+ mult(b1, a1, a0 + 32);
+ mult(b1 + 32, a1 + 32, a0);
+ add(c1, b1, b1 + 32);
+ sub(c1 + 32, b1, b1 + 32);
+ square(r, c1 + 32);
+ sub(s, b0, b0 + 32);
+ mult121665(t, s);
+ add(u, t, b0);
+ mult(x_2, b0, b0 + 32);
+ mult(x_2 + 32, s, u);
+ square(x_3, c1);
+ mult(x_3 + 32, r, point);
+ }
+
+ cswap(x_2, x_3, swap);
+ for (j = 0; j < 32; ++j) {
+ x_2_out[j] = x_2[j];
+ }
+ for (j = 0; j < 32; ++j) {
+ z_2_out[j] = x_2[j + 32];
+ }
+}
+
+static void
+recip(elem out, const elem z)
+{
+ elem z2;
+ elem z9;
+ elem z11;
+ elem z2_5_0;
+ elem z2_10_0;
+ elem z2_20_0;
+ elem z2_50_0;
+ elem z2_100_0;
+ elem t0;
+ elem t1;
+ int i;
+
+ /* 2 */ square(z2, z);
+ /* 4 */ square(t1, z2);
+ /* 8 */ square(t0, t1);
+ /* 9 */ mult(z9, t0, z);
+ /* 11 */ mult(z11, z9, z2);
+ /* 22 */ square(t0, z11);
+ /* 2^5 - 2^0 = 31 */ mult(z2_5_0, t0, z9);
+
+ /* 2^6 - 2^1 */ square(t0, z2_5_0);
+ /* 2^7 - 2^2 */ square(t1, t0);
+ /* 2^8 - 2^3 */ square(t0, t1);
+ /* 2^9 - 2^4 */ square(t1, t0);
+ /* 2^10 - 2^5 */ square(t0, t1);
+ /* 2^10 - 2^0 */ mult(z2_10_0, t0, z2_5_0);
+
+ /* 2^11 - 2^1 */ square(t0, z2_10_0);
+ /* 2^12 - 2^2 */ square(t1, t0);
+ /* 2^20 - 2^10 */
+ for (i = 2; i < 10; i += 2) {
+ square(t0, t1);
+ square(t1, t0);
+ }
+ /* 2^20 - 2^0 */ mult(z2_20_0, t1, z2_10_0);
+
+ /* 2^21 - 2^1 */ square(t0, z2_20_0);
+ /* 2^22 - 2^2 */ square(t1, t0);
+ /* 2^40 - 2^20 */
+ for (i = 2; i < 20; i += 2) {
+ square(t0, t1);
+ square(t1, t0);
+ }
+ /* 2^40 - 2^0 */ mult(t0, t1, z2_20_0);
+
+ /* 2^41 - 2^1 */ square(t1, t0);
+ /* 2^42 - 2^2 */ square(t0, t1);
+ /* 2^50 - 2^10 */
+ for (i = 2; i < 10; i += 2) {
+ square(t1, t0);
+ square(t0, t1);
+ }
+ /* 2^50 - 2^0 */ mult(z2_50_0, t0, z2_10_0);
+
+ /* 2^51 - 2^1 */ square(t0, z2_50_0);
+ /* 2^52 - 2^2 */ square(t1, t0);
+ /* 2^100 - 2^50 */
+ for (i = 2; i < 50; i += 2) {
+ square(t0, t1);
+ square(t1, t0);
+ }
+ /* 2^100 - 2^0 */ mult(z2_100_0, t1, z2_50_0);
+
+ /* 2^101 - 2^1 */ square(t1, z2_100_0);
+ /* 2^102 - 2^2 */ square(t0, t1);
+ /* 2^200 - 2^100 */
+ for (i = 2; i < 100; i += 2) {
+ square(t1, t0);
+ square(t0, t1);
+ }
+ /* 2^200 - 2^0 */ mult(t1, t0, z2_100_0);
+
+ /* 2^201 - 2^1 */ square(t0, t1);
+ /* 2^202 - 2^2 */ square(t1, t0);
+ /* 2^250 - 2^50 */
+ for (i = 2; i < 50; i += 2) {
+ square(t0, t1);
+ square(t1, t0);
+ }
+ /* 2^250 - 2^0 */ mult(t0, t1, z2_50_0);
+
+ /* 2^251 - 2^1 */ square(t1, t0);
+ /* 2^252 - 2^2 */ square(t0, t1);
+ /* 2^253 - 2^3 */ square(t1, t0);
+ /* 2^254 - 2^4 */ square(t0, t1);
+ /* 2^255 - 2^5 */ square(t1, t0);
+ /* 2^255 - 21 */ mult(out, t1, z11);
+}
+
+/*
+ * Computes q = Curve25519(p, s)
+ */
+SECStatus
+ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p)
+{
+ elem point = { 0 };
+ elem x_2 = { 0 };
+ elem z_2 = { 0 };
+ elem X = { 0 };
+ elem scalar = { 0 };
+ uint32_t i;
+
+ /* read and mask scalar */
+ for (i = 0; i < 32; ++i) {
+ scalar[i] = s[i];
+ }
+ scalar[0] &= 0xF8;
+ scalar[31] &= 0x7F;
+ scalar[31] |= 64;
+
+ /* read and mask point */
+ for (i = 0; i < 32; ++i) {
+ point[i] = p[i];
+ }
+ point[31] &= 0x7F;
+
+ monty(x_2, z_2, point, scalar);
+ recip(z_2, z_2);
+ mult(X, x_2, z_2);
+ reduce(X);
+ for (i = 0; i < 32; ++i) {
+ q[i] = X[i];
+ }
+ return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c
new file mode 100644
index 000000000..89327ad1c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/curve25519_64.c
@@ -0,0 +1,514 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Derived from public domain C code by Adan Langley and Daniel J. Bernstein
+ */
+
+#include "uint128.h"
+
+#include "ecl-priv.h"
+#include "mpi.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+
+typedef uint8_t u8;
+typedef uint64_t felem;
+
+/* Sum two numbers: output += in */
+static void
+fsum(felem *output, const felem *in)
+{
+ unsigned i;
+ for (i = 0; i < 5; ++i) {
+ output[i] += in[i];
+ }
+}
+
+/* Find the difference of two numbers: output = in - output
+ * (note the order of the arguments!)
+ */
+static void
+fdifference_backwards(felem *ioutput, const felem *iin)
+{
+ static const int64_t twotothe51 = ((int64_t)1l << 51);
+ const int64_t *in = (const int64_t *)iin;
+ int64_t *out = (int64_t *)ioutput;
+
+ out[0] = in[0] - out[0];
+ out[1] = in[1] - out[1];
+ out[2] = in[2] - out[2];
+ out[3] = in[3] - out[3];
+ out[4] = in[4] - out[4];
+
+ // An arithmetic shift right of 63 places turns a positive number to 0 and a
+ // negative number to all 1's. This gives us a bitmask that lets us avoid
+ // side-channel prone branches.
+ int64_t t;
+
+#define NEGCHAIN(a, b) \
+ t = out[a] >> 63; \
+ out[a] += twotothe51 & t; \
+ out[b] -= 1 & t;
+
+#define NEGCHAIN19(a, b) \
+ t = out[a] >> 63; \
+ out[a] += twotothe51 & t; \
+ out[b] -= 19 & t;
+
+ NEGCHAIN(0, 1);
+ NEGCHAIN(1, 2);
+ NEGCHAIN(2, 3);
+ NEGCHAIN(3, 4);
+ NEGCHAIN19(4, 0);
+ NEGCHAIN(0, 1);
+ NEGCHAIN(1, 2);
+ NEGCHAIN(2, 3);
+ NEGCHAIN(3, 4);
+}
+
+/* Multiply a number by a scalar: output = in * scalar */
+static void
+fscalar_product(felem *output, const felem *in,
+ const felem scalar)
+{
+ uint128_t tmp, tmp2;
+
+ tmp = mul6464(in[0], scalar);
+ output[0] = mask51(tmp);
+
+ tmp2 = mul6464(in[1], scalar);
+ tmp = add128(tmp2, rshift128(tmp, 51));
+ output[1] = mask51(tmp);
+
+ tmp2 = mul6464(in[2], scalar);
+ tmp = add128(tmp2, rshift128(tmp, 51));
+ output[2] = mask51(tmp);
+
+ tmp2 = mul6464(in[3], scalar);
+ tmp = add128(tmp2, rshift128(tmp, 51));
+ output[3] = mask51(tmp);
+
+ tmp2 = mul6464(in[4], scalar);
+ tmp = add128(tmp2, rshift128(tmp, 51));
+ output[4] = mask51(tmp);
+
+ output[0] += mask_lower(rshift128(tmp, 51)) * 19;
+}
+
+/* Multiply two numbers: output = in2 * in
+ *
+ * output must be distinct to both inputs. The inputs are reduced coefficient
+ * form, the output is not.
+ */
+static void
+fmul(felem *output, const felem *in2, const felem *in)
+{
+ uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
+
+ t0 = mul6464(in[0], in2[0]);
+ t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1]));
+ t2 = add128(add128(mul6464(in[0], in2[2]),
+ mul6464(in[2], in2[0])),
+ mul6464(in[1], in2[1]));
+ t3 = add128(add128(add128(mul6464(in[0], in2[3]),
+ mul6464(in[3], in2[0])),
+ mul6464(in[1], in2[2])),
+ mul6464(in[2], in2[1]));
+ t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]),
+ mul6464(in[4], in2[0])),
+ mul6464(in[3], in2[1])),
+ mul6464(in[1], in2[3])),
+ mul6464(in[2], in2[2]));
+ t5 = add128(add128(add128(mul6464(in[4], in2[1]),
+ mul6464(in[1], in2[4])),
+ mul6464(in[2], in2[3])),
+ mul6464(in[3], in2[2]));
+ t6 = add128(add128(mul6464(in[4], in2[2]),
+ mul6464(in[2], in2[4])),
+ mul6464(in[3], in2[3]));
+ t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3]));
+ t8 = mul6464(in[4], in2[4]);
+
+ t0 = add128(t0, mul12819(t5));
+ t1 = add128(t1, mul12819(t6));
+ t2 = add128(t2, mul12819(t7));
+ t3 = add128(t3, mul12819(t8));
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t4 = add128(t4, rshift128(t3, 51));
+ t0 = add128(t0, mul12819(rshift128(t4, 51)));
+ t1 = add128(t1, rshift128(t0, 51));
+ t2 = mask51full(t2);
+ t2 = add128(t2, rshift128(t1, 51));
+
+ output[0] = mask51(t0);
+ output[1] = mask51(t1);
+ output[2] = mask_lower(t2);
+ output[3] = mask51(t3);
+ output[4] = mask51(t4);
+}
+
+static void
+fsquare(felem *output, const felem *in)
+{
+ uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8;
+
+ t0 = mul6464(in[0], in[0]);
+ t1 = lshift128(mul6464(in[0], in[1]), 1);
+ t2 = add128(lshift128(mul6464(in[0], in[2]), 1),
+ mul6464(in[1], in[1]));
+ t3 = add128(lshift128(mul6464(in[0], in[3]), 1),
+ lshift128(mul6464(in[1], in[2]), 1));
+ t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1),
+ lshift128(mul6464(in[3], in[1]), 1)),
+ mul6464(in[2], in[2]));
+ t5 = add128(lshift128(mul6464(in[4], in[1]), 1),
+ lshift128(mul6464(in[2], in[3]), 1));
+ t6 = add128(lshift128(mul6464(in[4], in[2]), 1),
+ mul6464(in[3], in[3]));
+ t7 = lshift128(mul6464(in[3], in[4]), 1);
+ t8 = mul6464(in[4], in[4]);
+
+ t0 = add128(t0, mul12819(t5));
+ t1 = add128(t1, mul12819(t6));
+ t2 = add128(t2, mul12819(t7));
+ t3 = add128(t3, mul12819(t8));
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t4 = add128(t4, rshift128(t3, 51));
+ t0 = add128(t0, mul12819(rshift128(t4, 51)));
+ t1 = add128(t1, rshift128(t0, 51));
+
+ output[0] = mask51(t0);
+ output[1] = mask_lower(t1);
+ output[2] = mask51(t2);
+ output[3] = mask51(t3);
+ output[4] = mask51(t4);
+}
+
+/* Take a 32-byte number and expand it into polynomial form */
+static void NO_SANITIZE_ALIGNMENT
+fexpand(felem *output, const u8 *in)
+{
+ output[0] = *((const uint64_t *)(in)) & MASK51;
+ output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51;
+ output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51;
+ output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51;
+ output[4] = (*((const uint64_t *)(in + 25)) >> 4) & MASK51;
+}
+
+/* Take a fully reduced polynomial form number and contract it into a
+ * 32-byte array
+ */
+static void
+fcontract(u8 *output, const felem *input)
+{
+ uint128_t t0 = init128x(input[0]);
+ uint128_t t1 = init128x(input[1]);
+ uint128_t t2 = init128x(input[2]);
+ uint128_t t3 = init128x(input[3]);
+ uint128_t t4 = init128x(input[4]);
+ uint128_t tmp = init128x(19);
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t2 = mask51full(t2);
+ t4 = add128(t4, rshift128(t3, 51));
+ t3 = mask51full(t3);
+ t0 = add128(t0, mul12819(rshift128(t4, 51)));
+ t4 = mask51full(t4);
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t2 = mask51full(t2);
+ t4 = add128(t4, rshift128(t3, 51));
+ t3 = mask51full(t3);
+ t0 = add128(t0, mul12819(rshift128(t4, 51)));
+ t4 = mask51full(t4);
+
+ /* now t is between 0 and 2^255-1, properly carried. */
+ /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */
+
+ t0 = add128(t0, tmp);
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t2 = mask51full(t2);
+ t4 = add128(t4, rshift128(t3, 51));
+ t3 = mask51full(t3);
+ t0 = add128(t0, mul12819(rshift128(t4, 51)));
+ t4 = mask51full(t4);
+
+ /* now between 19 and 2^255-1 in both cases, and offset by 19. */
+
+ t0 = add128(t0, init128x(0x8000000000000 - 19));
+ tmp = init128x(0x8000000000000 - 1);
+ t1 = add128(t1, tmp);
+ t2 = add128(t2, tmp);
+ t3 = add128(t3, tmp);
+ t4 = add128(t4, tmp);
+
+ /* now between 2^255 and 2^256-20, and offset by 2^255. */
+
+ t1 = add128(t1, rshift128(t0, 51));
+ t0 = mask51full(t0);
+ t2 = add128(t2, rshift128(t1, 51));
+ t1 = mask51full(t1);
+ t3 = add128(t3, rshift128(t2, 51));
+ t2 = mask51full(t2);
+ t4 = add128(t4, rshift128(t3, 51));
+ t3 = mask51full(t3);
+ t4 = mask51full(t4);
+
+ *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51;
+ *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38);
+ *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25);
+ *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12);
+}
+
+/* Input: Q, Q', Q-Q'
+ * Output: 2Q, Q+Q'
+ *
+ * x2 z3: long form
+ * x3 z3: long form
+ * x z: short form, destroyed
+ * xprime zprime: short form, destroyed
+ * qmqp: short form, preserved
+ */
+static void
+fmonty(felem *x2, felem *z2, /* output 2Q */
+ felem *x3, felem *z3, /* output Q + Q' */
+ felem *x, felem *z, /* input Q */
+ felem *xprime, felem *zprime, /* input Q' */
+ const felem *qmqp /* input Q - Q' */)
+{
+ felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5],
+ zzzprime[5];
+
+ memcpy(origx, x, 5 * sizeof(felem));
+ fsum(x, z);
+ fdifference_backwards(z, origx); // does x - z
+
+ memcpy(origxprime, xprime, sizeof(felem) * 5);
+ fsum(xprime, zprime);
+ fdifference_backwards(zprime, origxprime);
+ fmul(xxprime, xprime, z);
+ fmul(zzprime, x, zprime);
+ memcpy(origxprime, xxprime, sizeof(felem) * 5);
+ fsum(xxprime, zzprime);
+ fdifference_backwards(zzprime, origxprime);
+ fsquare(x3, xxprime);
+ fsquare(zzzprime, zzprime);
+ fmul(z3, zzzprime, qmqp);
+
+ fsquare(xx, x);
+ fsquare(zz, z);
+ fmul(x2, xx, zz);
+ fdifference_backwards(zz, xx); // does zz = xx - zz
+ fscalar_product(zzz, zz, 121665);
+ fsum(zzz, xx);
+ fmul(z2, zz, zzz);
+}
+
+// -----------------------------------------------------------------------------
+// Maybe swap the contents of two felem arrays (@a and @b), each @len elements
+// long. Perform the swap iff @swap is non-zero.
+//
+// This function performs the swap without leaking any side-channel
+// information.
+// -----------------------------------------------------------------------------
+static void
+swap_conditional(felem *a, felem *b, unsigned len, felem iswap)
+{
+ unsigned i;
+ const felem swap = 1 + ~iswap;
+
+ for (i = 0; i < len; ++i) {
+ const felem x = swap & (a[i] ^ b[i]);
+ a[i] ^= x;
+ b[i] ^= x;
+ }
+}
+
+/* Calculates nQ where Q is the x-coordinate of a point on the curve
+ *
+ * resultx/resultz: the x coordinate of the resulting curve point (short form)
+ * n: a 32-byte number
+ * q: a point of the curve (short form)
+ */
+static void
+cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q)
+{
+ felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 };
+ felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t;
+ felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 };
+ felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h;
+
+ unsigned i, j;
+
+ memcpy(nqpqx, q, sizeof(felem) * 5);
+
+ for (i = 0; i < 32; ++i) {
+ u8 byte = n[31 - i];
+ for (j = 0; j < 8; ++j) {
+ const felem bit = byte >> 7;
+
+ swap_conditional(nqx, nqpqx, 5, bit);
+ swap_conditional(nqz, nqpqz, 5, bit);
+ fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q);
+ swap_conditional(nqx2, nqpqx2, 5, bit);
+ swap_conditional(nqz2, nqpqz2, 5, bit);
+
+ t = nqx;
+ nqx = nqx2;
+ nqx2 = t;
+ t = nqz;
+ nqz = nqz2;
+ nqz2 = t;
+ t = nqpqx;
+ nqpqx = nqpqx2;
+ nqpqx2 = t;
+ t = nqpqz;
+ nqpqz = nqpqz2;
+ nqpqz2 = t;
+
+ byte <<= 1;
+ }
+ }
+
+ memcpy(resultx, nqx, sizeof(felem) * 5);
+ memcpy(resultz, nqz, sizeof(felem) * 5);
+}
+
+// -----------------------------------------------------------------------------
+// Shamelessly copied from djb's code
+// -----------------------------------------------------------------------------
+static void
+crecip(felem *out, const felem *z)
+{
+ felem z2[5];
+ felem z9[5];
+ felem z11[5];
+ felem z2_5_0[5];
+ felem z2_10_0[5];
+ felem z2_20_0[5];
+ felem z2_50_0[5];
+ felem z2_100_0[5];
+ felem t0[5];
+ felem t1[5];
+ int i;
+
+ /* 2 */ fsquare(z2, z);
+ /* 4 */ fsquare(t1, z2);
+ /* 8 */ fsquare(t0, t1);
+ /* 9 */ fmul(z9, t0, z);
+ /* 11 */ fmul(z11, z9, z2);
+ /* 22 */ fsquare(t0, z11);
+ /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9);
+
+ /* 2^6 - 2^1 */ fsquare(t0, z2_5_0);
+ /* 2^7 - 2^2 */ fsquare(t1, t0);
+ /* 2^8 - 2^3 */ fsquare(t0, t1);
+ /* 2^9 - 2^4 */ fsquare(t1, t0);
+ /* 2^10 - 2^5 */ fsquare(t0, t1);
+ /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0);
+
+ /* 2^11 - 2^1 */ fsquare(t0, z2_10_0);
+ /* 2^12 - 2^2 */ fsquare(t1, t0);
+ /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) {
+ fsquare(t0, t1);
+ fsquare(t1, t0);
+ }
+ /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0);
+
+ /* 2^21 - 2^1 */ fsquare(t0, z2_20_0);
+ /* 2^22 - 2^2 */ fsquare(t1, t0);
+ /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) {
+ fsquare(t0, t1);
+ fsquare(t1, t0);
+ }
+ /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0);
+
+ /* 2^41 - 2^1 */ fsquare(t1, t0);
+ /* 2^42 - 2^2 */ fsquare(t0, t1);
+ /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) {
+ fsquare(t1, t0);
+ fsquare(t0, t1);
+ }
+ /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0);
+
+ /* 2^51 - 2^1 */ fsquare(t0, z2_50_0);
+ /* 2^52 - 2^2 */ fsquare(t1, t0);
+ /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) {
+ fsquare(t0, t1);
+ fsquare(t1, t0);
+ }
+ /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0);
+
+ /* 2^101 - 2^1 */ fsquare(t1, z2_100_0);
+ /* 2^102 - 2^2 */ fsquare(t0, t1);
+ /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) {
+ fsquare(t1, t0);
+ fsquare(t0, t1);
+ }
+ /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0);
+
+ /* 2^201 - 2^1 */ fsquare(t0, t1);
+ /* 2^202 - 2^2 */ fsquare(t1, t0);
+ /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) {
+ fsquare(t0, t1);
+ fsquare(t1, t0);
+ }
+ /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0);
+
+ /* 2^251 - 2^1 */ fsquare(t1, t0);
+ /* 2^252 - 2^2 */ fsquare(t0, t1);
+ /* 2^253 - 2^3 */ fsquare(t1, t0);
+ /* 2^254 - 2^4 */ fsquare(t0, t1);
+ /* 2^255 - 2^5 */ fsquare(t1, t0);
+ /* 2^255 - 21 */ fmul(out, t1, z11);
+}
+
+SECStatus
+ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret,
+ const uint8_t *basepoint)
+{
+ felem bp[5], x[5], z[5], zmone[5];
+ uint8_t e[32];
+ int i;
+
+ for (i = 0; i < 32; ++i) {
+ e[i] = secret[i];
+ }
+ e[0] &= 248;
+ e[31] &= 127;
+ e[31] |= 64;
+ fexpand(bp, basepoint);
+ cmult(x, z, e, bp);
+ crecip(zmone, z);
+ fmul(z, x, zmone);
+ fcontract(mypublic, z);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c
new file mode 100644
index 000000000..cad08cb27
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ec_naf.c
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-priv.h"
+
+/* Returns 2^e as an integer. This is meant to be used for small powers of
+ * two. */
+int
+ec_twoTo(int e)
+{
+ int a = 1;
+ int i;
+
+ for (i = 0; i < e; i++) {
+ a *= 2;
+ }
+ return a;
+}
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err
+ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w)
+{
+ mp_int k;
+ mp_err res = MP_OKAY;
+ int i, twowm1, mask;
+
+ twowm1 = ec_twoTo(w - 1);
+ mask = 2 * twowm1 - 1;
+
+ MP_DIGITS(&k) = 0;
+ MP_CHECKOK(mp_init_copy(&k, in));
+
+ i = 0;
+ /* Compute wNAF form */
+ while (mp_cmp_z(&k) > 0) {
+ if (mp_isodd(&k)) {
+ out[i] = MP_DIGIT(&k, 0) & mask;
+ if (out[i] >= twowm1)
+ out[i] -= 2 * twowm1;
+
+ /* Subtract off out[i]. Note mp_sub_d only works with
+ * unsigned digits */
+ if (out[i] >= 0) {
+ MP_CHECKOK(mp_sub_d(&k, out[i], &k));
+ } else {
+ MP_CHECKOK(mp_add_d(&k, -(out[i]), &k));
+ }
+ } else {
+ out[i] = 0;
+ }
+ MP_CHECKOK(mp_div_2(&k, &k));
+ i++;
+ }
+ /* Zero out the remaining elements of the out array. */
+ for (; i < bitsize + 1; i++) {
+ out[i] = 0;
+ }
+CLEANUP:
+ mp_clear(&k);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h
new file mode 100644
index 000000000..df061396c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-curve.h
@@ -0,0 +1,123 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl-exp.h"
+#include <stdlib.h>
+
+#ifndef __ecl_curve_h_
+#define __ecl_curve_h_
+
+/* copied from certt.h */
+#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */
+#define KU_KEY_AGREEMENT (0x08) /* bit 4 */
+
+static const ECCurveParams ecCurve_NIST_P256 = {
+ "NIST-P256", ECField_GFp, 256,
+ "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF",
+ "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC",
+ "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B",
+ "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
+ "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
+ "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551",
+ 1, 128, 65, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve_NIST_P384 = {
+ "NIST-P384", ECField_GFp, 384,
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF",
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC",
+ "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF",
+ "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7",
+ "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F",
+ "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973",
+ 1, 192, 97, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve_NIST_P521 = {
+ "NIST-P521", ECField_GFp, 521,
+ "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+ "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC",
+ "0051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00",
+ "00C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66",
+ "011839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650",
+ "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409",
+ 1, 256, 133, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT
+};
+
+static const ECCurveParams ecCurve25519 = {
+ "Curve25519", ECField_GFp, 255,
+ "7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
+ "076D06",
+ "00",
+ "0900000000000000000000000000000000000000000000000000000000000000",
+ "20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9",
+ "1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed",
+ 8, 128, 32, KU_KEY_AGREEMENT
+};
+
+/* mapping between ECCurveName enum and pointers to ECCurveParams */
+static const ECCurveParams *ecCurve_map[] = {
+ NULL, /* ECCurve_noName */
+ NULL, /* ECCurve_NIST_P192 */
+ NULL, /* ECCurve_NIST_P224 */
+ &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */
+ &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */
+ &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */
+ NULL, /* ECCurve_NIST_K163 */
+ NULL, /* ECCurve_NIST_B163 */
+ NULL, /* ECCurve_NIST_K233 */
+ NULL, /* ECCurve_NIST_B233 */
+ NULL, /* ECCurve_NIST_K283 */
+ NULL, /* ECCurve_NIST_B283 */
+ NULL, /* ECCurve_NIST_K409 */
+ NULL, /* ECCurve_NIST_B409 */
+ NULL, /* ECCurve_NIST_K571 */
+ NULL, /* ECCurve_NIST_B571 */
+ NULL, /* ECCurve_X9_62_PRIME_192V2 */
+ NULL, /* ECCurve_X9_62_PRIME_192V3 */
+ NULL, /* ECCurve_X9_62_PRIME_239V1 */
+ NULL, /* ECCurve_X9_62_PRIME_239V2 */
+ NULL, /* ECCurve_X9_62_PRIME_239V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB163V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB176V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB191V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB208W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V2 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB239V3 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB272W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB304W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB359V1 */
+ NULL, /* ECCurve_X9_62_CHAR2_PNB368W1 */
+ NULL, /* ECCurve_X9_62_CHAR2_TNB431R1 */
+ NULL, /* ECCurve_SECG_PRIME_112R1 */
+ NULL, /* ECCurve_SECG_PRIME_112R2 */
+ NULL, /* ECCurve_SECG_PRIME_128R1 */
+ NULL, /* ECCurve_SECG_PRIME_128R2 */
+ NULL, /* ECCurve_SECG_PRIME_160K1 */
+ NULL, /* ECCurve_SECG_PRIME_160R1 */
+ NULL, /* ECCurve_SECG_PRIME_160R2 */
+ NULL, /* ECCurve_SECG_PRIME_192K1 */
+ NULL, /* ECCurve_SECG_PRIME_224K1 */
+ NULL, /* ECCurve_SECG_PRIME_256K1 */
+ NULL, /* ECCurve_SECG_CHAR2_113R1 */
+ NULL, /* ECCurve_SECG_CHAR2_113R2 */
+ NULL, /* ECCurve_SECG_CHAR2_131R1 */
+ NULL, /* ECCurve_SECG_CHAR2_131R2 */
+ NULL, /* ECCurve_SECG_CHAR2_163R1 */
+ NULL, /* ECCurve_SECG_CHAR2_193R1 */
+ NULL, /* ECCurve_SECG_CHAR2_193R2 */
+ NULL, /* ECCurve_SECG_CHAR2_239K1 */
+ NULL, /* ECCurve_WTLS_1 */
+ NULL, /* ECCurve_WTLS_8 */
+ NULL, /* ECCurve_WTLS_9 */
+ &ecCurve25519, /* ECCurve25519 */
+ NULL /* ECCurve_pastLastCurve */
+};
+
+#endif
diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h
new file mode 100644
index 000000000..44adb8a1c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-exp.h
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_exp_h_
+#define __ecl_exp_h_
+
+/* Curve field type */
+typedef enum {
+ ECField_GFp,
+ ECField_GF2m
+} ECField;
+
+/* Hexadecimal encoding of curve parameters */
+struct ECCurveParamsStr {
+ char *text;
+ ECField field;
+ unsigned int size;
+ char *irr;
+ char *curvea;
+ char *curveb;
+ char *genx;
+ char *geny;
+ char *order;
+ int cofactor;
+ int security;
+ int pointSize;
+ unsigned int usage;
+};
+typedef struct ECCurveParamsStr ECCurveParams;
+
+/* Named curve parameters */
+typedef enum {
+
+ ECCurve_noName = 0,
+
+ /* NIST prime curves */
+ ECCurve_NIST_P192, /* not supported */
+ ECCurve_NIST_P224, /* not supported */
+ ECCurve_NIST_P256,
+ ECCurve_NIST_P384,
+ ECCurve_NIST_P521,
+
+ /* NIST binary curves */
+ ECCurve_NIST_K163, /* not supported */
+ ECCurve_NIST_B163, /* not supported */
+ ECCurve_NIST_K233, /* not supported */
+ ECCurve_NIST_B233, /* not supported */
+ ECCurve_NIST_K283, /* not supported */
+ ECCurve_NIST_B283, /* not supported */
+ ECCurve_NIST_K409, /* not supported */
+ ECCurve_NIST_B409, /* not supported */
+ ECCurve_NIST_K571, /* not supported */
+ ECCurve_NIST_B571, /* not supported */
+
+ /* ANSI X9.62 prime curves */
+ /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */
+ ECCurve_X9_62_PRIME_192V2, /* not supported */
+ ECCurve_X9_62_PRIME_192V3, /* not supported */
+ ECCurve_X9_62_PRIME_239V1, /* not supported */
+ ECCurve_X9_62_PRIME_239V2, /* not supported */
+ ECCurve_X9_62_PRIME_239V3, /* not supported */
+ /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */
+
+ /* ANSI X9.62 binary curves */
+ ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */
+ ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */
+ ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */
+
+ /* SEC2 prime curves */
+ ECCurve_SECG_PRIME_112R1, /* not supported */
+ ECCurve_SECG_PRIME_112R2, /* not supported */
+ ECCurve_SECG_PRIME_128R1, /* not supported */
+ ECCurve_SECG_PRIME_128R2, /* not supported */
+ ECCurve_SECG_PRIME_160K1, /* not supported */
+ ECCurve_SECG_PRIME_160R1, /* not supported */
+ ECCurve_SECG_PRIME_160R2, /* not supported */
+ ECCurve_SECG_PRIME_192K1, /* not supported */
+ /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */
+ ECCurve_SECG_PRIME_224K1, /* not supported */
+ /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */
+ ECCurve_SECG_PRIME_256K1, /* not supported */
+ /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */
+ /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */
+ /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */
+
+ /* SEC2 binary curves */
+ ECCurve_SECG_CHAR2_113R1, /* not supported */
+ ECCurve_SECG_CHAR2_113R2, /* not supported */
+ ECCurve_SECG_CHAR2_131R1, /* not supported */
+ ECCurve_SECG_CHAR2_131R2, /* not supported */
+ /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */
+ ECCurve_SECG_CHAR2_163R1, /* not supported */
+ /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */
+ ECCurve_SECG_CHAR2_193R1, /* not supported */
+ ECCurve_SECG_CHAR2_193R2, /* not supported */
+ /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */
+ /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */
+ ECCurve_SECG_CHAR2_239K1, /* not supported */
+ /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */
+ /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */
+ /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */
+ /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */
+ /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */
+ /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */
+
+ /* WTLS curves */
+ ECCurve_WTLS_1, /* not supported */
+ /* there is no WTLS 2 curve */
+ /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */
+ /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */
+ /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */
+ /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */
+ /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */
+ ECCurve_WTLS_8, /* not supported */
+ ECCurve_WTLS_9, /* not supported */
+ /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */
+ /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */
+ /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */
+
+ ECCurve25519,
+
+ ECCurve_pastLastCurve
+} ECCurveName;
+
+/* Aliased named curves */
+
+#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */
+#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */
+#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256
+#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384
+#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521
+#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163 /* not supported */
+#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163 /* not supported */
+#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233 /* not supported */
+#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233 /* not supported */
+#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283 /* not supported */
+#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283 /* not supported */
+#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409 /* not supported */
+#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409 /* not supported */
+#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571 /* not supported */
+#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571 /* not supported */
+#define ECCurve_WTLS_3 ECCurve_NIST_K163 /* not supported */
+#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1 /* not supported */
+#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */
+#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1 /* not supported */
+#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1 /* not supported */
+#define ECCurve_WTLS_10 ECCurve_NIST_K233 /* not supported */
+#define ECCurve_WTLS_11 ECCurve_NIST_B233 /* not supported */
+#define ECCurve_WTLS_12 ECCurve_NIST_P224 /* not supported */
+
+#endif /* __ecl_exp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h
new file mode 100644
index 000000000..f43f19327
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl-priv.h
@@ -0,0 +1,257 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecl_priv_h_
+#define __ecl_priv_h_
+
+#include "ecl.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "../blapii.h"
+
+/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */
+/* the following needs to go away... */
+#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT)
+#define ECL_SIXTY_FOUR_BIT
+#else
+#define ECL_THIRTY_TWO_BIT
+#endif
+
+#define ECL_CURVE_DIGITS(curve_size_in_bits) \
+ (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8))
+#define ECL_BITS (sizeof(mp_digit) * 8)
+#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit))
+
+/* Gets the i'th bit in the binary representation of a. If i >= length(a),
+ * then return 0. (The above behaviour differs from mpl_get_bit, which
+ * causes an error if i >= length(a).) */
+#define MP_GET_BIT(a, i) \
+ ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i))
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+#define MP_ADD_CARRY(a1, a2, s, carry) \
+ { \
+ mp_word w; \
+ w = ((mp_word)carry) + (a1) + (a2); \
+ s = ACCUM(w); \
+ carry = CARRYOUT(w); \
+ }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow) \
+ { \
+ mp_word w; \
+ w = ((mp_word)(a1)) - (a2)-borrow; \
+ s = ACCUM(w); \
+ borrow = (w >> MP_DIGIT_BIT) & 1; \
+ }
+
+#else
+/* NOTE,
+ * carry and borrow are both read and written.
+ * a1 or a2 and s could be the same variable.
+ * don't trash those outputs until their respective inputs have
+ * been read. */
+#define MP_ADD_CARRY(a1, a2, s, carry) \
+ { \
+ mp_digit tmp, sum; \
+ tmp = (a1); \
+ sum = tmp + (a2); \
+ tmp = (sum < tmp); /* detect overflow */ \
+ s = sum += carry; \
+ carry = tmp + (sum < carry); \
+ }
+
+#define MP_SUB_BORROW(a1, a2, s, borrow) \
+ { \
+ mp_digit tmp; \
+ tmp = (a1); \
+ s = tmp - (a2); \
+ tmp = (s > tmp); /* detect borrow */ \
+ if (borrow && !s--) \
+ tmp++; \
+ borrow = tmp; \
+ }
+#endif
+
+struct GFMethodStr;
+typedef struct GFMethodStr GFMethod;
+struct GFMethodStr {
+ /* Indicates whether the structure was constructed from dynamic memory
+ * or statically created. */
+ int constructed;
+ /* Irreducible that defines the field. For prime fields, this is the
+ * prime p. For binary polynomial fields, this is the bitstring
+ * representation of the irreducible polynomial. */
+ mp_int irr;
+ /* For prime fields, the value irr_arr[0] is the number of bits in the
+ * field. For binary polynomial fields, the irreducible polynomial
+ * f(t) is represented as an array of unsigned int[], where f(t) is
+ * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0]
+ * > p[1] > ... > p[4] = 0. */
+ unsigned int irr_arr[5];
+ /* Field arithmetic methods. All methods (except field_enc and
+ * field_dec) are assumed to take field-encoded parameters and return
+ * field-encoded values. All methods (except field_enc and field_dec)
+ * are required to be implemented. */
+ mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+ mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth);
+ /* Extra storage for implementation-specific data. Any memory
+ * allocated to these extra fields will be cleared by extra_free. */
+ void *extra1;
+ void *extra2;
+ void (*extra_free)(GFMethod *meth);
+};
+
+/* Construct generic GFMethods. */
+GFMethod *GFMethod_consGFp(const mp_int *irr);
+GFMethod *GFMethod_consGFp_mont(const mp_int *irr);
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void GFMethod_free(GFMethod *meth);
+
+struct ECGroupStr {
+ /* Indicates whether the structure was constructed from dynamic memory
+ * or statically created. */
+ int constructed;
+ /* Field definition and arithmetic. */
+ GFMethod *meth;
+ /* Textual representation of curve name, if any. */
+ char *text;
+ /* Curve parameters, field-encoded. */
+ mp_int curvea, curveb;
+ /* x and y coordinates of the base point, field-encoded. */
+ mp_int genx, geny;
+ /* Order and cofactor of the base point. */
+ mp_int order;
+ int cofactor;
+ /* Point arithmetic methods. All methods are assumed to take
+ * field-encoded parameters and return field-encoded values. All
+ * methods (except base_point_mul and points_mul) are required to be
+ * implemented. */
+ mp_err (*point_add)(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_sub)(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*point_mul)(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+ mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+ mp_err (*points_mul)(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+ mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group);
+ /* Extra storage for implementation-specific data. Any memory
+ * allocated to these extra fields will be cleared by extra_free. */
+ void *extra1;
+ void *extra2;
+ void (*extra_free)(ECGroup *group);
+};
+
+/* Wrapper functions for generic prime field arithmetic. */
+mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+/* fixed length in-line adds. Count is in words */
+mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+
+/* Montgomery prime field arithmetic. */
+mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth);
+mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth);
+void ec_GFp_extra_free_mont(GFMethod *meth);
+
+/* point multiplication */
+mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should
+ * be an array of signed char's to output to, bitsize should be the number
+ * of bits of out, in is the original scalar, and w is the window size.
+ * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A.
+ * Menezes, "Software implementation of elliptic curve cryptography over
+ * binary fields", Proc. CHES 2000. */
+mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in,
+ int w);
+
+/* Optimized field arithmetic */
+mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName);
+mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name);
+mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name);
+
+/* Optimized point multiplication */
+mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name);
+
+/* Optimized floating-point arithmetic */
+#ifdef ECL_USE_FP
+mp_err ec_group_set_secp160r1_fp(ECGroup *group);
+mp_err ec_group_set_nistp192_fp(ECGroup *group);
+mp_err ec_group_set_nistp224_fp(ECGroup *group);
+#endif
+
+SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p);
+#endif /* __ecl_priv_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c
new file mode 100644
index 000000000..3540af781
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.c
@@ -0,0 +1,301 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include <stdlib.h>
+#include <string.h>
+
+/* Allocate memory for a new ECGroup object. */
+ECGroup *
+ECGroup_new()
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group;
+ group = (ECGroup *)malloc(sizeof(ECGroup));
+ if (group == NULL)
+ return NULL;
+ group->constructed = MP_YES;
+ group->meth = NULL;
+ group->text = NULL;
+ MP_DIGITS(&group->curvea) = 0;
+ MP_DIGITS(&group->curveb) = 0;
+ MP_DIGITS(&group->genx) = 0;
+ MP_DIGITS(&group->geny) = 0;
+ MP_DIGITS(&group->order) = 0;
+ group->base_point_mul = NULL;
+ group->points_mul = NULL;
+ group->validate_point = NULL;
+ group->extra1 = NULL;
+ group->extra2 = NULL;
+ group->extra_free = NULL;
+ MP_CHECKOK(mp_init(&group->curvea));
+ MP_CHECKOK(mp_init(&group->curveb));
+ MP_CHECKOK(mp_init(&group->genx));
+ MP_CHECKOK(mp_init(&group->geny));
+ MP_CHECKOK(mp_init(&group->order));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields. */
+ECGroup *
+ECGroup_consGFp(const mp_int *irr, const mp_int *curvea,
+ const mp_int *curveb, const mp_int *genx,
+ const mp_int *geny, const mp_int *order, int cofactor)
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group = NULL;
+
+ group = ECGroup_new();
+ if (group == NULL)
+ return NULL;
+
+ group->meth = GFMethod_consGFp(irr);
+ if (group->meth == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(mp_copy(curvea, &group->curvea));
+ MP_CHECKOK(mp_copy(curveb, &group->curveb));
+ MP_CHECKOK(mp_copy(genx, &group->genx));
+ MP_CHECKOK(mp_copy(geny, &group->geny));
+ MP_CHECKOK(mp_copy(order, &group->order));
+ group->cofactor = cofactor;
+ group->point_add = &ec_GFp_pt_add_aff;
+ group->point_sub = &ec_GFp_pt_sub_aff;
+ group->point_dbl = &ec_GFp_pt_dbl_aff;
+ group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+ group->base_point_mul = NULL;
+ group->points_mul = &ec_GFp_pts_mul_jac;
+ group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct a generic ECGroup for elliptic curves over prime fields with
+ * field arithmetic implemented in Montgomery coordinates. */
+ECGroup *
+ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea,
+ const mp_int *curveb, const mp_int *genx,
+ const mp_int *geny, const mp_int *order, int cofactor)
+{
+ mp_err res = MP_OKAY;
+ ECGroup *group = NULL;
+
+ group = ECGroup_new();
+ if (group == NULL)
+ return NULL;
+
+ group->meth = GFMethod_consGFp_mont(irr);
+ if (group->meth == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth));
+ MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth));
+ MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth));
+ MP_CHECKOK(mp_copy(order, &group->order));
+ group->cofactor = cofactor;
+ group->point_add = &ec_GFp_pt_add_aff;
+ group->point_sub = &ec_GFp_pt_sub_aff;
+ group->point_dbl = &ec_GFp_pt_dbl_aff;
+ group->point_mul = &ec_GFp_pt_mul_jm_wNAF;
+ group->base_point_mul = NULL;
+ group->points_mul = &ec_GFp_pts_mul_jac;
+ group->validate_point = &ec_GFp_validate_point;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct ECGroup from hex parameters and name, if any. Called by
+ * ECGroup_fromHex and ECGroup_fromName. */
+ECGroup *
+ecgroup_fromNameAndHex(const ECCurveName name,
+ const ECCurveParams *params)
+{
+ mp_int irr, curvea, curveb, genx, geny, order;
+ int bits;
+ ECGroup *group = NULL;
+ mp_err res = MP_OKAY;
+
+ /* initialize values */
+ MP_DIGITS(&irr) = 0;
+ MP_DIGITS(&curvea) = 0;
+ MP_DIGITS(&curveb) = 0;
+ MP_DIGITS(&genx) = 0;
+ MP_DIGITS(&geny) = 0;
+ MP_DIGITS(&order) = 0;
+ MP_CHECKOK(mp_init(&irr));
+ MP_CHECKOK(mp_init(&curvea));
+ MP_CHECKOK(mp_init(&curveb));
+ MP_CHECKOK(mp_init(&genx));
+ MP_CHECKOK(mp_init(&geny));
+ MP_CHECKOK(mp_init(&order));
+ MP_CHECKOK(mp_read_radix(&irr, params->irr, 16));
+ MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16));
+ MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16));
+ MP_CHECKOK(mp_read_radix(&genx, params->genx, 16));
+ MP_CHECKOK(mp_read_radix(&geny, params->geny, 16));
+ MP_CHECKOK(mp_read_radix(&order, params->order, 16));
+
+ /* determine number of bits */
+ bits = mpl_significant_bits(&irr) - 1;
+ if (bits < MP_OKAY) {
+ res = bits;
+ goto CLEANUP;
+ }
+
+ /* determine which optimizations (if any) to use */
+ if (params->field == ECField_GFp) {
+ switch (name) {
+ case ECCurve_SECG_PRIME_256R1:
+ group =
+ ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+ &order, params->cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(ec_group_set_gfp256(group, name));
+ MP_CHECKOK(ec_group_set_gfp256_32(group, name));
+ break;
+ case ECCurve_SECG_PRIME_521R1:
+ group =
+ ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny,
+ &order, params->cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(ec_group_set_gfp521(group, name));
+ break;
+ default:
+ /* use generic arithmetic */
+ group =
+ ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny,
+ &order, params->cofactor);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ }
+ } else {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ /* set name, if any */
+ if ((group != NULL) && (params->text != NULL)) {
+ group->text = strdup(params->text);
+ if (group->text == NULL) {
+ res = MP_MEM;
+ }
+ }
+
+CLEANUP:
+ mp_clear(&irr);
+ mp_clear(&curvea);
+ mp_clear(&curveb);
+ mp_clear(&genx);
+ mp_clear(&geny);
+ mp_clear(&order);
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Construct ECGroup from hexadecimal representations of parameters. */
+ECGroup *
+ECGroup_fromHex(const ECCurveParams *params)
+{
+ return ecgroup_fromNameAndHex(ECCurve_noName, params);
+}
+
+/* Construct ECGroup from named parameters. */
+ECGroup *
+ECGroup_fromName(const ECCurveName name)
+{
+ ECGroup *group = NULL;
+ ECCurveParams *params = NULL;
+ mp_err res = MP_OKAY;
+
+ params = EC_GetNamedCurveParams(name);
+ if (params == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ /* construct actual group */
+ group = ecgroup_fromNameAndHex(name, params);
+ if (group == NULL) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+CLEANUP:
+ EC_FreeCurveParams(params);
+ if (res != MP_OKAY) {
+ ECGroup_free(group);
+ return NULL;
+ }
+ return group;
+}
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62. */
+mp_err
+ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py)
+{
+ /* 1: Verify that publicValue is not the point at infinity */
+ /* 2: Verify that the coordinates of publicValue are elements
+ * of the field.
+ */
+ /* 3: Verify that publicValue is on the curve. */
+ /* 4: Verify that the order of the curve times the publicValue
+ * is the point at infinity.
+ */
+ return group->validate_point(px, py, group);
+}
+
+/* Free the memory allocated (if any) to an ECGroup object. */
+void
+ECGroup_free(ECGroup *group)
+{
+ if (group == NULL)
+ return;
+ GFMethod_free(group->meth);
+ if (group->constructed == MP_NO)
+ return;
+ mp_clear(&group->curvea);
+ mp_clear(&group->curveb);
+ mp_clear(&group->genx);
+ mp_clear(&group->geny);
+ mp_clear(&group->order);
+ if (group->text != NULL)
+ free(group->text);
+ if (group->extra_free != NULL)
+ group->extra_free(group);
+ free(group);
+}
diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h
new file mode 100644
index 000000000..ddcbb1f3a
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl.h
@@ -0,0 +1,60 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Although this is not an exported header file, code which uses elliptic
+ * curve point operations will need to include it. */
+
+#ifndef __ecl_h_
+#define __ecl_h_
+
+#include "blapi.h"
+#include "ecl-exp.h"
+#include "mpi.h"
+
+struct ECGroupStr;
+typedef struct ECGroupStr ECGroup;
+
+/* Construct ECGroup from hexadecimal representations of parameters. */
+ECGroup *ECGroup_fromHex(const ECCurveParams *params);
+
+/* Construct ECGroup from named parameters. */
+ECGroup *ECGroup_fromName(const ECCurveName name);
+
+/* Free an allocated ECGroup. */
+void ECGroup_free(ECGroup *group);
+
+/* Construct ECCurveParams from an ECCurveName */
+ECCurveParams *EC_GetNamedCurveParams(const ECCurveName name);
+
+/* Duplicates an ECCurveParams */
+ECCurveParams *ECCurveParams_dup(const ECCurveParams *params);
+
+/* Free an allocated ECCurveParams */
+void EC_FreeCurveParams(ECCurveParams *params);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x,
+ * y). If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+ const mp_int *py, mp_int *qx, mp_int *qy);
+
+/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Input and output values are assumed to
+ * be NOT field-encoded. */
+mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1,
+ const mp_int *k2, const mp_int *px, const mp_int *py,
+ mp_int *qx, mp_int *qy);
+
+/* Validates an EC public key as described in Section 5.2.2 of X9.62.
+ * Returns MP_YES if the public key is valid, MP_NO if the public key
+ * is invalid, or an error code if the validation could not be
+ * performed. */
+mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py);
+
+SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P);
+SECStatus ec_Curve25519_pt_validate(const SECItem *px);
+
+#endif /* __ecl_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecl_curve.c b/security/nss/lib/freebl/ecl/ecl_curve.c
new file mode 100644
index 000000000..cf090cfc3
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_curve.c
@@ -0,0 +1,93 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecl.h"
+#include "ecl-curve.h"
+#include "ecl-priv.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define CHECK(func) \
+ if ((func) == NULL) { \
+ res = 0; \
+ goto CLEANUP; \
+ }
+
+/* Duplicates an ECCurveParams */
+ECCurveParams *
+ECCurveParams_dup(const ECCurveParams *params)
+{
+ int res = 1;
+ ECCurveParams *ret = NULL;
+
+ CHECK(ret = (ECCurveParams *)calloc(1, sizeof(ECCurveParams)));
+ if (params->text != NULL) {
+ CHECK(ret->text = strdup(params->text));
+ }
+ ret->field = params->field;
+ ret->size = params->size;
+ if (params->irr != NULL) {
+ CHECK(ret->irr = strdup(params->irr));
+ }
+ if (params->curvea != NULL) {
+ CHECK(ret->curvea = strdup(params->curvea));
+ }
+ if (params->curveb != NULL) {
+ CHECK(ret->curveb = strdup(params->curveb));
+ }
+ if (params->genx != NULL) {
+ CHECK(ret->genx = strdup(params->genx));
+ }
+ if (params->geny != NULL) {
+ CHECK(ret->geny = strdup(params->geny));
+ }
+ if (params->order != NULL) {
+ CHECK(ret->order = strdup(params->order));
+ }
+ ret->cofactor = params->cofactor;
+
+CLEANUP:
+ if (res != 1) {
+ EC_FreeCurveParams(ret);
+ return NULL;
+ }
+ return ret;
+}
+
+#undef CHECK
+
+/* Construct ECCurveParams from an ECCurveName */
+ECCurveParams *
+EC_GetNamedCurveParams(const ECCurveName name)
+{
+ if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) ||
+ (ecCurve_map[name] == NULL)) {
+ return NULL;
+ } else {
+ return ECCurveParams_dup(ecCurve_map[name]);
+ }
+}
+
+/* Free the memory allocated (if any) to an ECCurveParams object. */
+void
+EC_FreeCurveParams(ECCurveParams *params)
+{
+ if (params == NULL)
+ return;
+ if (params->text != NULL)
+ free(params->text);
+ if (params->irr != NULL)
+ free(params->irr);
+ if (params->curvea != NULL)
+ free(params->curvea);
+ if (params->curveb != NULL)
+ free(params->curveb);
+ if (params->genx != NULL)
+ free(params->genx);
+ if (params->geny != NULL)
+ free(params->geny);
+ if (params->order != NULL)
+ free(params->order);
+ free(params);
+}
diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c
new file mode 100644
index 000000000..81b007705
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_gf.c
@@ -0,0 +1,958 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mp_gf2m.h"
+#include "ecl-priv.h"
+#include "mpi-priv.h"
+#include <stdlib.h>
+
+/* Allocate memory for a new GFMethod object. */
+GFMethod *
+GFMethod_new()
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth;
+ meth = (GFMethod *)malloc(sizeof(GFMethod));
+ if (meth == NULL)
+ return NULL;
+ meth->constructed = MP_YES;
+ MP_DIGITS(&meth->irr) = 0;
+ meth->extra_free = NULL;
+ MP_CHECKOK(mp_init(&meth->irr));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp(const mp_int *irr)
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth = NULL;
+
+ meth = GFMethod_new();
+ if (meth == NULL)
+ return NULL;
+
+ MP_CHECKOK(mp_copy(irr, &meth->irr));
+ meth->irr_arr[0] = mpl_significant_bits(irr);
+ meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] =
+ meth->irr_arr[4] = 0;
+ switch (MP_USED(&meth->irr)) {
+ /* maybe we need 1 and 2 words here as well?*/
+ case 3:
+ meth->field_add = &ec_GFp_add_3;
+ meth->field_sub = &ec_GFp_sub_3;
+ break;
+ case 4:
+ meth->field_add = &ec_GFp_add_4;
+ meth->field_sub = &ec_GFp_sub_4;
+ break;
+ case 5:
+ meth->field_add = &ec_GFp_add_5;
+ meth->field_sub = &ec_GFp_sub_5;
+ break;
+ case 6:
+ meth->field_add = &ec_GFp_add_6;
+ meth->field_sub = &ec_GFp_sub_6;
+ break;
+ default:
+ meth->field_add = &ec_GFp_add;
+ meth->field_sub = &ec_GFp_sub;
+ }
+ meth->field_neg = &ec_GFp_neg;
+ meth->field_mod = &ec_GFp_mod;
+ meth->field_mul = &ec_GFp_mul;
+ meth->field_sqr = &ec_GFp_sqr;
+ meth->field_div = &ec_GFp_div;
+ meth->field_enc = NULL;
+ meth->field_dec = NULL;
+ meth->extra1 = NULL;
+ meth->extra2 = NULL;
+ meth->extra_free = NULL;
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Free the memory allocated (if any) to a GFMethod object. */
+void
+GFMethod_free(GFMethod *meth)
+{
+ if (meth == NULL)
+ return;
+ if (meth->constructed == MP_NO)
+ return;
+ mp_clear(&meth->irr);
+ if (meth->extra_free != NULL)
+ meth->extra_free(meth);
+ free(meth);
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Add two field elements. Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */
+ mp_err res;
+
+ if ((res = mp_add(a, b, r)) != MP_OKAY) {
+ return res;
+ }
+ if (mp_cmp(r, &meth->irr) >= 0) {
+ return mp_sub(r, &meth->irr, r);
+ }
+ return res;
+}
+
+/* Negates a field element. Assumes that 0 <= a < meth->irr */
+mp_err
+ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */
+
+ if (mp_cmp_z(a) == 0) {
+ mp_zero(r);
+ return MP_OKAY;
+ }
+ return mp_sub(&meth->irr, a, r);
+}
+
+/* Subtracts two field elements. Assumes that 0 <= a, b < meth->irr */
+mp_err
+ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */
+ res = mp_sub(a, b, r);
+ if (res == MP_RANGE) {
+ MP_CHECKOK(mp_sub(b, a, r));
+ if (mp_cmp_z(r) < 0) {
+ MP_CHECKOK(mp_add(r, &meth->irr, r));
+ }
+ MP_CHECKOK(ec_GFp_neg(r, r, meth));
+ }
+ if (mp_cmp_z(r) < 0) {
+ MP_CHECKOK(mp_add(r, &meth->irr, r));
+ }
+CLEANUP:
+ return res;
+}
+/*
+ * Inline adds for small curve lengths.
+ */
+/* 3 words */
+mp_err
+ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+#else
+ __asm__(
+ "xorq %3,%3 \n\t"
+ "addq %4,%0 \n\t"
+ "adcq %5,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry)
+ : "r"(a0), "r"(a1), "r"(a2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a2 = MP_DIGIT(&meth->irr, 2);
+ if (carry || r2 > a2 ||
+ ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+#else
+ __asm__(
+ "subq %3,%0 \n\t"
+ "sbbq %4,%1 \n\t"
+ "sbbq %5,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "r"(a0), "r"(a1), "r"(a2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+#else
+ __asm__(
+ "xorq %4,%4 \n\t"
+ "addq %5,%0 \n\t"
+ "adcq %6,%1 \n\t"
+ "adcq %7,%2 \n\t"
+ "adcq %8,%3 \n\t"
+ "adcq $0,%4 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry)
+ : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+
+ MP_CHECKOK(s_mp_pad(r, 4));
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a3 = MP_DIGIT(&meth->irr, 3);
+ if (carry || r3 > a3 ||
+ ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+#else
+ __asm__(
+ "subq %4,%0 \n\t"
+ "sbbq %5,%1 \n\t"
+ "sbbq %6,%2 \n\t"
+ "sbbq %7,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+ : "r"(a0), "r"(a1), "r"(a2), "r"(a3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 5:
+ r4 = MP_DIGIT(b, 4);
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+ MP_ADD_CARRY(a4, r4, r4, carry);
+
+ MP_CHECKOK(s_mp_pad(r, 5));
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 5;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a4 = MP_DIGIT(&meth->irr, 4);
+ if (carry || r4 > a4 ||
+ ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a3 = MP_DIGIT(&meth->irr, 3);
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+ MP_SUB_BORROW(r4, a4, r4, carry);
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+ mp_digit carry;
+
+ switch (MP_USED(a)) {
+ case 6:
+ a5 = MP_DIGIT(a, 5);
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ case 4:
+ a3 = MP_DIGIT(a, 3);
+ case 3:
+ a2 = MP_DIGIT(a, 2);
+ case 2:
+ a1 = MP_DIGIT(a, 1);
+ case 1:
+ a0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 6:
+ r5 = MP_DIGIT(b, 5);
+ case 5:
+ r4 = MP_DIGIT(b, 4);
+ case 4:
+ r3 = MP_DIGIT(b, 3);
+ case 3:
+ r2 = MP_DIGIT(b, 2);
+ case 2:
+ r1 = MP_DIGIT(b, 1);
+ case 1:
+ r0 = MP_DIGIT(b, 0);
+ }
+
+ carry = 0;
+ MP_ADD_CARRY(a0, r0, r0, carry);
+ MP_ADD_CARRY(a1, r1, r1, carry);
+ MP_ADD_CARRY(a2, r2, r2, carry);
+ MP_ADD_CARRY(a3, r3, r3, carry);
+ MP_ADD_CARRY(a4, r4, r4, carry);
+ MP_ADD_CARRY(a5, r5, r5, carry);
+
+ MP_CHECKOK(s_mp_pad(r, 6));
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 6;
+
+ /* Do quick 'subract' if we've gone over
+ * (add the 2's complement of the curve field) */
+ a5 = MP_DIGIT(&meth->irr, 5);
+ if (carry || r5 > a5 ||
+ ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) {
+ a4 = MP_DIGIT(&meth->irr, 4);
+ a3 = MP_DIGIT(&meth->irr, 3);
+ a2 = MP_DIGIT(&meth->irr, 2);
+ a1 = MP_DIGIT(&meth->irr, 1);
+ a0 = MP_DIGIT(&meth->irr, 0);
+ carry = 0;
+ MP_SUB_BORROW(r0, a0, r0, carry);
+ MP_SUB_BORROW(r1, a1, r1, carry);
+ MP_SUB_BORROW(r2, a2, r2, carry);
+ MP_SUB_BORROW(r3, a3, r3, carry);
+ MP_SUB_BORROW(r4, a4, r4, carry);
+ MP_SUB_BORROW(r5, a5, r5, carry);
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ }
+
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/*
+ * The following subraction functions do in-line subractions based
+ * on our curve size.
+ *
+ * ... 3 words
+ */
+mp_err
+ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+#else
+ __asm__(
+ "xorq %3,%3 \n\t"
+ "subq %4,%0 \n\t"
+ "sbbq %5,%1 \n\t"
+ "sbbq %6,%2 \n\t"
+ "adcq $0,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow)
+ : "r"(b0), "r"(b1), "r"(b2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+#else
+ __asm__(
+ "addq %3,%0 \n\t"
+ "adcq %4,%1 \n\t"
+ "adcq %5,%2 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2)
+ : "r"(b0), "r"(b1), "r"(b2),
+ "0"(r0), "1"(r1), "2"(r2)
+ : "%cc");
+#endif
+ }
+
+#ifdef MPI_AMD64_ADD
+ /* compiler fakeout? */
+ if ((r2 == b0) && (r1 == b0) && (r0 == b0)) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+#endif
+ MP_CHECKOK(s_mp_pad(r, 3));
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 3;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 4 words */
+mp_err
+ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+#else
+ __asm__(
+ "xorq %4,%4 \n\t"
+ "subq %5,%0 \n\t"
+ "sbbq %6,%1 \n\t"
+ "sbbq %7,%2 \n\t"
+ "sbbq %8,%3 \n\t"
+ "adcq $0,%4 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow)
+ : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+#ifndef MPI_AMD64_ADD
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+#else
+ __asm__(
+ "addq %4,%0 \n\t"
+ "adcq %5,%1 \n\t"
+ "adcq %6,%2 \n\t"
+ "adcq %7,%3 \n\t"
+ : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3)
+ : "r"(b0), "r"(b1), "r"(b2), "r"(b3),
+ "0"(r0), "1"(r1), "2"(r2), "3"(r3)
+ : "%cc");
+#endif
+ }
+#ifdef MPI_AMD64_ADD
+ /* compiler fakeout? */
+ if ((r3 == b0) && (r1 == b0) && (r0 == b0)) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+#endif
+ MP_CHECKOK(s_mp_pad(r, 4));
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 5 words */
+mp_err
+ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 5:
+ r4 = MP_DIGIT(a, 4);
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 5:
+ b4 = MP_DIGIT(b, 4);
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+ MP_SUB_BORROW(r4, b4, r4, borrow);
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b4 = MP_DIGIT(&meth->irr, 4);
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+ MP_ADD_CARRY(b4, r4, r4, borrow);
+ }
+ MP_CHECKOK(s_mp_pad(r, 5));
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 5;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* 6 words */
+mp_err
+ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0;
+ mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0;
+ mp_digit borrow;
+
+ switch (MP_USED(a)) {
+ case 6:
+ r5 = MP_DIGIT(a, 5);
+ case 5:
+ r4 = MP_DIGIT(a, 4);
+ case 4:
+ r3 = MP_DIGIT(a, 3);
+ case 3:
+ r2 = MP_DIGIT(a, 2);
+ case 2:
+ r1 = MP_DIGIT(a, 1);
+ case 1:
+ r0 = MP_DIGIT(a, 0);
+ }
+ switch (MP_USED(b)) {
+ case 6:
+ b5 = MP_DIGIT(b, 5);
+ case 5:
+ b4 = MP_DIGIT(b, 4);
+ case 4:
+ b3 = MP_DIGIT(b, 3);
+ case 3:
+ b2 = MP_DIGIT(b, 2);
+ case 2:
+ b1 = MP_DIGIT(b, 1);
+ case 1:
+ b0 = MP_DIGIT(b, 0);
+ }
+
+ borrow = 0;
+ MP_SUB_BORROW(r0, b0, r0, borrow);
+ MP_SUB_BORROW(r1, b1, r1, borrow);
+ MP_SUB_BORROW(r2, b2, r2, borrow);
+ MP_SUB_BORROW(r3, b3, r3, borrow);
+ MP_SUB_BORROW(r4, b4, r4, borrow);
+ MP_SUB_BORROW(r5, b5, r5, borrow);
+
+ /* Do quick 'add' if we've gone under 0
+ * (subtract the 2's complement of the curve field) */
+ if (borrow) {
+ b5 = MP_DIGIT(&meth->irr, 5);
+ b4 = MP_DIGIT(&meth->irr, 4);
+ b3 = MP_DIGIT(&meth->irr, 3);
+ b2 = MP_DIGIT(&meth->irr, 2);
+ b1 = MP_DIGIT(&meth->irr, 1);
+ b0 = MP_DIGIT(&meth->irr, 0);
+ borrow = 0;
+ MP_ADD_CARRY(b0, r0, r0, borrow);
+ MP_ADD_CARRY(b1, r1, r1, borrow);
+ MP_ADD_CARRY(b2, r2, r2, borrow);
+ MP_ADD_CARRY(b3, r3, r3, borrow);
+ MP_ADD_CARRY(b4, r4, r4, borrow);
+ MP_ADD_CARRY(b5, r5, r5, borrow);
+ }
+
+ MP_CHECKOK(s_mp_pad(r, 6));
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 6;
+ s_mp_clamp(r);
+
+CLEANUP:
+ return res;
+}
+
+/* Reduces an integer to a field element. */
+mp_err
+ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_mod(a, &meth->irr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_mulmod(a, b, &meth->irr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_sqrmod(a, &meth->irr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ return mp_invmod(b, &meth->irr, r);
+ } else {
+ /* MPI doesn't support divmod, so we implement it using invmod and
+ * mulmod. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+ MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ }
+}
+
+/* Wrapper functions for generic binary polynomial field arithmetic. */
+
+/* Adds two field elements. */
+mp_err
+ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_badd(a, b, r);
+}
+
+/* Negates a field element. Note that for binary polynomial fields, the
+ * negation of a field element is the field element itself. */
+mp_err
+ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ if (a == r) {
+ return MP_OKAY;
+ } else {
+ return mp_copy(a, r);
+ }
+}
+
+/* Reduces a binary polynomial to a field element. */
+mp_err
+ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_bmod(a, meth->irr_arr, r);
+}
+
+/* Multiplies two field elements. */
+mp_err
+ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ return mp_bmulmod(a, b, meth->irr_arr, r);
+}
+
+/* Squares a field element. */
+mp_err
+ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return mp_bsqrmod(a, meth->irr_arr, r);
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+mp_err
+ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ /* The GF(2^m) portion of MPI doesn't support invmod, so we
+ * compute 1/b. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_set_int(&t, 1));
+ MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ } else {
+ return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r);
+ }
+}
diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c
new file mode 100644
index 000000000..ffbcbf1d9
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecl_mult.c
@@ -0,0 +1,305 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecl-priv.h"
+#include <stdlib.h>
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x,
+ * y). If x, y = NULL, then P is assumed to be the generator (base point)
+ * of the group of points on the elliptic curve. Input and output values
+ * are assumed to be NOT field-encoded. */
+mp_err
+ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry)
+{
+ mp_err res = MP_OKAY;
+ mp_int kt;
+
+ ARGCHK((k != NULL) && (group != NULL), MP_BADARG);
+ MP_DIGITS(&kt) = 0;
+
+ /* want scalar to be less than or equal to group order */
+ if (mp_cmp(k, &group->order) > 0) {
+ MP_CHECKOK(mp_init(&kt));
+ MP_CHECKOK(mp_mod(k, &group->order, &kt));
+ } else {
+ MP_SIGN(&kt) = MP_ZPOS;
+ MP_USED(&kt) = MP_USED(k);
+ MP_ALLOC(&kt) = MP_ALLOC(k);
+ MP_DIGITS(&kt) = MP_DIGITS(k);
+ }
+
+ if ((px == NULL) || (py == NULL)) {
+ if (group->base_point_mul) {
+ MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group));
+ } else {
+ MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry,
+ group));
+ }
+ } else {
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, rx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, ry, group->meth));
+ MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group));
+ } else {
+ MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group));
+ }
+ }
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ if (MP_DIGITS(&kt) != MP_DIGITS(k)) {
+ mp_clear(&kt);
+ }
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int sx, sy;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ MP_DIGITS(&sx) = 0;
+ MP_DIGITS(&sy) = 0;
+ MP_CHECKOK(mp_init(&sx));
+ MP_CHECKOK(mp_init(&sy));
+
+ MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy));
+ MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry));
+
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth));
+ MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth));
+ }
+
+ MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group));
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&sx);
+ mp_clear(&sy);
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. Uses
+ * algorithm 15 (simultaneous multiple point multiplication) from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST
+ * Elliptic Curves over Prime Fields. */
+mp_err
+ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[4][4][2];
+ const mp_int *a, *b;
+ unsigned int i, j;
+ int ai, bi, d;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ /* initialize precomputation table */
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_DIGITS(&precomp[i][j][0]) = 0;
+ MP_DIGITS(&precomp[i][j][1]) = 0;
+ }
+ }
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_CHECKOK(mp_init_size(&precomp[i][j][0],
+ ECL_MAX_FIELD_SIZE_DIGITS));
+ MP_CHECKOK(mp_init_size(&precomp[i][j][1],
+ ECL_MAX_FIELD_SIZE_DIGITS));
+ }
+ }
+
+ /* fill precomputation table */
+ /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+ if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+ a = k2;
+ b = k1;
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+ }
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+ } else {
+ a = k1;
+ b = k2;
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+ }
+ }
+ /* precompute [*][0][*] */
+ mp_zero(&precomp[0][0][0]);
+ mp_zero(&precomp[0][0][1]);
+ MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1], group));
+ MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1],
+ &precomp[3][0][0], &precomp[3][0][1], group));
+ /* precompute [*][1][*] */
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][1][0], &precomp[i][1][1], group));
+ }
+ /* precompute [*][2][*] */
+ MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][2][0], &precomp[i][2][1], group));
+ }
+ /* precompute [*][3][*] */
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1],
+ &precomp[0][3][0], &precomp[0][3][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][3][0], &precomp[i][3][1], group));
+ }
+
+ d = (mpl_significant_bits(a) + 1) / 2;
+
+ /* R = inf */
+ mp_zero(rx);
+ mp_zero(ry);
+
+ for (i = d; i-- > 0;) {
+ ai = MP_GET_BIT(a, 2 * i + 1);
+ ai <<= 1;
+ ai |= MP_GET_BIT(a, 2 * i);
+ bi = MP_GET_BIT(b, 2 * i + 1);
+ bi <<= 1;
+ bi |= MP_GET_BIT(b, 2 * i);
+ /* R = 2^2 * R */
+ MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+ MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group));
+ /* R = R + (ai * A + bi * B) */
+ MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0],
+ &precomp[ai][bi][1], rx, ry, group));
+ }
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ mp_clear(&precomp[i][j][0]);
+ mp_clear(&precomp[i][j][1]);
+ }
+ }
+ return res;
+}
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Input and output values are assumed to be NOT field-encoded. */
+mp_err
+ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2,
+ const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry)
+{
+ mp_err res = MP_OKAY;
+ mp_int k1t, k2t;
+ const mp_int *k1p, *k2p;
+
+ MP_DIGITS(&k1t) = 0;
+ MP_DIGITS(&k2t) = 0;
+
+ ARGCHK(group != NULL, MP_BADARG);
+
+ /* want scalar to be less than or equal to group order */
+ if (k1 != NULL) {
+ if (mp_cmp(k1, &group->order) >= 0) {
+ MP_CHECKOK(mp_init(&k1t));
+ MP_CHECKOK(mp_mod(k1, &group->order, &k1t));
+ k1p = &k1t;
+ } else {
+ k1p = k1;
+ }
+ } else {
+ k1p = k1;
+ }
+ if (k2 != NULL) {
+ if (mp_cmp(k2, &group->order) >= 0) {
+ MP_CHECKOK(mp_init(&k2t));
+ MP_CHECKOK(mp_mod(k2, &group->order, &k2t));
+ k2p = &k2t;
+ } else {
+ k2p = k2;
+ }
+ } else {
+ k2p = k2;
+ }
+
+ /* if points_mul is defined, then use it */
+ if (group->points_mul) {
+ res = group->points_mul(k1p, k2p, px, py, rx, ry, group);
+ } else {
+ res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group);
+ }
+
+CLEANUP:
+ mp_clear(&k1t);
+ mp_clear(&k2t);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h
new file mode 100644
index 000000000..7e54e4e07
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp.h
@@ -0,0 +1,106 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef __ecp_h_
+#define __ecp_h_
+
+#include "ecl-priv.h"
+
+/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */
+mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py);
+
+/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */
+mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py);
+
+/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx,
+ * qy). Uses affine coordinates. */
+mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes R = P - Q. Uses affine coordinates. */
+mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Computes R = 2P. Uses affine coordinates. */
+mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group);
+
+/* Validates a point on a GFp curve. */
+mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Uses affine coordinates. */
+mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). */
+mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group);
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry). */
+mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py,
+ const mp_int *pz, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+
+/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py,
+ const mp_int *pz);
+
+/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian
+ * coordinates. */
+mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz);
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, qz). Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py,
+ const mp_int *pz, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ mp_int *rz, const ECGroup *group);
+
+/* Computes R = 2P. Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py,
+ const mp_int *pz, mp_int *rx, mp_int *ry,
+ mp_int *rz, const ECGroup *group);
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Uses Jacobian coordinates. */
+mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+#endif
+
+/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator
+ * (base point) of the group of points on the elliptic curve. Allows k1 =
+ * NULL or { k2, P } = NULL. Implemented using mixed Jacobian-affine
+ * coordinates. Input and output values are assumed to be NOT
+ * field-encoded and are in affine form. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group);
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group);
+
+#endif /* __ecp_h_ */
diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c
new file mode 100644
index 000000000..a8d41520e
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_25519.c
@@ -0,0 +1,120 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */
+
+#ifdef FREEBL_NO_DEPEND
+#include "../stubs.h"
+#endif
+
+#include "ecl-priv.h"
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "secport.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/*
+ * point validation is not necessary in general. But this checks a point (px)
+ * against some known bad values.
+ */
+SECStatus
+ec_Curve25519_pt_validate(const SECItem *px)
+{
+ PRUint8 *p;
+ int i;
+ PRUint8 forbiddenValues[12][32] = {
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 },
+ { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 },
+ { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
+ { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae,
+ 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a,
+ 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd,
+ 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 },
+ { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24,
+ 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b,
+ 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86,
+ 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 },
+ { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff },
+ };
+
+ /* The point must not be longer than 32 (it can be smaller). */
+ if (px->len <= 32) {
+ p = px->data;
+ } else {
+ return SECFailure;
+ }
+
+ for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) {
+ if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) {
+ return SECFailure;
+ }
+ }
+
+ return SECSuccess;
+}
+
+/*
+ * Scalar multiplication for Curve25519.
+ * If P == NULL, the base point is used.
+ * Returns X = k*P
+ */
+SECStatus
+ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P)
+{
+ PRUint8 *px;
+ PRUint8 basePoint[32] = { 9 };
+
+ if (!P) {
+ px = basePoint;
+ } else {
+ PORT_Assert(P->len == 32);
+ if (P->len != 32) {
+ return SECFailure;
+ }
+ px = P->data;
+ }
+
+ return ec_Curve25519_mul(X->data, k->data, px);
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c
new file mode 100644
index 000000000..ad4e630c1
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256.c
@@ -0,0 +1,401 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r.
+ * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_size a_used = MP_USED(a);
+ int a_bits = mpl_significant_bits(a);
+ mp_digit carry;
+
+#ifdef ECL_THIRTY_TWO_BIT
+ mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0;
+ mp_digit r0, r1, r2, r3, r4, r5, r6, r7;
+ int r8; /* must be a signed value ! */
+#else
+ mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0;
+ mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l;
+ mp_digit r0, r1, r2, r3;
+ int r4; /* must be a signed value ! */
+#endif
+ /* for polynomials larger than twice the field size
+ * use regular reduction */
+ if (a_bits < 256) {
+ if (a == r)
+ return MP_OKAY;
+ return mp_copy(a, r);
+ }
+ if (a_bits > 512) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+
+#ifdef ECL_THIRTY_TWO_BIT
+ switch (a_used) {
+ case 16:
+ a15 = MP_DIGIT(a, 15);
+ case 15:
+ a14 = MP_DIGIT(a, 14);
+ case 14:
+ a13 = MP_DIGIT(a, 13);
+ case 13:
+ a12 = MP_DIGIT(a, 12);
+ case 12:
+ a11 = MP_DIGIT(a, 11);
+ case 11:
+ a10 = MP_DIGIT(a, 10);
+ case 10:
+ a9 = MP_DIGIT(a, 9);
+ case 9:
+ a8 = MP_DIGIT(a, 8);
+ }
+
+ r0 = MP_DIGIT(a, 0);
+ r1 = MP_DIGIT(a, 1);
+ r2 = MP_DIGIT(a, 2);
+ r3 = MP_DIGIT(a, 3);
+ r4 = MP_DIGIT(a, 4);
+ r5 = MP_DIGIT(a, 5);
+ r6 = MP_DIGIT(a, 6);
+ r7 = MP_DIGIT(a, 7);
+
+ /* sum 1 */
+ carry = 0;
+ MP_ADD_CARRY(r3, a11, r3, carry);
+ MP_ADD_CARRY(r4, a12, r4, carry);
+ MP_ADD_CARRY(r5, a13, r5, carry);
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry);
+ r8 = carry;
+ carry = 0;
+ MP_ADD_CARRY(r3, a11, r3, carry);
+ MP_ADD_CARRY(r4, a12, r4, carry);
+ MP_ADD_CARRY(r5, a13, r5, carry);
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* sum 2 */
+ MP_ADD_CARRY(r3, a12, r3, carry);
+ MP_ADD_CARRY(r4, a13, r4, carry);
+ MP_ADD_CARRY(r5, a14, r5, carry);
+ MP_ADD_CARRY(r6, a15, r6, carry);
+ MP_ADD_CARRY(r7, 0, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* combine last bottom of sum 3 with second sum 2 */
+ MP_ADD_CARRY(r0, a8, r0, carry);
+ MP_ADD_CARRY(r1, a9, r1, carry);
+ MP_ADD_CARRY(r2, a10, r2, carry);
+ MP_ADD_CARRY(r3, a12, r3, carry);
+ MP_ADD_CARRY(r4, a13, r4, carry);
+ MP_ADD_CARRY(r5, a14, r5, carry);
+ MP_ADD_CARRY(r6, a15, r6, carry);
+ MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */
+ r8 += carry;
+ carry = 0;
+ /* sum 3 (rest of it)*/
+ MP_ADD_CARRY(r6, a14, r6, carry);
+ MP_ADD_CARRY(r7, 0, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* sum 4 (rest of it)*/
+ MP_ADD_CARRY(r0, a9, r0, carry);
+ MP_ADD_CARRY(r1, a10, r1, carry);
+ MP_ADD_CARRY(r2, a11, r2, carry);
+ MP_ADD_CARRY(r3, a13, r3, carry);
+ MP_ADD_CARRY(r4, a14, r4, carry);
+ MP_ADD_CARRY(r5, a15, r5, carry);
+ MP_ADD_CARRY(r6, a13, r6, carry);
+ MP_ADD_CARRY(r7, a8, r7, carry);
+ r8 += carry;
+ carry = 0;
+ /* diff 5 */
+ MP_SUB_BORROW(r0, a11, r0, carry);
+ MP_SUB_BORROW(r1, a12, r1, carry);
+ MP_SUB_BORROW(r2, a13, r2, carry);
+ MP_SUB_BORROW(r3, 0, r3, carry);
+ MP_SUB_BORROW(r4, 0, r4, carry);
+ MP_SUB_BORROW(r5, 0, r5, carry);
+ MP_SUB_BORROW(r6, a8, r6, carry);
+ MP_SUB_BORROW(r7, a10, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 6 */
+ MP_SUB_BORROW(r0, a12, r0, carry);
+ MP_SUB_BORROW(r1, a13, r1, carry);
+ MP_SUB_BORROW(r2, a14, r2, carry);
+ MP_SUB_BORROW(r3, a15, r3, carry);
+ MP_SUB_BORROW(r4, 0, r4, carry);
+ MP_SUB_BORROW(r5, 0, r5, carry);
+ MP_SUB_BORROW(r6, a9, r6, carry);
+ MP_SUB_BORROW(r7, a11, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 7 */
+ MP_SUB_BORROW(r0, a13, r0, carry);
+ MP_SUB_BORROW(r1, a14, r1, carry);
+ MP_SUB_BORROW(r2, a15, r2, carry);
+ MP_SUB_BORROW(r3, a8, r3, carry);
+ MP_SUB_BORROW(r4, a9, r4, carry);
+ MP_SUB_BORROW(r5, a10, r5, carry);
+ MP_SUB_BORROW(r6, 0, r6, carry);
+ MP_SUB_BORROW(r7, a12, r7, carry);
+ r8 -= carry;
+ carry = 0;
+ /* diff 8 */
+ MP_SUB_BORROW(r0, a14, r0, carry);
+ MP_SUB_BORROW(r1, a15, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, a9, r3, carry);
+ MP_SUB_BORROW(r4, a10, r4, carry);
+ MP_SUB_BORROW(r5, a11, r5, carry);
+ MP_SUB_BORROW(r6, 0, r6, carry);
+ MP_SUB_BORROW(r7, a13, r7, carry);
+ r8 -= carry;
+
+ /* reduce the overflows */
+ while (r8 > 0) {
+ mp_digit r8_d = r8;
+ carry = 0;
+ MP_ADD_CARRY(r0, r8_d, r0, carry);
+ MP_ADD_CARRY(r1, 0, r1, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry);
+ MP_ADD_CARRY(r3, 0 - r8_d, r3, carry);
+ MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry);
+ MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry);
+ MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry);
+ MP_ADD_CARRY(r7, (r8_d - 1), r7, carry);
+ r8 = carry;
+ }
+
+ /* reduce the underflows */
+ while (r8 < 0) {
+ mp_digit r8_d = -r8;
+ carry = 0;
+ MP_SUB_BORROW(r0, r8_d, r0, carry);
+ MP_SUB_BORROW(r1, 0, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, 0 - r8_d, r3, carry);
+ MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry);
+ MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry);
+ MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry);
+ MP_SUB_BORROW(r7, (r8_d - 1), r7, carry);
+ r8 = 0 - carry;
+ }
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, 8));
+ }
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 8;
+
+ MP_DIGIT(r, 7) = r7;
+ MP_DIGIT(r, 6) = r6;
+ MP_DIGIT(r, 5) = r5;
+ MP_DIGIT(r, 4) = r4;
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+
+ /* final reduction if necessary */
+ if ((r7 == MP_DIGIT_MAX) &&
+ ((r6 > 1) || ((r6 == 1) &&
+ (r5 || r4 || r3 ||
+ ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) {
+ MP_CHECKOK(mp_sub(r, &meth->irr, r));
+ }
+
+ s_mp_clamp(r);
+#else
+ switch (a_used) {
+ case 8:
+ a7 = MP_DIGIT(a, 7);
+ case 7:
+ a6 = MP_DIGIT(a, 6);
+ case 6:
+ a5 = MP_DIGIT(a, 5);
+ case 5:
+ a4 = MP_DIGIT(a, 4);
+ }
+ a7l = a7 << 32;
+ a7h = a7 >> 32;
+ a6l = a6 << 32;
+ a6h = a6 >> 32;
+ a5l = a5 << 32;
+ a5h = a5 >> 32;
+ a4l = a4 << 32;
+ a4h = a4 >> 32;
+ r3 = MP_DIGIT(a, 3);
+ r2 = MP_DIGIT(a, 2);
+ r1 = MP_DIGIT(a, 1);
+ r0 = MP_DIGIT(a, 0);
+
+ /* sum 1 */
+ carry = 0;
+ MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+ MP_ADD_CARRY(r2, a6, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 = carry;
+ carry = 0;
+ MP_ADD_CARRY(r1, a5h << 32, r1, carry);
+ MP_ADD_CARRY(r2, a6, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 += carry;
+ /* sum 2 */
+ carry = 0;
+ MP_ADD_CARRY(r1, a6l, r1, carry);
+ MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+ MP_ADD_CARRY(r3, a7h, r3, carry);
+ r4 += carry;
+ carry = 0;
+ MP_ADD_CARRY(r1, a6l, r1, carry);
+ MP_ADD_CARRY(r2, a6h | a7l, r2, carry);
+ MP_ADD_CARRY(r3, a7h, r3, carry);
+ r4 += carry;
+
+ /* sum 3 */
+ carry = 0;
+ MP_ADD_CARRY(r0, a4, r0, carry);
+ MP_ADD_CARRY(r1, a5l >> 32, r1, carry);
+ MP_ADD_CARRY(r2, 0, r2, carry);
+ MP_ADD_CARRY(r3, a7, r3, carry);
+ r4 += carry;
+ /* sum 4 */
+ carry = 0;
+ MP_ADD_CARRY(r0, a4h | a5l, r0, carry);
+ MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry);
+ MP_ADD_CARRY(r2, a7, r2, carry);
+ MP_ADD_CARRY(r3, a6h | a4l, r3, carry);
+ r4 += carry;
+ /* diff 5 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a5h | a6l, r0, carry);
+ MP_SUB_BORROW(r1, a6h, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry);
+ r4 -= carry;
+ /* diff 6 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a6, r0, carry);
+ MP_SUB_BORROW(r1, a7, r1, carry);
+ MP_SUB_BORROW(r2, 0, r2, carry);
+ MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry);
+ r4 -= carry;
+ /* diff 7 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a6h | a7l, r0, carry);
+ MP_SUB_BORROW(r1, a7h | a4l, r1, carry);
+ MP_SUB_BORROW(r2, a4h | a5l, r2, carry);
+ MP_SUB_BORROW(r3, a6l, r3, carry);
+ r4 -= carry;
+ /* diff 8 */
+ carry = 0;
+ MP_SUB_BORROW(r0, a7, r0, carry);
+ MP_SUB_BORROW(r1, a4h << 32, r1, carry);
+ MP_SUB_BORROW(r2, a5, r2, carry);
+ MP_SUB_BORROW(r3, a6h << 32, r3, carry);
+ r4 -= carry;
+
+ /* reduce the overflows */
+ while (r4 > 0) {
+ mp_digit r4_long = r4;
+ mp_digit r4l = (r4_long << 32);
+ carry = 0;
+ MP_ADD_CARRY(r0, r4_long, r0, carry);
+ MP_ADD_CARRY(r1, 0 - r4l, r1, carry);
+ MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry);
+ MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry);
+ r4 = carry;
+ }
+
+ /* reduce the underflows */
+ while (r4 < 0) {
+ mp_digit r4_long = -r4;
+ mp_digit r4l = (r4_long << 32);
+ carry = 0;
+ MP_SUB_BORROW(r0, r4_long, r0, carry);
+ MP_SUB_BORROW(r1, 0 - r4l, r1, carry);
+ MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry);
+ MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry);
+ r4 = 0 - carry;
+ }
+
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, 4));
+ }
+ MP_SIGN(r) = MP_ZPOS;
+ MP_USED(r) = 4;
+
+ MP_DIGIT(r, 3) = r3;
+ MP_DIGIT(r, 2) = r2;
+ MP_DIGIT(r, 1) = r1;
+ MP_DIGIT(r, 0) = r0;
+
+ /* final reduction if necessary */
+ if ((r3 > 0xFFFFFFFF00000001ULL) ||
+ ((r3 == 0xFFFFFFFF00000001ULL) &&
+ (r2 || (r1 >> 32) ||
+ (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) {
+ /* very rare, just use mp_sub */
+ MP_CHECKOK(mp_sub(r, &meth->irr, r));
+ }
+
+ s_mp_clamp(r);
+#endif
+ }
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p256. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p256.
+ */
+static mp_err
+ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p256.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p256. */
+static mp_err
+ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp256(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P256) {
+ group->meth->field_mod = &ec_GFp_nistp256_mod;
+ group->meth->field_mul = &ec_GFp_nistp256_mul;
+ group->meth->field_sqr = &ec_GFp_nistp256_sqr;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c
new file mode 100644
index 000000000..515f6f731
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_256_32.c
@@ -0,0 +1,1535 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* A 32-bit implementation of the NIST P-256 elliptic curve. */
+
+#include <string.h>
+
+#include "prtypes.h"
+#include "mpi.h"
+#include "mpi-priv.h"
+#include "ecp.h"
+
+typedef PRUint8 u8;
+typedef PRUint32 u32;
+typedef PRUint64 u64;
+
+/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's
+ * MPI library calls them digits, but here they are called limbs, which is
+ * GMP's terminology.
+ *
+ * The value of an felem (field element) is:
+ * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228)
+ *
+ * That is, each limb is alternately 29 or 28-bits wide in little-endian
+ * order.
+ *
+ * This means that an felem hits 2**257, rather than 2**256 as we would like. A
+ * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems
+ * when multiplying as terms end up one bit short of a limb which would require
+ * much bit-shifting to correct.
+ *
+ * Finally, the values stored in an felem are in Montgomery form. So the value
+ * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257.
+ */
+typedef u32 limb;
+#define NLIMBS 9
+typedef limb felem[NLIMBS];
+
+static const limb kBottom28Bits = 0xfffffff;
+static const limb kBottom29Bits = 0x1fffffff;
+
+/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and
+ * 28-bit words.
+ */
+static const felem kOne = {
+ 2, 0, 0, 0xffff800,
+ 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,
+ 0
+};
+static const felem kZero = { 0 };
+static const felem kP = {
+ 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,
+ 0, 0, 0x200000, 0xf000000,
+ 0xfffffff
+};
+static const felem k2P = {
+ 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,
+ 0, 0, 0x400000, 0xe000000,
+ 0x1fffffff
+};
+
+/* kPrecomputed contains precomputed values to aid the calculation of scalar
+ * multiples of the base point, G. It's actually two, equal length, tables
+ * concatenated.
+ *
+ * The first table contains (x,y) felem pairs for 16 multiples of the base
+ * point, G.
+ *
+ * Index | Index (binary) | Value
+ * 0 | 0000 | 0G (all zeros, omitted)
+ * 1 | 0001 | G
+ * 2 | 0010 | 2**64G
+ * 3 | 0011 | 2**64G + G
+ * 4 | 0100 | 2**128G
+ * 5 | 0101 | 2**128G + G
+ * 6 | 0110 | 2**128G + 2**64G
+ * 7 | 0111 | 2**128G + 2**64G + G
+ * 8 | 1000 | 2**192G
+ * 9 | 1001 | 2**192G + G
+ * 10 | 1010 | 2**192G + 2**64G
+ * 11 | 1011 | 2**192G + 2**64G + G
+ * 12 | 1100 | 2**192G + 2**128G
+ * 13 | 1101 | 2**192G + 2**128G + G
+ * 14 | 1110 | 2**192G + 2**128G + 2**64G
+ * 15 | 1111 | 2**192G + 2**128G + 2**64G + G
+ *
+ * The second table follows the same style, but the terms are 2**32G,
+ * 2**96G, 2**160G, 2**224G.
+ *
+ * This is ~2KB of data.
+ */
+static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {
+ 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee,
+ 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3,
+ 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c,
+ 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22,
+ 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050,
+ 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b,
+ 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa,
+ 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2,
+ 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609,
+ 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581,
+ 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca,
+ 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33,
+ 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6,
+ 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd,
+ 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,
+ 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881,
+ 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a,
+ 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26,
+ 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b,
+ 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023,
+ 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133,
+ 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa,
+ 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,
+ 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc,
+ 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8,
+ 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59,
+ 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39,
+ 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689,
+ 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa,
+ 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3,
+ 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1,
+ 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f,
+ 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,
+ 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d,
+ 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b,
+ 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,
+ 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a,
+ 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f,
+ 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb,
+ 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc,
+ 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9,
+ 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce,
+ 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2,
+ 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca,
+ 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229,
+ 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57,
+ 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c,
+ 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa,
+ 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651,
+ 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,
+ 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7,
+ 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c,
+ 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927,
+ 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298,
+ 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8,
+ 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2,
+ 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d,
+ 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4,
+ 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8,
+ 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78,
+};
+
+/* Field element operations:
+ */
+
+/* NON_ZERO_TO_ALL_ONES returns:
+ * 0xffffffff for 0 < x <= 2**31
+ * 0 for x == 0 or x > 2**31.
+ *
+ * x must be a u32 or an equivalent type such as limb.
+ */
+#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1)
+
+/* felem_reduce_carry adds a multiple of p in order to cancel |carry|,
+ * which is a term at 2**257.
+ *
+ * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28.
+ * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29.
+ */
+static void
+felem_reduce_carry(felem inout, limb carry)
+{
+ const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);
+
+ inout[0] += carry << 1;
+ inout[3] += 0x10000000 & carry_mask;
+ /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the
+ * previous line therefore this doesn't underflow.
+ */
+ inout[3] -= carry << 11;
+ inout[4] += (0x20000000 - 1) & carry_mask;
+ inout[5] += (0x10000000 - 1) & carry_mask;
+ inout[6] += (0x20000000 - 1) & carry_mask;
+ inout[6] -= carry << 22;
+ /* This may underflow if carry is non-zero but, if so, we'll fix it in the
+ * next line.
+ */
+ inout[7] -= 1 & carry_mask;
+ inout[7] += carry << 25;
+}
+
+/* felem_sum sets out = in+in2.
+ *
+ * On entry, in[i]+in2[i] must not overflow a 32-bit word.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_sum(felem out, const felem in, const felem in2)
+{
+ limb carry = 0;
+ unsigned int i;
+ for (i = 0;; i++) {
+ out[i] = in[i] + in2[i];
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] = in[i] + in2[i];
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+#define two31m3 (((limb)1) << 31) - (((limb)1) << 3)
+#define two30m2 (((limb)1) << 30) - (((limb)1) << 2)
+#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)
+#define two31m2 (((limb)1) << 31) - (((limb)1) << 2)
+#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)
+#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)
+
+/* zero31 is 0 mod p.
+ */
+static const felem zero31 = {
+ two31m3, two30m2, two31m2, two30p13m2,
+ two31m2, two30m2, two31p24m2, two30m27m2,
+ two31m2
+};
+
+/* felem_diff sets out = in-in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_diff(felem out, const felem in, const felem in2)
+{
+ limb carry = 0;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ out[i] = in[i] - in2[i];
+ out[i] += zero31[i];
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] = in[i] - in2[i];
+ out[i] += zero31[i];
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words
+ * with the same 29,28,... bit positions as an felem.
+ *
+ * The values in felems are in Montgomery form: x*R mod p where R = 2**257.
+ * Since we just multiplied two Montgomery values together, the result is
+ * x*y*R*R mod p. We wish to divide by R in order for the result also to be
+ * in Montgomery form.
+ *
+ * On entry: tmp[i] < 2**64
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29
+ */
+static void
+felem_reduce_degree(felem out, u64 tmp[17])
+{
+ /* The following table may be helpful when reading this code:
+ *
+ * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10...
+ * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29
+ * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285
+ * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285
+ */
+ limb tmp2[18], carry, x, xMask;
+ unsigned int i;
+
+ /* tmp contains 64-bit words with the same 29,28,29-bit positions as an
+ * felem. So the top of an element of tmp might overlap with another
+ * element two positions down. The following loop eliminates this
+ * overlap.
+ */
+ tmp2[0] = tmp[0] & kBottom29Bits;
+
+ /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try
+ * and hint to the compiler that it can do a single-word shift by selecting
+ * the right register rather than doing a double-word shift and truncating
+ * afterwards.
+ */
+ tmp2[1] = ((limb)tmp[0]) >> 29;
+ tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits;
+ tmp2[1] += ((limb)tmp[1]) & kBottom28Bits;
+ carry = tmp2[1] >> 28;
+ tmp2[1] &= kBottom28Bits;
+
+ for (i = 2; i < 17; i++) {
+ tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+ tmp2[i] += ((limb)(tmp[i - 1])) >> 28;
+ tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits;
+ tmp2[i] += ((limb)tmp[i]) & kBottom29Bits;
+ tmp2[i] += carry;
+ carry = tmp2[i] >> 29;
+ tmp2[i] &= kBottom29Bits;
+
+ i++;
+ if (i == 17)
+ break;
+ tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25;
+ tmp2[i] += ((limb)(tmp[i - 1])) >> 29;
+ tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits;
+ tmp2[i] += ((limb)tmp[i]) & kBottom28Bits;
+ tmp2[i] += carry;
+ carry = tmp2[i] >> 28;
+ tmp2[i] &= kBottom28Bits;
+ }
+
+ tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25;
+ tmp2[17] += ((limb)(tmp[16])) >> 29;
+ tmp2[17] += (((limb)(tmp[16] >> 32)) << 3);
+ tmp2[17] += carry;
+
+ /* Montgomery elimination of terms:
+ *
+ * Since R is 2**257, we can divide by R with a bitwise shift if we can
+ * ensure that the right-most 257 bits are all zero. We can make that true
+ * by adding multiplies of p without affecting the value.
+ *
+ * So we eliminate limbs from right to left. Since the bottom 29 bits of p
+ * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.
+ * We can do that for 8 further limbs and then right shift to eliminate the
+ * extra factor of R.
+ */
+ for (i = 0;; i += 2) {
+ tmp2[i + 1] += tmp2[i] >> 29;
+ x = tmp2[i] & kBottom29Bits;
+ xMask = NON_ZERO_TO_ALL_ONES(x);
+ tmp2[i] = 0;
+
+ /* The bounds calculations for this loop are tricky. Each iteration of
+ * the loop eliminates two words by adding values to words to their
+ * right.
+ *
+ * The following table contains the amounts added to each word (as an
+ * offset from the value of i at the top of the loop). The amounts are
+ * accounted for from the first and second half of the loop separately
+ * and are written as, for example, 28 to mean a value <2**28.
+ *
+ * Word: 3 4 5 6 7 8 9 10
+ * Added in top half: 28 11 29 21 29 28
+ * 28 29
+ * 29
+ * Added in bottom half: 29 10 28 21 28 28
+ * 29
+ *
+ * The value that is currently offset 7 will be offset 5 for the next
+ * iteration and then offset 3 for the iteration after that. Therefore
+ * the total value added will be the values added at 7, 5 and 3.
+ *
+ * The following table accumulates these values. The sums at the bottom
+ * are written as, for example, 29+28, to mean a value < 2**29+2**28.
+ *
+ * Word: 3 4 5 6 7 8 9 10 11 12 13
+ * 28 11 10 29 21 29 28 28 28 28 28
+ * 29 28 11 28 29 28 29 28 29 28
+ * 29 28 21 21 29 21 29 21
+ * 10 29 28 21 28 21 28
+ * 28 29 28 29 28 29 28
+ * 11 10 29 10 29 10
+ * 29 28 11 28 11
+ * 29 29
+ * --------------------------------------------
+ * 30+ 31+ 30+ 31+ 30+
+ * 28+ 29+ 28+ 29+ 21+
+ * 21+ 28+ 21+ 28+ 10
+ * 10 21+ 10 21+
+ * 11 11
+ *
+ * So the greatest amount is added to tmp2[10] and tmp2[12]. If
+ * tmp2[10/12] has an initial value of <2**29, then the maximum value
+ * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32,
+ * as required.
+ */
+ tmp2[i + 3] += (x << 10) & kBottom28Bits;
+ tmp2[i + 4] += (x >> 18);
+
+ tmp2[i + 6] += (x << 21) & kBottom29Bits;
+ tmp2[i + 7] += x >> 8;
+
+ /* At position 200, which is the starting bit position for word 7, we
+ * have a factor of 0xf000000 = 2**28 - 2**24.
+ */
+ tmp2[i + 7] += 0x10000000 & xMask;
+ /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */
+ tmp2[i + 8] += (x - 1) & xMask;
+ tmp2[i + 7] -= (x << 24) & kBottom28Bits;
+ tmp2[i + 8] -= x >> 4;
+
+ tmp2[i + 8] += 0x20000000 & xMask;
+ tmp2[i + 8] -= x;
+ tmp2[i + 8] += (x << 28) & kBottom29Bits;
+ tmp2[i + 9] += ((x >> 1) - 1) & xMask;
+
+ if (i + 1 == NLIMBS)
+ break;
+ tmp2[i + 2] += tmp2[i + 1] >> 28;
+ x = tmp2[i + 1] & kBottom28Bits;
+ xMask = NON_ZERO_TO_ALL_ONES(x);
+ tmp2[i + 1] = 0;
+
+ tmp2[i + 4] += (x << 11) & kBottom29Bits;
+ tmp2[i + 5] += (x >> 18);
+
+ tmp2[i + 7] += (x << 21) & kBottom28Bits;
+ tmp2[i + 8] += x >> 7;
+
+ /* At position 199, which is the starting bit of the 8th word when
+ * dealing with a context starting on an odd word, we have a factor of
+ * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th
+ * word from i+1 is i+8.
+ */
+ tmp2[i + 8] += 0x20000000 & xMask;
+ tmp2[i + 9] += (x - 1) & xMask;
+ tmp2[i + 8] -= (x << 25) & kBottom29Bits;
+ tmp2[i + 9] -= x >> 4;
+
+ tmp2[i + 9] += 0x10000000 & xMask;
+ tmp2[i + 9] -= x;
+ tmp2[i + 10] += (x - 1) & xMask;
+ }
+
+ /* We merge the right shift with a carry chain. The words above 2**257 have
+ * widths of 28,29,... which we need to correct when copying them down.
+ */
+ carry = 0;
+ for (i = 0; i < 8; i++) {
+ /* The maximum value of tmp2[i + 9] occurs on the first iteration and
+ * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is
+ * therefore safe.
+ */
+ out[i] = tmp2[i + 9];
+ out[i] += carry;
+ out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ out[i] = tmp2[i + 9] >> 1;
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ out[8] = tmp2[17];
+ out[8] += carry;
+ carry = out[8] >> 29;
+ out[8] &= kBottom29Bits;
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_square sets out=in*in.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_square(felem out, const felem in)
+{
+ u64 tmp[17];
+
+ tmp[0] = ((u64)in[0]) * in[0];
+ tmp[1] = ((u64)in[0]) * (in[1] << 1);
+ tmp[2] = ((u64)in[0]) * (in[2] << 1) +
+ ((u64)in[1]) * (in[1] << 1);
+ tmp[3] = ((u64)in[0]) * (in[3] << 1) +
+ ((u64)in[1]) * (in[2] << 1);
+ tmp[4] = ((u64)in[0]) * (in[4] << 1) +
+ ((u64)in[1]) * (in[3] << 2) +
+ ((u64)in[2]) * in[2];
+ tmp[5] = ((u64)in[0]) * (in[5] << 1) +
+ ((u64)in[1]) * (in[4] << 1) +
+ ((u64)in[2]) * (in[3] << 1);
+ tmp[6] = ((u64)in[0]) * (in[6] << 1) +
+ ((u64)in[1]) * (in[5] << 2) +
+ ((u64)in[2]) * (in[4] << 1) +
+ ((u64)in[3]) * (in[3] << 1);
+ tmp[7] = ((u64)in[0]) * (in[7] << 1) +
+ ((u64)in[1]) * (in[6] << 1) +
+ ((u64)in[2]) * (in[5] << 1) +
+ ((u64)in[3]) * (in[4] << 1);
+ /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60,
+ * which is < 2**64 as required.
+ */
+ tmp[8] = ((u64)in[0]) * (in[8] << 1) +
+ ((u64)in[1]) * (in[7] << 2) +
+ ((u64)in[2]) * (in[6] << 1) +
+ ((u64)in[3]) * (in[5] << 2) +
+ ((u64)in[4]) * in[4];
+ tmp[9] = ((u64)in[1]) * (in[8] << 1) +
+ ((u64)in[2]) * (in[7] << 1) +
+ ((u64)in[3]) * (in[6] << 1) +
+ ((u64)in[4]) * (in[5] << 1);
+ tmp[10] = ((u64)in[2]) * (in[8] << 1) +
+ ((u64)in[3]) * (in[7] << 2) +
+ ((u64)in[4]) * (in[6] << 1) +
+ ((u64)in[5]) * (in[5] << 1);
+ tmp[11] = ((u64)in[3]) * (in[8] << 1) +
+ ((u64)in[4]) * (in[7] << 1) +
+ ((u64)in[5]) * (in[6] << 1);
+ tmp[12] = ((u64)in[4]) * (in[8] << 1) +
+ ((u64)in[5]) * (in[7] << 2) +
+ ((u64)in[6]) * in[6];
+ tmp[13] = ((u64)in[5]) * (in[8] << 1) +
+ ((u64)in[6]) * (in[7] << 1);
+ tmp[14] = ((u64)in[6]) * (in[8] << 1) +
+ ((u64)in[7]) * (in[7] << 1);
+ tmp[15] = ((u64)in[7]) * (in[8] << 1);
+ tmp[16] = ((u64)in[8]) * in[8];
+
+ felem_reduce_degree(out, tmp);
+}
+
+/* felem_mul sets out=in*in2.
+ *
+ * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and
+ * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_mul(felem out, const felem in, const felem in2)
+{
+ u64 tmp[17];
+
+ tmp[0] = ((u64)in[0]) * in2[0];
+ tmp[1] = ((u64)in[0]) * (in2[1] << 0) +
+ ((u64)in[1]) * (in2[0] << 0);
+ tmp[2] = ((u64)in[0]) * (in2[2] << 0) +
+ ((u64)in[1]) * (in2[1] << 1) +
+ ((u64)in[2]) * (in2[0] << 0);
+ tmp[3] = ((u64)in[0]) * (in2[3] << 0) +
+ ((u64)in[1]) * (in2[2] << 0) +
+ ((u64)in[2]) * (in2[1] << 0) +
+ ((u64)in[3]) * (in2[0] << 0);
+ tmp[4] = ((u64)in[0]) * (in2[4] << 0) +
+ ((u64)in[1]) * (in2[3] << 1) +
+ ((u64)in[2]) * (in2[2] << 0) +
+ ((u64)in[3]) * (in2[1] << 1) +
+ ((u64)in[4]) * (in2[0] << 0);
+ tmp[5] = ((u64)in[0]) * (in2[5] << 0) +
+ ((u64)in[1]) * (in2[4] << 0) +
+ ((u64)in[2]) * (in2[3] << 0) +
+ ((u64)in[3]) * (in2[2] << 0) +
+ ((u64)in[4]) * (in2[1] << 0) +
+ ((u64)in[5]) * (in2[0] << 0);
+ tmp[6] = ((u64)in[0]) * (in2[6] << 0) +
+ ((u64)in[1]) * (in2[5] << 1) +
+ ((u64)in[2]) * (in2[4] << 0) +
+ ((u64)in[3]) * (in2[3] << 1) +
+ ((u64)in[4]) * (in2[2] << 0) +
+ ((u64)in[5]) * (in2[1] << 1) +
+ ((u64)in[6]) * (in2[0] << 0);
+ tmp[7] = ((u64)in[0]) * (in2[7] << 0) +
+ ((u64)in[1]) * (in2[6] << 0) +
+ ((u64)in[2]) * (in2[5] << 0) +
+ ((u64)in[3]) * (in2[4] << 0) +
+ ((u64)in[4]) * (in2[3] << 0) +
+ ((u64)in[5]) * (in2[2] << 0) +
+ ((u64)in[6]) * (in2[1] << 0) +
+ ((u64)in[7]) * (in2[0] << 0);
+ /* tmp[8] has the greatest value but doesn't overflow. See logic in
+ * felem_square.
+ */
+ tmp[8] = ((u64)in[0]) * (in2[8] << 0) +
+ ((u64)in[1]) * (in2[7] << 1) +
+ ((u64)in[2]) * (in2[6] << 0) +
+ ((u64)in[3]) * (in2[5] << 1) +
+ ((u64)in[4]) * (in2[4] << 0) +
+ ((u64)in[5]) * (in2[3] << 1) +
+ ((u64)in[6]) * (in2[2] << 0) +
+ ((u64)in[7]) * (in2[1] << 1) +
+ ((u64)in[8]) * (in2[0] << 0);
+ tmp[9] = ((u64)in[1]) * (in2[8] << 0) +
+ ((u64)in[2]) * (in2[7] << 0) +
+ ((u64)in[3]) * (in2[6] << 0) +
+ ((u64)in[4]) * (in2[5] << 0) +
+ ((u64)in[5]) * (in2[4] << 0) +
+ ((u64)in[6]) * (in2[3] << 0) +
+ ((u64)in[7]) * (in2[2] << 0) +
+ ((u64)in[8]) * (in2[1] << 0);
+ tmp[10] = ((u64)in[2]) * (in2[8] << 0) +
+ ((u64)in[3]) * (in2[7] << 1) +
+ ((u64)in[4]) * (in2[6] << 0) +
+ ((u64)in[5]) * (in2[5] << 1) +
+ ((u64)in[6]) * (in2[4] << 0) +
+ ((u64)in[7]) * (in2[3] << 1) +
+ ((u64)in[8]) * (in2[2] << 0);
+ tmp[11] = ((u64)in[3]) * (in2[8] << 0) +
+ ((u64)in[4]) * (in2[7] << 0) +
+ ((u64)in[5]) * (in2[6] << 0) +
+ ((u64)in[6]) * (in2[5] << 0) +
+ ((u64)in[7]) * (in2[4] << 0) +
+ ((u64)in[8]) * (in2[3] << 0);
+ tmp[12] = ((u64)in[4]) * (in2[8] << 0) +
+ ((u64)in[5]) * (in2[7] << 1) +
+ ((u64)in[6]) * (in2[6] << 0) +
+ ((u64)in[7]) * (in2[5] << 1) +
+ ((u64)in[8]) * (in2[4] << 0);
+ tmp[13] = ((u64)in[5]) * (in2[8] << 0) +
+ ((u64)in[6]) * (in2[7] << 0) +
+ ((u64)in[7]) * (in2[6] << 0) +
+ ((u64)in[8]) * (in2[5] << 0);
+ tmp[14] = ((u64)in[6]) * (in2[8] << 0) +
+ ((u64)in[7]) * (in2[7] << 1) +
+ ((u64)in[8]) * (in2[6] << 0);
+ tmp[15] = ((u64)in[7]) * (in2[8] << 0) +
+ ((u64)in[8]) * (in2[7] << 0);
+ tmp[16] = ((u64)in[8]) * (in2[8] << 0);
+
+ felem_reduce_degree(out, tmp);
+}
+
+static void
+felem_assign(felem out, const felem in)
+{
+ memcpy(out, in, sizeof(felem));
+}
+
+/* felem_inv calculates |out| = |in|^{-1}
+ *
+ * Based on Fermat's Little Theorem:
+ * a^p = a (mod p)
+ * a^{p-1} = 1 (mod p)
+ * a^{p-2} = a^{-1} (mod p)
+ */
+static void
+felem_inv(felem out, const felem in)
+{
+ felem ftmp, ftmp2;
+ /* each e_I will hold |in|^{2^I - 1} */
+ felem e2, e4, e8, e16, e32, e64;
+ unsigned int i;
+
+ felem_square(ftmp, in); /* 2^1 */
+ felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */
+ felem_assign(e2, ftmp);
+ felem_square(ftmp, ftmp); /* 2^3 - 2^1 */
+ felem_square(ftmp, ftmp); /* 2^4 - 2^2 */
+ felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */
+ felem_assign(e4, ftmp);
+ felem_square(ftmp, ftmp); /* 2^5 - 2^1 */
+ felem_square(ftmp, ftmp); /* 2^6 - 2^2 */
+ felem_square(ftmp, ftmp); /* 2^7 - 2^3 */
+ felem_square(ftmp, ftmp); /* 2^8 - 2^4 */
+ felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */
+ felem_assign(e8, ftmp);
+ for (i = 0; i < 8; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^16 - 2^8 */
+ felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */
+ felem_assign(e16, ftmp);
+ for (i = 0; i < 16; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^32 - 2^16 */
+ felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */
+ felem_assign(e32, ftmp);
+ for (i = 0; i < 32; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^64 - 2^32 */
+ felem_assign(e64, ftmp);
+ felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */
+ for (i = 0; i < 192; i++) {
+ felem_square(ftmp, ftmp);
+ } /* 2^256 - 2^224 + 2^192 */
+
+ felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */
+ for (i = 0; i < 16; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^80 - 2^16 */
+ felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */
+ for (i = 0; i < 8; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^88 - 2^8 */
+ felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */
+ for (i = 0; i < 4; i++) {
+ felem_square(ftmp2, ftmp2);
+ } /* 2^92 - 2^4 */
+ felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */
+ felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */
+ felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */
+ felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */
+ felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */
+ felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */
+ felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */
+
+ felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */
+}
+
+/* felem_scalar_3 sets out=3*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_3(felem out)
+{
+ limb carry = 0;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ out[i] *= 3;
+ out[i] += carry;
+ carry = out[i] >> 29;
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ out[i] *= 3;
+ out[i] += carry;
+ carry = out[i] >> 28;
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_4 sets out=4*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_4(felem out)
+{
+ limb carry = 0, next_carry;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ next_carry = out[i] >> 27;
+ out[i] <<= 2;
+ out[i] &= kBottom29Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 29);
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ next_carry = out[i] >> 26;
+ out[i] <<= 2;
+ out[i] &= kBottom28Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 28);
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_scalar_8 sets out=8*out.
+ *
+ * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29.
+ */
+static void
+felem_scalar_8(felem out)
+{
+ limb carry = 0, next_carry;
+ unsigned int i;
+
+ for (i = 0;; i++) {
+ next_carry = out[i] >> 26;
+ out[i] <<= 3;
+ out[i] &= kBottom29Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 29);
+ out[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ next_carry = out[i] >> 25;
+ out[i] <<= 3;
+ out[i] &= kBottom28Bits;
+ out[i] += carry;
+ carry = next_carry + (out[i] >> 28);
+ out[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(out, carry);
+}
+
+/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of
+ * time depending on the value of |in|.
+ */
+static char
+felem_is_zero_vartime(const felem in)
+{
+ limb carry;
+ int i;
+ limb tmp[NLIMBS];
+ felem_assign(tmp, in);
+
+ /* First, reduce tmp to a minimal form.
+ */
+ do {
+ carry = 0;
+ for (i = 0;; i++) {
+ tmp[i] += carry;
+ carry = tmp[i] >> 29;
+ tmp[i] &= kBottom29Bits;
+
+ i++;
+ if (i == NLIMBS)
+ break;
+
+ tmp[i] += carry;
+ carry = tmp[i] >> 28;
+ tmp[i] &= kBottom28Bits;
+ }
+
+ felem_reduce_carry(tmp, carry);
+ } while (carry);
+
+ /* tmp < 2**257, so the only possible zero values are 0, p and 2p.
+ */
+ return memcmp(tmp, kZero, sizeof(tmp)) == 0 ||
+ memcmp(tmp, kP, sizeof(tmp)) == 0 ||
+ memcmp(tmp, k2P, sizeof(tmp)) == 0;
+}
+
+/* Group operations:
+ *
+ * Elements of the elliptic curve group are represented in Jacobian
+ * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in
+ * Jacobian form.
+ */
+
+/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
+ */
+static void
+point_double(felem x_out, felem y_out, felem z_out,
+ const felem x, const felem y, const felem z)
+{
+ felem delta, gamma, alpha, beta, tmp, tmp2;
+
+ felem_square(delta, z);
+ felem_square(gamma, y);
+ felem_mul(beta, x, gamma);
+
+ felem_sum(tmp, x, delta);
+ felem_diff(tmp2, x, delta);
+ felem_mul(alpha, tmp, tmp2);
+ felem_scalar_3(alpha);
+
+ felem_sum(tmp, y, z);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, gamma);
+ felem_diff(z_out, tmp, delta);
+
+ felem_scalar_4(beta);
+ felem_square(x_out, alpha);
+ felem_diff(x_out, x_out, beta);
+ felem_diff(x_out, x_out, beta);
+
+ felem_diff(tmp, beta, x_out);
+ felem_mul(tmp, alpha, tmp);
+ felem_square(tmp2, gamma);
+ felem_scalar_8(tmp2);
+ felem_diff(y_out, tmp, tmp2);
+}
+
+/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.
+ * (i.e. the second point is affine.)
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add_mixed(felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2)
+{
+ felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;
+
+ felem_square(z1z1, z1);
+ felem_sum(tmp, z1, z1);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, x1);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, y1);
+ felem_sum(r, r, r);
+ felem_mul(v, x1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, y1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * Note that this function does not handle P+P, infinity+P nor P+infinity
+ * correctly.
+ */
+static void
+point_add(felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2, const felem z2)
+{
+ felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+
+ felem_square(z1z1, z1);
+ felem_square(z2z2, z2);
+ felem_mul(u1, x1, z2z2);
+
+ felem_sum(tmp, z1, z2);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, z1z1);
+ felem_diff(tmp, tmp, z2z2);
+
+ felem_mul(z2z2z2, z2, z2z2);
+ felem_mul(s1, y1, z2z2z2);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, u1);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, s1);
+ felem_sum(r, r, r);
+ felem_mul(v, u1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, s1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +
+ * {x2,y2,z2}.
+ *
+ * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl
+ *
+ * This function handles the case where {x1,y1,z1}={x2,y2,z2}.
+ */
+static void
+point_add_or_double_vartime(
+ felem x_out, felem y_out, felem z_out,
+ const felem x1, const felem y1, const felem z1,
+ const felem x2, const felem y2, const felem z2)
+{
+ felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;
+ char x_equal, y_equal;
+
+ felem_square(z1z1, z1);
+ felem_square(z2z2, z2);
+ felem_mul(u1, x1, z2z2);
+
+ felem_sum(tmp, z1, z2);
+ felem_square(tmp, tmp);
+ felem_diff(tmp, tmp, z1z1);
+ felem_diff(tmp, tmp, z2z2);
+
+ felem_mul(z2z2z2, z2, z2z2);
+ felem_mul(s1, y1, z2z2z2);
+
+ felem_mul(u2, x2, z1z1);
+ felem_mul(z1z1z1, z1, z1z1);
+ felem_mul(s2, y2, z1z1z1);
+ felem_diff(h, u2, u1);
+ x_equal = felem_is_zero_vartime(h);
+ felem_sum(i, h, h);
+ felem_square(i, i);
+ felem_mul(j, h, i);
+ felem_diff(r, s2, s1);
+ y_equal = felem_is_zero_vartime(r);
+ if (x_equal && y_equal) {
+ point_double(x_out, y_out, z_out, x1, y1, z1);
+ return;
+ }
+ felem_sum(r, r, r);
+ felem_mul(v, u1, i);
+
+ felem_mul(z_out, tmp, h);
+ felem_square(rr, r);
+ felem_diff(x_out, rr, j);
+ felem_diff(x_out, x_out, v);
+ felem_diff(x_out, x_out, v);
+
+ felem_diff(tmp, v, x_out);
+ felem_mul(y_out, tmp, r);
+ felem_mul(tmp, s1, j);
+ felem_diff(y_out, y_out, tmp);
+ felem_diff(y_out, y_out, tmp);
+}
+
+/* copy_conditional sets out=in if mask = 0xffffffff in constant time.
+ *
+ * On entry: mask is either 0 or 0xffffffff.
+ */
+static void
+copy_conditional(felem out, const felem in, limb mask)
+{
+ int i;
+
+ for (i = 0; i < NLIMBS; i++) {
+ const limb tmp = mask & (in[i] ^ out[i]);
+ out[i] ^= tmp;
+ }
+}
+
+/* select_affine_point sets {out_x,out_y} to the index'th entry of table.
+ * On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_affine_point(felem out_x, felem out_y,
+ const limb *table, limb index)
+{
+ limb i, j;
+
+ memset(out_x, 0, sizeof(felem));
+ memset(out_y, 0, sizeof(felem));
+
+ for (i = 1; i < 16; i++) {
+ limb mask = i ^ index;
+ mask |= mask >> 2;
+ mask |= mask >> 1;
+ mask &= 1;
+ mask--;
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_x[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_y[j] |= *table & mask;
+ }
+ }
+}
+
+/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of
+ * table. On entry: index < 16, table[0] must be zero.
+ */
+static void
+select_jacobian_point(felem out_x, felem out_y, felem out_z,
+ const limb *table, limb index)
+{
+ limb i, j;
+
+ memset(out_x, 0, sizeof(felem));
+ memset(out_y, 0, sizeof(felem));
+ memset(out_z, 0, sizeof(felem));
+
+ /* The implicit value at index 0 is all zero. We don't need to perform that
+ * iteration of the loop because we already set out_* to zero.
+ */
+ table += 3 * NLIMBS;
+
+ for (i = 1; i < 16; i++) {
+ limb mask = i ^ index;
+ mask |= mask >> 2;
+ mask |= mask >> 1;
+ mask &= 1;
+ mask--;
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_x[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_y[j] |= *table & mask;
+ }
+ for (j = 0; j < NLIMBS; j++, table++) {
+ out_z[j] |= *table & mask;
+ }
+ }
+}
+
+/* get_bit returns the bit'th bit of scalar. */
+static char
+get_bit(const u8 scalar[32], int bit)
+{
+ return ((scalar[bit >> 3]) >> (bit & 7)) & 1;
+}
+
+/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian
+ * number. Note that the value of scalar must be less than the order of the
+ * group.
+ */
+static void
+scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])
+{
+ int i, j;
+ limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;
+ u32 table_offset;
+
+ felem px, py;
+ felem tx, ty, tz;
+
+ memset(nx, 0, sizeof(felem));
+ memset(ny, 0, sizeof(felem));
+ memset(nz, 0, sizeof(felem));
+
+ /* The loop adds bits at positions 0, 64, 128 and 192, followed by
+ * positions 32,96,160 and 224 and does this 32 times.
+ */
+ for (i = 0; i < 32; i++) {
+ if (i) {
+ point_double(nx, ny, nz, nx, ny, nz);
+ }
+ table_offset = 0;
+ for (j = 0; j <= 32; j += 32) {
+ char bit0 = get_bit(scalar, 31 - i + j);
+ char bit1 = get_bit(scalar, 95 - i + j);
+ char bit2 = get_bit(scalar, 159 - i + j);
+ char bit3 = get_bit(scalar, 223 - i + j);
+ limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3);
+
+ select_affine_point(px, py, kPrecomputed + table_offset, index);
+ table_offset += 30 * NLIMBS;
+
+ /* Since scalar is less than the order of the group, we know that
+ * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle
+ * below.
+ */
+ point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);
+ /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero
+ * (a.k.a. the point at infinity). We handle that situation by
+ * copying the point from the table.
+ */
+ copy_conditional(nx, px, n_is_infinity_mask);
+ copy_conditional(ny, py, n_is_infinity_mask);
+ copy_conditional(nz, kOne, n_is_infinity_mask);
+
+ /* Equally, the result is also wrong if the point from the table is
+ * zero, which happens when the index is zero. We handle that by
+ * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.
+ */
+ p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+ mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+ copy_conditional(nx, tx, mask);
+ copy_conditional(ny, ty, mask);
+ copy_conditional(nz, tz, mask);
+ /* If p was not zero, then n is now non-zero. */
+ n_is_infinity_mask &= ~p_is_noninfinite_mask;
+ }
+ }
+}
+
+/* point_to_affine converts a Jacobian point to an affine point. If the input
+ * is the point at infinity then it returns (0, 0) in constant time.
+ */
+static void
+point_to_affine(felem x_out, felem y_out,
+ const felem nx, const felem ny, const felem nz)
+{
+ felem z_inv, z_inv_sq;
+ felem_inv(z_inv, nz);
+ felem_square(z_inv_sq, z_inv);
+ felem_mul(x_out, nx, z_inv_sq);
+ felem_mul(z_inv, z_inv, z_inv_sq);
+ felem_mul(y_out, ny, z_inv);
+}
+
+/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */
+static void
+scalar_mult(felem nx, felem ny, felem nz,
+ const felem x, const felem y, const u8 scalar[32])
+{
+ int i;
+ felem px, py, pz, tx, ty, tz;
+ felem precomp[16][3];
+ limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;
+
+ /* We precompute 0,1,2,... times {x,y}. */
+ memset(precomp, 0, sizeof(felem) * 3);
+ memcpy(&precomp[1][0], x, sizeof(felem));
+ memcpy(&precomp[1][1], y, sizeof(felem));
+ memcpy(&precomp[1][2], kOne, sizeof(felem));
+
+ for (i = 2; i < 16; i += 2) {
+ point_double(precomp[i][0], precomp[i][1], precomp[i][2],
+ precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);
+
+ point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],
+ precomp[i][0], precomp[i][1], precomp[i][2], x, y);
+ }
+
+ memset(nx, 0, sizeof(felem));
+ memset(ny, 0, sizeof(felem));
+ memset(nz, 0, sizeof(felem));
+ n_is_infinity_mask = -1;
+
+ /* We add in a window of four bits each iteration and do this 64 times. */
+ for (i = 0; i < 64; i++) {
+ if (i) {
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ point_double(nx, ny, nz, nx, ny, nz);
+ }
+
+ index = scalar[31 - i / 2];
+ if ((i & 1) == 1) {
+ index &= 15;
+ } else {
+ index >>= 4;
+ }
+
+ /* See the comments in scalar_base_mult about handling infinities. */
+ select_jacobian_point(px, py, pz, precomp[0][0], index);
+ point_add(tx, ty, tz, nx, ny, nz, px, py, pz);
+ copy_conditional(nx, px, n_is_infinity_mask);
+ copy_conditional(ny, py, n_is_infinity_mask);
+ copy_conditional(nz, pz, n_is_infinity_mask);
+
+ p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);
+ mask = p_is_noninfinite_mask & ~n_is_infinity_mask;
+ copy_conditional(nx, tx, mask);
+ copy_conditional(ny, ty, mask);
+ copy_conditional(nz, tz, mask);
+ n_is_infinity_mask &= ~p_is_noninfinite_mask;
+ }
+}
+
+/* Interface with Freebl: */
+
+/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to
+ * little-endian order.
+ */
+#ifdef IS_BIG_ENDIAN
+#ifdef __APPLE__
+#include <libkern/OSByteOrder.h>
+#define BYTESWAP32(x) OSSwapInt32(x)
+#define BYTESWAP64(x) OSSwapInt64(x)
+#else
+#define BYTESWAP32(x) \
+ (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24))
+#define BYTESWAP64(x) \
+ (((x) >> 56) | (((x) >> 40) & 0xff00) | \
+ (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \
+ (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) | \
+ (((x)&0xff00) << 40) | ((x) << 56))
+#endif
+
+#ifdef MP_USE_UINT_DIGIT
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)
+#else
+#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)
+#endif
+#endif /* IS_BIG_ENDIAN */
+
+#ifdef MP_USE_UINT_DIGIT
+static const mp_digit kRInvDigits[8] = {
+ 0x80000000, 1, 0xffffffff, 0,
+ 0x80000001, 0xfffffffe, 1, 0x7fffffff
+};
+#else
+static const mp_digit kRInvDigits[4] = {
+ PR_UINT64(0x180000000), 0xffffffff,
+ PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)
+};
+#endif
+#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit))
+static const mp_int kRInv = {
+ MP_ZPOS,
+ MP_DIGITS_IN_256_BITS,
+ MP_DIGITS_IN_256_BITS,
+ (mp_digit *)kRInvDigits
+};
+
+static const limb kTwo28 = 0x10000000;
+static const limb kTwo29 = 0x20000000;
+
+/* to_montgomery sets out = R*in. */
+static mp_err
+to_montgomery(felem out, const mp_int *in, const ECGroup *group)
+{
+ /* There are no MPI functions for bitshift operations and we wish to shift
+ * in 257 bits left so we move the digits 256-bits left and then multiply
+ * by two.
+ */
+ mp_int in_shifted;
+ int i;
+ mp_err res;
+
+ MP_CHECKOK(mp_init(&in_shifted));
+ MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS));
+ memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),
+ MP_DIGITS(in),
+ MP_USED(in) * sizeof(mp_digit));
+ MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted));
+ MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));
+
+ for (i = 0;; i++) {
+ out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;
+ MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL));
+
+ i++;
+ if (i == NLIMBS)
+ break;
+ out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;
+ MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL));
+ }
+
+CLEANUP:
+ mp_clear(&in_shifted);
+ return res;
+}
+
+/* from_montgomery sets out=in/R. */
+static mp_err
+from_montgomery(mp_int *out, const felem in,
+ const ECGroup *group)
+{
+ mp_int result, tmp;
+ mp_err res;
+ int i;
+
+ MP_CHECKOK(mp_init(&result));
+ MP_CHECKOK(mp_init(&tmp));
+
+ MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result));
+ for (i = NLIMBS - 2; i >= 0; i--) {
+ if ((i & 1) == 0) {
+ MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));
+ } else {
+ MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));
+ }
+ MP_CHECKOK(mp_add_d(&tmp, in[i], &result));
+ }
+
+ MP_CHECKOK(mp_mul(&result, &kRInv, out));
+ MP_CHECKOK(group->meth->field_mod(out, out, group->meth));
+
+CLEANUP:
+ mp_clear(&result);
+ mp_clear(&tmp);
+ return res;
+}
+
+/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */
+static void
+scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)
+{
+ /* We require that |n| is less than the order of the group and therefore it
+ * will fit into |out_scalar|. However, these is a timing side-channel here
+ * that we cannot avoid: if |n| is sufficiently small it may be one or more
+ * words too short and we'll copy less data.
+ */
+ memset(out_scalar, 0, 32);
+#ifdef IS_LITTLE_ENDIAN
+ memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));
+#else
+ {
+ mp_size i;
+ mp_digit swapped[MP_DIGITS_IN_256_BITS];
+ for (i = 0; i < MP_USED(n); i++) {
+ swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));
+ }
+ memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));
+ }
+#endif
+}
+
+/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the
+ * order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_base_point_mul(const mp_int *n,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar[32];
+ felem x, y, z, x_affine, y_affine;
+ mp_err res;
+
+ /* FIXME(agl): test that n < order. */
+
+ scalar_from_mp_int(scalar, n);
+ scalar_base_mult(x, y, z, scalar);
+ point_to_affine(x_affine, y_affine, x, y, z);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <
+ * the order of the group.
+ */
+static mp_err
+ec_GFp_nistp256_point_mul(const mp_int *n,
+ const mp_int *in_x, const mp_int *in_y,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar[32];
+ felem x, y, z, x_affine, y_affine, px, py;
+ mp_err res;
+
+ scalar_from_mp_int(scalar, n);
+
+ MP_CHECKOK(to_montgomery(px, in_x, group));
+ MP_CHECKOK(to_montgomery(py, in_y, group));
+
+ scalar_mult(x, y, z, px, py, scalar);
+ point_to_affine(x_affine, y_affine, x, y, z);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +
+ * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.
+ *
+ * As indicated by the name, this function operates in variable time. This
+ * is safe because it's used for signature validation which doesn't deal
+ * with secrets.
+ */
+static mp_err
+ec_GFp_nistp256_points_mul_vartime(
+ const mp_int *n1, const mp_int *n2,
+ const mp_int *in_x, const mp_int *in_y,
+ mp_int *out_x, mp_int *out_y,
+ const ECGroup *group)
+{
+ u8 scalar1[32], scalar2[32];
+ felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;
+ mp_err res = MP_OKAY;
+
+ /* If n2 == NULL, this is just a base-point multiplication. */
+ if (n2 == NULL) {
+ return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);
+ }
+
+ /* If n1 == nULL, this is just an arbitary-point multiplication. */
+ if (n1 == NULL) {
+ return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);
+ }
+
+ /* If both scalars are zero, then the result is the point at infinity. */
+ if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {
+ mp_zero(out_x);
+ mp_zero(out_y);
+ return res;
+ }
+
+ scalar_from_mp_int(scalar1, n1);
+ scalar_from_mp_int(scalar2, n2);
+
+ MP_CHECKOK(to_montgomery(px, in_x, group));
+ MP_CHECKOK(to_montgomery(py, in_y, group));
+ scalar_base_mult(x1, y1, z1, scalar1);
+ scalar_mult(x2, y2, z2, px, py, scalar2);
+
+ if (mp_cmp_z(n2) == 0) {
+ /* If n2 == 0, then {x2,y2,z2} is zero and the result is just
+ * {x1,y1,z1}. */
+ } else if (mp_cmp_z(n1) == 0) {
+ /* If n1 == 0, then {x1,y1,z1} is zero and the result is just
+ * {x2,y2,z2}. */
+ memcpy(x1, x2, sizeof(x2));
+ memcpy(y1, y2, sizeof(y2));
+ memcpy(z1, z2, sizeof(z2));
+ } else {
+ /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */
+ point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);
+ }
+
+ point_to_affine(x_affine, y_affine, x1, y1, z1);
+ MP_CHECKOK(from_montgomery(out_x, x_affine, group));
+ MP_CHECKOK(from_montgomery(out_y, y_affine, group));
+
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast point multiplication for named curves. */
+mp_err
+ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P256) {
+ group->base_point_mul = &ec_GFp_nistp256_base_point_mul;
+ group->point_mul = &ec_GFp_nistp256_point_mul;
+ group->points_mul = &ec_GFp_nistp256_points_mul_vartime;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c
new file mode 100644
index 000000000..702fd976e
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_384.c
@@ -0,0 +1,258 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r.
+ * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ int a_bits = mpl_significant_bits(a);
+ int i;
+
+ /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+ mp_int m[10];
+
+#ifdef ECL_THIRTY_TWO_BIT
+ mp_digit s[10][12];
+ for (i = 0; i < 10; i++) {
+ MP_SIGN(&m[i]) = MP_ZPOS;
+ MP_ALLOC(&m[i]) = 12;
+ MP_USED(&m[i]) = 12;
+ MP_DIGITS(&m[i]) = s[i];
+ }
+#else
+ mp_digit s[10][6];
+ for (i = 0; i < 10; i++) {
+ MP_SIGN(&m[i]) = MP_ZPOS;
+ MP_ALLOC(&m[i]) = 6;
+ MP_USED(&m[i]) = 6;
+ MP_DIGITS(&m[i]) = s[i];
+ }
+#endif
+
+#ifdef ECL_THIRTY_TWO_BIT
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if ((a_bits > 768) || (a_bits <= 736)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+ for (i = 0; i < 12; i++) {
+ s[0][i] = MP_DIGIT(a, i);
+ }
+ s[1][0] = 0;
+ s[1][1] = 0;
+ s[1][2] = 0;
+ s[1][3] = 0;
+ s[1][4] = MP_DIGIT(a, 21);
+ s[1][5] = MP_DIGIT(a, 22);
+ s[1][6] = MP_DIGIT(a, 23);
+ s[1][7] = 0;
+ s[1][8] = 0;
+ s[1][9] = 0;
+ s[1][10] = 0;
+ s[1][11] = 0;
+ for (i = 0; i < 12; i++) {
+ s[2][i] = MP_DIGIT(a, i + 12);
+ }
+ s[3][0] = MP_DIGIT(a, 21);
+ s[3][1] = MP_DIGIT(a, 22);
+ s[3][2] = MP_DIGIT(a, 23);
+ for (i = 3; i < 12; i++) {
+ s[3][i] = MP_DIGIT(a, i + 9);
+ }
+ s[4][0] = 0;
+ s[4][1] = MP_DIGIT(a, 23);
+ s[4][2] = 0;
+ s[4][3] = MP_DIGIT(a, 20);
+ for (i = 4; i < 12; i++) {
+ s[4][i] = MP_DIGIT(a, i + 8);
+ }
+ s[5][0] = 0;
+ s[5][1] = 0;
+ s[5][2] = 0;
+ s[5][3] = 0;
+ s[5][4] = MP_DIGIT(a, 20);
+ s[5][5] = MP_DIGIT(a, 21);
+ s[5][6] = MP_DIGIT(a, 22);
+ s[5][7] = MP_DIGIT(a, 23);
+ s[5][8] = 0;
+ s[5][9] = 0;
+ s[5][10] = 0;
+ s[5][11] = 0;
+ s[6][0] = MP_DIGIT(a, 20);
+ s[6][1] = 0;
+ s[6][2] = 0;
+ s[6][3] = MP_DIGIT(a, 21);
+ s[6][4] = MP_DIGIT(a, 22);
+ s[6][5] = MP_DIGIT(a, 23);
+ s[6][6] = 0;
+ s[6][7] = 0;
+ s[6][8] = 0;
+ s[6][9] = 0;
+ s[6][10] = 0;
+ s[6][11] = 0;
+ s[7][0] = MP_DIGIT(a, 23);
+ for (i = 1; i < 12; i++) {
+ s[7][i] = MP_DIGIT(a, i + 11);
+ }
+ s[8][0] = 0;
+ s[8][1] = MP_DIGIT(a, 20);
+ s[8][2] = MP_DIGIT(a, 21);
+ s[8][3] = MP_DIGIT(a, 22);
+ s[8][4] = MP_DIGIT(a, 23);
+ s[8][5] = 0;
+ s[8][6] = 0;
+ s[8][7] = 0;
+ s[8][8] = 0;
+ s[8][9] = 0;
+ s[8][10] = 0;
+ s[8][11] = 0;
+ s[9][0] = 0;
+ s[9][1] = 0;
+ s[9][2] = 0;
+ s[9][3] = MP_DIGIT(a, 23);
+ s[9][4] = MP_DIGIT(a, 23);
+ s[9][5] = 0;
+ s[9][6] = 0;
+ s[9][7] = 0;
+ s[9][8] = 0;
+ s[9][9] = 0;
+ s[9][10] = 0;
+ s[9][11] = 0;
+
+ MP_CHECKOK(mp_add(&m[0], &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[2], r));
+ MP_CHECKOK(mp_add(r, &m[3], r));
+ MP_CHECKOK(mp_add(r, &m[4], r));
+ MP_CHECKOK(mp_add(r, &m[5], r));
+ MP_CHECKOK(mp_add(r, &m[6], r));
+ MP_CHECKOK(mp_sub(r, &m[7], r));
+ MP_CHECKOK(mp_sub(r, &m[8], r));
+ MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+ s_mp_clamp(r);
+ }
+#else
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if ((a_bits > 768) || (a_bits <= 736)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+ for (i = 0; i < 6; i++) {
+ s[0][i] = MP_DIGIT(a, i);
+ }
+ s[1][0] = 0;
+ s[1][1] = 0;
+ s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[1][3] = MP_DIGIT(a, 11) >> 32;
+ s[1][4] = 0;
+ s[1][5] = 0;
+ for (i = 0; i < 6; i++) {
+ s[2][i] = MP_DIGIT(a, i + 6);
+ }
+ s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+ for (i = 2; i < 6; i++) {
+ s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32);
+ }
+ s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32;
+ s[4][1] = MP_DIGIT(a, 10) << 32;
+ for (i = 2; i < 6; i++) {
+ s[4][i] = MP_DIGIT(a, i + 4);
+ }
+ s[5][0] = 0;
+ s[5][1] = 0;
+ s[5][2] = MP_DIGIT(a, 10);
+ s[5][3] = MP_DIGIT(a, 11);
+ s[5][4] = 0;
+ s[5][5] = 0;
+ s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32;
+ s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32;
+ s[6][2] = MP_DIGIT(a, 11);
+ s[6][3] = 0;
+ s[6][4] = 0;
+ s[6][5] = 0;
+ s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32);
+ for (i = 1; i < 6; i++) {
+ s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32);
+ }
+ s[8][0] = MP_DIGIT(a, 10) << 32;
+ s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32);
+ s[8][2] = MP_DIGIT(a, 11) >> 32;
+ s[8][3] = 0;
+ s[8][4] = 0;
+ s[8][5] = 0;
+ s[9][0] = 0;
+ s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32;
+ s[9][2] = MP_DIGIT(a, 11) >> 32;
+ s[9][3] = 0;
+ s[9][4] = 0;
+ s[9][5] = 0;
+
+ MP_CHECKOK(mp_add(&m[0], &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[1], r));
+ MP_CHECKOK(mp_add(r, &m[2], r));
+ MP_CHECKOK(mp_add(r, &m[3], r));
+ MP_CHECKOK(mp_add(r, &m[4], r));
+ MP_CHECKOK(mp_add(r, &m[5], r));
+ MP_CHECKOK(mp_add(r, &m[6], r));
+ MP_CHECKOK(mp_sub(r, &m[7], r));
+ MP_CHECKOK(mp_sub(r, &m[8], r));
+ MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r));
+ s_mp_clamp(r);
+ }
+#endif
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p384. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p384.
+ */
+static mp_err
+ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p384.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p384. */
+static mp_err
+ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp384(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P384) {
+ group->meth->field_mod = &ec_GFp_nistp384_mod;
+ group->meth->field_mul = &ec_GFp_nistp384_mul;
+ group->meth->field_sqr = &ec_GFp_nistp384_sqr;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c
new file mode 100644
index 000000000..6ca0dbb11
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_521.c
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+#define ECP521_DIGITS ECL_CURVE_DIGITS(521)
+
+/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses
+ * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to
+ * Elliptic Curve Cryptography. */
+static mp_err
+ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ int a_bits = mpl_significant_bits(a);
+ unsigned int i;
+
+ /* m1, m2 are statically-allocated mp_int of exactly the size we need */
+ mp_int m1;
+
+ mp_digit s1[ECP521_DIGITS] = { 0 };
+
+ MP_SIGN(&m1) = MP_ZPOS;
+ MP_ALLOC(&m1) = ECP521_DIGITS;
+ MP_USED(&m1) = ECP521_DIGITS;
+ MP_DIGITS(&m1) = s1;
+
+ if (a_bits < 521) {
+ if (a == r)
+ return MP_OKAY;
+ return mp_copy(a, r);
+ }
+ /* for polynomials larger than twice the field size or polynomials
+ * not using all words, use regular reduction */
+ if (a_bits > (521 * 2)) {
+ MP_CHECKOK(mp_mod(a, &meth->irr, r));
+ } else {
+#define FIRST_DIGIT (ECP521_DIGITS - 1)
+ for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) {
+ s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9));
+ }
+ s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9;
+
+ if (a != r) {
+ MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS));
+ for (i = 0; i < ECP521_DIGITS; i++) {
+ MP_DIGIT(r, i) = MP_DIGIT(a, i);
+ }
+ }
+ MP_USED(r) = ECP521_DIGITS;
+ MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+
+ MP_CHECKOK(s_mp_add(r, &m1));
+ if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) {
+ MP_CHECKOK(s_mp_add_d(r, 1));
+ MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF;
+ } else if (s_mp_cmp(r, &meth->irr) == 0) {
+ mp_zero(r);
+ }
+ s_mp_clamp(r);
+ }
+
+CLEANUP:
+ return res;
+}
+
+/* Compute the square of polynomial a, reduce modulo p521. Store the
+ * result in r. r could be a. Uses optimized modular reduction for p521.
+ */
+static mp_err
+ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_sqr(a, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p521.
+ * Store the result in r. r could be a or b; a could be b. Uses
+ * optimized modular reduction for p521. */
+static mp_err
+ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+CLEANUP:
+ return res;
+}
+
+/* Divides two field elements. If a is NULL, then returns the inverse of
+ * b. */
+static mp_err
+ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+ mp_int t;
+
+ /* If a is NULL, then return the inverse of b, otherwise return a/b. */
+ if (a == NULL) {
+ return mp_invmod(b, &meth->irr, r);
+ } else {
+ /* MPI doesn't support divmod, so we implement it using invmod and
+ * mulmod. */
+ MP_CHECKOK(mp_init(&t));
+ MP_CHECKOK(mp_invmod(b, &meth->irr, &t));
+ MP_CHECKOK(mp_mul(a, &t, r));
+ MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth));
+ CLEANUP:
+ mp_clear(&t);
+ return res;
+ }
+}
+
+/* Wire in fast field arithmetic and precomputation of base point for
+ * named curves. */
+mp_err
+ec_group_set_gfp521(ECGroup *group, ECCurveName name)
+{
+ if (name == ECCurve_NIST_P521) {
+ group->meth->field_mod = &ec_GFp_nistp521_mod;
+ group->meth->field_mul = &ec_GFp_nistp521_mul;
+ group->meth->field_sqr = &ec_GFp_nistp521_sqr;
+ group->meth->field_div = &ec_GFp_nistp521_div;
+ }
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c
new file mode 100644
index 000000000..47fb27326
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_aff.c
@@ -0,0 +1,308 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */
+mp_err
+ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py)
+{
+
+ if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) {
+ return MP_YES;
+ } else {
+ return MP_NO;
+ }
+}
+
+/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */
+mp_err
+ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py)
+{
+ mp_zero(px);
+ mp_zero(py);
+ return MP_OKAY;
+}
+
+/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P,
+ * Q, and R can all be identical. Uses affine coordinates. Assumes input
+ * is already field-encoded using field_enc, and returns output that is
+ * still field-encoded. */
+mp_err
+ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int lambda, temp, tempx, tempy;
+
+ MP_DIGITS(&lambda) = 0;
+ MP_DIGITS(&temp) = 0;
+ MP_DIGITS(&tempx) = 0;
+ MP_DIGITS(&tempy) = 0;
+ MP_CHECKOK(mp_init(&lambda));
+ MP_CHECKOK(mp_init(&temp));
+ MP_CHECKOK(mp_init(&tempx));
+ MP_CHECKOK(mp_init(&tempy));
+ /* if P = inf, then R = Q */
+ if (ec_GFp_pt_is_inf_aff(px, py) == 0) {
+ MP_CHECKOK(mp_copy(qx, rx));
+ MP_CHECKOK(mp_copy(qy, ry));
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* if Q = inf, then R = P */
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* if px != qx, then lambda = (py-qy) / (px-qx) */
+ if (mp_cmp(px, qx) != 0) {
+ MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth));
+ } else {
+ /* if py != qy or qy = 0, then R = inf */
+ if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) {
+ mp_zero(rx);
+ mp_zero(ry);
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* lambda = (3qx^2+a) / (2qy) */
+ MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth));
+ MP_CHECKOK(mp_set_int(&temp, 3));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+ }
+ MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth));
+ MP_CHECKOK(mp_set_int(&temp, 2));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth));
+ }
+ MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth));
+ }
+ /* rx = lambda^2 - px - qx */
+ MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth));
+ /* ry = (x1-x2) * lambda - y1 */
+ MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth));
+ MP_CHECKOK(mp_copy(&tempx, rx));
+ MP_CHECKOK(mp_copy(&tempy, ry));
+
+CLEANUP:
+ mp_clear(&lambda);
+ mp_clear(&temp);
+ mp_clear(&tempx);
+ mp_clear(&tempy);
+ return res;
+}
+
+/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be
+ * identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int nqy;
+
+ MP_DIGITS(&nqy) = 0;
+ MP_CHECKOK(mp_init(&nqy));
+ /* nqy = -qy */
+ MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth));
+ res = group->point_add(px, py, qx, &nqy, rx, ry, group);
+CLEANUP:
+ mp_clear(&nqy);
+ return res;
+}
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * affine coordinates. Assumes input is already field-encoded using
+ * field_enc, and returns output that is still field-encoded. */
+mp_err
+ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, const ECGroup *group)
+{
+ return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group);
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and
+ * R can be identical. Uses affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int k, k3, qx, qy, sx, sy;
+ int b1, b3, i, l;
+
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&k3) = 0;
+ MP_DIGITS(&qx) = 0;
+ MP_DIGITS(&qy) = 0;
+ MP_DIGITS(&sx) = 0;
+ MP_DIGITS(&sy) = 0;
+ MP_CHECKOK(mp_init(&k));
+ MP_CHECKOK(mp_init(&k3));
+ MP_CHECKOK(mp_init(&qx));
+ MP_CHECKOK(mp_init(&qy));
+ MP_CHECKOK(mp_init(&sx));
+ MP_CHECKOK(mp_init(&sy));
+
+ /* if n = 0 then r = inf */
+ if (mp_cmp_z(n) == 0) {
+ mp_zero(rx);
+ mp_zero(ry);
+ res = MP_OKAY;
+ goto CLEANUP;
+ }
+ /* Q = P, k = n */
+ MP_CHECKOK(mp_copy(px, &qx));
+ MP_CHECKOK(mp_copy(py, &qy));
+ MP_CHECKOK(mp_copy(n, &k));
+ /* if n < 0 then Q = -Q, k = -k */
+ if (mp_cmp_z(n) < 0) {
+ MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth));
+ MP_CHECKOK(mp_neg(&k, &k));
+ }
+#ifdef ECL_DEBUG /* basic double and add method */
+ l = mpl_significant_bits(&k) - 1;
+ MP_CHECKOK(mp_copy(&qx, &sx));
+ MP_CHECKOK(mp_copy(&qy, &sy));
+ for (i = l - 1; i >= 0; i--) {
+ /* S = 2S */
+ MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+ /* if k_i = 1, then S = S + Q */
+ if (mpl_get_bit(&k, i) != 0) {
+ MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ }
+ }
+#else /* double and add/subtract method from \
+ * standard */
+ /* k3 = 3 * k */
+ MP_CHECKOK(mp_set_int(&k3, 3));
+ MP_CHECKOK(mp_mul(&k, &k3, &k3));
+ /* S = Q */
+ MP_CHECKOK(mp_copy(&qx, &sx));
+ MP_CHECKOK(mp_copy(&qy, &sy));
+ /* l = index of high order bit in binary representation of 3*k */
+ l = mpl_significant_bits(&k3) - 1;
+ /* for i = l-1 downto 1 */
+ for (i = l - 1; i >= 1; i--) {
+ /* S = 2S */
+ MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group));
+ b3 = MP_GET_BIT(&k3, i);
+ b1 = MP_GET_BIT(&k, i);
+ /* if k3_i = 1 and k_i = 0, then S = S + Q */
+ if ((b3 == 1) && (b1 == 0)) {
+ MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ /* if k3_i = 0 and k_i = 1, then S = S - Q */
+ } else if ((b3 == 0) && (b1 == 1)) {
+ MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group));
+ }
+ }
+#endif
+ /* output S */
+ MP_CHECKOK(mp_copy(&sx, rx));
+ MP_CHECKOK(mp_copy(&sy, ry));
+
+CLEANUP:
+ mp_clear(&k);
+ mp_clear(&k3);
+ mp_clear(&qx);
+ mp_clear(&qy);
+ mp_clear(&sx);
+ mp_clear(&sy);
+ return res;
+}
+#endif
+
+/* Validates a point on a GFp curve. */
+mp_err
+ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group)
+{
+ mp_err res = MP_NO;
+ mp_int accl, accr, tmp, pxt, pyt;
+
+ MP_DIGITS(&accl) = 0;
+ MP_DIGITS(&accr) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&pxt) = 0;
+ MP_DIGITS(&pyt) = 0;
+ MP_CHECKOK(mp_init(&accl));
+ MP_CHECKOK(mp_init(&accr));
+ MP_CHECKOK(mp_init(&tmp));
+ MP_CHECKOK(mp_init(&pxt));
+ MP_CHECKOK(mp_init(&pyt));
+
+ /* 1: Verify that publicValue is not the point at infinity */
+ if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 2: Verify that the coordinates of publicValue are elements
+ * of the field.
+ */
+ if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) ||
+ (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 3: Verify that publicValue is on the curve. */
+ if (group->meth->field_enc) {
+ group->meth->field_enc(px, &pxt, group->meth);
+ group->meth->field_enc(py, &pyt, group->meth);
+ } else {
+ MP_CHECKOK(mp_copy(px, &pxt));
+ MP_CHECKOK(mp_copy(py, &pyt));
+ }
+ /* left-hand side: y^2 */
+ MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth));
+ /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */
+ MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth));
+ MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth));
+ MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth));
+ /* check LHS - RHS == 0 */
+ MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth));
+ if (mp_cmp_z(&accr) != 0) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ /* 4: Verify that the order of the curve times the publicValue
+ * is the point at infinity.
+ */
+ MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt));
+ if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) {
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ res = MP_YES;
+
+CLEANUP:
+ mp_clear(&accl);
+ mp_clear(&accr);
+ mp_clear(&tmp);
+ mp_clear(&pxt);
+ mp_clear(&pyt);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c
new file mode 100644
index 000000000..535e75903
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jac.c
@@ -0,0 +1,513 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#ifdef ECL_DEBUG
+#include <assert.h>
+#endif
+
+/* Converts a point P(px, py) from affine coordinates to Jacobian
+ * projective coordinates R(rx, ry, rz). Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+mp_err
+ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+
+ if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ } else {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_set_int(rz, 1));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth));
+ }
+ }
+CLEANUP:
+ return res;
+}
+
+/* Converts a point P(px, py, pz) from Jacobian projective coordinates to
+ * affine coordinates R(rx, ry). P and R can share x and y coordinates.
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded. */
+mp_err
+ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int z1, z2, z3;
+
+ MP_DIGITS(&z1) = 0;
+ MP_DIGITS(&z2) = 0;
+ MP_DIGITS(&z3) = 0;
+ MP_CHECKOK(mp_init(&z1));
+ MP_CHECKOK(mp_init(&z2));
+ MP_CHECKOK(mp_init(&z3));
+
+ /* if point at infinity, then set point at infinity and exit */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry));
+ goto CLEANUP;
+ }
+
+ /* transform (px, py, pz) into (px / pz^2, py / pz^3) */
+ if (mp_cmp_d(pz, 1) == 0) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ } else {
+ MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth));
+ MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth));
+ MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&z1);
+ mp_clear(&z2);
+ mp_clear(&z3);
+ return res;
+}
+
+/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz)
+{
+ return mp_cmp_z(pz);
+}
+
+/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian
+ * coordinates. */
+mp_err
+ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz)
+{
+ mp_zero(pz);
+ return MP_OKAY;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Jacobian-affine coordinates. Assumes input is already
+ * field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and
+ * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime
+ * Fields. */
+mp_err
+ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *qx, const mp_int *qy, mp_int *rx,
+ mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int A, B, C, D, C2, C3;
+
+ MP_DIGITS(&A) = 0;
+ MP_DIGITS(&B) = 0;
+ MP_DIGITS(&C) = 0;
+ MP_DIGITS(&D) = 0;
+ MP_DIGITS(&C2) = 0;
+ MP_DIGITS(&C3) = 0;
+ MP_CHECKOK(mp_init(&A));
+ MP_CHECKOK(mp_init(&B));
+ MP_CHECKOK(mp_init(&C));
+ MP_CHECKOK(mp_init(&D));
+ MP_CHECKOK(mp_init(&C2));
+ MP_CHECKOK(mp_init(&C3));
+
+ /* If either P or Q is the point at infinity, then return the other
+ * point */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+ goto CLEANUP;
+ }
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_copy(pz, rz));
+ goto CLEANUP;
+ }
+
+ /* A = qx * pz^2, B = qy * pz^3 */
+ MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth));
+
+ /* C = A - px, D = B - py */
+ MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth));
+
+ if (mp_cmp_z(&C) == 0) {
+ /* P == Q or P == -Q */
+ if (mp_cmp_z(&D) == 0) {
+ /* P == Q */
+ /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */
+ MP_DIGIT(&D, 0) = 1; /* Set D to 1. */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group));
+ } else {
+ /* P == -Q */
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ }
+ goto CLEANUP;
+ }
+
+ /* C2 = C^2, C3 = C^3 */
+ MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth));
+
+ /* rz = pz * C */
+ MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth));
+
+ /* C = px * C^2 */
+ MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth));
+ /* A = D^2 */
+ MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth));
+
+ /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+ MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth));
+ MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth));
+
+ /* C3 = py * C^3 */
+ MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth));
+
+ /* ry = D * (px * C^2 - rx) - py * C^3 */
+ MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth));
+
+CLEANUP:
+ mp_clear(&A);
+ mp_clear(&B);
+ mp_clear(&C);
+ mp_clear(&D);
+ mp_clear(&C2);
+ mp_clear(&C3);
+ return res;
+}
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ * This routine implements Point Doubling in the Jacobian Projective
+ * space as described in the paper "Efficient elliptic curve exponentiation
+ * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono.
+ */
+mp_err
+ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz,
+ mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int t0, t1, M, S;
+
+ MP_DIGITS(&t0) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&M) = 0;
+ MP_DIGITS(&S) = 0;
+ MP_CHECKOK(mp_init(&t0));
+ MP_CHECKOK(mp_init(&t1));
+ MP_CHECKOK(mp_init(&M));
+ MP_CHECKOK(mp_init(&S));
+
+ /* P == inf or P == -P */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) {
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ goto CLEANUP;
+ }
+
+ if (mp_cmp_d(pz, 1) == 0) {
+ /* M = 3 * px^2 + a */
+ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth));
+ } else if (MP_SIGN(&group->curvea) == MP_NEG &&
+ MP_USED(&group->curvea) == 1 &&
+ MP_DIGIT(&group->curvea, 0) == 3) {
+ /* M = 3 * (px + pz^2) * (px - pz^2) */
+ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth));
+ } else {
+ /* M = 3 * (px^2) + a * (pz^4) */
+ MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth));
+ MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth));
+ }
+
+ /* rz = 2 * py * pz */
+ /* t0 = 4 * py^2 */
+ if (mp_cmp_d(pz, 1) == 0) {
+ MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth));
+ } else {
+ MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth));
+ }
+
+ /* S = 4 * px * py^2 = px * (2 * py)^2 */
+ MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth));
+
+ /* rx = M^2 - 2 * S */
+ MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth));
+
+ /* ry = M * (S - rx) - 8 * py^4 */
+ MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth));
+ if (mp_isodd(&t1)) {
+ MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1));
+ }
+ MP_CHECKOK(mp_div_2(&t1, &t1));
+ MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth));
+ MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth));
+ MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth));
+
+CLEANUP:
+ mp_clear(&t0);
+ mp_clear(&t1);
+ mp_clear(&M);
+ mp_clear(&S);
+ return res;
+}
+
+/* by default, this routine is unused and thus doesn't need to be compiled */
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters
+ * a, b and p are the elliptic curve coefficients and the prime that
+ * determines the field GFp. Elliptic curve points P and R can be
+ * identical. Uses mixed Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. Uses 4-bit window method. */
+mp_err
+ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[16][2], rz;
+ int i, ni, d;
+
+ MP_DIGITS(&rz) = 0;
+ for (i = 0; i < 16; i++) {
+ MP_DIGITS(&precomp[i][0]) = 0;
+ MP_DIGITS(&precomp[i][1]) = 0;
+ }
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+ /* initialize precomputation table */
+ for (i = 0; i < 16; i++) {
+ MP_CHECKOK(mp_init(&precomp[i][0]));
+ MP_CHECKOK(mp_init(&precomp[i][1]));
+ }
+
+ /* fill precomputation table */
+ mp_zero(&precomp[0][0]);
+ mp_zero(&precomp[0][1]);
+ MP_CHECKOK(mp_copy(px, &precomp[1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][1]));
+ for (i = 2; i < 16; i++) {
+ MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1],
+ &precomp[i - 1][0], &precomp[i - 1][1],
+ &precomp[i][0], &precomp[i][1], group));
+ }
+
+ d = (mpl_significant_bits(n) + 3) / 4;
+
+ /* R = inf */
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ for (i = d - 1; i >= 0; i--) {
+ /* compute window ni */
+ ni = MP_GET_BIT(n, 4 * i + 3);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i + 2);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i + 1);
+ ni <<= 1;
+ ni |= MP_GET_BIT(n, 4 * i);
+ /* R = 2^4 * R */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ /* R = R + (ni * P) */
+ MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry,
+ &rz, group));
+ }
+
+ /* convert result S to affine coordinates */
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+ mp_clear(&rz);
+ for (i = 0; i < 16; i++) {
+ mp_clear(&precomp[i][0]);
+ mp_clear(&precomp[i][1]);
+ }
+ return res;
+}
+#endif
+
+/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G +
+ * k2 * P(x, y), where G is the generator (base point) of the group of
+ * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL.
+ * Uses mixed Jacobian-affine coordinates. Input and output values are
+ * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous
+ * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes.
+ * Software Implementation of the NIST Elliptic Curves over Prime Fields. */
+mp_err
+ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px,
+ const mp_int *py, mp_int *rx, mp_int *ry,
+ const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[4][4][2];
+ mp_int rz;
+ const mp_int *a, *b;
+ unsigned int i, j;
+ int ai, bi, d;
+
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_DIGITS(&precomp[i][j][0]) = 0;
+ MP_DIGITS(&precomp[i][j][1]) = 0;
+ }
+ }
+ MP_DIGITS(&rz) = 0;
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG);
+
+ /* if some arguments are not defined used ECPoint_mul */
+ if (k1 == NULL) {
+ return ECPoint_mul(group, k2, px, py, rx, ry);
+ } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) {
+ return ECPoint_mul(group, k1, NULL, NULL, rx, ry);
+ }
+
+ /* initialize precomputation table */
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ MP_CHECKOK(mp_init(&precomp[i][j][0]));
+ MP_CHECKOK(mp_init(&precomp[i][j][1]));
+ }
+ }
+
+ /* fill precomputation table */
+ /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */
+ if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) {
+ a = k2;
+ b = k1;
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[1][0][1]));
+ }
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1]));
+ } else {
+ a = k1;
+ b = k2;
+ MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0]));
+ MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1]));
+ if (group->meth->field_enc) {
+ MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth));
+ MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(px, &precomp[0][1][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[0][1][1]));
+ }
+ }
+ /* precompute [*][0][*] */
+ mp_zero(&precomp[0][0][0]);
+ mp_zero(&precomp[0][0][1]);
+ MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1], group));
+ MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1],
+ &precomp[2][0][0], &precomp[2][0][1],
+ &precomp[3][0][0], &precomp[3][0][1], group));
+ /* precompute [*][1][*] */
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][1][0], &precomp[i][1][1], group));
+ }
+ /* precompute [*][2][*] */
+ MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][2][0], &precomp[i][2][1], group));
+ }
+ /* precompute [*][3][*] */
+ MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1],
+ &precomp[0][2][0], &precomp[0][2][1],
+ &precomp[0][3][0], &precomp[0][3][1], group));
+ for (i = 1; i < 4; i++) {
+ MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1],
+ &precomp[i][0][0], &precomp[i][0][1],
+ &precomp[i][3][0], &precomp[i][3][1], group));
+ }
+
+ d = (mpl_significant_bits(a) + 1) / 2;
+
+ /* R = inf */
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ for (i = d; i-- > 0;) {
+ ai = MP_GET_BIT(a, 2 * i + 1);
+ ai <<= 1;
+ ai |= MP_GET_BIT(a, 2 * i);
+ bi = MP_GET_BIT(b, 2 * i + 1);
+ bi <<= 1;
+ bi |= MP_GET_BIT(b, 2 * i);
+ /* R = 2^2 * R */
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group));
+ /* R = R + (ai * A + bi * B) */
+ MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1],
+ rx, ry, &rz, group));
+ }
+
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth));
+ }
+
+CLEANUP:
+ mp_clear(&rz);
+ for (i = 0; i < 4; i++) {
+ for (j = 0; j < 4; j++) {
+ mp_clear(&precomp[i][j][0]);
+ mp_clear(&precomp[i][j][1]);
+ }
+ }
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c
new file mode 100644
index 000000000..a1106cea8
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_jm.c
@@ -0,0 +1,283 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "ecp.h"
+#include "ecl-priv.h"
+#include "mplogic.h"
+#include <stdlib.h>
+
+#define MAX_SCRATCH 6
+
+/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses
+ * Modified Jacobian coordinates.
+ *
+ * Assumes input is already field-encoded using field_enc, and returns
+ * output that is still field-encoded.
+ *
+ */
+static mp_err
+ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz,
+ mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int *t0, *t1, *M, *S;
+
+ t0 = &scratch[0];
+ t1 = &scratch[1];
+ M = &scratch[2];
+ S = &scratch[3];
+
+#if MAX_SCRATCH < 4
+#error "Scratch array defined too small "
+#endif
+
+ /* Check for point at infinity */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ /* Set r = pt at infinity by setting rz = 0 */
+
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz));
+ goto CLEANUP;
+ }
+
+ /* M = 3 (px^2) + a*(pz^4) */
+ MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth));
+
+ /* rz = 2 * py * pz */
+ MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth));
+ MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth));
+
+ /* t0 = 2y^2 , t1 = 8y^4 */
+ MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth));
+ MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth));
+ MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth));
+
+ /* S = 4 * px * py^2 = 2 * px * t0 */
+ MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth));
+ MP_CHECKOK(group->meth->field_add(S, S, S, group->meth));
+
+ /* rx = M^2 - 2S */
+ MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth));
+
+ /* ry = M * (S - rx) - t1 */
+ MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth));
+ MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth));
+
+ /* ra*z^4 = 2*t1*(apz4) */
+ MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth));
+
+CLEANUP:
+ return res;
+}
+
+/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is
+ * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical.
+ * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is
+ * already field-encoded using field_enc, and returns output that is still
+ * field-encoded. */
+static mp_err
+ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz,
+ const mp_int *paz4, const mp_int *qx,
+ const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz,
+ mp_int *raz4, mp_int scratch[], const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int *A, *B, *C, *D, *C2, *C3;
+
+ A = &scratch[0];
+ B = &scratch[1];
+ C = &scratch[2];
+ D = &scratch[3];
+ C2 = &scratch[4];
+ C3 = &scratch[5];
+
+#if MAX_SCRATCH < 6
+#error "Scratch array defined too small "
+#endif
+
+ /* If either P or Q is the point at infinity, then return the other
+ * point */
+ if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) {
+ MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group));
+ MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+ goto CLEANUP;
+ }
+ if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) {
+ MP_CHECKOK(mp_copy(px, rx));
+ MP_CHECKOK(mp_copy(py, ry));
+ MP_CHECKOK(mp_copy(pz, rz));
+ MP_CHECKOK(mp_copy(paz4, raz4));
+ goto CLEANUP;
+ }
+
+ /* A = qx * pz^2, B = qy * pz^3 */
+ MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth));
+ MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth));
+ MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth));
+
+ /* C = A - px, D = B - py */
+ MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth));
+ MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth));
+
+ /* C2 = C^2, C3 = C^3 */
+ MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth));
+ MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth));
+
+ /* rz = pz * C */
+ MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth));
+
+ /* C = px * C^2 */
+ MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth));
+ /* A = D^2 */
+ MP_CHECKOK(group->meth->field_sqr(D, A, group->meth));
+
+ /* rx = D^2 - (C^3 + 2 * (px * C^2)) */
+ MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth));
+ MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth));
+ MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth));
+
+ /* C3 = py * C^3 */
+ MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth));
+
+ /* ry = D * (px * C^2 - rx) - py * C^3 */
+ MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth));
+ MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth));
+ MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth));
+
+ /* raz4 = a * rz^4 */
+ MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth));
+ MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth));
+CLEANUP:
+ return res;
+}
+
+/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic
+ * curve points P and R can be identical. Uses mixed Modified-Jacobian
+ * co-ordinates for doubling and Chudnovsky Jacobian coordinates for
+ * additions. Assumes input is already field-encoded using field_enc, and
+ * returns output that is still field-encoded. Uses 5-bit window NAF
+ * method (algorithm 11) for scalar-point multiplication from Brown,
+ * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic
+ * Curves Over Prime Fields. */
+mp_err
+ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py,
+ mp_int *rx, mp_int *ry, const ECGroup *group)
+{
+ mp_err res = MP_OKAY;
+ mp_int precomp[16][2], rz, tpx, tpy;
+ mp_int raz4;
+ mp_int scratch[MAX_SCRATCH];
+ signed char *naf = NULL;
+ int i, orderBitSize;
+
+ MP_DIGITS(&rz) = 0;
+ MP_DIGITS(&raz4) = 0;
+ MP_DIGITS(&tpx) = 0;
+ MP_DIGITS(&tpy) = 0;
+ for (i = 0; i < 16; i++) {
+ MP_DIGITS(&precomp[i][0]) = 0;
+ MP_DIGITS(&precomp[i][1]) = 0;
+ }
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ MP_DIGITS(&scratch[i]) = 0;
+ }
+
+ ARGCHK(group != NULL, MP_BADARG);
+ ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG);
+
+ /* initialize precomputation table */
+ MP_CHECKOK(mp_init(&tpx));
+ MP_CHECKOK(mp_init(&tpy));
+ ;
+ MP_CHECKOK(mp_init(&rz));
+ MP_CHECKOK(mp_init(&raz4));
+
+ for (i = 0; i < 16; i++) {
+ MP_CHECKOK(mp_init(&precomp[i][0]));
+ MP_CHECKOK(mp_init(&precomp[i][1]));
+ }
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ MP_CHECKOK(mp_init(&scratch[i]));
+ }
+
+ /* Set out[8] = P */
+ MP_CHECKOK(mp_copy(px, &precomp[8][0]));
+ MP_CHECKOK(mp_copy(py, &precomp[8][1]));
+
+ /* Set (tpx, tpy) = 2P */
+ MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy,
+ group));
+
+ /* Set 3P, 5P, ..., 15P */
+ for (i = 8; i < 15; i++) {
+ MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy,
+ &precomp[i + 1][0], &precomp[i + 1][1],
+ group));
+ }
+
+ /* Set -15P, -13P, ..., -P */
+ for (i = 0; i < 8; i++) {
+ MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0]));
+ MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1],
+ group->meth));
+ }
+
+ /* R = inf */
+ MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz));
+
+ orderBitSize = mpl_significant_bits(&group->order);
+
+ /* Allocate memory for NAF */
+ naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1));
+ if (naf == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+
+ /* Compute 5NAF */
+ ec_compute_wNAF(naf, orderBitSize, n, 5);
+
+ /* wNAF method */
+ for (i = orderBitSize; i >= 0; i--) {
+ /* R = 2R */
+ ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz,
+ &raz4, scratch, group);
+ if (naf[i] != 0) {
+ ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4,
+ &precomp[(naf[i] + 15) / 2][0],
+ &precomp[(naf[i] + 15) / 2][1], rx, ry,
+ &rz, &raz4, scratch, group);
+ }
+ }
+
+ /* convert result S to affine coordinates */
+ MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group));
+
+CLEANUP:
+ for (i = 0; i < MAX_SCRATCH; i++) {
+ mp_clear(&scratch[i]);
+ }
+ for (i = 0; i < 16; i++) {
+ mp_clear(&precomp[i][0]);
+ mp_clear(&precomp[i][1]);
+ }
+ mp_clear(&tpx);
+ mp_clear(&tpy);
+ mp_clear(&rz);
+ mp_clear(&raz4);
+ free(naf);
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c
new file mode 100644
index 000000000..779685b4d
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/ecp_mont.c
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Uses Montgomery reduction for field arithmetic. See mpi/mpmontg.c for
+ * code implementation. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+#include "ecl-priv.h"
+#include "ecp.h"
+#include <stdlib.h>
+#include <stdio.h>
+
+/* Construct a generic GFMethod for arithmetic over prime fields with
+ * irreducible irr. */
+GFMethod *
+GFMethod_consGFp_mont(const mp_int *irr)
+{
+ mp_err res = MP_OKAY;
+ GFMethod *meth = NULL;
+ mp_mont_modulus *mmm;
+
+ meth = GFMethod_consGFp(irr);
+ if (meth == NULL)
+ return NULL;
+
+ mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus));
+ if (mmm == NULL) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+
+ meth->field_mul = &ec_GFp_mul_mont;
+ meth->field_sqr = &ec_GFp_sqr_mont;
+ meth->field_div = &ec_GFp_div_mont;
+ meth->field_enc = &ec_GFp_enc_mont;
+ meth->field_dec = &ec_GFp_dec_mont;
+ meth->extra1 = mmm;
+ meth->extra2 = NULL;
+ meth->extra_free = &ec_GFp_extra_free_mont;
+
+ mmm->N = meth->irr;
+ mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0));
+
+CLEANUP:
+ if (res != MP_OKAY) {
+ GFMethod_free(meth);
+ return NULL;
+ }
+ return meth;
+}
+
+/* Wrapper functions for generic prime field arithmetic. */
+
+/* Field multiplication using Montgomery reduction. */
+mp_err
+ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+#ifdef MP_MONT_USE_MP_MUL
+ /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont
+ * is not implemented and we have to use mp_mul and s_mp_redc directly
+ */
+ MP_CHECKOK(mp_mul(a, b, r));
+ MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+#else
+ mp_int s;
+
+ MP_DIGITS(&s) = 0;
+ /* s_mp_mul_mont doesn't allow source and destination to be the same */
+ if ((a == r) || (b == r)) {
+ MP_CHECKOK(mp_init(&s));
+ MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1));
+ MP_CHECKOK(mp_copy(&s, r));
+ mp_clear(&s);
+ } else {
+ return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1);
+ }
+#endif
+CLEANUP:
+ return res;
+}
+
+/* Field squaring using Montgomery reduction. */
+mp_err
+ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ return ec_GFp_mul_mont(a, a, r, meth);
+}
+
+/* Field division using Montgomery reduction. */
+mp_err
+ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r,
+ const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ /* if A=aZ represents a encoded in montgomery coordinates with Z and #
+ * and \ respectively represent multiplication and division in
+ * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv =
+ * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */
+ MP_CHECKOK(ec_GFp_div(a, b, r, meth));
+ MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+ if (a == NULL) {
+ MP_CHECKOK(ec_GFp_enc_mont(r, r, meth));
+ }
+CLEANUP:
+ return res;
+}
+
+/* Encode a field element in Montgomery form. See s_mp_to_mont in
+ * mpi/mpmontg.c */
+mp_err
+ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_mont_modulus *mmm;
+ mp_err res = MP_OKAY;
+
+ mmm = (mp_mont_modulus *)meth->extra1;
+ MP_CHECKOK(mp_copy(a, r));
+ MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N)));
+ MP_CHECKOK(mp_mod(r, &mmm->N, r));
+CLEANUP:
+ return res;
+}
+
+/* Decode a field element from Montgomery form. */
+mp_err
+ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth)
+{
+ mp_err res = MP_OKAY;
+
+ if (a != r) {
+ MP_CHECKOK(mp_copy(a, r));
+ }
+ MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1));
+CLEANUP:
+ return res;
+}
+
+/* Free the memory allocated to the extra fields of Montgomery GFMethod
+ * object. */
+void
+ec_GFp_extra_free_mont(GFMethod *meth)
+{
+ if (meth->extra1 != NULL) {
+ free(meth->extra1);
+ meth->extra1 = NULL;
+ }
+}
diff --git a/security/nss/lib/freebl/ecl/tests/ec_naft.c b/security/nss/lib/freebl/ecl/tests/ec_naft.c
new file mode 100644
index 000000000..61ef15c36
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/tests/ec_naft.c
@@ -0,0 +1,121 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "ecl.h"
+#include "ecp.h"
+#include "ecl-priv.h"
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/* Returns 2^e as an integer. This is meant to be used for small powers of
+ * two. */
+int ec_twoTo(int e);
+
+/* Number of bits of scalar to test */
+#define BITSIZE 160
+
+/* Time k repetitions of operation op. */
+#define M_TimeOperation(op, k) \
+ { \
+ double dStart, dNow, dUserTime; \
+ struct rusage ru; \
+ int i; \
+ getrusage(RUSAGE_SELF, &ru); \
+ dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+ for (i = 0; i < k; i++) { \
+ { \
+ op; \
+ } \
+ }; \
+ getrusage(RUSAGE_SELF, &ru); \
+ dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+ dUserTime = dNow - dStart; \
+ if (dUserTime) \
+ printf(" %-45s\n k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \
+ }
+
+/* Tests wNAF computation. Non-adjacent-form is discussed in the paper: D.
+ * Hankerson, J. Hernandez and A. Menezes, "Software implementation of
+ * elliptic curve cryptography over binary fields", Proc. CHES 2000. */
+
+mp_err
+main(void)
+{
+ signed char naf[BITSIZE + 1];
+ ECGroup *group = NULL;
+ mp_int k;
+ mp_int *scalar;
+ int i, count;
+ int res;
+ int w = 5;
+ char s[1000];
+
+ /* Get a 160 bit scalar to compute wNAF from */
+ group = ECGroup_fromName(ECCurve_SECG_PRIME_160R1);
+ scalar = &group->genx;
+
+ /* Compute wNAF representation of scalar */
+ ec_compute_wNAF(naf, BITSIZE, scalar, w);
+
+ /* Verify correctness of representation */
+ mp_init(&k); /* init k to 0 */
+
+ for (i = BITSIZE; i >= 0; i--) {
+ mp_add(&k, &k, &k);
+ /* digits in mp_???_d are unsigned */
+ if (naf[i] >= 0) {
+ mp_add_d(&k, naf[i], &k);
+ } else {
+ mp_sub_d(&k, -naf[i], &k);
+ }
+ }
+
+ if (mp_cmp(&k, scalar) != 0) {
+ printf("Error: incorrect NAF value.\n");
+ MP_CHECKOK(mp_toradix(&k, s, 16));
+ printf("NAF value %s\n", s);
+ MP_CHECKOK(mp_toradix(scalar, s, 16));
+ printf("original value %s\n", s);
+ goto CLEANUP;
+ }
+
+ /* Verify digits of representation are valid */
+ for (i = 0; i <= BITSIZE; i++) {
+ if (naf[i] % 2 == 0 && naf[i] != 0) {
+ printf("Error: Even non-zero digit found.\n");
+ goto CLEANUP;
+ }
+ if (naf[i] < -(ec_twoTo(w - 1)) || naf[i] >= ec_twoTo(w - 1)) {
+ printf("Error: Magnitude of naf digit too large.\n");
+ goto CLEANUP;
+ }
+ }
+
+ /* Verify sparsity of representation */
+ count = w - 1;
+ for (i = 0; i <= BITSIZE; i++) {
+ if (naf[i] != 0) {
+ if (count < w - 1) {
+ printf("Error: Sparsity failed.\n");
+ goto CLEANUP;
+ }
+ count = 0;
+ } else
+ count++;
+ }
+
+ /* Check timing */
+ M_TimeOperation(ec_compute_wNAF(naf, BITSIZE, scalar, w), 10000);
+
+ printf("Test passed.\n");
+CLEANUP:
+ ECGroup_free(group);
+ return MP_OKAY;
+}
diff --git a/security/nss/lib/freebl/ecl/tests/ecp_test.c b/security/nss/lib/freebl/ecl/tests/ecp_test.c
new file mode 100644
index 000000000..dcec4d747
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/tests/ecp_test.c
@@ -0,0 +1,409 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#include "ecl.h"
+#include "ecl-curve.h"
+#include "ecp.h"
+#include <stdio.h>
+#include <strings.h>
+#include <assert.h>
+
+#include <time.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+/* Time k repetitions of operation op. */
+#define M_TimeOperation(op, k) \
+ { \
+ double dStart, dNow, dUserTime; \
+ struct rusage ru; \
+ int i; \
+ getrusage(RUSAGE_SELF, &ru); \
+ dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+ for (i = 0; i < k; i++) { \
+ { \
+ op; \
+ } \
+ }; \
+ getrusage(RUSAGE_SELF, &ru); \
+ dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \
+ dUserTime = dNow - dStart; \
+ if (dUserTime) \
+ printf(" %-45s k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \
+ }
+
+/* Test curve using generic field arithmetic. */
+#define ECTEST_GENERIC_GFP(name_c, name) \
+ printf("Testing %s using generic implementation...\n", name_c); \
+ params = EC_GetNamedCurveParams(name); \
+ if (params == NULL) { \
+ printf(" Error: could not construct params.\n"); \
+ res = MP_NO; \
+ goto CLEANUP; \
+ } \
+ ECGroup_free(group); \
+ group = ECGroup_fromHex(params); \
+ if (group == NULL) { \
+ printf(" Error: could not construct group.\n"); \
+ res = MP_NO; \
+ goto CLEANUP; \
+ } \
+ MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 1)); \
+ printf("... okay.\n");
+
+/* Test curve using specific field arithmetic. */
+#define ECTEST_NAMED_GFP(name_c, name) \
+ printf("Testing %s using specific implementation...\n", name_c); \
+ ECGroup_free(group); \
+ group = ECGroup_fromName(name); \
+ if (group == NULL) { \
+ printf(" Warning: could not construct group.\n"); \
+ printf("... failed; continuing with remaining tests.\n"); \
+ } else { \
+ MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 0)); \
+ printf("... okay.\n"); \
+ }
+
+/* Performs basic tests of elliptic curve cryptography over prime fields.
+ * If tests fail, then it prints an error message, aborts, and returns an
+ * error code. Otherwise, returns 0. */
+int
+ectest_curve_GFp(ECGroup *group, int ectestPrint, int ectestTime,
+ int generic)
+{
+
+ mp_int one, order_1, gx, gy, rx, ry, n;
+ int size;
+ mp_err res;
+ char s[1000];
+
+ /* initialize values */
+ MP_CHECKOK(mp_init(&one));
+ MP_CHECKOK(mp_init(&order_1));
+ MP_CHECKOK(mp_init(&gx));
+ MP_CHECKOK(mp_init(&gy));
+ MP_CHECKOK(mp_init(&rx));
+ MP_CHECKOK(mp_init(&ry));
+ MP_CHECKOK(mp_init(&n));
+
+ MP_CHECKOK(mp_set_int(&one, 1));
+ MP_CHECKOK(mp_sub(&group->order, &one, &order_1));
+
+ /* encode base point */
+ if (group->meth->field_dec) {
+ MP_CHECKOK(group->meth->field_dec(&group->genx, &gx, group->meth));
+ MP_CHECKOK(group->meth->field_dec(&group->geny, &gy, group->meth));
+ } else {
+ MP_CHECKOK(mp_copy(&group->genx, &gx));
+ MP_CHECKOK(mp_copy(&group->geny, &gy));
+ }
+ if (ectestPrint) {
+ /* output base point */
+ printf(" base point P:\n");
+ MP_CHECKOK(mp_toradix(&gx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&gy, s, 16));
+ printf(" %s\n", s);
+ if (group->meth->field_enc) {
+ printf(" base point P (encoded):\n");
+ MP_CHECKOK(mp_toradix(&group->genx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&group->geny, s, 16));
+ printf(" %s\n", s);
+ }
+ }
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+ /* multiply base point by order - 1 and check for negative of base
+ * point */
+ MP_CHECKOK(ec_GFp_pt_mul_aff(&order_1, &group->genx, &group->geny, &rx, &ry, group));
+ if (ectestPrint) {
+ printf(" (order-1)*P (affine):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth));
+ if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) {
+ printf(" Error: invalid result (expected (- base point)).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+#endif
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+ /* multiply base point by order - 1 and check for negative of base
+ * point */
+ MP_CHECKOK(ec_GFp_pt_mul_jac(&order_1, &group->genx, &group->geny, &rx, &ry, group));
+ if (ectestPrint) {
+ printf(" (order-1)*P (jacobian):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth));
+ if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) {
+ printf(" Error: invalid result (expected (- base point)).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+#endif
+
+ /* multiply base point by order - 1 and check for negative of base
+ * point */
+ MP_CHECKOK(ECPoint_mul(group, &order_1, NULL, NULL, &rx, &ry));
+ if (ectestPrint) {
+ printf(" (order-1)*P (ECPoint_mul):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+ if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+ printf(" Error: invalid result (expected (- base point)).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ /* multiply base point by order - 1 and check for negative of base
+ * point */
+ MP_CHECKOK(ECPoint_mul(group, &order_1, &gx, &gy, &rx, &ry));
+ if (ectestPrint) {
+ printf(" (order-1)*P (ECPoint_mul):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+ if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+ printf(" Error: invalid result (expected (- base point)).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+ /* multiply base point by order and check for point at infinity */
+ MP_CHECKOK(ec_GFp_pt_mul_aff(&group->order, &group->genx, &group->geny, &rx, &ry,
+ group));
+ if (ectestPrint) {
+ printf(" (order)*P (affine):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+ printf(" Error: invalid result (expected point at infinity).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+#endif
+
+#ifdef ECL_ENABLE_GFP_PT_MUL_JAC
+ /* multiply base point by order and check for point at infinity */
+ MP_CHECKOK(ec_GFp_pt_mul_jac(&group->order, &group->genx, &group->geny, &rx, &ry,
+ group));
+ if (ectestPrint) {
+ printf(" (order)*P (jacobian):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+ printf(" Error: invalid result (expected point at infinity).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+#endif
+
+ /* multiply base point by order and check for point at infinity */
+ MP_CHECKOK(ECPoint_mul(group, &group->order, NULL, NULL, &rx, &ry));
+ if (ectestPrint) {
+ printf(" (order)*P (ECPoint_mul):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+ printf(" Error: invalid result (expected point at infinity).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ /* multiply base point by order and check for point at infinity */
+ MP_CHECKOK(ECPoint_mul(group, &group->order, &gx, &gy, &rx, &ry));
+ if (ectestPrint) {
+ printf(" (order)*P (ECPoint_mul):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) {
+ printf(" Error: invalid result (expected point at infinity).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ /* check that (order-1)P + (order-1)P + P == (order-1)P */
+ MP_CHECKOK(ECPoints_mul(group, &order_1, &order_1, &gx, &gy, &rx, &ry));
+ MP_CHECKOK(ECPoints_mul(group, &one, &one, &rx, &ry, &rx, &ry));
+ if (ectestPrint) {
+ printf(" (order-1)*P + (order-1)*P + P == (order-1)*P (ECPoints_mul):\n");
+ MP_CHECKOK(mp_toradix(&rx, s, 16));
+ printf(" %s\n", s);
+ MP_CHECKOK(mp_toradix(&ry, s, 16));
+ printf(" %s\n", s);
+ }
+ MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry));
+ if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) {
+ printf(" Error: invalid result (expected (- base point)).\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ /* test validate_point function */
+ if (ECPoint_validate(group, &gx, &gy) != MP_YES) {
+ printf(" Error: validate point on base point failed.\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+ MP_CHECKOK(mp_add_d(&gy, 1, &ry));
+ if (ECPoint_validate(group, &gx, &ry) != MP_NO) {
+ printf(" Error: validate point on invalid point passed.\n");
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ if (ectestTime) {
+ /* compute random scalar */
+ size = mpl_significant_bits(&group->meth->irr);
+ if (size < MP_OKAY) {
+ goto CLEANUP;
+ }
+ MP_CHECKOK(mpp_random_size(&n, (size + ECL_BITS - 1) / ECL_BITS));
+ MP_CHECKOK(group->meth->field_mod(&n, &n, group->meth));
+ /* timed test */
+ if (generic) {
+#ifdef ECL_ENABLE_GFP_PT_MUL_AFF
+ M_TimeOperation(MP_CHECKOK(ec_GFp_pt_mul_aff(&n, &group->genx, &group->geny, &rx, &ry,
+ group)),
+ 100);
+#endif
+ M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)),
+ 100);
+ M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100);
+ } else {
+ M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)),
+ 100);
+ M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, &gx, &gy, &rx, &ry)),
+ 100);
+ M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100);
+ }
+ }
+
+CLEANUP:
+ mp_clear(&one);
+ mp_clear(&order_1);
+ mp_clear(&gx);
+ mp_clear(&gy);
+ mp_clear(&rx);
+ mp_clear(&ry);
+ mp_clear(&n);
+ if (res != MP_OKAY) {
+ printf(" Error: exiting with error value %i\n", res);
+ }
+ return res;
+}
+
+/* Prints help information. */
+void
+printUsage()
+{
+ printf("Usage: ecp_test [--print] [--time]\n");
+ printf(" --print Print out results of each point arithmetic test.\n");
+ printf(" --time Benchmark point operations and print results.\n");
+}
+
+/* Performs tests of elliptic curve cryptography over prime fields If
+ * tests fail, then it prints an error message, aborts, and returns an
+ * error code. Otherwise, returns 0. */
+int
+main(int argv, char **argc)
+{
+
+ int ectestTime = 0;
+ int ectestPrint = 0;
+ int i;
+ ECGroup *group = NULL;
+ ECCurveParams *params = NULL;
+ mp_err res;
+
+ /* read command-line arguments */
+ for (i = 1; i < argv; i++) {
+ if ((strcasecmp(argc[i], "time") == 0) || (strcasecmp(argc[i], "-time") == 0) || (strcasecmp(argc[i], "--time") == 0)) {
+ ectestTime = 1;
+ } else if ((strcasecmp(argc[i], "print") == 0) || (strcasecmp(argc[i], "-print") == 0) || (strcasecmp(argc[i], "--print") == 0)) {
+ ectestPrint = 1;
+ } else {
+ printUsage();
+ return 0;
+ }
+ }
+
+ /* generic arithmetic tests */
+ ECTEST_GENERIC_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1);
+
+ /* specific arithmetic tests */
+ ECTEST_NAMED_GFP("NIST-P192", ECCurve_NIST_P192);
+ ECTEST_NAMED_GFP("NIST-P224", ECCurve_NIST_P224);
+ ECTEST_NAMED_GFP("NIST-P256", ECCurve_NIST_P256);
+ ECTEST_NAMED_GFP("NIST-P384", ECCurve_NIST_P384);
+ ECTEST_NAMED_GFP("NIST-P521", ECCurve_NIST_P521);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v1", ECCurve_X9_62_PRIME_192V1);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v2", ECCurve_X9_62_PRIME_192V2);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v3", ECCurve_X9_62_PRIME_192V3);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v1", ECCurve_X9_62_PRIME_239V1);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v2", ECCurve_X9_62_PRIME_239V2);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v3", ECCurve_X9_62_PRIME_239V3);
+ ECTEST_NAMED_GFP("ANSI X9.62 PRIME256v1", ECCurve_X9_62_PRIME_256V1);
+ ECTEST_NAMED_GFP("SECP-112R1", ECCurve_SECG_PRIME_112R1);
+ ECTEST_NAMED_GFP("SECP-112R2", ECCurve_SECG_PRIME_112R2);
+ ECTEST_NAMED_GFP("SECP-128R1", ECCurve_SECG_PRIME_128R1);
+ ECTEST_NAMED_GFP("SECP-128R2", ECCurve_SECG_PRIME_128R2);
+ ECTEST_NAMED_GFP("SECP-160K1", ECCurve_SECG_PRIME_160K1);
+ ECTEST_NAMED_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1);
+ ECTEST_NAMED_GFP("SECP-160R2", ECCurve_SECG_PRIME_160R2);
+ ECTEST_NAMED_GFP("SECP-192K1", ECCurve_SECG_PRIME_192K1);
+ ECTEST_NAMED_GFP("SECP-192R1", ECCurve_SECG_PRIME_192R1);
+ ECTEST_NAMED_GFP("SECP-224K1", ECCurve_SECG_PRIME_224K1);
+ ECTEST_NAMED_GFP("SECP-224R1", ECCurve_SECG_PRIME_224R1);
+ ECTEST_NAMED_GFP("SECP-256K1", ECCurve_SECG_PRIME_256K1);
+ ECTEST_NAMED_GFP("SECP-256R1", ECCurve_SECG_PRIME_256R1);
+ ECTEST_NAMED_GFP("SECP-384R1", ECCurve_SECG_PRIME_384R1);
+ ECTEST_NAMED_GFP("SECP-521R1", ECCurve_SECG_PRIME_521R1);
+ ECTEST_NAMED_GFP("WTLS-6 (112)", ECCurve_WTLS_6);
+ ECTEST_NAMED_GFP("WTLS-7 (160)", ECCurve_WTLS_7);
+ ECTEST_NAMED_GFP("WTLS-8 (112)", ECCurve_WTLS_8);
+ ECTEST_NAMED_GFP("WTLS-9 (160)", ECCurve_WTLS_9);
+ ECTEST_NAMED_GFP("WTLS-12 (224)", ECCurve_WTLS_12);
+ ECTEST_NAMED_GFP("Curve25519", ECCurve25519);
+
+CLEANUP:
+ EC_FreeCurveParams(params);
+ ECGroup_free(group);
+ if (res != MP_OKAY) {
+ printf("Error: exiting with error value %i\n", res);
+ }
+ return res;
+}
diff --git a/security/nss/lib/freebl/ecl/uint128.c b/security/nss/lib/freebl/ecl/uint128.c
new file mode 100644
index 000000000..22cbd023c
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/uint128.c
@@ -0,0 +1,87 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "uint128.h"
+
+/* helper functions */
+uint64_t
+mask51(uint128_t x)
+{
+ return x.lo & MASK51;
+}
+
+uint64_t
+mask_lower(uint128_t x)
+{
+ return x.lo;
+}
+
+uint128_t
+mask51full(uint128_t x)
+{
+ uint128_t ret = { x.lo & MASK51, 0 };
+ return ret;
+}
+
+uint128_t
+init128x(uint64_t x)
+{
+ uint128_t ret = { x, 0 };
+ return ret;
+}
+
+/* arithmetic */
+
+uint128_t
+add128(uint128_t a, uint128_t b)
+{
+ uint128_t ret;
+ ret.lo = a.lo + b.lo;
+ ret.hi = a.hi + b.hi + (ret.lo < b.lo);
+ return ret;
+}
+
+/* out = 19 * a */
+uint128_t
+mul12819(uint128_t a)
+{
+ uint128_t ret = lshift128(a, 4);
+ ret = add128(ret, a);
+ ret = add128(ret, a);
+ ret = add128(ret, a);
+ return ret;
+}
+
+uint128_t
+mul6464(uint64_t a, uint64_t b)
+{
+ uint128_t ret;
+ uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b);
+ uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32);
+ uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1);
+ ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0);
+ ret.hi = (a >> 32) * (b >> 32);
+ ret.hi += (t2 >> 32) + (t1 >> 32);
+ return ret;
+}
+
+/* only defined for n < 64 */
+uint128_t
+rshift128(uint128_t x, uint8_t n)
+{
+ uint128_t ret;
+ ret.lo = (x.lo >> n) + (x.hi << (64 - n));
+ ret.hi = x.hi >> n;
+ return ret;
+}
+
+/* only defined for n < 64 */
+uint128_t
+lshift128(uint128_t x, uint8_t n)
+{
+ uint128_t ret;
+ ret.hi = (x.hi << n) + (x.lo >> (64 - n));
+ ret.lo = x.lo << n;
+ return ret;
+}
diff --git a/security/nss/lib/freebl/ecl/uint128.h b/security/nss/lib/freebl/ecl/uint128.h
new file mode 100644
index 000000000..a3a71e6e7
--- /dev/null
+++ b/security/nss/lib/freebl/ecl/uint128.h
@@ -0,0 +1,35 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdint.h>
+
+#define MASK51 0x7ffffffffffffULL
+
+#ifdef HAVE_INT128_SUPPORT
+typedef unsigned __int128 uint128_t;
+#define add128(a, b) (a) + (b)
+#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b)
+#define mul12819(a) (uint128_t)(a) * 19
+#define rshift128(x, n) (x) >> (n)
+#define lshift128(x, n) (x) << (n)
+#define mask51(x) (x) & 0x7ffffffffffff
+#define mask_lower(x) (uint64_t)(x)
+#define mask51full(x) (x) & 0x7ffffffffffff
+#define init128x(x) (x)
+#else /* uint128_t for Windows and 32 bit intel systems */
+struct uint128_t_str {
+ uint64_t lo;
+ uint64_t hi;
+};
+typedef struct uint128_t_str uint128_t;
+uint128_t add128(uint128_t a, uint128_t b);
+uint128_t mul6464(uint64_t a, uint64_t b);
+uint128_t mul12819(uint128_t a);
+uint128_t rshift128(uint128_t x, uint8_t n);
+uint128_t lshift128(uint128_t x, uint8_t n);
+uint64_t mask51(uint128_t x);
+uint64_t mask_lower(uint128_t x);
+uint128_t mask51full(uint128_t x);
+uint128_t init128x(uint64_t x);
+#endif
diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp
new file mode 100644
index 000000000..ef81685b0
--- /dev/null
+++ b/security/nss/lib/freebl/exports.gyp
@@ -0,0 +1,48 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+ 'includes': [
+ '../../coreconf/config.gypi'
+ ],
+ 'targets': [
+ {
+ 'target_name': 'lib_freebl_exports',
+ 'type': 'none',
+ 'copies': [
+ {
+ 'files': [
+ 'blapit.h',
+ 'ecl/ecl-exp.h',
+ 'shsign.h'
+ ],
+ 'conditions': [
+ [ 'OS=="linux"', {
+ 'files': [
+ 'nsslowhash.h',
+ ],
+ }],
+ ],
+ 'destination': '<(nss_public_dist_dir)/<(module)'
+ },
+ {
+ 'files': [
+ 'alghmac.h',
+ 'blapi.h',
+ 'chacha20poly1305.h',
+ 'ec.h',
+ 'ecl/ecl-curve.h',
+ 'ecl/ecl.h',
+ 'hmacct.h',
+ 'secmpi.h',
+ 'secrng.h'
+ ],
+ 'destination': '<(nss_private_dist_dir)/<(module)'
+ }
+ ]
+ }
+ ],
+ 'variables': {
+ 'module': 'nss'
+ }
+}
diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c
new file mode 100644
index 000000000..b3ae6865b
--- /dev/null
+++ b/security/nss/lib/freebl/fipsfreebl.c
@@ -0,0 +1,1715 @@
+/*
+ * PKCS #11 FIPS Power-Up Self Test.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "seccomon.h" /* Required for RSA and DSA. */
+#include "secerr.h"
+#include "prtypes.h"
+
+#ifdef NSS_ENABLE_ECC
+#include "ec.h" /* Required for ECDSA */
+#endif
+
+/*
+ * different platforms have different ways of calling and initial entry point
+ * when the dll/.so is loaded. Most platforms support either a posix pragma
+ * or the GCC attribute. Some platforms suppor a pre-defined name, and some
+ * platforms have a link line way of invoking this function.
+ */
+
+/* The pragma */
+#if defined(USE_INIT_PRAGMA)
+#pragma init(bl_startup_tests)
+#endif
+
+/* GCC Attribute */
+#if defined(__GNUC__) && !defined(NSS_NO_INIT_SUPPORT)
+#define INIT_FUNCTION __attribute__((constructor))
+#else
+#define INIT_FUNCTION
+#endif
+
+static void INIT_FUNCTION bl_startup_tests(void);
+
+/* Windows pre-defined entry */
+#if defined(XP_WIN) && !defined(NSS_NO_INIT_SUPPORT)
+#include <windows.h>
+
+BOOL WINAPI DllMain(
+ HINSTANCE hinstDLL, // handle to DLL module
+ DWORD fdwReason, // reason for calling function
+ LPVOID lpReserved) // reserved
+{
+ // Perform actions based on the reason for calling.
+ switch (fdwReason) {
+ case DLL_PROCESS_ATTACH:
+ // Initialize once for each new process.
+ // Return FALSE to fail DLL load.
+ bl_startup_tests();
+ break;
+
+ case DLL_THREAD_ATTACH:
+ // Do thread-specific initialization.
+ break;
+
+ case DLL_THREAD_DETACH:
+ // Do thread-specific cleanup.
+ break;
+
+ case DLL_PROCESS_DETACH:
+ // Perform any necessary cleanup.
+ break;
+ }
+ return TRUE; // Successful DLL_PROCESS_ATTACH.
+}
+#endif
+
+/* insert other platform dependent init entry points here, or modify
+ * the linker line */
+
+/* FIPS preprocessor directives for RC2-ECB and RC2-CBC. */
+#define FIPS_RC2_KEY_LENGTH 5 /* 40-bits */
+#define FIPS_RC2_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_RC2_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for RC4. */
+#define FIPS_RC4_KEY_LENGTH 5 /* 40-bits */
+#define FIPS_RC4_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_RC4_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for DES-ECB and DES-CBC. */
+#define FIPS_DES_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_DES_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for DES3-CBC and DES3-ECB. */
+#define FIPS_DES3_ENCRYPT_LENGTH 8 /* 64-bits */
+#define FIPS_DES3_DECRYPT_LENGTH 8 /* 64-bits */
+
+/* FIPS preprocessor directives for AES-ECB and AES-CBC. */
+#define FIPS_AES_BLOCK_SIZE 16 /* 128-bits */
+#define FIPS_AES_ENCRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_DECRYPT_LENGTH 16 /* 128-bits */
+#define FIPS_AES_128_KEY_SIZE 16 /* 128-bits */
+#define FIPS_AES_192_KEY_SIZE 24 /* 192-bits */
+#define FIPS_AES_256_KEY_SIZE 32 /* 256-bits */
+
+/* FIPS preprocessor directives for message digests */
+#define FIPS_KNOWN_HASH_MESSAGE_LENGTH 64 /* 512-bits */
+
+/* FIPS preprocessor directives for RSA. */
+#define FIPS_RSA_TYPE siBuffer
+#define FIPS_RSA_PUBLIC_EXPONENT_LENGTH 3 /* 24-bits */
+#define FIPS_RSA_PRIVATE_VERSION_LENGTH 1 /* 8-bits */
+#define FIPS_RSA_MESSAGE_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_COEFFICIENT_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIME0_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIME1_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_EXPONENT0_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_EXPONENT1_LENGTH 128 /* 1024-bits */
+#define FIPS_RSA_PRIVATE_EXPONENT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_ENCRYPT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_DECRYPT_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_SIGNATURE_LENGTH 256 /* 2048-bits */
+#define FIPS_RSA_MODULUS_LENGTH 256 /* 2048-bits */
+
+/* FIPS preprocessor directives for DSA. */
+#define FIPS_DSA_TYPE siBuffer
+#define FIPS_DSA_DIGEST_LENGTH 20 /* 160-bits */
+#define FIPS_DSA_SUBPRIME_LENGTH 20 /* 160-bits */
+#define FIPS_DSA_SIGNATURE_LENGTH 40 /* 320-bits */
+#define FIPS_DSA_PRIME_LENGTH 128 /* 1024-bits */
+#define FIPS_DSA_BASE_LENGTH 128 /* 1024-bits */
+
+/* FIPS preprocessor directives for RNG. */
+#define FIPS_RNG_XKEY_LENGTH 32 /* 256-bits */
+
+static SECStatus
+freebl_fips_DES3_PowerUpSelfTest(void)
+{
+ /* DES3 Known Key (56-bits). */
+ static const PRUint8 des3_known_key[] = { "ANSI Triple-DES Key Data" };
+
+ /* DES3-CBC Known Initialization Vector (64-bits). */
+ static const PRUint8 des3_cbc_known_initialization_vector[] = { "Security" };
+
+ /* DES3 Known Plaintext (64-bits). */
+ static const PRUint8 des3_ecb_known_plaintext[] = { "Netscape" };
+ static const PRUint8 des3_cbc_known_plaintext[] = { "Netscape" };
+
+ /* DES3 Known Ciphertext (64-bits). */
+ static const PRUint8 des3_ecb_known_ciphertext[] = {
+ 0x55, 0x8e, 0xad, 0x3c, 0xee, 0x49, 0x69, 0xbe
+ };
+ static const PRUint8 des3_cbc_known_ciphertext[] = {
+ 0x43, 0xdc, 0x6a, 0xc1, 0xaf, 0xa6, 0x32, 0xf5
+ };
+
+ /* DES3 variables. */
+ PRUint8 des3_computed_ciphertext[FIPS_DES3_ENCRYPT_LENGTH];
+ PRUint8 des3_computed_plaintext[FIPS_DES3_DECRYPT_LENGTH];
+ DESContext *des3_context;
+ unsigned int des3_bytes_encrypted;
+ unsigned int des3_bytes_decrypted;
+ SECStatus des3_status;
+
+ /*******************************************************/
+ /* DES3-ECB Single-Round Known Answer Encryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key, NULL,
+ NSS_DES_EDE3, PR_TRUE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+ &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+ des3_ecb_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_ciphertext, des3_ecb_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-ECB Single-Round Known Answer Decryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key, NULL,
+ NSS_DES_EDE3, PR_FALSE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+ &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+ des3_ecb_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_plaintext, des3_ecb_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-CBC Single-Round Known Answer Encryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key,
+ des3_cbc_known_initialization_vector,
+ NSS_DES_EDE3_CBC, PR_TRUE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext,
+ &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH,
+ des3_cbc_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_ciphertext, des3_cbc_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /*******************************************************/
+ /* DES3-CBC Single-Round Known Answer Decryption Test. */
+ /*******************************************************/
+
+ des3_context = DES_CreateContext(des3_known_key,
+ des3_cbc_known_initialization_vector,
+ NSS_DES_EDE3_CBC, PR_FALSE);
+
+ if (des3_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ des3_status = DES_Decrypt(des3_context, des3_computed_plaintext,
+ &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH,
+ des3_cbc_known_ciphertext,
+ FIPS_DES3_ENCRYPT_LENGTH);
+
+ DES_DestroyContext(des3_context, PR_TRUE);
+
+ if ((des3_status != SECSuccess) ||
+ (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(des3_computed_plaintext, des3_cbc_known_plaintext,
+ FIPS_DES3_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+/* AES self-test for 128-bit, 192-bit, or 256-bit key sizes*/
+static SECStatus
+freebl_fips_AES_PowerUpSelfTest(int aes_key_size)
+{
+ /* AES Known Key (up to 256-bits). */
+ static const PRUint8 aes_known_key[] =
+ { "AES-128 RIJNDAELLEADNJIR 821-SEA" };
+
+ /* AES-CBC Known Initialization Vector (128-bits). */
+ static const PRUint8 aes_cbc_known_initialization_vector[] =
+ { "SecurityytiruceS" };
+
+ /* AES Known Plaintext (128-bits). (blocksize is 128-bits) */
+ static const PRUint8 aes_known_plaintext[] = { "NetscapeepacsteN" };
+
+ /* AES Known Ciphertext (128-bit key). */
+ static const PRUint8 aes_ecb128_known_ciphertext[] = {
+ 0x3c, 0xa5, 0x96, 0xf3, 0x34, 0x6a, 0x96, 0xc1,
+ 0x03, 0x88, 0x16, 0x7b, 0x20, 0xbf, 0x35, 0x47
+ };
+
+ static const PRUint8 aes_cbc128_known_ciphertext[] = {
+ 0xcf, 0x15, 0x1d, 0x4f, 0x96, 0xe4, 0x4f, 0x63,
+ 0x15, 0x54, 0x14, 0x1d, 0x4e, 0xd8, 0xd5, 0xea
+ };
+
+ /* AES Known Ciphertext (192-bit key). */
+ static const PRUint8 aes_ecb192_known_ciphertext[] = {
+ 0xa0, 0x18, 0x62, 0xed, 0x88, 0x19, 0xcb, 0x62,
+ 0x88, 0x1d, 0x4d, 0xfe, 0x84, 0x02, 0x89, 0x0e
+ };
+
+ static const PRUint8 aes_cbc192_known_ciphertext[] = {
+ 0x83, 0xf7, 0xa4, 0x76, 0xd1, 0x6f, 0x07, 0xbe,
+ 0x07, 0xbc, 0x43, 0x2f, 0x6d, 0xad, 0x29, 0xe1
+ };
+
+ /* AES Known Ciphertext (256-bit key). */
+ static const PRUint8 aes_ecb256_known_ciphertext[] = {
+ 0xdb, 0xa6, 0x52, 0x01, 0x8a, 0x70, 0xae, 0x66,
+ 0x3a, 0x99, 0xd8, 0x95, 0x7f, 0xfb, 0x01, 0x67
+ };
+
+ static const PRUint8 aes_cbc256_known_ciphertext[] = {
+ 0x37, 0xea, 0x07, 0x06, 0x31, 0x1c, 0x59, 0x27,
+ 0xc5, 0xc5, 0x68, 0x71, 0x6e, 0x34, 0x40, 0x16
+ };
+
+ const PRUint8 *aes_ecb_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_ecb128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_ecb192_known_ciphertext : aes_ecb256_known_ciphertext;
+
+ const PRUint8 *aes_cbc_known_ciphertext =
+ (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cbc128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cbc192_known_ciphertext : aes_cbc256_known_ciphertext;
+
+ /* AES variables. */
+ PRUint8 aes_computed_ciphertext[FIPS_AES_ENCRYPT_LENGTH];
+ PRUint8 aes_computed_plaintext[FIPS_AES_DECRYPT_LENGTH];
+ AESContext *aes_context;
+ unsigned int aes_bytes_encrypted;
+ unsigned int aes_bytes_decrypted;
+ SECStatus aes_status;
+
+ /*check if aes_key_size is 128, 192, or 256 bits */
+ if ((aes_key_size != FIPS_AES_128_KEY_SIZE) &&
+ (aes_key_size != FIPS_AES_192_KEY_SIZE) &&
+ (aes_key_size != FIPS_AES_256_KEY_SIZE)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-ECB Single-Round Known Answer Encryption Test: */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_TRUE,
+ aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+ aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_ecb_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-ECB Single-Round Known Answer Decryption Test: */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_FALSE,
+ aes_key_size, FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+ &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+ aes_ecb_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-CBC Single-Round Known Answer Encryption Test. */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key,
+ aes_cbc_known_initialization_vector,
+ NSS_AES_CBC, PR_TRUE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext,
+ &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH,
+ aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_ciphertext, aes_cbc_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /******************************************************/
+ /* AES-CBC Single-Round Known Answer Decryption Test. */
+ /******************************************************/
+
+ aes_context = AES_CreateContext(aes_known_key,
+ aes_cbc_known_initialization_vector,
+ NSS_AES_CBC, PR_FALSE, aes_key_size,
+ FIPS_AES_BLOCK_SIZE);
+
+ if (aes_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ aes_status = AES_Decrypt(aes_context, aes_computed_plaintext,
+ &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH,
+ aes_cbc_known_ciphertext,
+ FIPS_AES_ENCRYPT_LENGTH);
+
+ AES_DestroyContext(aes_context, PR_TRUE);
+
+ if ((aes_status != SECSuccess) ||
+ (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) ||
+ (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext,
+ FIPS_AES_DECRYPT_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+/* Known Hash Message (512-bits). Used for all hashes (incl. SHA-N [N>1]). */
+static const PRUint8 known_hash_message[] = {
+ "The test message for the MD2, MD5, and SHA-1 hashing algorithms."
+};
+
+/****************************************************/
+/* Single Round HMAC SHA-X test */
+/****************************************************/
+static SECStatus
+freebl_fips_HMAC(unsigned char *hmac_computed,
+ const PRUint8 *secret_key,
+ unsigned int secret_key_length,
+ const PRUint8 *message,
+ unsigned int message_length,
+ HASH_HashType hashAlg)
+{
+ SECStatus hmac_status = SECFailure;
+ HMACContext *cx = NULL;
+ SECHashObject *hashObj = NULL;
+ unsigned int bytes_hashed = 0;
+
+ hashObj = (SECHashObject *)HASH_GetRawHashObject(hashAlg);
+
+ if (!hashObj)
+ return (SECFailure);
+
+ cx = HMAC_Create(hashObj, secret_key,
+ secret_key_length,
+ PR_TRUE); /* PR_TRUE for in FIPS mode */
+
+ if (cx == NULL)
+ return (SECFailure);
+
+ HMAC_Begin(cx);
+ HMAC_Update(cx, message, message_length);
+ hmac_status = HMAC_Finish(cx, hmac_computed, &bytes_hashed,
+ hashObj->length);
+
+ HMAC_Destroy(cx, PR_TRUE);
+
+ return (hmac_status);
+}
+
+static SECStatus
+freebl_fips_HMAC_PowerUpSelfTest(void)
+{
+ static const PRUint8 HMAC_known_secret_key[] = {
+ "Firefox and ThunderBird are awesome!"
+ };
+
+ static const PRUint8 HMAC_known_secret_key_length = sizeof HMAC_known_secret_key;
+
+ /* known SHA1 hmac (20 bytes) */
+ static const PRUint8 known_SHA1_hmac[] = {
+ 0xd5, 0x85, 0xf6, 0x5b, 0x39, 0xfa, 0xb9, 0x05,
+ 0x3b, 0x57, 0x1d, 0x61, 0xe7, 0xb8, 0x84, 0x1e,
+ 0x5d, 0x0e, 0x1e, 0x11
+ };
+
+ /* known SHA224 hmac (28 bytes) */
+ static const PRUint8 known_SHA224_hmac[] = {
+ 0x1c, 0xc3, 0x06, 0x8e, 0xce, 0x37, 0x68, 0xfb,
+ 0x1a, 0x82, 0x4a, 0xbe, 0x2b, 0x00, 0x51, 0xf8,
+ 0x9d, 0xb6, 0xe0, 0x90, 0x0d, 0x00, 0xc9, 0x64,
+ 0x9a, 0xb8, 0x98, 0x4e
+ };
+
+ /* known SHA256 hmac (32 bytes) */
+ static const PRUint8 known_SHA256_hmac[] = {
+ 0x05, 0x75, 0x9a, 0x9e, 0x70, 0x5e, 0xe7, 0x44,
+ 0xe2, 0x46, 0x4b, 0x92, 0x22, 0x14, 0x22, 0xe0,
+ 0x1b, 0x92, 0x8a, 0x0c, 0xfe, 0xf5, 0x49, 0xe9,
+ 0xa7, 0x1b, 0x56, 0x7d, 0x1d, 0x29, 0x40, 0x48
+ };
+
+ /* known SHA384 hmac (48 bytes) */
+ static const PRUint8 known_SHA384_hmac[] = {
+ 0xcd, 0x56, 0x14, 0xec, 0x05, 0x53, 0x06, 0x2b,
+ 0x7e, 0x9c, 0x8a, 0x18, 0x5e, 0xea, 0xf3, 0x91,
+ 0x33, 0xfb, 0x64, 0xf6, 0xe3, 0x9f, 0x89, 0x0b,
+ 0xaf, 0xbe, 0x83, 0x4d, 0x3f, 0x3c, 0x43, 0x4d,
+ 0x4a, 0x0c, 0x56, 0x98, 0xf8, 0xca, 0xb4, 0xaa,
+ 0x9a, 0xf4, 0x0a, 0xaf, 0x4f, 0x69, 0xca, 0x87
+ };
+
+ /* known SHA512 hmac (64 bytes) */
+ static const PRUint8 known_SHA512_hmac[] = {
+ 0xf6, 0x0e, 0x97, 0x12, 0x00, 0x67, 0x6e, 0xb9,
+ 0x0c, 0xb2, 0x63, 0xf0, 0x60, 0xac, 0x75, 0x62,
+ 0x70, 0x95, 0x2a, 0x52, 0x22, 0xee, 0xdd, 0xd2,
+ 0x71, 0xb1, 0xe8, 0x26, 0x33, 0xd3, 0x13, 0x27,
+ 0xcb, 0xff, 0x44, 0xef, 0x87, 0x97, 0x16, 0xfb,
+ 0xd3, 0x0b, 0x48, 0xbe, 0x12, 0x4e, 0xda, 0xb1,
+ 0x89, 0x90, 0xfb, 0x06, 0x0c, 0xbe, 0xe5, 0xc4,
+ 0xff, 0x24, 0x37, 0x3d, 0xc7, 0xe4, 0xe4, 0x37
+ };
+
+ SECStatus hmac_status;
+ PRUint8 hmac_computed[HASH_LENGTH_MAX];
+
+ /***************************************************/
+ /* HMAC SHA-1 Single-Round Known Answer HMAC Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA1);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA1_hmac,
+ SHA1_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-224 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA224);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA224_hmac,
+ SHA224_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-256 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA256);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA256_hmac,
+ SHA256_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-384 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA384);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA384_hmac,
+ SHA384_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* HMAC SHA-512 Single-Round Known Answer Test. */
+ /***************************************************/
+
+ hmac_status = freebl_fips_HMAC(hmac_computed,
+ HMAC_known_secret_key,
+ HMAC_known_secret_key_length,
+ known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH,
+ HASH_AlgSHA512);
+
+ if ((hmac_status != SECSuccess) ||
+ (PORT_Memcmp(hmac_computed, known_SHA512_hmac,
+ SHA512_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_SHA_PowerUpSelfTest(void)
+{
+ /* SHA-1 Known Digest Message (160-bits). */
+ static const PRUint8 sha1_known_digest[] = {
+ 0x0a, 0x6d, 0x07, 0xba, 0x1e, 0xbd, 0x8a, 0x1b,
+ 0x72, 0xf6, 0xc7, 0x22, 0xf1, 0x27, 0x9f, 0xf0,
+ 0xe0, 0x68, 0x47, 0x7a
+ };
+
+ /* SHA-224 Known Digest Message (224-bits). */
+ static const PRUint8 sha224_known_digest[] = {
+ 0x89, 0x5e, 0x7f, 0xfd, 0x0e, 0xd8, 0x35, 0x6f,
+ 0x64, 0x6d, 0xf2, 0xde, 0x5e, 0xed, 0xa6, 0x7f,
+ 0x29, 0xd1, 0x12, 0x73, 0x42, 0x84, 0x95, 0x4f,
+ 0x8e, 0x08, 0xe5, 0xcb
+ };
+
+ /* SHA-256 Known Digest Message (256-bits). */
+ static const PRUint8 sha256_known_digest[] = {
+ 0x38, 0xa9, 0xc1, 0xf0, 0x35, 0xf6, 0x5d, 0x61,
+ 0x11, 0xd4, 0x0b, 0xdc, 0xce, 0x35, 0x14, 0x8d,
+ 0xf2, 0xdd, 0xaf, 0xaf, 0xcf, 0xb7, 0x87, 0xe9,
+ 0x96, 0xa5, 0xd2, 0x83, 0x62, 0x46, 0x56, 0x79
+ };
+
+ /* SHA-384 Known Digest Message (384-bits). */
+ static const PRUint8 sha384_known_digest[] = {
+ 0x11, 0xfe, 0x1c, 0x00, 0x89, 0x48, 0xde, 0xb3,
+ 0x99, 0xee, 0x1c, 0x18, 0xb4, 0x10, 0xfb, 0xfe,
+ 0xe3, 0xa8, 0x2c, 0xf3, 0x04, 0xb0, 0x2f, 0xc8,
+ 0xa3, 0xc4, 0x5e, 0xea, 0x7e, 0x60, 0x48, 0x7b,
+ 0xce, 0x2c, 0x62, 0xf7, 0xbc, 0xa7, 0xe8, 0xa3,
+ 0xcf, 0x24, 0xce, 0x9c, 0xe2, 0x8b, 0x09, 0x72
+ };
+
+ /* SHA-512 Known Digest Message (512-bits). */
+ static const PRUint8 sha512_known_digest[] = {
+ 0xc8, 0xb3, 0x27, 0xf9, 0x0b, 0x24, 0xc8, 0xbf,
+ 0x4c, 0xba, 0x33, 0x54, 0xf2, 0x31, 0xbf, 0xdb,
+ 0xab, 0xfd, 0xb3, 0x15, 0xd7, 0xfa, 0x48, 0x99,
+ 0x07, 0x60, 0x0f, 0x57, 0x41, 0x1a, 0xdd, 0x28,
+ 0x12, 0x55, 0x25, 0xac, 0xba, 0x3a, 0x99, 0x12,
+ 0x2c, 0x7a, 0x8f, 0x75, 0x3a, 0xe1, 0x06, 0x6f,
+ 0x30, 0x31, 0xc9, 0x33, 0xc6, 0x1b, 0x90, 0x1a,
+ 0x6c, 0x98, 0x9a, 0x87, 0xd0, 0xb2, 0xf8, 0x07
+ };
+
+ /* SHA-X variables. */
+ PRUint8 sha_computed_digest[HASH_LENGTH_MAX];
+ SECStatus sha_status;
+
+ /*************************************************/
+ /* SHA-1 Single-Round Known Answer Hashing Test. */
+ /*************************************************/
+
+ sha_status = SHA1_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha1_known_digest,
+ SHA1_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-224 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA224_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha224_known_digest,
+ SHA224_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-256 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA256_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha256_known_digest,
+ SHA256_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-384 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA384_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha384_known_digest,
+ SHA384_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /***************************************************/
+ /* SHA-512 Single-Round Known Answer Hashing Test. */
+ /***************************************************/
+
+ sha_status = SHA512_HashBuf(sha_computed_digest, known_hash_message,
+ FIPS_KNOWN_HASH_MESSAGE_LENGTH);
+
+ if ((sha_status != SECSuccess) ||
+ (PORT_Memcmp(sha_computed_digest, sha512_known_digest,
+ SHA512_LENGTH) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RSA_PowerUpSelfTest(void)
+{
+ /* RSA Known Modulus used in both Public/Private Key Values (2048-bits). */
+ static const PRUint8 rsa_modulus[FIPS_RSA_MODULUS_LENGTH] = {
+ 0xb8, 0x15, 0x00, 0x33, 0xda, 0x0c, 0x9d, 0xa5,
+ 0x14, 0x8c, 0xde, 0x1f, 0x23, 0x07, 0x54, 0xe2,
+ 0xc6, 0xb9, 0x51, 0x04, 0xc9, 0x65, 0x24, 0x6e,
+ 0x0a, 0x46, 0x34, 0x5c, 0x37, 0x86, 0x6b, 0x88,
+ 0x24, 0x27, 0xac, 0xa5, 0x02, 0x79, 0xfb, 0xed,
+ 0x75, 0xc5, 0x3f, 0x6e, 0xdf, 0x05, 0x5f, 0x0f,
+ 0x20, 0x70, 0xa0, 0x5b, 0x85, 0xdb, 0xac, 0xb9,
+ 0x5f, 0x02, 0xc2, 0x64, 0x1e, 0x84, 0x5b, 0x3e,
+ 0xad, 0xbf, 0xf6, 0x2e, 0x51, 0xd6, 0xad, 0xf7,
+ 0xa7, 0x86, 0x75, 0x86, 0xec, 0xa7, 0xe1, 0xf7,
+ 0x08, 0xbf, 0xdc, 0x56, 0xb1, 0x3b, 0xca, 0xd8,
+ 0xfc, 0x51, 0xdf, 0x9a, 0x2a, 0x37, 0x06, 0xf2,
+ 0xd1, 0x6b, 0x9a, 0x5e, 0x2a, 0xe5, 0x20, 0x57,
+ 0x35, 0x9f, 0x1f, 0x98, 0xcf, 0x40, 0xc7, 0xd6,
+ 0x98, 0xdb, 0xde, 0xf5, 0x64, 0x53, 0xf7, 0x9d,
+ 0x45, 0xf3, 0xd6, 0x78, 0xb9, 0xe3, 0xa3, 0x20,
+ 0xcd, 0x79, 0x43, 0x35, 0xef, 0xd7, 0xfb, 0xb9,
+ 0x80, 0x88, 0x27, 0x2f, 0x63, 0xa8, 0x67, 0x3d,
+ 0x4a, 0xfa, 0x06, 0xc6, 0xd2, 0x86, 0x0b, 0xa7,
+ 0x28, 0xfd, 0xe0, 0x1e, 0x93, 0x4b, 0x17, 0x2e,
+ 0xb0, 0x11, 0x6f, 0xc6, 0x2b, 0x98, 0x0f, 0x15,
+ 0xe3, 0x87, 0x16, 0x7a, 0x7c, 0x67, 0x3e, 0x12,
+ 0x2b, 0xf8, 0xbe, 0x48, 0xc1, 0x97, 0x47, 0xf4,
+ 0x1f, 0x81, 0x80, 0x12, 0x28, 0xe4, 0x7b, 0x1e,
+ 0xb7, 0x00, 0xa4, 0xde, 0xaa, 0xfb, 0x0f, 0x77,
+ 0x84, 0xa3, 0xd6, 0xb2, 0x03, 0x48, 0xdd, 0x53,
+ 0x8b, 0x46, 0x41, 0x28, 0x52, 0xc4, 0x53, 0xf0,
+ 0x1c, 0x95, 0xd9, 0x36, 0xe0, 0x0f, 0x26, 0x46,
+ 0x9c, 0x61, 0x0e, 0x80, 0xca, 0x86, 0xaf, 0x39,
+ 0x95, 0xe5, 0x60, 0x43, 0x61, 0x3e, 0x2b, 0xb4,
+ 0xe8, 0xbd, 0x8d, 0x77, 0x62, 0xf5, 0x32, 0x43,
+ 0x2f, 0x4b, 0x65, 0x82, 0x14, 0xdd, 0x29, 0x5b
+ };
+
+ /* RSA Known Public Key Values (24-bits). */
+ static const PRUint8 rsa_public_exponent[FIPS_RSA_PUBLIC_EXPONENT_LENGTH] = { 0x01, 0x00, 0x01 };
+ /* RSA Known Private Key Values (version is 8-bits), */
+ /* (private exponent is 2048-bits), */
+ /* (private prime0 is 1024-bits), */
+ /* (private prime1 is 1024-bits), */
+ /* (private prime exponent0 is 1024-bits), */
+ /* (private prime exponent1 is 1024-bits), */
+ /* and (private coefficient is 1024-bits). */
+ static const PRUint8 rsa_version[] = { 0x00 };
+
+ static const PRUint8 rsa_private_exponent[FIPS_RSA_PRIVATE_EXPONENT_LENGTH] = {
+ 0x29, 0x08, 0x05, 0x53, 0x89, 0x76, 0xe6, 0x6c,
+ 0xb5, 0x77, 0xf0, 0xca, 0xdf, 0xf3, 0xf2, 0x67,
+ 0xda, 0x03, 0xd4, 0x9b, 0x4c, 0x88, 0xce, 0xe5,
+ 0xf8, 0x44, 0x4d, 0xc7, 0x80, 0x58, 0xe5, 0xff,
+ 0x22, 0x8f, 0xf5, 0x5b, 0x92, 0x81, 0xbe, 0x35,
+ 0xdf, 0xda, 0x67, 0x99, 0x3e, 0xfc, 0xe3, 0x83,
+ 0x6b, 0xa7, 0xaf, 0x16, 0xb7, 0x6f, 0x8f, 0xc0,
+ 0x81, 0xfd, 0x0b, 0x77, 0x65, 0x95, 0xfb, 0x00,
+ 0xad, 0x99, 0xec, 0x35, 0xc6, 0xe8, 0x23, 0x3e,
+ 0xe0, 0x88, 0x88, 0x09, 0xdb, 0x16, 0x50, 0xb7,
+ 0xcf, 0xab, 0x74, 0x61, 0x9e, 0x7f, 0xc5, 0x67,
+ 0x38, 0x56, 0xc7, 0x90, 0x85, 0x78, 0x5e, 0x84,
+ 0x21, 0x49, 0xea, 0xce, 0xb2, 0xa0, 0xff, 0xe4,
+ 0x70, 0x7f, 0x57, 0x7b, 0xa8, 0x36, 0xb8, 0x54,
+ 0x8d, 0x1d, 0xf5, 0x44, 0x9d, 0x68, 0x59, 0xf9,
+ 0x24, 0x6e, 0x85, 0x8f, 0xc3, 0x5f, 0x8a, 0x2c,
+ 0x94, 0xb7, 0xbc, 0x0e, 0xa5, 0xef, 0x93, 0x06,
+ 0x38, 0xcd, 0x07, 0x0c, 0xae, 0xb8, 0x44, 0x1a,
+ 0xd8, 0xe7, 0xf5, 0x9a, 0x1e, 0x9c, 0x18, 0xc7,
+ 0x6a, 0xc2, 0x7f, 0x28, 0x01, 0x4f, 0xb4, 0xb8,
+ 0x90, 0x97, 0x5a, 0x43, 0x38, 0xad, 0xe8, 0x95,
+ 0x68, 0x83, 0x1a, 0x1b, 0x10, 0x07, 0xe6, 0x02,
+ 0x52, 0x1f, 0xbf, 0x76, 0x6b, 0x46, 0xd6, 0xfb,
+ 0xc3, 0xbe, 0xb5, 0xac, 0x52, 0x53, 0x01, 0x1c,
+ 0xf3, 0xc5, 0xeb, 0x64, 0xf2, 0x1e, 0xc4, 0x38,
+ 0xe9, 0xaa, 0xd9, 0xc3, 0x72, 0x51, 0xa5, 0x44,
+ 0x58, 0x69, 0x0b, 0x1b, 0x98, 0x7f, 0xf2, 0x23,
+ 0xff, 0xeb, 0xf0, 0x75, 0x24, 0xcf, 0xc5, 0x1e,
+ 0xb8, 0x6a, 0xc5, 0x2f, 0x4f, 0x23, 0x50, 0x7d,
+ 0x15, 0x9d, 0x19, 0x7a, 0x0b, 0x82, 0xe0, 0x21,
+ 0x5b, 0x5f, 0x9d, 0x50, 0x2b, 0x83, 0xe4, 0x48,
+ 0xcc, 0x39, 0xe5, 0xfb, 0x13, 0x7b, 0x6f, 0x81
+ };
+
+ static const PRUint8 rsa_prime0[FIPS_RSA_PRIME0_LENGTH] = {
+ 0xe4, 0xbf, 0x21, 0x62, 0x9b, 0xa9, 0x77, 0x40,
+ 0x8d, 0x2a, 0xce, 0xa1, 0x67, 0x5a, 0x4c, 0x96,
+ 0x45, 0x98, 0x67, 0xbd, 0x75, 0x22, 0x33, 0x6f,
+ 0xe6, 0xcb, 0x77, 0xde, 0x9e, 0x97, 0x7d, 0x96,
+ 0x8c, 0x5e, 0x5d, 0x34, 0xfb, 0x27, 0xfc, 0x6d,
+ 0x74, 0xdb, 0x9d, 0x2e, 0x6d, 0xf6, 0xea, 0xfc,
+ 0xce, 0x9e, 0xda, 0xa7, 0x25, 0xa2, 0xf4, 0x58,
+ 0x6d, 0x0a, 0x3f, 0x01, 0xc2, 0xb4, 0xab, 0x38,
+ 0xc1, 0x14, 0x85, 0xb6, 0xfa, 0x94, 0xc3, 0x85,
+ 0xf9, 0x3c, 0x2e, 0x96, 0x56, 0x01, 0xe7, 0xd6,
+ 0x14, 0x71, 0x4f, 0xfb, 0x4c, 0x85, 0x52, 0xc4,
+ 0x61, 0x1e, 0xa5, 0x1e, 0x96, 0x13, 0x0d, 0x8f,
+ 0x66, 0xae, 0xa0, 0xcd, 0x7d, 0x25, 0x66, 0x19,
+ 0x15, 0xc2, 0xcf, 0xc3, 0x12, 0x3c, 0xe8, 0xa4,
+ 0x52, 0x4c, 0xcb, 0x28, 0x3c, 0xc4, 0xbf, 0x95,
+ 0x33, 0xe3, 0x81, 0xea, 0x0c, 0x6c, 0xa2, 0x05
+ };
+ static const PRUint8 rsa_prime1[FIPS_RSA_PRIME1_LENGTH] = {
+ 0xce, 0x03, 0x94, 0xf4, 0xa9, 0x2c, 0x1e, 0x06,
+ 0xe7, 0x40, 0x30, 0x01, 0xf7, 0xbb, 0x68, 0x8c,
+ 0x27, 0xd2, 0x15, 0xe3, 0x28, 0x49, 0x5b, 0xa8,
+ 0xc1, 0x9a, 0x42, 0x7e, 0x31, 0xf9, 0x08, 0x34,
+ 0x81, 0xa2, 0x0f, 0x04, 0x61, 0x34, 0xe3, 0x36,
+ 0x92, 0xb1, 0x09, 0x2b, 0xe9, 0xef, 0x84, 0x88,
+ 0xbe, 0x9c, 0x98, 0x60, 0xa6, 0x60, 0x84, 0xe9,
+ 0x75, 0x6f, 0xcc, 0x81, 0xd1, 0x96, 0xef, 0xdd,
+ 0x2e, 0xca, 0xc4, 0xf5, 0x42, 0xfb, 0x13, 0x2b,
+ 0x57, 0xbf, 0x14, 0x5e, 0xc2, 0x7f, 0x77, 0x35,
+ 0x29, 0xc4, 0xe5, 0xe0, 0xf9, 0x6d, 0x15, 0x4a,
+ 0x42, 0x56, 0x1c, 0x3e, 0x0c, 0xc5, 0xce, 0x70,
+ 0x08, 0x63, 0x1e, 0x73, 0xdb, 0x7e, 0x74, 0x05,
+ 0x32, 0x01, 0xc6, 0x36, 0x32, 0x75, 0x6b, 0xed,
+ 0x9d, 0xfe, 0x7c, 0x7e, 0xa9, 0x57, 0xb4, 0xe9,
+ 0x22, 0xe4, 0xe7, 0xfe, 0x36, 0x07, 0x9b, 0xdf
+ };
+ static const PRUint8 rsa_exponent0[FIPS_RSA_EXPONENT0_LENGTH] = {
+ 0x04, 0x5a, 0x3a, 0xa9, 0x64, 0xaa, 0xd9, 0xd1,
+ 0x09, 0x9e, 0x99, 0xe5, 0xea, 0x50, 0x86, 0x8a,
+ 0x89, 0x72, 0x77, 0xee, 0xdb, 0xee, 0xb5, 0xa9,
+ 0xd8, 0x6b, 0x60, 0xb1, 0x84, 0xb4, 0xff, 0x37,
+ 0xc1, 0x1d, 0xfe, 0x8a, 0x06, 0x89, 0x61, 0x3d,
+ 0x37, 0xef, 0x01, 0xd3, 0xa3, 0x56, 0x02, 0x6c,
+ 0xa3, 0x05, 0xd4, 0xc5, 0x3f, 0x6b, 0x15, 0x59,
+ 0x25, 0x61, 0xff, 0x86, 0xea, 0x0c, 0x84, 0x01,
+ 0x85, 0x72, 0xfd, 0x84, 0x58, 0xca, 0x41, 0xda,
+ 0x27, 0xbe, 0xe4, 0x68, 0x09, 0xe4, 0xe9, 0x63,
+ 0x62, 0x6a, 0x31, 0x8a, 0x67, 0x8f, 0x55, 0xde,
+ 0xd4, 0xb6, 0x3f, 0x90, 0x10, 0x6c, 0xf6, 0x62,
+ 0x17, 0x23, 0x15, 0x7e, 0x33, 0x76, 0x65, 0xb5,
+ 0xee, 0x7b, 0x11, 0x76, 0xf5, 0xbe, 0xe0, 0xf2,
+ 0x57, 0x7a, 0x8c, 0x97, 0x0c, 0x68, 0xf5, 0xf8,
+ 0x41, 0xcf, 0x7f, 0x66, 0x53, 0xac, 0x31, 0x7d
+ };
+ static const PRUint8 rsa_exponent1[FIPS_RSA_EXPONENT1_LENGTH] = {
+ 0x93, 0x54, 0x14, 0x6e, 0x73, 0x9d, 0x4d, 0x4b,
+ 0xfa, 0x8c, 0xf8, 0xc8, 0x2f, 0x76, 0x22, 0xea,
+ 0x38, 0x80, 0x11, 0x8f, 0x05, 0xfc, 0x90, 0x44,
+ 0x3b, 0x50, 0x2a, 0x45, 0x3d, 0x4f, 0xaf, 0x02,
+ 0x7d, 0xc2, 0x7b, 0xa2, 0xd2, 0x31, 0x94, 0x5c,
+ 0x2e, 0xc3, 0xd4, 0x9f, 0x47, 0x09, 0x37, 0x6a,
+ 0xe3, 0x85, 0xf1, 0xa3, 0x0c, 0xd8, 0xf1, 0xb4,
+ 0x53, 0x7b, 0xc4, 0x71, 0x02, 0x86, 0x42, 0xbb,
+ 0x96, 0xff, 0x03, 0xa3, 0xb2, 0x67, 0x03, 0xea,
+ 0x77, 0x31, 0xfb, 0x4b, 0x59, 0x24, 0xf7, 0x07,
+ 0x59, 0xfb, 0xa9, 0xba, 0x1e, 0x26, 0x58, 0x97,
+ 0x66, 0xa1, 0x56, 0x49, 0x39, 0xb1, 0x2c, 0x55,
+ 0x0a, 0x6a, 0x78, 0x18, 0xba, 0xdb, 0xcf, 0xf4,
+ 0xf7, 0x32, 0x35, 0xa2, 0x04, 0xab, 0xdc, 0xa7,
+ 0x6d, 0xd9, 0xd5, 0x06, 0x6f, 0xec, 0x7d, 0x40,
+ 0x4c, 0xe8, 0x0e, 0xd0, 0xc9, 0xaa, 0xdf, 0x59
+ };
+ static const PRUint8 rsa_coefficient[FIPS_RSA_COEFFICIENT_LENGTH] = {
+ 0x17, 0xd7, 0xf5, 0x0a, 0xf0, 0x68, 0x97, 0x96,
+ 0xc4, 0x29, 0x18, 0x77, 0x9a, 0x1f, 0xe3, 0xf3,
+ 0x12, 0x13, 0x0f, 0x7e, 0x7b, 0xb9, 0xc1, 0x91,
+ 0xf9, 0xc7, 0x08, 0x56, 0x5c, 0xa4, 0xbc, 0x83,
+ 0x71, 0xf9, 0x78, 0xd9, 0x2b, 0xec, 0xfe, 0x6b,
+ 0xdc, 0x2f, 0x63, 0xc9, 0xcd, 0x50, 0x14, 0x5b,
+ 0xd3, 0x6e, 0x85, 0x4d, 0x0c, 0xa2, 0x0b, 0xa0,
+ 0x09, 0xb6, 0xca, 0x34, 0x9c, 0xc2, 0xc1, 0x4a,
+ 0xb0, 0xbc, 0x45, 0x93, 0xa5, 0x7e, 0x99, 0xb5,
+ 0xbd, 0xe4, 0x69, 0x29, 0x08, 0x28, 0xd2, 0xcd,
+ 0xab, 0x24, 0x78, 0x48, 0x41, 0x26, 0x0b, 0x37,
+ 0xa3, 0x43, 0xd1, 0x95, 0x1a, 0xd6, 0xee, 0x22,
+ 0x1c, 0x00, 0x0b, 0xc2, 0xb7, 0xa4, 0xa3, 0x21,
+ 0xa9, 0xcd, 0xe4, 0x69, 0xd3, 0x45, 0x02, 0xb1,
+ 0xb7, 0x3a, 0xbf, 0x51, 0x35, 0x1b, 0x78, 0xc2,
+ 0xcf, 0x0c, 0x0d, 0x60, 0x09, 0xa9, 0x44, 0x02
+ };
+
+ /* RSA Known Plaintext Message (1024-bits). */
+ static const PRUint8 rsa_known_plaintext_msg[FIPS_RSA_MESSAGE_LENGTH] = {
+ "Known plaintext message utilized"
+ "for RSA Encryption & Decryption"
+ "blocks SHA256, SHA384 and "
+ "SHA512 RSA Signature KAT tests. "
+ "Known plaintext message utilized"
+ "for RSA Encryption & Decryption"
+ "blocks SHA256, SHA384 and "
+ "SHA512 RSA Signature KAT tests."
+ };
+
+ /* RSA Known Ciphertext (2048-bits). */
+ static const PRUint8 rsa_known_ciphertext[] = {
+ 0x04, 0x12, 0x46, 0xe3, 0x6a, 0xee, 0xde, 0xdd,
+ 0x49, 0xa1, 0xd9, 0x83, 0xf7, 0x35, 0xf9, 0x70,
+ 0x88, 0x03, 0x2d, 0x01, 0x8b, 0xd1, 0xbf, 0xdb,
+ 0xe5, 0x1c, 0x85, 0xbe, 0xb5, 0x0b, 0x48, 0x45,
+ 0x7a, 0xf0, 0xa0, 0xe3, 0xa2, 0xbb, 0x4b, 0xf6,
+ 0x27, 0xd0, 0x1b, 0x12, 0xe3, 0x77, 0x52, 0x34,
+ 0x9e, 0x8e, 0x03, 0xd2, 0xf8, 0x79, 0x6e, 0x39,
+ 0x79, 0x53, 0x3c, 0x44, 0x14, 0x94, 0xbb, 0x8d,
+ 0xaa, 0x14, 0x44, 0xa0, 0x7b, 0xa5, 0x8c, 0x93,
+ 0x5f, 0x99, 0xa4, 0xa3, 0x6e, 0x7a, 0x38, 0x40,
+ 0x78, 0xfa, 0x36, 0x91, 0x5e, 0x9a, 0x9c, 0xba,
+ 0x1e, 0xd4, 0xf9, 0xda, 0x4b, 0x0f, 0xa8, 0xa3,
+ 0x1c, 0xf3, 0x3a, 0xd1, 0xa5, 0xb4, 0x51, 0x16,
+ 0xed, 0x4b, 0xcf, 0xec, 0x93, 0x7b, 0x90, 0x21,
+ 0xbc, 0x3a, 0xf4, 0x0b, 0xd1, 0x3a, 0x2b, 0xba,
+ 0xa6, 0x7d, 0x5b, 0x53, 0xd8, 0x64, 0xf9, 0x29,
+ 0x7b, 0x7f, 0x77, 0x3e, 0x51, 0x4c, 0x9a, 0x94,
+ 0xd2, 0x4b, 0x4a, 0x8d, 0x61, 0x74, 0x97, 0xae,
+ 0x53, 0x6a, 0xf4, 0x90, 0xc2, 0x2c, 0x49, 0xe2,
+ 0xfa, 0xeb, 0x91, 0xc5, 0xe5, 0x83, 0x13, 0xc9,
+ 0x44, 0x4b, 0x95, 0x2c, 0x57, 0x70, 0x15, 0x5c,
+ 0x64, 0x8d, 0x1a, 0xfd, 0x2a, 0xc7, 0xb2, 0x9c,
+ 0x5c, 0x99, 0xd3, 0x4a, 0xfd, 0xdd, 0xf6, 0x82,
+ 0x87, 0x8c, 0x5a, 0xc4, 0xa8, 0x0d, 0x2a, 0xef,
+ 0xc3, 0xa2, 0x7e, 0x8e, 0x67, 0x9f, 0x6f, 0x63,
+ 0xdb, 0xbb, 0x1d, 0x31, 0xc4, 0xbb, 0xbc, 0x13,
+ 0x3f, 0x54, 0xc6, 0xf6, 0xc5, 0x28, 0x32, 0xab,
+ 0x96, 0x42, 0x10, 0x36, 0x40, 0x92, 0xbb, 0x57,
+ 0x55, 0x38, 0xf5, 0x43, 0x7e, 0x43, 0xc4, 0x65,
+ 0x47, 0x64, 0xaa, 0x0f, 0x4c, 0xe9, 0x49, 0x16,
+ 0xec, 0x6a, 0x50, 0xfd, 0x14, 0x49, 0xca, 0xdb,
+ 0x44, 0x54, 0xca, 0xbe, 0xa3, 0x0e, 0x5f, 0xef
+ };
+
+ static const RSAPublicKey bl_public_key = {
+ NULL,
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+ FIPS_RSA_MODULUS_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+ FIPS_RSA_PUBLIC_EXPONENT_LENGTH }
+ };
+ static const RSAPrivateKey bl_private_key = {
+ NULL,
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_version,
+ FIPS_RSA_PRIVATE_VERSION_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus,
+ FIPS_RSA_MODULUS_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent,
+ FIPS_RSA_PUBLIC_EXPONENT_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_private_exponent,
+ FIPS_RSA_PRIVATE_EXPONENT_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_prime0,
+ FIPS_RSA_PRIME0_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_prime1,
+ FIPS_RSA_PRIME1_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent0,
+ FIPS_RSA_EXPONENT0_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent1,
+ FIPS_RSA_EXPONENT1_LENGTH },
+ { FIPS_RSA_TYPE, (unsigned char *)rsa_coefficient,
+ FIPS_RSA_COEFFICIENT_LENGTH }
+ };
+
+ /* RSA variables. */
+ SECStatus rsa_status;
+ RSAPublicKey rsa_public_key;
+ RSAPrivateKey rsa_private_key;
+
+ PRUint8 rsa_computed_ciphertext[FIPS_RSA_ENCRYPT_LENGTH];
+ PRUint8 rsa_computed_plaintext[FIPS_RSA_DECRYPT_LENGTH];
+
+ rsa_public_key = bl_public_key;
+ rsa_private_key = bl_private_key;
+
+ /**************************************************/
+ /* RSA Single-Round Known Answer Encryption Test. */
+ /**************************************************/
+
+ /* Perform RSA Public Key Encryption. */
+ rsa_status = RSA_PublicKeyOp(&rsa_public_key,
+ rsa_computed_ciphertext,
+ rsa_known_plaintext_msg);
+
+ if ((rsa_status != SECSuccess) ||
+ (PORT_Memcmp(rsa_computed_ciphertext, rsa_known_ciphertext,
+ FIPS_RSA_ENCRYPT_LENGTH) != 0))
+ goto rsa_loser;
+
+ /**************************************************/
+ /* RSA Single-Round Known Answer Decryption Test. */
+ /**************************************************/
+
+ /* Perform RSA Private Key Decryption. */
+ rsa_status = RSA_PrivateKeyOp(&rsa_private_key,
+ rsa_computed_plaintext,
+ rsa_known_ciphertext);
+
+ if ((rsa_status != SECSuccess) ||
+ (PORT_Memcmp(rsa_computed_plaintext, rsa_known_plaintext_msg,
+ FIPS_RSA_DECRYPT_LENGTH) != 0))
+ goto rsa_loser;
+
+ return (SECSuccess);
+
+rsa_loser:
+
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+}
+
+#ifdef NSS_ENABLE_ECC
+
+static SECStatus
+freebl_fips_ECDSA_Test(ECParams *ecparams,
+ const PRUint8 *knownSignature,
+ unsigned int knownSignatureLen)
+{
+
+ /* ECDSA Known Seed info for curves nistp256 and nistk283 */
+ static const PRUint8 ecdsa_Known_Seed[] = {
+ 0x6a, 0x9b, 0xf6, 0xf7, 0xce, 0xed, 0x79, 0x11,
+ 0xf0, 0xc7, 0xc8, 0x9a, 0xa5, 0xd1, 0x57, 0xb1,
+ 0x7b, 0x5a, 0x3b, 0x76, 0x4e, 0x7b, 0x7c, 0xbc,
+ 0xf2, 0x76, 0x1c, 0x1c, 0x7f, 0xc5, 0x53, 0x2f
+ };
+
+ static const PRUint8 msg[] = {
+ "Firefox and ThunderBird are awesome!"
+ };
+
+ unsigned char sha1[SHA1_LENGTH]; /* SHA-1 hash (160 bits) */
+ unsigned char sig[2 * MAX_ECKEY_LEN];
+ SECItem signature, digest;
+ ECPrivateKey *ecdsa_private_key = NULL;
+ ECPublicKey ecdsa_public_key;
+ SECStatus ecdsaStatus = SECSuccess;
+
+ /* Generates a new EC key pair. The private key is a supplied
+ * random value (in seed) and the public key is the result of
+ * performing a scalar point multiplication of that value with
+ * the curve's base point.
+ */
+ ecdsaStatus = EC_NewKeyFromSeed(ecparams, &ecdsa_private_key,
+ ecdsa_Known_Seed,
+ sizeof(ecdsa_Known_Seed));
+ if (ecdsaStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+
+ /* construct public key from private key. */
+ ecdsa_public_key.ecParams = ecdsa_private_key->ecParams;
+ ecdsa_public_key.publicValue = ecdsa_private_key->publicValue;
+
+ /* validate public key value */
+ ecdsaStatus = EC_ValidatePublicKey(&ecdsa_public_key.ecParams,
+ &ecdsa_public_key.publicValue);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ /* validate public key value */
+ ecdsaStatus = EC_ValidatePublicKey(&ecdsa_private_key->ecParams,
+ &ecdsa_private_key->publicValue);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ /***************************************************/
+ /* ECDSA Single-Round Known Answer Signature Test. */
+ /***************************************************/
+
+ ecdsaStatus = SHA1_HashBuf(sha1, msg, sizeof msg);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+ digest.type = siBuffer;
+ digest.data = sha1;
+ digest.len = SHA1_LENGTH;
+
+ memset(sig, 0, sizeof sig);
+ signature.type = siBuffer;
+ signature.data = sig;
+ signature.len = sizeof sig;
+
+ ecdsaStatus = ECDSA_SignDigestWithSeed(ecdsa_private_key, &signature,
+ &digest, ecdsa_Known_Seed, sizeof ecdsa_Known_Seed);
+ if (ecdsaStatus != SECSuccess) {
+ goto loser;
+ }
+
+ if ((signature.len != knownSignatureLen) ||
+ (PORT_Memcmp(signature.data, knownSignature,
+ knownSignatureLen) != 0)) {
+ ecdsaStatus = SECFailure;
+ goto loser;
+ }
+
+ /******************************************************/
+ /* ECDSA Single-Round Known Answer Verification Test. */
+ /******************************************************/
+
+ /* Perform ECDSA verification process. */
+ ecdsaStatus = ECDSA_VerifyDigest(&ecdsa_public_key, &signature, &digest);
+
+loser:
+ /* free the memory for the private key arena*/
+ PORT_FreeArena(ecdsa_private_key->ecParams.arena, PR_FALSE);
+
+ if (ecdsaStatus != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return (SECFailure);
+ }
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_ECDSA_PowerUpSelfTest()
+{
+
+ /* ECDSA Known curve nistp256 == ECCCurve_X9_62_PRIME_256V1 params */
+ static const unsigned char p256_prime[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+ };
+ static const unsigned char p256_a[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC
+ };
+ static const unsigned char p256_b[] = {
+ 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, 0x76,
+ 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, 0x3B, 0xCE,
+ 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B
+ };
+ static const unsigned char p256_base[] = {
+ 0x04,
+ 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63,
+ 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1,
+ 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96,
+ 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C,
+ 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6,
+ 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5
+ };
+ static const unsigned char p256_order[] = {
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+ 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9,
+ 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51
+ };
+ static const unsigned char p256_encoding[] = {
+ 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07
+ };
+ static const ECParams ecdsa_known_P256_Params = {
+ NULL, ec_params_named, /* arena, type */
+ /* fieldID */
+ { 256, ec_field_GFp, /* size and type */
+ { { siBuffer, (unsigned char *)p256_prime, sizeof(p256_prime) } }, /* u.prime */
+ 0,
+ 0,
+ 0 },
+ /* curve */
+ { /* a = curvea b = curveb */
+ /* curve.a */
+ { siBuffer, (unsigned char *)p256_a, sizeof(p256_a) },
+ /* curve.b */
+ { siBuffer, (unsigned char *)p256_b, sizeof(p256_b) },
+ /* curve.seed */
+ { siBuffer, NULL, 0 } },
+ /* base = 04xy*/
+ { siBuffer, (unsigned char *)p256_base, sizeof(p256_base) },
+ /* order */
+ { siBuffer, (unsigned char *)p256_order, sizeof(p256_order) },
+ 1, /* cofactor */
+ /* DEREncoding */
+ { siBuffer, (unsigned char *)p256_encoding, sizeof(p256_encoding) },
+ ECCurve_X9_62_PRIME_256V1,
+ /* curveOID */
+ { siBuffer, (unsigned char *)(p256_encoding) + 2, sizeof(p256_encoding) - 2 },
+ };
+
+ static const PRUint8 ecdsa_known_P256_signature[] = {
+ 0x07, 0xb1, 0xcb, 0x57, 0x20, 0xa7, 0x10, 0xd6,
+ 0x9d, 0x37, 0x4b, 0x1c, 0xdc, 0x35, 0x90, 0xff,
+ 0x1a, 0x2d, 0x98, 0x95, 0x1b, 0x2f, 0xeb, 0x7f,
+ 0xbb, 0x81, 0xca, 0xc0, 0x69, 0x75, 0xea, 0xc5,
+ 0x59, 0x6a, 0x62, 0x49, 0x3d, 0x50, 0xc9, 0xe1,
+ 0x27, 0x3b, 0xff, 0x9b, 0x13, 0x66, 0x67, 0xdd,
+ 0x7d, 0xd1, 0x0d, 0x2d, 0x7c, 0x44, 0x04, 0x1b,
+ 0x16, 0x21, 0x12, 0xc5, 0xcb, 0xbd, 0x9e, 0x75
+ };
+
+ ECParams ecparams;
+
+ SECStatus rv;
+
+ /* ECDSA GF(p) prime field curve test */
+ ecparams = ecdsa_known_P256_Params;
+ rv = freebl_fips_ECDSA_Test(&ecparams,
+ ecdsa_known_P256_signature,
+ sizeof ecdsa_known_P256_signature);
+ if (rv != SECSuccess) {
+ return (SECFailure);
+ }
+
+ return (SECSuccess);
+}
+
+#endif /* NSS_ENABLE_ECC */
+
+static SECStatus
+freebl_fips_DSA_PowerUpSelfTest(void)
+{
+ /* DSA Known P (1024-bits), Q (160-bits), and G (1024-bits) Values. */
+ static const PRUint8 dsa_P[] = {
+ 0x80, 0xb0, 0xd1, 0x9d, 0x6e, 0xa4, 0xf3, 0x28,
+ 0x9f, 0x24, 0xa9, 0x8a, 0x49, 0xd0, 0x0c, 0x63,
+ 0xe8, 0x59, 0x04, 0xf9, 0x89, 0x4a, 0x5e, 0xc0,
+ 0x6d, 0xd2, 0x67, 0x6b, 0x37, 0x81, 0x83, 0x0c,
+ 0xfe, 0x3a, 0x8a, 0xfd, 0xa0, 0x3b, 0x08, 0x91,
+ 0x1c, 0xcb, 0xb5, 0x63, 0xb0, 0x1c, 0x70, 0xd0,
+ 0xae, 0xe1, 0x60, 0x2e, 0x12, 0xeb, 0x54, 0xc7,
+ 0xcf, 0xc6, 0xcc, 0xae, 0x97, 0x52, 0x32, 0x63,
+ 0xd3, 0xeb, 0x55, 0xea, 0x2f, 0x4c, 0xd5, 0xd7,
+ 0x3f, 0xda, 0xec, 0x49, 0x27, 0x0b, 0x14, 0x56,
+ 0xc5, 0x09, 0xbe, 0x4d, 0x09, 0x15, 0x75, 0x2b,
+ 0xa3, 0x42, 0x0d, 0x03, 0x71, 0xdf, 0x0f, 0xf4,
+ 0x0e, 0xe9, 0x0c, 0x46, 0x93, 0x3d, 0x3f, 0xa6,
+ 0x6c, 0xdb, 0xca, 0xe5, 0xac, 0x96, 0xc8, 0x64,
+ 0x5c, 0xec, 0x4b, 0x35, 0x65, 0xfc, 0xfb, 0x5a,
+ 0x1b, 0x04, 0x1b, 0xa1, 0x0e, 0xfd, 0x88, 0x15
+ };
+
+ static const PRUint8 dsa_Q[] = {
+ 0xad, 0x22, 0x59, 0xdf, 0xe5, 0xec, 0x4c, 0x6e,
+ 0xf9, 0x43, 0xf0, 0x4b, 0x2d, 0x50, 0x51, 0xc6,
+ 0x91, 0x99, 0x8b, 0xcf
+ };
+
+ static const PRUint8 dsa_G[] = {
+ 0x78, 0x6e, 0xa9, 0xd8, 0xcd, 0x4a, 0x85, 0xa4,
+ 0x45, 0xb6, 0x6e, 0x5d, 0x21, 0x50, 0x61, 0xf6,
+ 0x5f, 0xdf, 0x5c, 0x7a, 0xde, 0x0d, 0x19, 0xd3,
+ 0xc1, 0x3b, 0x14, 0xcc, 0x8e, 0xed, 0xdb, 0x17,
+ 0xb6, 0xca, 0xba, 0x86, 0xa9, 0xea, 0x51, 0x2d,
+ 0xc1, 0xa9, 0x16, 0xda, 0xf8, 0x7b, 0x59, 0x8a,
+ 0xdf, 0xcb, 0xa4, 0x67, 0x00, 0x44, 0xea, 0x24,
+ 0x73, 0xe5, 0xcb, 0x4b, 0xaf, 0x2a, 0x31, 0x25,
+ 0x22, 0x28, 0x3f, 0x16, 0x10, 0x82, 0xf7, 0xeb,
+ 0x94, 0x0d, 0xdd, 0x09, 0x22, 0x14, 0x08, 0x79,
+ 0xba, 0x11, 0x0b, 0xf1, 0xff, 0x2d, 0x67, 0xac,
+ 0xeb, 0xb6, 0x55, 0x51, 0x69, 0x97, 0xa7, 0x25,
+ 0x6b, 0x9c, 0xa0, 0x9b, 0xd5, 0x08, 0x9b, 0x27,
+ 0x42, 0x1c, 0x7a, 0x69, 0x57, 0xe6, 0x2e, 0xed,
+ 0xa9, 0x5b, 0x25, 0xe8, 0x1f, 0xd2, 0xed, 0x1f,
+ 0xdf, 0xe7, 0x80, 0x17, 0xba, 0x0d, 0x4d, 0x38
+ };
+
+ /* DSA Known Random Values (known random key block is 160-bits) */
+ /* and (known random signature block is 160-bits). */
+ static const PRUint8 dsa_known_random_key_block[] = {
+ "Mozilla Rules World!"
+ };
+ static const PRUint8 dsa_known_random_signature_block[] = {
+ "Random DSA Signature"
+ };
+
+ /* DSA Known Digest (160-bits) */
+ static const PRUint8 dsa_known_digest[] = { "DSA Signature Digest" };
+
+ /* DSA Known Signature (320-bits). */
+ static const PRUint8 dsa_known_signature[] = {
+ 0x25, 0x7c, 0x3a, 0x79, 0x32, 0x45, 0xb7, 0x32,
+ 0x70, 0xca, 0x62, 0x63, 0x2b, 0xf6, 0x29, 0x2c,
+ 0x22, 0x2a, 0x03, 0xce, 0x48, 0x15, 0x11, 0x72,
+ 0x7b, 0x7e, 0xf5, 0x7a, 0xf3, 0x10, 0x3b, 0xde,
+ 0x34, 0xc1, 0x9e, 0xd7, 0x27, 0x9e, 0x77, 0x38
+ };
+
+ /* DSA variables. */
+ DSAPrivateKey *dsa_private_key;
+ SECStatus dsa_status;
+ SECItem dsa_signature_item;
+ SECItem dsa_digest_item;
+ DSAPublicKey dsa_public_key;
+ PRUint8 dsa_computed_signature[FIPS_DSA_SIGNATURE_LENGTH];
+ static const PQGParams dsa_pqg = {
+ NULL,
+ { FIPS_DSA_TYPE, (unsigned char *)dsa_P, FIPS_DSA_PRIME_LENGTH },
+ { FIPS_DSA_TYPE, (unsigned char *)dsa_Q, FIPS_DSA_SUBPRIME_LENGTH },
+ { FIPS_DSA_TYPE, (unsigned char *)dsa_G, FIPS_DSA_BASE_LENGTH }
+ };
+
+ /*******************************************/
+ /* Generate a DSA public/private key pair. */
+ /*******************************************/
+
+ /* Generate a DSA public/private key pair. */
+ dsa_status = DSA_NewKeyFromSeed(&dsa_pqg, dsa_known_random_key_block,
+ &dsa_private_key);
+
+ if (dsa_status != SECSuccess) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return (SECFailure);
+ }
+
+ /* construct public key from private key. */
+ dsa_public_key.params = dsa_private_key->params;
+ dsa_public_key.publicValue = dsa_private_key->publicValue;
+
+ /*************************************************/
+ /* DSA Single-Round Known Answer Signature Test. */
+ /*************************************************/
+
+ dsa_signature_item.data = dsa_computed_signature;
+ dsa_signature_item.len = sizeof dsa_computed_signature;
+
+ dsa_digest_item.data = (unsigned char *)dsa_known_digest;
+ dsa_digest_item.len = SHA1_LENGTH;
+
+ /* Perform DSA signature process. */
+ dsa_status = DSA_SignDigestWithSeed(dsa_private_key,
+ &dsa_signature_item,
+ &dsa_digest_item,
+ dsa_known_random_signature_block);
+
+ if ((dsa_status != SECSuccess) ||
+ (dsa_signature_item.len != FIPS_DSA_SIGNATURE_LENGTH) ||
+ (PORT_Memcmp(dsa_computed_signature, dsa_known_signature,
+ FIPS_DSA_SIGNATURE_LENGTH) != 0)) {
+ dsa_status = SECFailure;
+ } else {
+
+ /****************************************************/
+ /* DSA Single-Round Known Answer Verification Test. */
+ /****************************************************/
+
+ /* Perform DSA verification process. */
+ dsa_status = DSA_VerifyDigest(&dsa_public_key,
+ &dsa_signature_item,
+ &dsa_digest_item);
+ }
+
+ PORT_FreeArena(dsa_private_key->params.arena, PR_TRUE);
+ /* Don't free public key, it uses same arena as private key */
+
+ /* Verify DSA signature. */
+ if (dsa_status != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fips_RNG_PowerUpSelfTest(void)
+{
+ static const PRUint8 Q[] = {
+ 0x85, 0x89, 0x9c, 0x77, 0xa3, 0x79, 0xff, 0x1a,
+ 0x86, 0x6f, 0x2f, 0x3e, 0x2e, 0xf9, 0x8c, 0x9c,
+ 0x9d, 0xef, 0xeb, 0xed
+ };
+ static const PRUint8 GENX[] = {
+ 0x65, 0x48, 0xe3, 0xca, 0xac, 0x64, 0x2d, 0xf7,
+ 0x7b, 0xd3, 0x4e, 0x79, 0xc9, 0x7d, 0xa6, 0xa8,
+ 0xa2, 0xc2, 0x1f, 0x8f, 0xe9, 0xb9, 0xd3, 0xa1,
+ 0x3f, 0xf7, 0x0c, 0xcd, 0xa6, 0xca, 0xbf, 0xce,
+ 0x84, 0x0e, 0xb6, 0xf1, 0x0d, 0xbe, 0xa9, 0xa3
+ };
+ static const PRUint8 rng_known_DSAX[] = {
+ 0x7a, 0x86, 0xf1, 0x7f, 0xbd, 0x4e, 0x6e, 0xd9,
+ 0x0a, 0x26, 0x21, 0xd0, 0x19, 0xcb, 0x86, 0x73,
+ 0x10, 0x1f, 0x60, 0xd7
+ };
+
+ SECStatus rng_status = SECSuccess;
+ PRUint8 DSAX[FIPS_DSA_SUBPRIME_LENGTH];
+
+ /*******************************************/
+ /* Run the SP 800-90 Health tests */
+ /*******************************************/
+ rng_status = PRNGTEST_RunHealthTests();
+ if (rng_status != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ /*******************************************/
+ /* Generate DSAX fow given Q. */
+ /*******************************************/
+
+ rng_status = FIPS186Change_ReduceModQForDSA(GENX, Q, DSAX);
+
+ /* Verify DSAX to perform the RNG integrity check */
+ if ((rng_status != SECSuccess) ||
+ (PORT_Memcmp(DSAX, rng_known_DSAX,
+ (FIPS_DSA_SUBPRIME_LENGTH)) != 0)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+
+ return (SECSuccess);
+}
+
+static SECStatus
+freebl_fipsSoftwareIntegrityTest(const char *libname)
+{
+ SECStatus rv = SECSuccess;
+
+ /* make sure that our check file signatures are OK */
+ if (!BLAPI_VerifySelf(libname)) {
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+#define DO_FREEBL 1
+#define DO_REST 2
+
+static SECStatus
+freebl_fipsPowerUpSelfTest(unsigned int tests)
+{
+ SECStatus rv;
+
+ /*
+ * stand alone freebl. Test hash, and rng
+ */
+ if (tests & DO_FREEBL) {
+
+ /* SHA-X Power-Up SelfTest(s). */
+ rv = freebl_fips_SHA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* RNG Power-Up SelfTest(s). */
+ rv = freebl_fips_RNG_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+ }
+
+ /*
+ * test the rest of the algorithms not accessed through freebl
+ * standalone */
+ if (tests & DO_REST) {
+
+ /* DES3 Power-Up SelfTest(s). */
+ rv = freebl_fips_DES3_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 128-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_128_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 192-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_192_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* AES Power-Up SelfTest(s) for 256-bit key. */
+ rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_256_KEY_SIZE);
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* HMAC SHA-X Power-Up SelfTest(s). */
+ rv = freebl_fips_HMAC_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* NOTE: RSA can only be tested in full freebl. It requires access to
+ * the locking primitives */
+ /* RSA Power-Up SelfTest(s). */
+ rv = freebl_fips_RSA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+ /* DSA Power-Up SelfTest(s). */
+ rv = freebl_fips_DSA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+
+#ifdef NSS_ENABLE_ECC
+ /* ECDSA Power-Up SelfTest(s). */
+ rv = freebl_fips_ECDSA_PowerUpSelfTest();
+
+ if (rv != SECSuccess)
+ return rv;
+#endif
+ }
+ /* Passed Power-Up SelfTest(s). */
+ return (SECSuccess);
+}
+
+/*
+ * state variables. NOTE: freebl has two uses: a standalone use which
+ * provided limitted access to the hash functions throught the NSSLOWHASH_
+ * interface and an joint use from softoken, using the function pointer
+ * table. The standalone use can operation without nspr or nss-util, while
+ * the joint use requires both to be loaded. Certain functions (like RSA)
+ * needs locking from NSPR, for instance.
+ *
+ * At load time, we need to handle the two uses separately. If nspr and
+ * nss-util are loaded, then we can run all the selftests, but if nspr and
+ * nss-util are not loaded, then we can't run all the selftests, and we need
+ * to prevent the softoken function pointer table from operating until the
+ * libraries are loaded and we try to use them.
+ */
+static PRBool self_tests_freebl_ran = PR_FALSE;
+static PRBool self_tests_ran = PR_FALSE;
+static PRBool self_tests_freebl_success = PR_FALSE;
+static PRBool self_tests_success = PR_FALSE;
+#if defined(DEBUG)
+static PRBool fips_mode_available = PR_FALSE;
+#endif
+
+/*
+ * accessors for freebl
+ */
+PRBool
+BL_POSTRan(PRBool freebl_only)
+{
+ SECStatus rv;
+ /* if the freebl self tests didn't run, there is something wrong with
+ * our on load tests */
+ if (!self_tests_freebl_ran) {
+ return PR_FALSE;
+ }
+ /* if all the self tests have run, we are good */
+ if (self_tests_ran) {
+ return PR_TRUE;
+ }
+ /* if we only care about the freebl tests, we are good */
+ if (freebl_only) {
+ return PR_TRUE;
+ }
+ /* run the rest of the self tests */
+ /* We could get there if freebl was loaded without the rest of the support
+ * libraries, but now we want to use more than just a standalone freebl.
+ * This requires the other libraries to be loaded.
+ * If they are now loaded, Try to run the rest of the selftests,
+ * otherwise fail (disabling access to these algorithms) */
+ self_tests_ran = PR_TRUE;
+ BL_Init(); /* required by RSA */
+ RNG_RNGInit(); /* required by RSA */
+ rv = freebl_fipsPowerUpSelfTest(DO_REST);
+ if (rv == SECSuccess) {
+ self_tests_success = PR_TRUE;
+ }
+ return PR_TRUE;
+}
+
+#include "blname.c"
+
+/*
+ * This function is called at dll load time, the code tha makes this
+ * happen is platform specific on defined above.
+ */
+static void
+bl_startup_tests(void)
+{
+ const char *libraryName;
+ PRBool freebl_only = PR_FALSE;
+ SECStatus rv;
+
+ PORT_Assert(self_tests_freebl_ran == PR_FALSE);
+ PORT_Assert(self_tests_success == PR_FALSE);
+ PORT_Assert(fips_mode_available == PR_FALSE);
+ self_tests_freebl_ran = PR_TRUE; /* we are running the tests */
+ self_tests_success = PR_FALSE; /* force it just in case */
+ self_tests_freebl_success = PR_FALSE; /* force it just in case */
+
+#ifdef FREEBL_NO_DEPEND
+ rv = FREEBL_InitStubs();
+ if (rv != SECSuccess) {
+ freebl_only = PR_TRUE;
+ }
+#endif
+
+ self_tests_freebl_ran = PR_TRUE; /* we are running the tests */
+
+ if (!freebl_only) {
+ self_tests_ran = PR_TRUE; /* we're running all the tests */
+ BL_Init(); /* needs to be called before RSA can be used */
+ RNG_RNGInit();
+ }
+
+ /* always run the post tests */
+ rv = freebl_fipsPowerUpSelfTest(freebl_only ? DO_FREEBL : DO_FREEBL | DO_REST);
+ if (rv != SECSuccess) {
+ return;
+ }
+
+ libraryName = getLibName();
+ rv = freebl_fipsSoftwareIntegrityTest(libraryName);
+ if (rv != SECSuccess) {
+ return;
+ }
+
+ /* posts are happy, allow the fips module to function now */
+ self_tests_freebl_success = PR_TRUE; /* we always test the freebl stuff */
+ if (!freebl_only) {
+ self_tests_success = PR_TRUE;
+ }
+}
+
+/*
+ * this is called from the freebl init entry points that controll access to
+ * all other freebl functions. This prevents freebl from operating if our
+ * power on selftest failed.
+ */
+SECStatus
+BL_FIPSEntryOK(PRBool freebl_only)
+{
+#ifdef NSS_NO_INIT_SUPPORT
+ /* this should only be set on platforms that can't handle one of the INIT
+ * schemes. This code allows those platforms to continue to function,
+ * though they don't meet the strict NIST requirements. If NSS_NO_INIT_SUPPORT
+ * is not set, and init support has not been properly enabled, freebl
+ * will always fail because of the test below
+ */
+ if (!self_tests_freebl_ran) {
+ bl_startup_tests();
+ }
+#endif
+ /* if the general self tests succeeded, we're done */
+ if (self_tests_success) {
+ return SECSuccess;
+ }
+ /* standalone freebl can initialize */
+ if (freebl_only & self_tests_freebl_success) {
+ return SECSuccess;
+ }
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+}
diff --git a/security/nss/lib/freebl/freebl.def b/security/nss/lib/freebl/freebl.def
new file mode 100644
index 000000000..164c843fd
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.def
@@ -0,0 +1,26 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp
new file mode 100644
index 000000000..f5ae232ec
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.gyp
@@ -0,0 +1,408 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+{
+ 'includes': [
+ '../../coreconf/config.gypi'
+ ],
+ 'targets': [
+ {
+ 'target_name': 'intel-gcm-wrap_c_lib',
+ 'type': 'static_library',
+ 'sources': [
+ 'intel-gcm-wrap.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'cflags': [
+ '-mssse3'
+ ],
+ 'cflags_mozilla': [
+ '-mssse3'
+ ]
+ },
+ {
+ 'target_name': 'freebl',
+ 'type': 'static_library',
+ 'sources': [
+ 'loader.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ]
+ },
+ {
+ 'target_name': '<(freebl_name)',
+ 'type': 'shared_library',
+ 'sources': [
+ 'aeskeywrap.c',
+ 'alg2268.c',
+ 'alghmac.c',
+ 'arcfive.c',
+ 'arcfour.c',
+ 'camellia.c',
+ 'chacha20poly1305.c',
+ 'ctr.c',
+ 'cts.c',
+ 'des.c',
+ 'desblapi.c',
+ 'dh.c',
+ 'drbg.c',
+ 'dsa.c',
+ 'ec.c',
+ 'ecdecode.c',
+ 'ecl/ec_naf.c',
+ 'ecl/ecl.c',
+ 'ecl/ecl_curve.c',
+ 'ecl/ecl_gf.c',
+ 'ecl/ecl_mult.c',
+ 'ecl/ecp_25519.c',
+ 'ecl/ecp_256.c',
+ 'ecl/ecp_256_32.c',
+ 'ecl/ecp_384.c',
+ 'ecl/ecp_521.c',
+ 'ecl/ecp_aff.c',
+ 'ecl/ecp_jac.c',
+ 'ecl/ecp_jm.c',
+ 'ecl/ecp_mont.c',
+ 'fipsfreebl.c',
+ 'freeblver.c',
+ 'gcm.c',
+ 'hmacct.c',
+ 'jpake.c',
+ 'ldvector.c',
+ 'md2.c',
+ 'md5.c',
+ 'mpi/mp_gf2m.c',
+ 'mpi/mpcpucache.c',
+ 'mpi/mpi.c',
+ 'mpi/mplogic.c',
+ 'mpi/mpmontg.c',
+ 'mpi/mpprime.c',
+ 'pqg.c',
+ 'rawhash.c',
+ 'rijndael.c',
+ 'rsa.c',
+ 'rsapkcs.c',
+ 'seed.c',
+ 'sha512.c',
+ 'sha_fast.c',
+ 'shvfy.c',
+ 'sysrand.c',
+ 'tlsprfalg.c'
+ ],
+ 'conditions': [
+ [ 'OS=="linux"', {
+ 'sources': [
+ 'nsslowhash.c',
+ 'stubs.c',
+ ],
+ 'conditions': [
+ [ 'test_build==1', {
+ 'dependencies': [
+ '<(DEPTH)/lib/util/util.gyp:nssutil3',
+ ],
+ }],
+ [ 'target_arch=="x64"', {
+ 'sources': [
+ 'arcfour-amd64-gas.s',
+ 'intel-aes.s',
+ 'intel-gcm.s',
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_gas.s',
+ 'mpi/mp_comba.c',
+ ],
+ 'dependencies': [
+ 'intel-gcm-wrap_c_lib',
+ ],
+ 'conditions': [
+ [ 'cc_is_clang==1', {
+ 'cflags': [
+ '-no-integrated-as',
+ ],
+ 'cflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ 'asflags_mozilla': [
+ '-no-integrated-as',
+ ],
+ }],
+ ],
+ }],
+ [ 'target_arch=="ia32"', {
+ 'sources': [
+ 'mpi/mpi_x86.s',
+ ],
+ }],
+ [ 'target_arch=="arm"', {
+ 'sources': [
+ 'mpi/mpi_arm.c',
+ ],
+ }],
+ ],
+ }, {
+ # not Linux
+ 'conditions': [
+ [ 'moz_fold_libs==0', {
+ 'dependencies': [
+ '../util/util.gyp:nssutil3',
+ ],
+ }, {
+ 'libraries': [
+ '<(moz_folded_library_name)',
+ ],
+ }],
+ ],
+ }],
+ [ 'OS=="win"', {
+ 'sources': [
+ #TODO: building with mingw should not need this.
+ 'ecl/uint128.c',
+ #TODO: clang-cl needs -msse3 here
+ 'intel-gcm-wrap.c',
+ ],
+ 'libraries': [
+ 'advapi32.lib',
+ ],
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'sources': [
+ 'arcfour-amd64-masm.asm',
+ 'mpi/mpi_amd64.c',
+ 'mpi/mpi_amd64_masm.asm',
+ 'mpi/mp_comba_amd64_masm.asm',
+ 'intel-aes-x64-masm.asm',
+ 'intel-gcm-x64-masm.asm',
+ ],
+ }, {
+ # not x64
+ 'sources': [
+ 'mpi/mpi_x86_asm.c',
+ 'intel-aes-x86-masm.asm',
+ 'intel-gcm-x86-masm.asm',
+ ],
+ }],
+ ],
+ }],
+ ['target_arch=="ia32" or target_arch=="x64"', {
+ 'sources': [
+ # All intel architectures get the 64 bit version
+ 'ecl/curve25519_64.c',
+ ],
+ }, {
+ 'sources': [
+ # All non intel architectures get the generic 32 bit implementation (slow!)
+ 'ecl/curve25519_32.c',
+ ],
+ }],
+ #TODO uint128.c
+ [ 'disable_chachapoly==0', {
+ 'conditions': [
+ [ 'OS!="win" and target_arch=="x64"', {
+ 'sources': [
+ 'chacha20_vec.c',
+ 'poly1305-donna-x64-sse2-incremental-source.c',
+ ],
+ }, {
+ # not x64
+ 'sources': [
+ 'chacha20.c',
+ 'poly1305.c',
+ ],
+ }],
+ ],
+ }],
+ [ 'fuzz==1', {
+ 'sources': [
+ 'det_rng.c',
+ ],
+ 'defines': [
+ 'UNSAFE_FUZZER_MODE',
+ ],
+ }],
+ [ 'test_build==1', {
+ 'defines': [
+ 'CT_VERIF',
+ ],
+ }],
+ [ 'OS=="mac"', {
+ 'conditions': [
+ [ 'target_arch=="ia32"', {
+ 'sources': [
+ 'mpi/mpi_sse2.s',
+ ],
+ 'defines': [
+ 'MP_USE_UINT_DIGIT',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ ],
+ }],
+ ],
+ }],
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports',
+ ],
+ 'variables': {
+ 'conditions': [
+ [ 'OS=="linux"', {
+ 'mapfile': 'freebl_hash_vector.def',
+ }, {
+ 'mapfile': 'freebl.def',
+ }],
+ ]
+ },
+ 'ldflags': [
+ '-Wl,-Bsymbolic'
+ ]
+ },
+ ],
+ 'conditions': [
+ [ 'OS=="linux"', {
+ # stub build
+ 'targets': [
+ {
+ 'target_name': 'freebl3',
+ 'type': 'shared_library',
+ 'defines': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ 'sources': [
+ 'lowhash_vector.c'
+ ],
+ 'dependencies': [
+ '<(DEPTH)/exports.gyp:nss_exports'
+ ],
+ 'variables': {
+ 'mapfile': 'freebl_hash.def'
+ }
+ },
+ ],
+ }],
+ ],
+ 'target_defaults': {
+ 'include_dirs': [
+ 'mpi',
+ 'ecl'
+ ],
+ 'defines': [
+ 'SHLIB_SUFFIX=\"<(dll_suffix)\"',
+ 'SHLIB_PREFIX=\"<(dll_prefix)\"',
+ 'SHLIB_VERSION=\"3\"',
+ 'SOFTOKEN_SHLIB_VERSION=\"3\"',
+ 'RIJNDAEL_INCLUDE_TABLES',
+ 'MP_API_COMPATIBLE'
+ ],
+ 'conditions': [
+ [ 'OS=="win" and target_arch=="ia32"', {
+ 'msvs_settings': {
+ 'VCCLCompilerTool': {
+ #TODO: -Ox optimize flags
+ 'PreprocessorDefinitions': [
+ 'NSS_X86_OR_X64',
+ 'NSS_X86',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ 'MP_USE_UINT_DIGIT',
+ 'MP_NO_MP_WORD',
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ },
+ },
+ }],
+ [ 'OS=="win" and target_arch=="x64"', {
+ 'msvs_settings': {
+ 'VCCLCompilerTool': {
+ #TODO: -Ox optimize flags
+ 'PreprocessorDefinitions': [
+ 'NSS_USE_64',
+ 'NSS_X86_OR_X64',
+ 'NSS_X64',
+ 'MP_IS_LITTLE_ENDIAN',
+ 'NSS_BEVAND_ARCFOUR',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ },
+ },
+ }],
+ [ 'OS!="win"', {
+ 'conditions': [
+ [ 'target_arch=="x64"', {
+ 'defines': [
+ 'NSS_USE_64',
+ 'NSS_X86_OR_X64',
+ 'NSS_X64',
+ # The Makefile does version-tests on GCC, but we're not doing that here.
+ 'HAVE_INT128_SUPPORT',
+ ],
+ }, {
+ 'sources': [
+ 'ecl/uint128.c',
+ ],
+ }],
+ [ 'target_arch=="ia32"', {
+ 'defines': [
+ 'NSS_X86_OR_X64',
+ 'NSS_X86',
+ ],
+ }],
+ ],
+ }],
+ [ 'OS=="linux"', {
+ 'defines': [
+ 'FREEBL_LOWHASH',
+ ],
+ 'conditions': [
+ [ 'test_build==0', {
+ 'defines': [
+ 'FREEBL_NO_DEPEND',
+ ],
+ }],
+ [ 'target_arch=="x64"', {
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'NSS_BEVAND_ARCFOUR',
+ 'MPI_AMD64',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'NSS_USE_COMBA',
+ ],
+ }],
+ [ 'target_arch=="x64" and use_msan==0', {
+ 'defines': [
+ 'USE_HW_AES',
+ 'INTEL_GCM',
+ ],
+ }],
+ [ 'target_arch=="ia32"', {
+ 'defines': [
+ 'MP_IS_LITTLE_ENDIAN',
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_ASSEMBLY_DIV_2DX1D',
+ 'MP_USE_UINT_DIGIT',
+ ],
+ }],
+ [ 'target_arch=="arm"', {
+ 'defines': [
+ 'MP_ASSEMBLY_MULTIPLY',
+ 'MP_ASSEMBLY_SQUARE',
+ 'MP_USE_UINT_DIGIT',
+ 'SHA_NO_LONG_LONG',
+ ],
+ }],
+ ],
+ }],
+ ],
+ },
+ 'variables': {
+ 'module': 'nss',
+ }
+}
diff --git a/security/nss/lib/freebl/freebl.rc b/security/nss/lib/freebl/freebl.rc
new file mode 100644
index 000000000..444ae5d03
--- /dev/null
+++ b/security/nss/lib/freebl/freebl.rc
@@ -0,0 +1,68 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "softkver.h"
+#include <winver.h>
+
+#define MY_LIBNAME "freebl"
+#define MY_FILEDESCRIPTION "NSS freebl Library"
+
+#define STRINGIZE(x) #x
+#define STRINGIZE2(x) STRINGIZE(x)
+#define SOFTOKEN_VMAJOR_STR STRINGIZE2(SOFTOKEN_VMAJOR)
+
+#ifdef _DEBUG
+#define MY_DEBUG_STR " (debug)"
+#define MY_FILEFLAGS_1 VS_FF_DEBUG
+#else
+#define MY_DEBUG_STR ""
+#define MY_FILEFLAGS_1 0x0L
+#endif
+#if SOFTOKEN_BETA
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1|VS_FF_PRERELEASE
+#else
+#define MY_FILEFLAGS_2 MY_FILEFLAGS_1
+#endif
+
+#ifdef WINNT
+#define MY_FILEOS VOS_NT_WINDOWS32
+#else
+#define MY_FILEOS VOS__WINDOWS32
+#endif
+
+#define MY_INTERNAL_NAME MY_LIBNAME SOFTOKEN_VMAJOR_STR
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version-information resource
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ PRODUCTVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD
+ FILEFLAGSMASK VS_FFI_FILEFLAGSMASK
+ FILEFLAGS MY_FILEFLAGS_2
+ FILEOS MY_FILEOS
+ FILETYPE VFT_DLL
+ FILESUBTYPE 0x0L // not used
+
+BEGIN
+ BLOCK "StringFileInfo"
+ BEGIN
+ BLOCK "040904B0" // Lang=US English, CharSet=Unicode
+ BEGIN
+ VALUE "CompanyName", "Mozilla Foundation\0"
+ VALUE "FileDescription", MY_FILEDESCRIPTION MY_DEBUG_STR "\0"
+ VALUE "FileVersion", SOFTOKEN_VERSION "\0"
+ VALUE "InternalName", MY_INTERNAL_NAME "\0"
+ VALUE "OriginalFilename", MY_INTERNAL_NAME ".dll\0"
+ VALUE "ProductName", "Network Security Services\0"
+ VALUE "ProductVersion", SOFTOKEN_VERSION "\0"
+ END
+ END
+ BLOCK "VarFileInfo"
+ BEGIN
+ VALUE "Translation", 0x409, 1200
+ END
+END
diff --git a/security/nss/lib/freebl/freebl_hash.def b/security/nss/lib/freebl/freebl_hash.def
new file mode 100644
index 000000000..9fd27367e
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash.def
@@ -0,0 +1,39 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
+;+NSSRAWHASH_3.12.3 { # NSS 3.12.3 release
+;+ global:
+NSSLOW_Init;
+NSSLOW_Shutdown;
+NSSLOWHASH_Length;
+NSSLOWHASH_Begin;
+NSSLOWHASH_Destroy;
+NSSLOWHASH_End;
+NSSLOWHASH_NewContext;
+NSSLOWHASH_Update;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freebl_hash_vector.def b/security/nss/lib/freebl/freebl_hash_vector.def
new file mode 100644
index 000000000..9d7d07d54
--- /dev/null
+++ b/security/nss/lib/freebl/freebl_hash_vector.def
@@ -0,0 +1,34 @@
+;+#
+;+# This Source Code Form is subject to the terms of the Mozilla Public
+;+# License, v. 2.0. If a copy of the MPL was not distributed with this
+;+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+;+#
+;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS
+;+# 1. For all unix platforms, the string ";-" means "remove this line"
+;+# 2. For all unix platforms, the string " DATA " will be removed from any
+;+# line on which it occurs.
+;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX.
+;+# On AIX, lines containing ";+" will be removed.
+;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed.
+;+# 5. For all unix platforms, after the above processing has taken place,
+;+# all characters after the first ";" on the line will be removed.
+;+# And for AIX, the first ";" will also be removed.
+;+# This file is passed directly to windows. Since ';' is a comment, all UNIX
+;+# directives are hidden behind ";", ";+", and ";-"
+;+
+;+NSSprivate_3.11 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+FREEBL_GetVector;
+;+ local:
+;+ *;
+;+};
+;+NSSprivate_3.16 { # NSS 3.11 release
+;+ global:
+LIBRARY freebl3 ;-
+EXPORTS ;-
+NSSLOW_GetVector;
+;+ local:
+;+ *;
+;+};
diff --git a/security/nss/lib/freebl/freeblver.c b/security/nss/lib/freebl/freeblver.c
new file mode 100644
index 000000000..9136f0b0b
--- /dev/null
+++ b/security/nss/lib/freebl/freeblver.c
@@ -0,0 +1,18 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Library identity and versioning */
+
+#include "softkver.h"
+
+#if defined(DEBUG)
+#define _DEBUG_STRING " (debug)"
+#else
+#define _DEBUG_STRING ""
+#endif
+
+/*
+ * Version information
+ */
+const char __nss_freebl_version[] = "Version: NSS " SOFTOKEN_VERSION _DEBUG_STRING;
diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c
new file mode 100644
index 000000000..22121001b
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.c
@@ -0,0 +1,860 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+/**************************************************************************
+ * First implement the Galois hash function of GCM (gcmHash) *
+ **************************************************************************/
+#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */
+
+typedef struct gcmHashContextStr gcmHashContext;
+
+static SECStatus gcmHash_InitContext(gcmHashContext *hash,
+ const unsigned char *H,
+ unsigned int blocksize);
+static void gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit);
+static SECStatus gcmHash_Update(gcmHashContext *ghash,
+ const unsigned char *buf, unsigned int len,
+ unsigned int blocksize);
+static SECStatus gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize);
+static SECStatus gcmHash_Final(gcmHashContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ unsigned int blocksize);
+static SECStatus gcmHash_Reset(gcmHashContext *ghash,
+ const unsigned char *inbuf,
+ unsigned int inbufLen, unsigned int blocksize);
+
+/* compile time defines to select how the GF2 multiply is calculated.
+ * There are currently 2 algorithms implemented here: MPI and ALGORITHM_1.
+ *
+ * MPI uses the GF2m implemented in mpi to support GF2 ECC.
+ * ALGORITHM_1 is the Algorithm 1 in both NIST SP 800-38D and
+ * "The Galois/Counter Mode of Operation (GCM)", McGrew & Viega.
+ */
+#if !defined(GCM_USE_ALGORITHM_1) && !defined(GCM_USE_MPI)
+#define GCM_USE_MPI 1 /* MPI is about 5x faster with the \
+ * same or less complexity. It's possible to use \
+ * tables to speed things up even more */
+#endif
+
+/* GCM defines the bit string to be LSB first, which is exactly
+ * opposite everyone else, including hardware. build array
+ * to reverse everything. */
+static const unsigned char gcm_byte_rev[256] = {
+ 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+ 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+ 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+ 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+ 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+ 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+ 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+ 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+ 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+ 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+ 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+ 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+ 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+ 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+ 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+ 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+ 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+ 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+ 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+ 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+ 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+ 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+ 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+ 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+ 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+ 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+ 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+ 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+ 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+ 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+ 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+ 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
+};
+
+#ifdef GCM_TRACE
+#include <stdio.h>
+
+#define GCM_TRACE_X(ghash, label) \
+ { \
+ unsigned char _X[MAX_BLOCK_SIZE]; \
+ int i; \
+ gcm_getX(ghash, _X, blocksize); \
+ printf(label, (ghash)->m); \
+ for (i = 0; i < blocksize; i++) \
+ printf("%02x", _X[i]); \
+ printf("\n"); \
+ }
+#define GCM_TRACE_BLOCK(label, buf, blocksize) \
+ { \
+ printf(label); \
+ for (i = 0; i < blocksize; i++) \
+ printf("%02x", buf[i]); \
+ printf("\n"); \
+ }
+#else
+#define GCM_TRACE_X(ghash, label)
+#define GCM_TRACE_BLOCK(label, buf, blocksize)
+#endif
+
+#ifdef GCM_USE_MPI
+
+#ifdef GCM_USE_ALGORITHM_1
+#error "Only define one of GCM_USE_MPI, GCM_USE_ALGORITHM_1"
+#endif
+/* use the MPI functions to calculate Xn = (Xn-1^C_i)*H mod poly */
+#include "mpi.h"
+#include "secmpi.h"
+#include "mplogic.h"
+#include "mp_gf2m.h"
+
+/* state needed to handle GCM Hash function */
+struct gcmHashContextStr {
+ mp_int H;
+ mp_int X;
+ mp_int C_i;
+ const unsigned int *poly;
+ unsigned char buffer[MAX_BLOCK_SIZE];
+ unsigned int bufLen;
+ int m; /* XXX what is m? */
+ unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
+ PRUint64 cLen;
+};
+
+/* f = x^128 + x^7 + x^2 + x + 1 */
+static const unsigned int poly_128[] = { 128, 7, 2, 1, 0 };
+
+/* sigh, GCM defines the bit strings exactly backwards from everything else */
+static void
+gcm_reverse(unsigned char *target, const unsigned char *src,
+ unsigned int blocksize)
+{
+ unsigned int i;
+ for (i = 0; i < blocksize; i++) {
+ target[blocksize - i - 1] = gcm_byte_rev[src[i]];
+ }
+}
+
+/* Initialize a gcmHashContext */
+static SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
+ unsigned int blocksize)
+{
+ mp_err err = MP_OKAY;
+ unsigned char H_rev[MAX_BLOCK_SIZE];
+
+ MP_DIGITS(&ghash->H) = 0;
+ MP_DIGITS(&ghash->X) = 0;
+ MP_DIGITS(&ghash->C_i) = 0;
+ CHECK_MPI_OK(mp_init(&ghash->H));
+ CHECK_MPI_OK(mp_init(&ghash->X));
+ CHECK_MPI_OK(mp_init(&ghash->C_i));
+
+ mp_zero(&ghash->X);
+ gcm_reverse(H_rev, H, blocksize);
+ CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->H, H_rev, blocksize));
+
+ /* set the irreducible polynomial. Each blocksize has its own polynomial.
+ * for now only blocksize 16 (=128 bits) is defined */
+ switch (blocksize) {
+ case 16: /* 128 bits */
+ ghash->poly = poly_128;
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+ ghash->cLen = 0;
+ ghash->bufLen = 0;
+ ghash->m = 0;
+ PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+ return SECSuccess;
+cleanup:
+ gcmHash_DestroyContext(ghash, PR_FALSE);
+ return SECFailure;
+}
+
+/* Destroy a HashContext (Note we zero the digits so this function
+ * is idempotent if called with freeit == PR_FALSE */
+static void
+gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
+{
+ mp_clear(&ghash->H);
+ mp_clear(&ghash->X);
+ mp_clear(&ghash->C_i);
+ PORT_Memset(ghash, 0, sizeof(gcmHashContext));
+ if (freeit) {
+ PORT_Free(ghash);
+ }
+}
+
+static SECStatus
+gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
+{
+ int len;
+ mp_err err;
+ unsigned char tmp_buf[MAX_BLOCK_SIZE];
+ unsigned char *X;
+
+ len = mp_unsigned_octet_size(&ghash->X);
+ if (len <= 0) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ X = tmp_buf;
+ PORT_Assert((unsigned int)len <= blocksize);
+ if ((unsigned int)len > blocksize) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* zero pad the result */
+ if (len != blocksize) {
+ PORT_Memset(X, 0, blocksize - len);
+ X += blocksize - len;
+ }
+
+ err = mp_to_unsigned_octets(&ghash->X, X, len);
+ if (err < 0) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ gcm_reverse(T, tmp_buf, blocksize);
+ return SECSuccess;
+}
+
+static SECStatus
+gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count, unsigned int blocksize)
+{
+ SECStatus rv = SECFailure;
+ mp_err err = MP_OKAY;
+ unsigned char tmp_buf[MAX_BLOCK_SIZE];
+ unsigned int i;
+
+ for (i = 0; i < count; i++, buf += blocksize) {
+ ghash->m++;
+ gcm_reverse(tmp_buf, buf, blocksize);
+ CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->C_i, tmp_buf, blocksize));
+ CHECK_MPI_OK(mp_badd(&ghash->X, &ghash->C_i, &ghash->C_i));
+ /*
+ * Looking to speed up GCM, this the the place to do it.
+ * There are two areas that can be exploited to speed up this code.
+ *
+ * 1) H is a constant in this multiply. We can precompute H * (0 - 255)
+ * at init time and this becomes an blockize xors of our table lookup.
+ *
+ * 2) poly is a constant for each blocksize. We can calculate the
+ * modulo reduction by a series of adds and shifts.
+ *
+ * For now we are after functionality, so we will go ahead and use
+ * the builtin bmulmod from mpi
+ */
+ CHECK_MPI_OK(mp_bmulmod(&ghash->C_i, &ghash->H,
+ ghash->poly, &ghash->X));
+ GCM_TRACE_X(ghash, "X%d = ")
+ }
+ rv = SECSuccess;
+cleanup:
+ PORT_Memset(tmp_buf, 0, sizeof(tmp_buf));
+ if (rv != SECSuccess) {
+ MP_TO_SEC_ERROR(err);
+ }
+ return rv;
+}
+
+static void
+gcm_zeroX(gcmHashContext *ghash)
+{
+ mp_zero(&ghash->X);
+ ghash->m = 0;
+}
+
+#endif
+
+#ifdef GCM_USE_ALGORITHM_1
+/* use algorithm 1 of McGrew & Viega "The Galois/Counter Mode of Operation" */
+
+#define GCM_ARRAY_SIZE (MAX_BLOCK_SIZE / sizeof(unsigned long))
+
+struct gcmHashContextStr {
+ unsigned long H[GCM_ARRAY_SIZE];
+ unsigned long X[GCM_ARRAY_SIZE];
+ unsigned long R;
+ unsigned char buffer[MAX_BLOCK_SIZE];
+ unsigned int bufLen;
+ int m;
+ unsigned char counterBuf[2 * GCM_HASH_LEN_LEN];
+ PRUint64 cLen;
+};
+
+static void
+gcm_bytes_to_longs(unsigned long *l, const unsigned char *c, unsigned int len)
+{
+ int i, j;
+ int array_size = len / sizeof(unsigned long);
+
+ PORT_Assert(len % sizeof(unsigned long) == 0);
+ for (i = 0; i < array_size; i++) {
+ unsigned long tmp = 0;
+ int byte_offset = i * sizeof(unsigned long);
+ for (j = sizeof(unsigned long) - 1; j >= 0; j--) {
+ tmp = (tmp << PR_BITS_PER_BYTE) | gcm_byte_rev[c[byte_offset + j]];
+ }
+ l[i] = tmp;
+ }
+}
+
+static void
+gcm_longs_to_bytes(const unsigned long *l, unsigned char *c, unsigned int len)
+{
+ int i, j;
+ int array_size = len / sizeof(unsigned long);
+
+ PORT_Assert(len % sizeof(unsigned long) == 0);
+ for (i = 0; i < array_size; i++) {
+ unsigned long tmp = l[i];
+ int byte_offset = i * sizeof(unsigned long);
+ for (j = 0; j < sizeof(unsigned long); j++) {
+ c[byte_offset + j] = gcm_byte_rev[tmp & 0xff];
+ tmp = (tmp >> PR_BITS_PER_BYTE);
+ }
+ }
+}
+
+/* Initialize a gcmHashContext */
+static SECStatus
+gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H,
+ unsigned int blocksize)
+{
+ PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+ PORT_Memset(ghash->H, 0, sizeof(ghash->H));
+ gcm_bytes_to_longs(ghash->H, H, blocksize);
+
+ /* set the irreducible polynomial. Each blocksize has its own polynommial
+ * for now only blocksize 16 (=128 bits) is defined */
+ switch (blocksize) {
+ case 16: /* 128 bits */
+ ghash->R = (unsigned long)0x87; /* x^7 + x^2 + x +1 */
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ goto cleanup;
+ }
+ ghash->cLen = 0;
+ ghash->bufLen = 0;
+ ghash->m = 0;
+ PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf));
+ return SECSuccess;
+cleanup:
+ return SECFailure;
+}
+
+/* Destroy a HashContext (Note we zero the digits so this function
+ * is idempotent if called with freeit == PR_FALSE */
+static void
+gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit)
+{
+ PORT_Memset(ghash, 0, sizeof(gcmHashContext));
+ if (freeit) {
+ PORT_Free(ghash);
+ }
+}
+
+static unsigned long
+gcm_shift_one(unsigned long *t, unsigned int count)
+{
+ unsigned long carry = 0;
+ unsigned long nextcarry = 0;
+ unsigned int i;
+ for (i = 0; i < count; i++) {
+ nextcarry = t[i] >> ((sizeof(unsigned long) * PR_BITS_PER_BYTE) - 1);
+ t[i] = (t[i] << 1) | carry;
+ carry = nextcarry;
+ }
+ return carry;
+}
+
+static SECStatus
+gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize)
+{
+ gcm_longs_to_bytes(ghash->X, T, blocksize);
+ return SECSuccess;
+}
+
+#define GCM_XOR(t, s, len) \
+ for (l = 0; l < len; l++) \
+ t[l] ^= s[l]
+
+static SECStatus
+gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int count, unsigned int blocksize)
+{
+ unsigned long C_i[GCM_ARRAY_SIZE];
+ unsigned int arraysize = blocksize / sizeof(unsigned long);
+ unsigned int i, j, k, l;
+
+ for (i = 0; i < count; i++, buf += blocksize) {
+ ghash->m++;
+ gcm_bytes_to_longs(C_i, buf, blocksize);
+ GCM_XOR(C_i, ghash->X, arraysize);
+ /* multiply X = C_i * H */
+ PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+ for (j = 0; j < arraysize; j++) {
+ unsigned long H = ghash->H[j];
+ for (k = 0; k < sizeof(unsigned long) * PR_BITS_PER_BYTE; k++) {
+ if (H & 1) {
+ GCM_XOR(ghash->X, C_i, arraysize);
+ }
+ if (gcm_shift_one(C_i, arraysize)) {
+ C_i[0] = C_i[0] ^ ghash->R;
+ }
+ H = H >> 1;
+ }
+ }
+ GCM_TRACE_X(ghash, "X%d = ")
+ }
+ PORT_Memset(C_i, 0, sizeof(C_i));
+ return SECSuccess;
+}
+
+static void
+gcm_zeroX(gcmHashContext *ghash)
+{
+ PORT_Memset(ghash->X, 0, sizeof(ghash->X));
+ ghash->m = 0;
+}
+#endif
+
+/*
+ * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult
+ * function always takes blocksize lengths of data. gcmHash_Update will
+ * format the data properly.
+ */
+static SECStatus
+gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf,
+ unsigned int len, unsigned int blocksize)
+{
+ unsigned int blocks;
+ SECStatus rv;
+
+ ghash->cLen += (len * PR_BITS_PER_BYTE);
+
+ /* first deal with the current buffer of data. Try to fill it out so
+ * we can hash it */
+ if (ghash->bufLen) {
+ unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen);
+ if (needed != 0) {
+ PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed);
+ }
+ buf += needed;
+ len -= needed;
+ ghash->bufLen += needed;
+ if (len == 0) {
+ /* didn't add enough to hash the data, nothing more do do */
+ return SECSuccess;
+ }
+ PORT_Assert(ghash->bufLen == blocksize);
+ /* hash the buffer and clear it */
+ rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
+ PORT_Memset(ghash->buffer, 0, blocksize);
+ ghash->bufLen = 0;
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ /* now hash any full blocks remaining in the data stream */
+ blocks = len / blocksize;
+ if (blocks) {
+ rv = gcm_HashMult(ghash, buf, blocks, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ buf += blocks * blocksize;
+ len -= blocks * blocksize;
+ }
+
+ /* save any remainder in the buffer to be hashed with the next call */
+ if (len != 0) {
+ PORT_Memcpy(ghash->buffer, buf, len);
+ ghash->bufLen = len;
+ }
+ return SECSuccess;
+}
+
+/*
+ * write out any partial blocks zero padded through the GHASH engine,
+ * save the lengths for the final completion of the hash
+ */
+static SECStatus
+gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize)
+{
+ int i;
+ SECStatus rv;
+
+ /* copy the previous counter to the upper block */
+ PORT_Memcpy(ghash->counterBuf, &ghash->counterBuf[GCM_HASH_LEN_LEN],
+ GCM_HASH_LEN_LEN);
+ /* copy the current counter in the lower block */
+ for (i = 0; i < GCM_HASH_LEN_LEN; i++) {
+ ghash->counterBuf[GCM_HASH_LEN_LEN + i] =
+ (ghash->cLen >> ((GCM_HASH_LEN_LEN - 1 - i) * PR_BITS_PER_BYTE)) & 0xff;
+ }
+ ghash->cLen = 0;
+
+ /* now zero fill the buffer and hash the last block */
+ if (ghash->bufLen) {
+ PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen);
+ rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize);
+ PORT_Memset(ghash->buffer, 0, blocksize);
+ ghash->bufLen = 0;
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ return SECSuccess;
+}
+
+/*
+ * This does the final sync, hashes the lengths, then returns
+ * "T", the hashed output.
+ */
+static SECStatus
+gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ unsigned int blocksize)
+{
+ unsigned char T[MAX_BLOCK_SIZE];
+ SECStatus rv;
+
+ rv = gcmHash_Sync(ghash, blocksize);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ rv = gcm_HashMult(ghash, ghash->counterBuf, (GCM_HASH_LEN_LEN * 2) / blocksize,
+ blocksize);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ GCM_TRACE_X(ghash, "GHASH(H,A,C) = ")
+
+ rv = gcm_getX(ghash, T, blocksize);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+
+ if (maxout > blocksize)
+ maxout = blocksize;
+ PORT_Memcpy(outbuf, T, maxout);
+ *outlen = maxout;
+ rv = SECSuccess;
+
+cleanup:
+ PORT_Memset(T, 0, sizeof(T));
+ return rv;
+}
+
+SECStatus
+gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD,
+ unsigned int AADLen, unsigned int blocksize)
+{
+ SECStatus rv;
+
+ ghash->cLen = 0;
+ PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2);
+ ghash->bufLen = 0;
+ gcm_zeroX(ghash);
+
+ /* now kick things off by hashing the Additional Authenticated Data */
+ if (AADLen != 0) {
+ rv = gcmHash_Update(ghash, AAD, AADLen, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcmHash_Sync(ghash, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ }
+ return SECSuccess;
+}
+
+/**************************************************************************
+ * Now implement the GCM using gcmHash and CTR *
+ **************************************************************************/
+
+/* state to handle the full GCM operation (hash and counter) */
+struct GCMContextStr {
+ gcmHashContext ghash_context;
+ CTRContext ctr_context;
+ unsigned long tagBits;
+ unsigned char tagKey[MAX_BLOCK_SIZE];
+};
+
+GCMContext *
+GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params, unsigned int blocksize)
+{
+ GCMContext *gcm = NULL;
+ gcmHashContext *ghash;
+ unsigned char H[MAX_BLOCK_SIZE];
+ unsigned int tmp;
+ PRBool freeCtr = PR_FALSE;
+ PRBool freeHash = PR_FALSE;
+ const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params;
+ CK_AES_CTR_PARAMS ctrParams;
+ SECStatus rv;
+
+ if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return NULL;
+ }
+ gcm = PORT_ZNew(GCMContext);
+ if (gcm == NULL) {
+ return NULL;
+ }
+ /* first fill in the ghash context */
+ ghash = &gcm->ghash_context;
+ PORT_Memset(H, 0, blocksize);
+ rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = gcmHash_InitContext(ghash, H, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ freeHash = PR_TRUE;
+
+ /* fill in the Counter context */
+ ctrParams.ulCounterBits = 32;
+ PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb));
+ if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) {
+ PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen);
+ ctrParams.cb[blocksize - 1] = 1;
+ } else {
+ rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen,
+ blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ }
+ rv = CTR_InitContext(&gcm->ctr_context, context, cipher,
+ (unsigned char *)&ctrParams, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ freeCtr = PR_TRUE;
+
+ /* fill in the gcm structure */
+ gcm->tagBits = gcmParams->ulTagBits; /* save for final step */
+ /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with.
+ * if this assumption changes, we would need to explicitly clear it here */
+ rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize,
+ gcm->tagKey, blocksize, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ /* finally mix in the AAD data */
+ rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ return gcm;
+
+loser:
+ if (freeCtr) {
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ }
+ if (freeHash) {
+ gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+ }
+ if (gcm) {
+ PORT_Free(gcm);
+ }
+ return NULL;
+}
+
+void
+GCM_DestroyContext(GCMContext *gcm, PRBool freeit)
+{
+ /* these two are statically allocated and will be freed when we free
+ * gcm. call their destroy functions to free up any locally
+ * allocated data (like mp_int's) */
+ CTR_DestroyContext(&gcm->ctr_context, PR_FALSE);
+ gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE);
+ PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits));
+ PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey));
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+static SECStatus
+gcm_GetTag(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned int extra;
+ unsigned int i;
+ SECStatus rv;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ extra = tagBytes * PR_BITS_PER_BYTE - gcm->tagBits;
+
+ if (outbuf == NULL) {
+ *outlen = tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if (maxout < tagBytes) {
+ *outlen = tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ maxout = tagBytes;
+ rv = gcmHash_Final(&gcm->ghash_context, outbuf, outlen, maxout, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+
+ GCM_TRACE_BLOCK("GHASH=", outbuf, blocksize);
+ GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
+ for (i = 0; i < *outlen; i++) {
+ outbuf[i] ^= gcm->tagKey[i];
+ }
+ GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize);
+ GCM_TRACE_BLOCK("T=", outbuf, blocksize);
+ /* mask off any extra bits we got */
+ if (extra) {
+ outbuf[tagBytes - 1] &= ~((1 << extra) - 1);
+ }
+ return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ * GCM is basically counter mode with a specific initialization and
+ * built in macing operation.
+ */
+SECStatus
+GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned int len;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcmHash_Update(&gcm->ghash_context, outbuf, *outlen, blocksize);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize);
+ if (rv != SECSuccess) {
+ PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */
+ *outlen = 0;
+ return SECFailure;
+ };
+ *outlen += len;
+ return SECSuccess;
+}
+
+/*
+ * See The Galois/Counter Mode of Operation, McGrew and Viega.
+ * GCM is basically counter mode with a specific initialization and
+ * built in macing operation. NOTE: the only difference between Encrypt
+ * and Decrypt is when we calculate the mac. That is because the mac must
+ * always be calculated on the cipher text, not the plain text, so for
+ * encrypt, we do the CTR update first and for decrypt we do the mac first.
+ */
+SECStatus
+GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ unsigned int tagBytes;
+ unsigned char tag[MAX_BLOCK_SIZE];
+ const unsigned char *intag;
+ unsigned int len;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ /* verify the block */
+ rv = gcmHash_Update(&gcm->ghash_context, inbuf, inlen, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize);
+ if (rv != SECSuccess) {
+ return SECFailure;
+ }
+ /* Don't decrypt if we can't authenticate the encrypted data!
+ * This assumes that if tagBits is not a multiple of 8, intag will
+ * preserve the masked off missing bits. */
+ if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) {
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ PORT_Memset(tag, 0, sizeof(tag));
+ return SECFailure;
+ }
+ PORT_Memset(tag, 0, sizeof(tag));
+ /* finish the decryption */
+ return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout,
+ inbuf, inlen, blocksize);
+}
diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h
new file mode 100644
index 000000000..1cdba534d
--- /dev/null
+++ b/security/nss/lib/freebl/gcm.h
@@ -0,0 +1,31 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef GCM_H
+#define GCM_H 1
+
+#include "blapii.h"
+
+typedef struct GCMContextStr GCMContext;
+
+/*
+ * The context argument is the inner cipher context to use with cipher. The
+ * GCMContext does not own context. context needs to remain valid for as long
+ * as the GCMContext is valid.
+ *
+ * The cipher argument is a block cipher in the ECB encrypt mode.
+ */
+GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params, unsigned int blocksize);
+void GCM_DestroyContext(GCMContext *gcm, PRBool freeit);
+SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+#endif
diff --git a/security/nss/lib/freebl/genload.c b/security/nss/lib/freebl/genload.c
new file mode 100644
index 000000000..832deb58c
--- /dev/null
+++ b/security/nss/lib/freebl/genload.c
@@ -0,0 +1,167 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * This file is meant to be included by other .c files.
+ * This file takes a "parameter", the scope which includes this
+ * code shall declare this variable:
+ * const char *NameOfThisSharedLib;
+ *
+ * NameOfThisSharedLib:
+ * The file name of the shared library that shall be used as the
+ * "reference library". The loader will attempt to load the requested
+ * library from the same directory as the reference library.
+ */
+
+#ifdef XP_UNIX
+#include <unistd.h>
+#define BL_MAXSYMLINKS 20
+
+/*
+ * If 'link' is a symbolic link, this function follows the symbolic links
+ * and returns the pathname of the ultimate source of the symbolic links.
+ * If 'link' is not a symbolic link, this function returns NULL.
+ * The caller should call PR_Free to free the string returned by this
+ * function.
+ */
+static char*
+loader_GetOriginalPathname(const char* link)
+{
+#ifdef __GLIBC__
+ char* tmp = realpath(link, NULL);
+ char* resolved;
+ if (!tmp)
+ return NULL;
+ resolved = PR_Malloc(strlen(tmp) + 1);
+ strcpy(resolved, tmp); /* This is necessary because PR_Free might not be using free() */
+ free(tmp);
+ return resolved;
+#else
+ char* resolved = NULL;
+ char* input = NULL;
+ PRUint32 iterations = 0;
+ PRInt32 len = 0, retlen = 0;
+ if (!link) {
+ PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0);
+ return NULL;
+ }
+ len = PR_MAX(1024, strlen(link) + 1);
+ resolved = PR_Malloc(len);
+ input = PR_Malloc(len);
+ if (!resolved || !input) {
+ if (resolved) {
+ PR_Free(resolved);
+ }
+ if (input) {
+ PR_Free(input);
+ }
+ return NULL;
+ }
+ strcpy(input, link);
+ while ((iterations++ < BL_MAXSYMLINKS) &&
+ ((retlen = readlink(input, resolved, len - 1)) > 0)) {
+ char* tmp = input;
+ resolved[retlen] = '\0'; /* NULL termination */
+ input = resolved;
+ resolved = tmp;
+ }
+ PR_Free(resolved);
+ if (iterations == 1 && retlen < 0) {
+ PR_Free(input);
+ input = NULL;
+ }
+ return input;
+#endif
+}
+#endif /* XP_UNIX */
+
+/*
+ * Load the library with the file name 'name' residing in the same
+ * directory as the reference library, whose pathname is 'referencePath'.
+ */
+static PRLibrary*
+loader_LoadLibInReferenceDir(const char* referencePath, const char* name)
+{
+ PRLibrary* dlh = NULL;
+ char* fullName = NULL;
+ char* c;
+ PRLibSpec libSpec;
+
+ /* Remove the trailing filename from referencePath and add the new one */
+ c = strrchr(referencePath, PR_GetDirectorySeparator());
+ if (c) {
+ size_t referencePathSize = 1 + c - referencePath;
+ fullName = (char*)PORT_Alloc(strlen(name) + referencePathSize + 1);
+ if (fullName) {
+ memcpy(fullName, referencePath, referencePathSize);
+ strcpy(fullName + referencePathSize, name);
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nAttempting to load fully-qualified %s\n",
+ fullName);
+#endif
+ libSpec.type = PR_LibSpec_Pathname;
+ libSpec.value.pathname = fullName;
+ dlh = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+ PORT_Free(fullName);
+ }
+ }
+ return dlh;
+}
+
+/*
+ * We use PR_GetLibraryFilePathname to get the pathname of the loaded
+ * shared lib that contains this function, and then do a PR_LoadLibrary
+ * with an absolute pathname for the softoken shared library.
+ */
+
+static PRLibrary*
+loader_LoadLibrary(const char* nameToLoad)
+{
+ PRLibrary* lib = NULL;
+ char* fullPath = NULL;
+ PRLibSpec libSpec;
+
+ /* Get the pathname for nameOfAlreadyLoadedLib, i.e. /usr/lib/libnss3.so
+ * PR_GetLibraryFilePathname works with either the base library name or a
+ * function pointer, depending on the platform. We can't query an exported
+ * symbol such as NSC_GetFunctionList, because on some platforms we can't
+ * find symbols in loaded implicit dependencies.
+ * But we can just get the address of this function !
+ */
+ fullPath = PR_GetLibraryFilePathname(NameOfThisSharedLib,
+ (PRFuncPtr)&loader_LoadLibrary);
+
+ if (fullPath) {
+ lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+#ifdef XP_UNIX
+ if (!lib) {
+ /*
+ * If fullPath is a symbolic link, resolve the symbolic
+ * link and try again.
+ */
+ char* originalfullPath = loader_GetOriginalPathname(fullPath);
+ if (originalfullPath) {
+ PR_Free(fullPath);
+ fullPath = originalfullPath;
+ lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad);
+ }
+ }
+#endif
+ PR_Free(fullPath);
+ }
+ if (!lib) {
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nAttempting to load %s\n", nameToLoad);
+#endif
+ libSpec.type = PR_LibSpec_Pathname;
+ libSpec.value.pathname = nameToLoad;
+ lib = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL);
+ }
+ if (NULL == lib) {
+#ifdef DEBUG_LOADER
+ PR_fprintf(PR_STDOUT, "\nLoading failed : %s.\n", nameToLoad);
+#endif
+ }
+ return lib;
+}
diff --git a/security/nss/lib/freebl/hmacct.c b/security/nss/lib/freebl/hmacct.c
new file mode 100644
index 000000000..c7815ac05
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.c
@@ -0,0 +1,335 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secport.h"
+#include "hasht.h"
+#include "blapit.h"
+#include "hmacct.h"
+#include "secerr.h"
+
+/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length
+ * field. (SHA-384/512 have 128-bit length.) */
+#define MAX_HASH_BIT_COUNT_BYTES 16
+
+/* Some utility functions are needed:
+ *
+ * These macros return the given value with the MSB copied to all the other
+ * bits. They use the fact that an arithmetic shift shifts-in the sign bit.
+ * However, this is not ensured by the C standard so you may need to replace
+ * them with something else on odd CPUs.
+ *
+ * Note: the argument to these macros must be an unsigned int.
+ * */
+#define DUPLICATE_MSB_TO_ALL(x) ((unsigned int)((int)(x) >> (sizeof(int) * 8 - 1)))
+#define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x)))
+
+/* constantTimeGE returns 0xff if a>=b and 0x00 otherwise, where a, b <
+ * MAX_UINT/2. */
+static unsigned char
+constantTimeGE(unsigned int a, unsigned int b)
+{
+ a -= b;
+ return DUPLICATE_MSB_TO_ALL(~a);
+}
+
+/* constantTimeEQ8 returns 0xff if a==b and 0x00 otherwise. */
+static unsigned char
+constantTimeEQ8(unsigned char a, unsigned char b)
+{
+ unsigned int c = a ^ b;
+ c--;
+ return DUPLICATE_MSB_TO_ALL_8(c);
+}
+
+/* MAC performs a constant time SSLv3/TLS MAC of |dataLen| bytes of |data|,
+ * where |dataLen| includes both the authenticated bytes and the MAC tag from
+ * the sender. |dataLen| must be >= the length of the MAC tag.
+ *
+ * |dataTotalLen| is >= |dataLen| and also accounts for any padding bytes
+ * that may follow the sender's MAC. (Only a single block of padding may
+ * follow in SSLv3, or up to 255 bytes in TLS.)
+ *
+ * Since the results of decryption are secret information (otherwise a
+ * padding-oracle is created), this function is constant-time with respect to
+ * |dataLen|.
+ *
+ * |header| contains either the 13-byte TLS header (containing the sequence
+ * number, record type etc), or it contains the SSLv3 header with the SSLv3
+ * padding bytes etc. */
+static SECStatus
+MAC(unsigned char *mdOut,
+ unsigned int *mdOutLen,
+ unsigned int mdOutMax,
+ const SECHashObject *hashObj,
+ const unsigned char *macSecret,
+ unsigned int macSecretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *data,
+ unsigned int dataLen,
+ unsigned int dataTotalLen,
+ unsigned char isSSLv3)
+{
+ void *mdState = hashObj->create();
+ const unsigned int mdSize = hashObj->length;
+ const unsigned int mdBlockSize = hashObj->blocklength;
+ /* mdLengthSize is the number of bytes in the length field that terminates
+ * the hash.
+ *
+ * This assumes that hash functions with a 64 byte block size use a 64-bit
+ * length, and otherwise they use a 128-bit length. This is true of {MD5,
+ * SHA*} (which are all of the hash functions specified for use with TLS
+ * today). */
+ const unsigned int mdLengthSize = mdBlockSize == 64 ? 8 : 16;
+
+ const unsigned int sslv3PadLen = hashObj->type == HASH_AlgMD5 ? 48 : 40;
+
+ /* varianceBlocks is the number of blocks of the hash that we have to
+ * calculate in constant time because they could be altered by the
+ * padding value.
+ *
+ * In SSLv3, the padding must be minimal so the end of the plaintext
+ * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that
+ * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash
+ * termination (0x80 + 64-bit length) don't fit in the final block, we
+ * say that the final two blocks can vary based on the padding.
+ *
+ * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not
+ * required to be minimal. Therefore we say that the final six blocks
+ * can vary based on the padding.
+ *
+ * Later in the function, if the message is short and there obviously
+ * cannot be this many blocks then varianceBlocks can be reduced. */
+ unsigned int varianceBlocks = isSSLv3 ? 2 : 6;
+ /* From now on we're dealing with the MAC, which conceptually has 13
+ * bytes of `header' before the start of the data (TLS) or 71/75 bytes
+ * (SSLv3) */
+ const unsigned int len = dataTotalLen + headerLen;
+ /* maxMACBytes contains the maximum bytes of bytes in the MAC, including
+ * |header|, assuming that there's no padding. */
+ const unsigned int maxMACBytes = len - mdSize - 1;
+ /* numBlocks is the maximum number of hash blocks. */
+ const unsigned int numBlocks =
+ (maxMACBytes + 1 + mdLengthSize + mdBlockSize - 1) / mdBlockSize;
+ /* macEndOffset is the index just past the end of the data to be
+ * MACed. */
+ const unsigned int macEndOffset = dataLen + headerLen - mdSize;
+ /* c is the index of the 0x80 byte in the final hash block that
+ * contains application data. */
+ const unsigned int c = macEndOffset % mdBlockSize;
+ /* indexA is the hash block number that contains the 0x80 terminating
+ * value. */
+ const unsigned int indexA = macEndOffset / mdBlockSize;
+ /* indexB is the hash block number that contains the 64-bit hash
+ * length, in bits. */
+ const unsigned int indexB = (macEndOffset + mdLengthSize) / mdBlockSize;
+ /* bits is the hash-length in bits. It includes the additional hash
+ * block for the masked HMAC key, or whole of |header| in the case of
+ * SSLv3. */
+ unsigned int bits;
+ /* In order to calculate the MAC in constant time we have to handle
+ * the final blocks specially because the padding value could cause the
+ * end to appear somewhere in the final |varianceBlocks| blocks and we
+ * can't leak where. However, |numStartingBlocks| worth of data can
+ * be hashed right away because no padding value can affect whether
+ * they are plaintext. */
+ unsigned int numStartingBlocks = 0;
+ /* k is the starting byte offset into the conceptual header||data where
+ * we start processing. */
+ unsigned int k = 0;
+ unsigned char lengthBytes[MAX_HASH_BIT_COUNT_BYTES];
+ /* hmacPad is the masked HMAC key. */
+ unsigned char hmacPad[HASH_BLOCK_LENGTH_MAX];
+ unsigned char firstBlock[HASH_BLOCK_LENGTH_MAX];
+ unsigned char macOut[HASH_LENGTH_MAX];
+ unsigned i, j;
+
+ /* For SSLv3, if we're going to have any starting blocks then we need
+ * at least two because the header is larger than a single block. */
+ if (numBlocks > varianceBlocks + (isSSLv3 ? 1 : 0)) {
+ numStartingBlocks = numBlocks - varianceBlocks;
+ k = mdBlockSize * numStartingBlocks;
+ }
+
+ bits = 8 * macEndOffset;
+ hashObj->begin(mdState);
+ if (!isSSLv3) {
+ /* Compute the initial HMAC block. For SSLv3, the padding and
+ * secret bytes are included in |header| because they take more
+ * than a single block. */
+ bits += 8 * mdBlockSize;
+ memset(hmacPad, 0, mdBlockSize);
+ PORT_Assert(macSecretLen <= sizeof(hmacPad));
+ memcpy(hmacPad, macSecret, macSecretLen);
+ for (i = 0; i < mdBlockSize; i++)
+ hmacPad[i] ^= 0x36;
+ hashObj->update(mdState, hmacPad, mdBlockSize);
+ }
+
+ j = 0;
+ memset(lengthBytes, 0, sizeof(lengthBytes));
+ if (mdLengthSize == 16) {
+ j = 8;
+ }
+ if (hashObj->type == HASH_AlgMD5) {
+ /* MD5 appends a little-endian length. */
+ for (i = 0; i < 4; i++) {
+ lengthBytes[i + j] = bits >> (8 * i);
+ }
+ } else {
+ /* All other TLS hash functions use a big-endian length. */
+ for (i = 0; i < 4; i++) {
+ lengthBytes[4 + i + j] = bits >> (8 * (3 - i));
+ }
+ }
+
+ if (k > 0) {
+ if (isSSLv3) {
+ /* The SSLv3 header is larger than a single block.
+ * overhang is the number of bytes beyond a single
+ * block that the header consumes: either 7 bytes
+ * (SHA1) or 11 bytes (MD5). */
+ const unsigned int overhang = headerLen - mdBlockSize;
+ hashObj->update(mdState, header, mdBlockSize);
+ memcpy(firstBlock, header + mdBlockSize, overhang);
+ memcpy(firstBlock + overhang, data, mdBlockSize - overhang);
+ hashObj->update(mdState, firstBlock, mdBlockSize);
+ for (i = 1; i < k / mdBlockSize - 1; i++) {
+ hashObj->update(mdState, data + mdBlockSize * i - overhang,
+ mdBlockSize);
+ }
+ } else {
+ /* k is a multiple of mdBlockSize. */
+ memcpy(firstBlock, header, 13);
+ memcpy(firstBlock + 13, data, mdBlockSize - 13);
+ hashObj->update(mdState, firstBlock, mdBlockSize);
+ for (i = 1; i < k / mdBlockSize; i++) {
+ hashObj->update(mdState, data + mdBlockSize * i - 13,
+ mdBlockSize);
+ }
+ }
+ }
+
+ memset(macOut, 0, sizeof(macOut));
+
+ /* We now process the final hash blocks. For each block, we construct
+ * it in constant time. If i == indexA then we'll include the 0x80
+ * bytes and zero pad etc. For each block we selectively copy it, in
+ * constant time, to |macOut|. */
+ for (i = numStartingBlocks; i <= numStartingBlocks + varianceBlocks; i++) {
+ unsigned char block[HASH_BLOCK_LENGTH_MAX];
+ unsigned char isBlockA = constantTimeEQ8(i, indexA);
+ unsigned char isBlockB = constantTimeEQ8(i, indexB);
+ for (j = 0; j < mdBlockSize; j++) {
+ unsigned char isPastC = isBlockA & constantTimeGE(j, c);
+ unsigned char isPastCPlus1 = isBlockA & constantTimeGE(j, c + 1);
+ unsigned char b = 0;
+ if (k < headerLen) {
+ b = header[k];
+ } else if (k < dataTotalLen + headerLen) {
+ b = data[k - headerLen];
+ }
+ k++;
+
+ /* If this is the block containing the end of the
+ * application data, and we are at the offset for the
+ * 0x80 value, then overwrite b with 0x80. */
+ b = (b & ~isPastC) | (0x80 & isPastC);
+ /* If this the the block containing the end of the
+ * application data and we're past the 0x80 value then
+ * just write zero. */
+ b = b & ~isPastCPlus1;
+ /* If this is indexB (the final block), but not
+ * indexA (the end of the data), then the 64-bit
+ * length didn't fit into indexA and we're having to
+ * add an extra block of zeros. */
+ b &= ~isBlockB | isBlockA;
+
+ /* The final bytes of one of the blocks contains the length. */
+ if (j >= mdBlockSize - mdLengthSize) {
+ /* If this is indexB, write a length byte. */
+ b = (b & ~isBlockB) |
+ (isBlockB & lengthBytes[j - (mdBlockSize - mdLengthSize)]);
+ }
+ block[j] = b;
+ }
+
+ hashObj->update(mdState, block, mdBlockSize);
+ hashObj->end_raw(mdState, block, NULL, mdSize);
+ /* If this is indexB, copy the hash value to |macOut|. */
+ for (j = 0; j < mdSize; j++) {
+ macOut[j] |= block[j] & isBlockB;
+ }
+ }
+
+ hashObj->begin(mdState);
+
+ if (isSSLv3) {
+ /* We repurpose |hmacPad| to contain the SSLv3 pad2 block. */
+ for (i = 0; i < sslv3PadLen; i++)
+ hmacPad[i] = 0x5c;
+
+ hashObj->update(mdState, macSecret, macSecretLen);
+ hashObj->update(mdState, hmacPad, sslv3PadLen);
+ hashObj->update(mdState, macOut, mdSize);
+ } else {
+ /* Complete the HMAC in the standard manner. */
+ for (i = 0; i < mdBlockSize; i++)
+ hmacPad[i] ^= 0x6a;
+
+ hashObj->update(mdState, hmacPad, mdBlockSize);
+ hashObj->update(mdState, macOut, mdSize);
+ }
+
+ hashObj->end(mdState, mdOut, mdOutLen, mdOutMax);
+ hashObj->destroy(mdState, PR_TRUE);
+
+ return SECSuccess;
+}
+
+SECStatus
+HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (hashObj->end_raw == NULL)
+ return SECFailure;
+ return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+ header, headerLen, body, bodyLen, bodyTotalLen,
+ 0 /* not SSLv3 */);
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (hashObj->end_raw == NULL)
+ return SECFailure;
+ return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen,
+ header, headerLen, body, bodyLen, bodyTotalLen,
+ 1 /* SSLv3 */);
+}
diff --git a/security/nss/lib/freebl/hmacct.h b/security/nss/lib/freebl/hmacct.h
new file mode 100644
index 000000000..a773ea89c
--- /dev/null
+++ b/security/nss/lib/freebl/hmacct.h
@@ -0,0 +1,38 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _HMACCT_H_
+#define _HMACCT_H_
+
+SEC_BEGIN_PROTOS
+
+extern SECStatus HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+extern SECStatus SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/intel-aes-x64-masm.asm b/security/nss/lib/freebl/intel-aes-x64-masm.asm
new file mode 100644
index 000000000..ef5c76ba2
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x64-masm.asm
@@ -0,0 +1,971 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx textequ <rcx>
+output textequ <rdx>
+input textequ <r8>
+inputLen textequ <r9d>
+
+
+aes_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
+ aesenc xmm1, xmm8
+ aesenc xmm2, xmm8
+ aesenc xmm3, xmm8
+ aesenc xmm4, xmm8
+ aesenc xmm5, xmm8
+ aesenc xmm6, xmm8
+ aesenc xmm7, xmm8
+ ENDM
+
+aes_last_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesenclast xmm0, xmm8
+ aesenclast xmm1, xmm8
+ aesenclast xmm2, xmm8
+ aesenclast xmm3, xmm8
+ aesenclast xmm4, xmm8
+ aesenclast xmm5, xmm8
+ aesenclast xmm6, xmm8
+ aesenclast xmm7, xmm8
+ ENDM
+
+aes_dec_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesdec xmm0, xmm8
+ aesdec xmm1, xmm8
+ aesdec xmm2, xmm8
+ aesdec xmm3, xmm8
+ aesdec xmm4, xmm8
+ aesdec xmm5, xmm8
+ aesdec xmm6, xmm8
+ aesdec xmm7, xmm8
+ ENDM
+
+aes_dec_last_rnd MACRO i
+ movdqu xmm8, [i*16 + ctx]
+ aesdeclast xmm0, xmm8
+ aesdeclast xmm1, xmm8
+ aesdeclast xmm2, xmm8
+ aesdeclast xmm3, xmm8
+ aesdeclast xmm4, xmm8
+ aesdeclast xmm5, xmm8
+ aesdeclast xmm6, xmm8
+ aesdeclast xmm7, xmm8
+ ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL bail
+
+ xor inputLen, inputLen
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+ lea ctx, [48+ctx]
+
+loop8:
+ cmp inputLen, 8*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
+
+IF enc eq 1
+ rnd textequ <aes_rnd>
+ lastrnd textequ <aes_last_rnd>
+ aesinst textequ <aesenc>
+ aeslastinst textequ <aesenclast>
+ELSE
+ rnd textequ <aes_dec_rnd>
+ lastrnd textequ <aes_dec_last_rnd>
+ aesinst textequ <aesdec>
+ aeslastinst textequ <aesdeclast>
+ENDIF
+
+ i = 1
+ WHILE i LT rnds
+ rnd i
+ i = i+1
+ ENDM
+ lastrnd rnds
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesinst xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aeslastinst xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+ENDM
+
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <rcx>
+KS textequ <rdx>
+ITR textequ <r8>
+
+intel_aes_encrypt_init_128 PROC
+
+ movdqu xmm1, [KEY]
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ lea ITR, Lcon1
+ movdqa xmm0, [ITR]
+ lea ITR, Lmask
+ movdqa xmm4, [ITR]
+
+ mov ITR, 8
+
+Lenc_128_ks_loop:
+ lea KS, [16 + KS]
+ dec ITR
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ jne Lenc_128_ks_loop
+
+ lea ITR, Lcon2
+ movdqa xmm0, [ITR]
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [16 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [32 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ ret
+intel_aes_encrypt_init_128 ENDP
+
+
+intel_aes_decrypt_init_128 PROC
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_128
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [10*16 + KS]
+ movdqu [10*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 5
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(10-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(10-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [5*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [5*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_128 ENDP
+
+
+intel_aes_encrypt_init_192 PROC
+
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
+
+ movdqu xmm1, [KEY]
+ mov ITR, [16 + KEY]
+ movd xmm3, ITR
+
+ movdqu [KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask192
+ movdqu xmm4, [ITR]
+
+ mov ITR, 4
+
+Lenc_192_ks_loop:
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqa xmm6, xmm1
+ shufpd xmm5, xmm1, 00h
+ shufpd xmm6, xmm3, 01h
+
+ movdqu [16 + KS], xmm5
+ movdqu [32 + KS], xmm6
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqu [48 + KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea KS, [48 + KS]
+
+ dec ITR
+ jnz Lenc_192_ks_loop
+
+ movdqu [16 + KS], xmm5
+
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
+ ret
+intel_aes_encrypt_init_192 ENDP
+
+intel_aes_decrypt_init_192 PROC
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_192
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [12*16 + KS]
+ movdqu [12*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 6
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(12-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(12-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [6*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [6*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_192 ENDP
+
+
+intel_aes_encrypt_init_256 PROC
+ sub rsp, 16*2
+ movdqu [16*0 + rsp], xmm6
+ movdqu [16*1 + rsp], xmm7
+
+ movdqu xmm1, [16*0 + KEY]
+ movdqu xmm3, [16*1 + KEY]
+
+ movdqu [16*0 + KS], xmm1
+ movdqu [16*1 + KS], xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask256
+ movdqu xmm5, [ITR]
+
+ pxor xmm6, xmm6
+
+ mov ITR, 6
+
+Lenc_256_ks_loop:
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ pshufd xmm2, xmm1, 0ffh
+ aesenclast xmm2, xmm6
+ movdqa xmm4, xmm3
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+ movdqu [16*3 + KS], xmm3
+
+ lea KS, [32 + KS]
+ dec ITR
+ jnz Lenc_256_ks_loop
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ movdqu xmm7, [16*1 + rsp]
+ movdqu xmm6, [16*0 + rsp]
+ add rsp, 16*2
+ ret
+
+intel_aes_encrypt_init_256 ENDP
+
+
+intel_aes_decrypt_init_256 PROC
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_256
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [14*16 + KS]
+ movdqu [14*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 7
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(14-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(14-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [7*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [7*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_256 ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL loop1
+LOCAL bail
+
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+ lea ctx, [48+ctx]
+
+ movdqu xmm0, [-32+ctx]
+
+ movdqu xmm2, [0*16 + ctx]
+ movdqu xmm3, [1*16 + ctx]
+ movdqu xmm4, [2*16 + ctx]
+ movdqu xmm5, [3*16 + ctx]
+ movdqu xmm6, [4*16 + ctx]
+ movdqu xmm7, [5*16 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm1, [input]
+ pxor xmm1, xmm2
+ pxor xmm0, xmm1
+
+ aesenc xmm0, xmm3
+ aesenc xmm0, xmm4
+ aesenc xmm0, xmm5
+ aesenc xmm0, xmm6
+ aesenc xmm0, xmm7
+
+ i = 6
+ WHILE i LT rnds
+ movdqu xmm8, [i*16 + ctx]
+ aesenc xmm0, xmm8
+ i = i+1
+ ENDM
+ movdqu xmm8, [rnds*16 + ctx]
+ aesenclast xmm0, xmm8
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [-32+ctx], xmm0
+
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL dec1
+LOCAL bail
+
+ mov input, [rsp + 1*8 + 8*4]
+ mov inputLen, [rsp + 1*8 + 8*5]
+
+ sub rsp, 3*16
+
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+ lea ctx, [48+ctx]
+
+loop8:
+ cmp inputLen, 8*16
+ jb dec1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+ movdqu xmm7, [7*16 + input]
+
+ movdqu xmm8, [0*16 + ctx]
+ pxor xmm0, xmm8
+ pxor xmm1, xmm8
+ pxor xmm2, xmm8
+ pxor xmm3, xmm8
+ pxor xmm4, xmm8
+ pxor xmm5, xmm8
+ pxor xmm6, xmm8
+ pxor xmm7, xmm8
+
+ i = 1
+ WHILE i LT rnds
+ aes_dec_rnd i
+ i = i+1
+ ENDM
+ aes_dec_last_rnd rnds
+
+ movdqu xmm8, [-32 + ctx]
+ pxor xmm0, xmm8
+ movdqu xmm8, [0*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm7, xmm8
+ movdqu xmm8, [7*16 + input]
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+ movdqu [-32 + ctx], xmm8
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+dec1:
+
+ movdqu xmm3, [-32 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqa xmm4, xmm0
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesdeclast xmm0, xmm7
+ pxor xmm3, xmm0
+
+ movdqu [output], xmm3
+ movdqa xmm3, xmm4
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [-32 + ctx], xmm3
+ xor rax, rax
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+ ret
+ENDM
+
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func 10
+intel_aes_encrypt_cbc_128 ENDP
+
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func 12
+intel_aes_encrypt_cbc_192 ENDP
+
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func 14
+intel_aes_encrypt_cbc_256 ENDP
+
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func 10
+intel_aes_decrypt_cbc_128 ENDP
+
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func 12
+intel_aes_decrypt_cbc_192 ENDP
+
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func 14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <r10>
+CTR textequ <r11d>
+CTRSave textequ <eax>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL loop8
+LOCAL loop1
+LOCAL enc1
+LOCAL bail
+
+ mov input, [rsp + 8*1 + 4*8]
+ mov inputLen, [rsp + 8*1 + 5*8]
+
+ mov ctrCtx, ctx
+ mov ctx, [8+ctrCtx]
+ lea ctx, [48+ctx]
+
+ sub rsp, 3*16
+ movdqu [rsp + 0*16], xmm6
+ movdqu [rsp + 1*16], xmm7
+ movdqu [rsp + 2*16], xmm8
+
+
+ push rbp
+ mov rbp, rsp
+ sub rsp, 8*16
+ and rsp, -16
+
+
+ movdqu xmm0, [16+ctrCtx]
+ mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4]
+ bswap CTRSave
+ movdqu xmm1, [ctx + 0*16]
+
+ pxor xmm0, xmm1
+
+ movdqa [rsp + 0*16], xmm0
+ movdqa [rsp + 1*16], xmm0
+ movdqa [rsp + 2*16], xmm0
+ movdqa [rsp + 3*16], xmm0
+ movdqa [rsp + 4*16], xmm0
+ movdqa [rsp + 5*16], xmm0
+ movdqa [rsp + 6*16], xmm0
+ movdqa [rsp + 7*16], xmm0
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 1*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 2*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 3*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 4*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 5*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 6*16 + 3*4], CTR
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + 7*16 + 3*4], CTR
+
+
+loop8:
+ cmp inputLen, 8*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + rsp]
+ movdqu xmm1, [1*16 + rsp]
+ movdqu xmm2, [2*16 + rsp]
+ movdqu xmm3, [3*16 + rsp]
+ movdqu xmm4, [4*16 + rsp]
+ movdqu xmm5, [5*16 + rsp]
+ movdqu xmm6, [6*16 + rsp]
+ movdqu xmm7, [7*16 + rsp]
+
+ i = 1
+ WHILE i LE 8
+ aes_rnd i
+
+ inc CTRSave
+ mov CTR, CTRSave
+ bswap CTR
+ xor CTR, DWORD PTR [ctx + 3*4]
+ mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR
+
+ i = i+1
+ ENDM
+ WHILE i LT rnds
+ aes_rnd i
+ i = i+1
+ ENDM
+ aes_last_rnd rnds
+
+ movdqu xmm8, [0*16 + input]
+ pxor xmm0, xmm8
+ movdqu xmm8, [1*16 + input]
+ pxor xmm1, xmm8
+ movdqu xmm8, [2*16 + input]
+ pxor xmm2, xmm8
+ movdqu xmm8, [3*16 + input]
+ pxor xmm3, xmm8
+ movdqu xmm8, [4*16 + input]
+ pxor xmm4, xmm8
+ movdqu xmm8, [5*16 + input]
+ pxor xmm5, xmm8
+ movdqu xmm8, [6*16 + input]
+ pxor xmm6, xmm8
+ movdqu xmm8, [7*16 + input]
+ pxor xmm7, xmm8
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [7*16 + output], xmm7
+
+ lea input, [8*16 + input]
+ lea output, [8*16 + output]
+ sub inputLen, 8*16
+ jmp loop8
+
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [rsp]
+ add rsp, 16
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu xmm7, [input]
+ pxor xmm0, xmm7
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+
+ movdqu xmm0, [rsp]
+ movdqu xmm1, [ctx + 0*16]
+ pxor xmm0, xmm1
+ movdqu [16+ctrCtx], xmm0
+
+
+ xor rax, rax
+ mov rsp, rbp
+ pop rbp
+
+ movdqu xmm6, [rsp + 0*16]
+ movdqu xmm7, [rsp + 1*16]
+ movdqu xmm8, [rsp + 2*16]
+ add rsp, 3*16
+
+ ret
+ENDM
+
+
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func 10
+intel_aes_encrypt_ctr_128 ENDP
+
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func 12
+intel_aes_encrypt_ctr_192 ENDP
+
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func 14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes-x86-masm.asm b/security/nss/lib/freebl/intel-aes-x86-masm.asm
new file mode 100644
index 000000000..7d805e766
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes-x86-masm.asm
@@ -0,0 +1,949 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh
+Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h
+Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh
+Lcon1 dd 1,1,1,1
+Lcon2 dd 1bh,1bh,1bh,1bh
+
+.CODE
+
+ctx textequ <ecx>
+output textequ <edx>
+input textequ <eax>
+inputLen textequ <edi>
+
+
+aes_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ aesenc xmm1, xmm7
+ aesenc xmm2, xmm7
+ aesenc xmm3, xmm7
+ aesenc xmm4, xmm7
+ aesenc xmm5, xmm7
+ aesenc xmm6, xmm7
+ ENDM
+
+aes_last_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesenclast xmm0, xmm7
+ aesenclast xmm1, xmm7
+ aesenclast xmm2, xmm7
+ aesenclast xmm3, xmm7
+ aesenclast xmm4, xmm7
+ aesenclast xmm5, xmm7
+ aesenclast xmm6, xmm7
+ ENDM
+
+aes_dec_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ aesdec xmm1, xmm7
+ aesdec xmm2, xmm7
+ aesdec xmm3, xmm7
+ aesdec xmm4, xmm7
+ aesdec xmm5, xmm7
+ aesdec xmm6, xmm7
+ ENDM
+
+aes_dec_last_rnd MACRO i
+ movdqu xmm7, [i*16 + ctx]
+ aesdeclast xmm0, xmm7
+ aesdeclast xmm1, xmm7
+ aesdeclast xmm2, xmm7
+ aesdeclast xmm3, xmm7
+ aesdeclast xmm4, xmm7
+ aesdeclast xmm5, xmm7
+ aesdeclast xmm6, xmm7
+ ENDM
+
+
+gen_aes_ecb_func MACRO enc, rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+ lea ctx, [44+ctx]
+
+loop7:
+ cmp inputLen, 7*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+ pxor xmm1, xmm7
+ pxor xmm2, xmm7
+ pxor xmm3, xmm7
+ pxor xmm4, xmm7
+ pxor xmm5, xmm7
+ pxor xmm6, xmm7
+
+IF enc eq 1
+ rnd textequ <aes_rnd>
+ lastrnd textequ <aes_last_rnd>
+ aesinst textequ <aesenc>
+ aeslastinst textequ <aesenclast>
+ELSE
+ rnd textequ <aes_dec_rnd>
+ lastrnd textequ <aes_dec_last_rnd>
+ aesinst textequ <aesdec>
+ aeslastinst textequ <aesdeclast>
+ENDIF
+
+ i = 1
+ WHILE i LT rnds
+ rnd i
+ i = i+1
+ ENDM
+ lastrnd rnds
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesinst xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aeslastinst xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ xor eax, eax
+ pop inputLen
+ ret
+
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_ecb_128 PROC
+gen_aes_ecb_func 1, 10
+intel_aes_encrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_192 PROC
+gen_aes_ecb_func 1, 12
+intel_aes_encrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ecb_256 PROC
+gen_aes_ecb_func 1, 14
+intel_aes_encrypt_ecb_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_128 PROC
+gen_aes_ecb_func 0, 10
+intel_aes_decrypt_ecb_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_192 PROC
+gen_aes_ecb_func 0, 12
+intel_aes_decrypt_ecb_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_ecb_256 PROC
+gen_aes_ecb_func 0, 14
+intel_aes_decrypt_ecb_256 ENDP
+
+
+KEY textequ <ecx>
+KS textequ <edx>
+ITR textequ <eax>
+
+ALIGN 16
+intel_aes_encrypt_init_128 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+
+ movdqu xmm1, [KEY]
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ lea ITR, Lcon1
+ movdqa xmm0, [ITR]
+ lea ITR, Lmask
+ movdqa xmm4, [ITR]
+
+ mov ITR, 8
+
+Lenc_128_ks_loop:
+ lea KS, [16 + KS]
+ dec ITR
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [KS], xmm1
+ movdqa xmm2, xmm1
+
+ jne Lenc_128_ks_loop
+
+ lea ITR, Lcon2
+ movdqa xmm0, [ITR]
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [16 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ movdqa xmm3, xmm1
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pslldq xmm3, 4
+ pxor xmm1, xmm3
+ pxor xmm1, xmm2
+ movdqu [32 + KS], xmm1
+ movdqa xmm2, xmm1
+
+ ret
+intel_aes_encrypt_init_128 ENDP
+
+
+ALIGN 16
+intel_aes_decrypt_init_128 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_128
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [10*16 + KS]
+ movdqu [10*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 5
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(10-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(10-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [5*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [5*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_128 ENDP
+
+
+ALIGN 16
+intel_aes_encrypt_init_192 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ pxor xmm3, xmm3
+ movdqu xmm1, [KEY]
+ pinsrd xmm3, DWORD PTR [16 + KEY], 0
+ pinsrd xmm3, DWORD PTR [20 + KEY], 1
+
+ movdqu [KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask192
+ movdqu xmm4, [ITR]
+
+ mov ITR, 4
+
+Lenc_192_ks_loop:
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqa xmm6, xmm1
+ shufpd xmm5, xmm1, 00h
+ shufpd xmm6, xmm3, 01h
+
+ movdqu [16 + KS], xmm5
+ movdqu [32 + KS], xmm6
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm4
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+
+ movdqa xmm6, xmm1
+ movdqa xmm7, xmm3
+ pslldq xmm6, 4
+ pslldq xmm7, 4
+ pxor xmm1, xmm6
+ pxor xmm3, xmm7
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pslldq xmm6, 4
+ pxor xmm1, xmm6
+ pxor xmm1, xmm2
+ pshufd xmm2, xmm1, 0ffh
+ pxor xmm3, xmm2
+
+ movdqu [48 + KS], xmm1
+ movdqa xmm5, xmm3
+
+ lea KS, [48 + KS]
+
+ dec ITR
+ jnz Lenc_192_ks_loop
+
+ movdqu [16 + KS], xmm5
+ret
+intel_aes_encrypt_init_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_192 PROC
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_192
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [12*16 + KS]
+ movdqu [12*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 6
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(12-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(12-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [6*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [6*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_init_256 PROC
+
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+ movdqu xmm1, [16*0 + KEY]
+ movdqu xmm3, [16*1 + KEY]
+
+ movdqu [16*0 + KS], xmm1
+ movdqu [16*1 + KS], xmm3
+
+ lea ITR, Lcon1
+ movdqu xmm0, [ITR]
+ lea ITR, Lmask256
+ movdqu xmm5, [ITR]
+
+ pxor xmm6, xmm6
+
+ mov ITR, 6
+
+Lenc_256_ks_loop:
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ pslld xmm0, 1
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ pshufd xmm2, xmm1, 0ffh
+ aesenclast xmm2, xmm6
+ movdqa xmm4, xmm3
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pslldq xmm4, 4
+ pxor xmm3, xmm4
+ pxor xmm3, xmm2
+ movdqu [16*3 + KS], xmm3
+
+ lea KS, [32 + KS]
+ dec ITR
+ jnz Lenc_256_ks_loop
+
+ movdqa xmm2, xmm3
+ pshufb xmm2, xmm5
+ aesenclast xmm2, xmm0
+ movdqa xmm4, xmm1
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pslldq xmm4, 4
+ pxor xmm1, xmm4
+ pxor xmm1, xmm2
+ movdqu [16*2 + KS], xmm1
+
+ ret
+intel_aes_encrypt_init_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_init_256 PROC
+ mov KEY, [esp + 1*4 + 0*4]
+ mov KS, [esp + 1*4 + 1*4]
+
+ push KS
+ push KEY
+
+ call intel_aes_encrypt_init_256
+
+ pop KEY
+ pop KS
+
+ movdqu xmm0, [0*16 + KS]
+ movdqu xmm1, [14*16 + KS]
+ movdqu [14*16 + KS], xmm0
+ movdqu [0*16 + KS], xmm1
+
+ i = 1
+ WHILE i LT 7
+ movdqu xmm0, [i*16 + KS]
+ movdqu xmm1, [(14-i)*16 + KS]
+
+ aesimc xmm0, xmm0
+ aesimc xmm1, xmm1
+
+ movdqu [(14-i)*16 + KS], xmm0
+ movdqu [i*16 + KS], xmm1
+
+ i = i+1
+ ENDM
+
+ movdqu xmm0, [7*16 + KS]
+ aesimc xmm0, xmm0
+ movdqu [7*16 + KS], xmm0
+ ret
+intel_aes_decrypt_init_256 ENDP
+
+
+
+gen_aes_cbc_enc_func MACRO rnds
+
+LOCAL loop1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+ lea ctx, [44+ctx]
+
+ movdqu xmm0, [-32+ctx]
+
+ movdqu xmm2, [0*16 + ctx]
+ movdqu xmm3, [1*16 + ctx]
+ movdqu xmm4, [2*16 + ctx]
+ movdqu xmm5, [3*16 + ctx]
+ movdqu xmm6, [4*16 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm1, [input]
+ pxor xmm1, xmm2
+ pxor xmm0, xmm1
+
+ aesenc xmm0, xmm3
+ aesenc xmm0, xmm4
+ aesenc xmm0, xmm5
+ aesenc xmm0, xmm6
+
+ i = 5
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [-32+ctx], xmm0
+
+ xor eax, eax
+ pop inputLen
+ ret
+
+ENDM
+
+gen_aes_cbc_dec_func MACRO rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL dec1
+LOCAL bail
+
+ push inputLen
+
+ mov ctx, [esp + 2*4 + 0*4]
+ mov output, [esp + 2*4 + 1*4]
+ mov input, [esp + 2*4 + 4*4]
+ mov inputLen, [esp + 2*4 + 5*4]
+
+ lea ctx, [44+ctx]
+
+loop7:
+ cmp inputLen, 7*16
+ jb dec1
+
+ movdqu xmm0, [0*16 + input]
+ movdqu xmm1, [1*16 + input]
+ movdqu xmm2, [2*16 + input]
+ movdqu xmm3, [3*16 + input]
+ movdqu xmm4, [4*16 + input]
+ movdqu xmm5, [5*16 + input]
+ movdqu xmm6, [6*16 + input]
+
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+ pxor xmm1, xmm7
+ pxor xmm2, xmm7
+ pxor xmm3, xmm7
+ pxor xmm4, xmm7
+ pxor xmm5, xmm7
+ pxor xmm6, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ aes_dec_rnd i
+ i = i+1
+ ENDM
+ aes_dec_last_rnd rnds
+
+ movdqu xmm7, [-32 + ctx]
+ pxor xmm0, xmm7
+ movdqu xmm7, [0*16 + input]
+ pxor xmm1, xmm7
+ movdqu xmm7, [1*16 + input]
+ pxor xmm2, xmm7
+ movdqu xmm7, [2*16 + input]
+ pxor xmm3, xmm7
+ movdqu xmm7, [3*16 + input]
+ pxor xmm4, xmm7
+ movdqu xmm7, [4*16 + input]
+ pxor xmm5, xmm7
+ movdqu xmm7, [5*16 + input]
+ pxor xmm6, xmm7
+ movdqu xmm7, [6*16 + input]
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+ movdqu [-32 + ctx], xmm7
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+dec1:
+
+ movdqu xmm3, [-32 + ctx]
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [input]
+ movdqa xmm4, xmm0
+ movdqu xmm7, [0*16 + ctx]
+ pxor xmm0, xmm7
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesdec xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesdeclast xmm0, xmm7
+ pxor xmm3, xmm0
+
+ movdqu [output], xmm3
+ movdqa xmm3, xmm4
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+ movdqu [-32 + ctx], xmm3
+ xor eax, eax
+ pop inputLen
+ ret
+ENDM
+
+ALIGN 16
+intel_aes_encrypt_cbc_128 PROC
+gen_aes_cbc_enc_func 10
+intel_aes_encrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_192 PROC
+gen_aes_cbc_enc_func 12
+intel_aes_encrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_cbc_256 PROC
+gen_aes_cbc_enc_func 14
+intel_aes_encrypt_cbc_256 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_128 PROC
+gen_aes_cbc_dec_func 10
+intel_aes_decrypt_cbc_128 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_192 PROC
+gen_aes_cbc_dec_func 12
+intel_aes_decrypt_cbc_192 ENDP
+
+ALIGN 16
+intel_aes_decrypt_cbc_256 PROC
+gen_aes_cbc_dec_func 14
+intel_aes_decrypt_cbc_256 ENDP
+
+
+
+ctrCtx textequ <esi>
+CTR textequ <ebx>
+
+gen_aes_ctr_func MACRO rnds
+
+LOCAL loop7
+LOCAL loop1
+LOCAL enc1
+LOCAL bail
+
+ push inputLen
+ push ctrCtx
+ push CTR
+ push ebp
+
+ mov ctrCtx, [esp + 4*5 + 0*4]
+ mov output, [esp + 4*5 + 1*4]
+ mov input, [esp + 4*5 + 4*4]
+ mov inputLen, [esp + 4*5 + 5*4]
+
+ mov ctx, [4+ctrCtx]
+ lea ctx, [44+ctx]
+
+ mov ebp, esp
+ sub esp, 7*16
+ and esp, -16
+
+ movdqu xmm0, [8+ctrCtx]
+ mov ctrCtx, [ctrCtx + 8 + 3*4]
+ bswap ctrCtx
+ movdqu xmm1, [ctx + 0*16]
+
+ pxor xmm0, xmm1
+
+ movdqa [esp + 0*16], xmm0
+ movdqa [esp + 1*16], xmm0
+ movdqa [esp + 2*16], xmm0
+ movdqa [esp + 3*16], xmm0
+ movdqa [esp + 4*16], xmm0
+ movdqa [esp + 5*16], xmm0
+ movdqa [esp + 6*16], xmm0
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 1*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 2*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 3*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 4*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 5*16 + 3*4], CTR
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + 6*16 + 3*4], CTR
+
+
+loop7:
+ cmp inputLen, 7*16
+ jb loop1
+
+ movdqu xmm0, [0*16 + esp]
+ movdqu xmm1, [1*16 + esp]
+ movdqu xmm2, [2*16 + esp]
+ movdqu xmm3, [3*16 + esp]
+ movdqu xmm4, [4*16 + esp]
+ movdqu xmm5, [5*16 + esp]
+ movdqu xmm6, [6*16 + esp]
+
+ i = 1
+ WHILE i LE 7
+ aes_rnd i
+
+ inc ctrCtx
+ mov CTR, ctrCtx
+ bswap CTR
+ xor CTR, [ctx + 3*4]
+ mov [esp + (i-1)*16 + 3*4], CTR
+
+ i = i+1
+ ENDM
+ WHILE i LT rnds
+ aes_rnd i
+ i = i+1
+ ENDM
+ aes_last_rnd rnds
+
+ movdqu xmm7, [0*16 + input]
+ pxor xmm0, xmm7
+ movdqu xmm7, [1*16 + input]
+ pxor xmm1, xmm7
+ movdqu xmm7, [2*16 + input]
+ pxor xmm2, xmm7
+ movdqu xmm7, [3*16 + input]
+ pxor xmm3, xmm7
+ movdqu xmm7, [4*16 + input]
+ pxor xmm4, xmm7
+ movdqu xmm7, [5*16 + input]
+ pxor xmm5, xmm7
+ movdqu xmm7, [6*16 + input]
+ pxor xmm6, xmm7
+
+ movdqu [0*16 + output], xmm0
+ movdqu [1*16 + output], xmm1
+ movdqu [2*16 + output], xmm2
+ movdqu [3*16 + output], xmm3
+ movdqu [4*16 + output], xmm4
+ movdqu [5*16 + output], xmm5
+ movdqu [6*16 + output], xmm6
+
+ lea input, [7*16 + input]
+ lea output, [7*16 + output]
+ sub inputLen, 7*16
+ jmp loop7
+
+
+loop1:
+ cmp inputLen, 1*16
+ jb bail
+
+ movdqu xmm0, [esp]
+ add esp, 16
+
+ i = 1
+ WHILE i LT rnds
+ movdqu xmm7, [i*16 + ctx]
+ aesenc xmm0, xmm7
+ i = i+1
+ ENDM
+ movdqu xmm7, [rnds*16 + ctx]
+ aesenclast xmm0, xmm7
+
+ movdqu xmm7, [input]
+ pxor xmm0, xmm7
+ movdqu [output], xmm0
+
+ lea input, [1*16 + input]
+ lea output, [1*16 + output]
+ sub inputLen, 1*16
+ jmp loop1
+
+bail:
+
+ mov ctrCtx, [ebp + 4*5 + 0*4]
+ movdqu xmm0, [esp]
+ movdqu xmm1, [ctx + 0*16]
+ pxor xmm0, xmm1
+ movdqu [8+ctrCtx], xmm0
+
+
+ xor eax, eax
+ mov esp, ebp
+ pop ebp
+ pop CTR
+ pop ctrCtx
+ pop inputLen
+ ret
+ENDM
+
+
+ALIGN 16
+intel_aes_encrypt_ctr_128 PROC
+gen_aes_ctr_func 10
+intel_aes_encrypt_ctr_128 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_192 PROC
+gen_aes_ctr_func 12
+intel_aes_encrypt_ctr_192 ENDP
+
+ALIGN 16
+intel_aes_encrypt_ctr_256 PROC
+gen_aes_ctr_func 14
+intel_aes_encrypt_ctr_256 ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-aes.h b/security/nss/lib/freebl/intel-aes.h
new file mode 100644
index 000000000..d5bd2d8ca
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.h
@@ -0,0 +1,143 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Prototypes of the functions defined in the assembler file. */
+void intel_aes_encrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_encrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_128(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_192(const unsigned char *key, PRUint32 *expanded);
+void intel_aes_decrypt_init_256(const unsigned char *key, PRUint32 *expanded);
+SECStatus intel_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_128(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_192(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ unsigned int blocksize);
+
+#define intel_aes_ecb_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128 \
+ : (keysize) == 24 ? intel_aes_encrypt_ecb_192 \
+ : intel_aes_encrypt_ecb_256) \
+ : ((keysize) == 16 ? intel_aes_decrypt_ecb_128 \
+ : (keysize) == 24 ? intel_aes_decrypt_ecb_192 \
+ : intel_aes_decrypt_ecb_256))
+
+#define intel_aes_cbc_worker(encrypt, keysize) \
+ ((encrypt) \
+ ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128 \
+ : (keysize) == 24 ? intel_aes_encrypt_cbc_192 \
+ : intel_aes_encrypt_cbc_256) \
+ : ((keysize) == 16 ? intel_aes_decrypt_cbc_128 \
+ : (keysize) == 24 ? intel_aes_decrypt_cbc_192 \
+ : intel_aes_decrypt_cbc_256))
+
+#define intel_aes_ctr_worker(nr) \
+ ((nr) == 10 ? intel_aes_encrypt_ctr_128 \
+ : (nr) == 12 ? intel_aes_encrypt_ctr_192 \
+ : intel_aes_encrypt_ctr_256)
+
+#define intel_aes_init(encrypt, keysize) \
+ do { \
+ if (encrypt) { \
+ if (keysize == 16) \
+ intel_aes_encrypt_init_128(key, cx->expandedKey); \
+ else if (keysize == 24) \
+ intel_aes_encrypt_init_192(key, cx->expandedKey); \
+ else \
+ intel_aes_encrypt_init_256(key, cx->expandedKey); \
+ } else { \
+ if (keysize == 16) \
+ intel_aes_decrypt_init_128(key, cx->expandedKey); \
+ else if (keysize == 24) \
+ intel_aes_decrypt_init_192(key, cx->expandedKey); \
+ else \
+ intel_aes_decrypt_init_256(key, cx->expandedKey); \
+ } \
+ } while (0)
diff --git a/security/nss/lib/freebl/intel-aes.s b/security/nss/lib/freebl/intel-aes.s
new file mode 100644
index 000000000..2dfcfa15b
--- /dev/null
+++ b/security/nss/lib/freebl/intel-aes.s
@@ -0,0 +1,2514 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+ .text
+
+#define IV_OFFSET 16
+#define EXPANDED_KEY_OFFSET 48
+
+/*
+ * Warning: the length values used in this module are "unsigned int"
+ * in C, which is 32-bit. When they're passed in registers, use only
+ * the low 32 bits, because the top half is unspecified.
+ *
+ * This is called from C code, so the contents of those bits can
+ * depend on the C compiler's optimization decisions. This means that
+ * mistakes might not be obvious in testing if those bits happen to be
+ * zero in your build.
+ *
+ * Exception: 32-bit lea instructions use a 64-bit address because the
+ * address size doesn't affect the result, and that form is more
+ * compactly encoded and preferred by compilers over a 32-bit address.
+ */
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_128,@function
+ .globl intel_aes_encrypt_init_128
+ .align 16
+intel_aes_encrypt_init_128:
+ movups (%rdi), %xmm1
+ movups %xmm1, (%rsi)
+ leaq 16(%rsi), %rsi
+ xorl %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
+ call key_expansion128
+
+ ret
+ .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_128,@function
+ .globl intel_aes_decrypt_init_128
+ .align 16
+intel_aes_decrypt_init_128:
+ movups (%rdi), %xmm1
+ movups %xmm1, (%rsi)
+ leaq 16(%rsi), %rsi
+ xorl %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */
+ call key_expansion128
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */
+ call key_expansion128
+
+ ret
+ .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128
+
+
+ .type key_expansion128,@function
+ .align 16
+key_expansion128:
+ movd %eax, %xmm3
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm3
+ pxor %xmm3, %xmm1
+ shufps $0x8c, %xmm1, %xmm3
+ pxor %xmm2, %xmm1
+ pxor %xmm3, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+ ret
+ .size key_expansion128, .-key_expansion128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_128,@function
+ .globl intel_aes_encrypt_ecb_128
+ .align 16
+intel_aes_encrypt_ecb_128:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 160(%rdi), %xmm12
+ xor %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_128,@function
+ .globl intel_aes_decrypt_ecb_128
+ .align 16
+intel_aes_decrypt_ecb_128:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 160(%rdi), %xmm12
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm12, %xmm3
+ pxor %xmm12, %xmm4
+ pxor %xmm12, %xmm5
+ pxor %xmm12, %xmm6
+ pxor %xmm12, %xmm7
+ pxor %xmm12, %xmm8
+ pxor %xmm12, %xmm9
+ pxor %xmm12, %xmm10
+
+// complete loop unrolling
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm12, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_128,@function
+ .globl intel_aes_encrypt_cbc_128
+ .align 16
+intel_aes_encrypt_cbc_128:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_128,@function
+ .globl intel_aes_decrypt_cbc_128
+ .align 16
+intel_aes_decrypt_cbc_128:
+// leaq IV_OFFSET(%rdi), %rdx
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0 /* iv */
+ movdqu (%rdi), %xmm2 /* first key block */
+ movdqu 160(%rdi), %xmm12 /* last key block */
+ xorl %eax, %eax
+ cmpl $128, %r9d
+ jb 1f
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
+ movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm12, %xmm3
+ pxor %xmm12, %xmm4
+ pxor %xmm12, %xmm5
+ pxor %xmm12, %xmm6
+ pxor %xmm12, %xmm7
+ pxor %xmm12, %xmm8
+ pxor %xmm12, %xmm9
+ pxor %xmm12, %xmm10
+
+// complete loop unrolling
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+
+4: movdqu (%r8, %rax), %xmm1
+ movdqa %xmm1, %xmm13
+ pxor %xmm12, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ pxor %xmm0, %xmm1
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm13, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_192,@function
+ .globl intel_aes_encrypt_init_192
+ .align 16
+intel_aes_encrypt_init_192:
+ movdqu (%rdi), %xmm1
+ movq 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movq %xmm3, 16(%rsi)
+ leaq 24(%rsi), %rsi
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
+ call key_expansion192
+
+ ret
+ .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_192,@function
+ .globl intel_aes_decrypt_init_192
+ .align 16
+intel_aes_decrypt_init_192:
+ movdqu (%rdi), %xmm1
+ movq 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movq %xmm3, 16(%rsi)
+ leaq 24(%rsi), %rsi
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion192
+ .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */
+ movups %xmm2, -24(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ call key_expansion192
+ movups -32(%rsi), %xmm2
+ movups -16(%rsi), %xmm4
+ .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */
+ .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */
+ movups %xmm2, -32(%rsi)
+ movups %xmm4, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */
+ call key_expansion192
+
+ ret
+ .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192
+
+
+ .type key_expansion192,@function
+ .align 16
+key_expansion192:
+ pshufd $0x55, %xmm2, %xmm2
+ xor %eax, %eax
+ movd %eax, %xmm4
+ shufps $0x10, %xmm1, %xmm4
+ pxor %xmm4, %xmm1
+ shufps $0x8c, %xmm1, %xmm4
+ pxor %xmm2, %xmm1
+ pxor %xmm4, %xmm1
+ movdqu %xmm1, (%rsi)
+ addq $16, %rsi
+
+ pshufd $0xff, %xmm1, %xmm4
+ movd %eax, %xmm5
+ shufps $0x00, %xmm3, %xmm5
+ shufps $0x08, %xmm3, %xmm5
+ pxor %xmm4, %xmm3
+ pxor %xmm5, %xmm3
+ movq %xmm3, (%rsi)
+ addq $8, %rsi
+ ret
+ .size key_expansion192, .-key_expansion192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_192,@function
+ .globl intel_aes_encrypt_ecb_192
+ .align 16
+intel_aes_encrypt_ecb_192:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_192,@function
+ .globl intel_aes_decrypt_ecb_192
+ .align 16
+intel_aes_decrypt_ecb_192:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm14, %xmm3
+ pxor %xmm14, %xmm4
+ pxor %xmm14, %xmm5
+ pxor %xmm14, %xmm6
+ pxor %xmm14, %xmm7
+ pxor %xmm14, %xmm8
+ pxor %xmm14, %xmm9
+ pxor %xmm14, %xmm10
+
+// complete loop unrolling
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm14, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_192,@function
+ .globl intel_aes_encrypt_cbc_192
+ .align 16
+intel_aes_encrypt_cbc_192:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+ movdqu 192(%rdi), %xmm14
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm2, %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %exx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_192,@function
+ .globl intel_aes_decrypt_cbc_192
+ .align 16
+intel_aes_decrypt_cbc_192:
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 192(%rdi), %xmm14
+ xorl %eax, %eax
+ cmpl $128, %r9d
+ jb 1f
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm14, %xmm3
+ pxor %xmm14, %xmm4
+ pxor %xmm14, %xmm5
+ pxor %xmm14, %xmm6
+ pxor %xmm14, %xmm7
+ pxor %xmm14, %xmm8
+ pxor %xmm14, %xmm9
+ pxor %xmm14, %xmm10
+
+// complete loop unrolling
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm3
+ movdqu 32(%rdi), %xmm4
+ movdqu 48(%rdi), %xmm5
+ movdqu 64(%rdi), %xmm6
+ movdqu 80(%rdi), %xmm7
+ movdqu 96(%rdi), %xmm8
+ movdqu 112(%rdi), %xmm9
+ movdqu 128(%rdi), %xmm10
+ movdqu 144(%rdi), %xmm11
+ movdqu 160(%rdi), %xmm12
+ movdqu 176(%rdi), %xmm13
+
+4: movdqu (%r8, %rax), %xmm1
+ movdqa %xmm1, %xmm15
+ pxor %xmm14, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
+ pxor %xmm0, %xmm1
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm15, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_encrypt_init_256,@function
+ .globl intel_aes_encrypt_init_256
+ .align 16
+intel_aes_encrypt_init_256:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ movdqu %xmm3, 16(%rsi)
+ leaq 32(%rsi), %rsi
+ xor %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ pxor %xmm6, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ ret
+ .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256
+
+
+/* in %rdi : the key
+ in %rsi : buffer for expanded key
+*/
+ .type intel_aes_decrypt_init_256,@function
+ .globl intel_aes_decrypt_init_256
+ .align 16
+intel_aes_decrypt_init_256:
+ movdqu (%rdi), %xmm1
+ movdqu 16(%rdi), %xmm3
+ movdqu %xmm1, (%rsi)
+ .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */
+ movdqu %xmm4, 16(%rsi)
+ leaq 32(%rsi), %rsi
+ xor %eax, %eax
+
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */
+ call key_expansion256
+ .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */
+ movdqu %xmm4, -32(%rsi)
+ movdqu %xmm5, -16(%rsi)
+ .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */
+ pxor %xmm6, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ ret
+ .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256
+
+
+ .type key_expansion256,@function
+ .align 16
+key_expansion256:
+ movd %eax, %xmm6
+ pshufd $0xff, %xmm2, %xmm2
+ shufps $0x10, %xmm1, %xmm6
+ pxor %xmm6, %xmm1
+ shufps $0x8c, %xmm1, %xmm6
+ pxor %xmm2, %xmm1
+ pxor %xmm6, %xmm1
+ movdqu %xmm1, (%rsi)
+
+ addq $16, %rsi
+ .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */
+ pshufd $0xaa, %xmm4, %xmm4
+ shufps $0x10, %xmm3, %xmm6
+ pxor %xmm6, %xmm3
+ shufps $0x8c, %xmm3, %xmm6
+ pxor %xmm4, %xmm3
+ pxor %xmm6, %xmm3
+ movdqu %xmm3, (%rsi)
+ addq $16, %rsi
+ ret
+ .size key_expansion256, .-key_expansion256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_ecb_256,@function
+ .globl intel_aes_encrypt_ecb_256
+ .align 16
+intel_aes_encrypt_ecb_256:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm2, %xmm3
+ pxor %xmm2, %xmm4
+ pxor %xmm2, %xmm5
+ pxor %xmm2, %xmm6
+ pxor %xmm2, %xmm7
+ pxor %xmm2, %xmm8
+ pxor %xmm2, %xmm9
+ pxor %xmm2, %xmm10
+
+// complete loop unrolling
+ movdqu 16(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */
+
+ movdqu 208(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu (%rdi), %xmm8
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm8, %xmm1
+ movdqu 112(%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_ecb_256,@function
+ .globl intel_aes_decrypt_ecb_256
+ .align 16
+intel_aes_decrypt_ecb_256:
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm15, %xmm3
+ pxor %xmm15, %xmm4
+ pxor %xmm15, %xmm5
+ pxor %xmm15, %xmm6
+ pxor %xmm15, %xmm7
+ pxor %xmm15, %xmm8
+ pxor %xmm15, %xmm9
+ pxor %xmm15, %xmm10
+
+// complete loop unrolling
+ movdqu 208(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 112(%rdi), %xmm8
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm15, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
+ movdqu 112(%rdi), %xmm8
+ movdqu %xmm1, (%rsi, %rax)
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_encrypt_cbc_256,@function
+ .globl intel_aes_encrypt_cbc_256
+ .align 16
+intel_aes_encrypt_cbc_256:
+ testl %r9d, %r9d
+ je 2f
+
+// leaq IV_OFFSET(%rdi), %rdx
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm8
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+ movdqu 224(%rdi), %xmm15
+
+ xorl %eax, %eax
+1: movdqu (%r8, %rax), %xmm1
+ pxor %xmm0, %xmm1
+ pxor %xmm8, %xmm1
+ movdqu 112(%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
+ movdqu %xmm1, (%rsi, %rax)
+ movdqa %xmm1, %xmm0
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 1b
+
+ movdqu %xmm0, (%rdx)
+
+2: xor %eax, %eax
+ ret
+ .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256
+
+
+/* in %rdi : cx - context
+ in %rsi : output - pointer to output buffer
+ in %rdx : outputLen - pointer to variable for length of output
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
+ in %r8 : input - pointer to input buffer
+ in %r9d : inputLen - length of input buffer
+ on stack: blocksize - AES blocksize (always 16, unused)
+*/
+ .type intel_aes_decrypt_cbc_256,@function
+ .globl intel_aes_decrypt_cbc_256
+ .align 16
+intel_aes_decrypt_cbc_256:
+// leaq IV_OFFSET(%rdi), %rdx
+// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi
+ leaq 16(%rdi), %rdx
+ leaq 48(%rdi), %rdi
+
+ movdqu (%rdx), %xmm0
+ movdqu (%rdi), %xmm2
+ movdqu 224(%rdi), %xmm15
+ xorl %eax, %eax
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
+ jb 1f
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
+2: movdqu (%r8, %rax), %xmm3
+ movdqu 16(%r8, %rax), %xmm4
+ movdqu 32(%r8, %rax), %xmm5
+ movdqu 48(%r8, %rax), %xmm6
+ movdqu 64(%r8, %rax), %xmm7
+ movdqu 80(%r8, %rax), %xmm8
+ movdqu 96(%r8, %rax), %xmm9
+ movdqu 112(%r8, %rax), %xmm10
+ pxor %xmm15, %xmm3
+ pxor %xmm15, %xmm4
+ pxor %xmm15, %xmm5
+ pxor %xmm15, %xmm6
+ pxor %xmm15, %xmm7
+ pxor %xmm15, %xmm8
+ pxor %xmm15, %xmm9
+ pxor %xmm15, %xmm10
+
+// complete loop unrolling
+ movdqu 208(%rdi), %xmm1
+ movdqu 192(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 176(%rdi), %xmm1
+ movdqu 160(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 144(%rdi), %xmm1
+ movdqu 128(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 112(%rdi), %xmm1
+ movdqu 96(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 80(%rdi), %xmm1
+ movdqu 64(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 48(%rdi), %xmm1
+ movdqu 32(%rdi), %xmm11
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */
+ .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */
+
+ movdqu 16(%rdi), %xmm1
+ .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */
+ .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */
+ .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */
+ .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */
+ .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */
+ .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */
+ .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */
+
+ pxor %xmm0, %xmm3
+ movdqu (%r8, %rax), %xmm0
+ pxor %xmm0, %xmm4
+ movdqu 16(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm5
+ movdqu 32(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm6
+ movdqu 48(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm7
+ movdqu 64(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm8
+ movdqu 80(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm9
+ movdqu 96(%r8, %rax), %xmm0
+ pxor %xmm0, %xmm10
+ movdqu 112(%r8, %rax), %xmm0
+ movdqu %xmm3, (%rsi, %rax)
+ movdqu %xmm4, 16(%rsi, %rax)
+ movdqu %xmm5, 32(%rsi, %rax)
+ movdqu %xmm6, 48(%rsi, %rax)
+ movdqu %xmm7, 64(%rsi, %rax)
+ movdqu %xmm8, 80(%rsi, %rax)
+ movdqu %xmm9, 96(%rsi, %rax)
+ movdqu %xmm10, 112(%rsi, %rax)
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
+ jbe 2b
+1: cmpl %eax, %r9d
+ je 5f
+
+ movdqu 16(%rdi), %xmm2
+ movdqu 32(%rdi), %xmm3
+ movdqu 48(%rdi), %xmm4
+ movdqu 64(%rdi), %xmm5
+ movdqu 80(%rdi), %xmm6
+ movdqu 96(%rdi), %xmm7
+ movdqu 112(%rdi), %xmm8
+ movdqu 128(%rdi), %xmm9
+ movdqu 144(%rdi), %xmm10
+ movdqu 160(%rdi), %xmm11
+ movdqu 176(%rdi), %xmm12
+ movdqu 192(%rdi), %xmm13
+ movdqu 208(%rdi), %xmm14
+
+4: movdqu (%r8, %rax), %xmm1
+ pxor %xmm15, %xmm1
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */
+ movdqu (%rdi), %xmm8
+ .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
+ .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */
+ .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
+ movdqu 112(%rdi), %xmm8
+ pxor %xmm0, %xmm1
+ movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
+ movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
+ addl $16, %eax
+ cmpl %eax, %r9d
+ jne 4b
+
+5: movdqu %xmm0, (%rdx)
+
+ xor %eax, %eax
+ ret
+ .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256
diff --git a/security/nss/lib/freebl/intel-gcm-wrap.c b/security/nss/lib/freebl/intel-gcm-wrap.c
new file mode 100644
index 000000000..8c5eaf021
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-wrap.c
@@ -0,0 +1,254 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/* Copyright(c) 2013, Intel Corp. */
+
+/* Wrapper functions for Intel optimized implementation of AES-GCM */
+
+#ifdef USE_HW_AES
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapii.h"
+#include "blapit.h"
+#include "gcm.h"
+#include "ctr.h"
+#include "secerr.h"
+#include "prtypes.h"
+#include "pkcs11t.h"
+
+#include <limits.h>
+
+#include "intel-gcm.h"
+#include "rijndael.h"
+
+#include <emmintrin.h>
+#include <tmmintrin.h>
+
+struct intel_AES_GCMContextStr {
+ unsigned char Htbl[16 * AES_BLOCK_SIZE];
+ unsigned char X0[AES_BLOCK_SIZE];
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned char CTR[AES_BLOCK_SIZE];
+ AESContext *aes_context;
+ unsigned long tagBits;
+ unsigned long Alen;
+ unsigned long Mlen;
+};
+
+intel_AES_GCMContext *
+intel_AES_GCM_CreateContext(void *context,
+ freeblCipherFunc cipher,
+ const unsigned char *params,
+ unsigned int blocksize)
+{
+ intel_AES_GCMContext *gcm = NULL;
+ AESContext *aes = (AESContext *)context;
+ const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params;
+ unsigned char buff[AES_BLOCK_SIZE]; /* aux buffer */
+
+ unsigned long IV_whole_len = gcmParams->ulIvLen & (~0xful);
+ unsigned int IV_remainder_len = gcmParams->ulIvLen & 0xful;
+ unsigned long AAD_whole_len = gcmParams->ulAADLen & (~0xful);
+ unsigned int AAD_remainder_len = gcmParams->ulAADLen & 0xful;
+
+ __m128i BSWAP_MASK = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
+ __m128i ONE = _mm_set_epi32(0, 0, 0, 1);
+ unsigned int j;
+ SECStatus rv;
+
+ if (blocksize != AES_BLOCK_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return NULL;
+ }
+ gcm = PORT_ZNew(intel_AES_GCMContext);
+
+ if (gcm == NULL) {
+ return NULL;
+ }
+
+ /* initialize context fields */
+ gcm->aes_context = aes;
+ gcm->tagBits = gcmParams->ulTagBits;
+ gcm->Alen = 0;
+ gcm->Mlen = 0;
+
+ /* first prepare H and its derivatives for ghash */
+ intel_aes_gcmINIT(gcm->Htbl, (unsigned char *)aes->expandedKey, aes->Nr);
+
+ /* Initial TAG value is zero */
+ _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+ _mm_storeu_si128((__m128i *)gcm->X0, _mm_setzero_si128());
+
+ /* Init the counter */
+ if (gcmParams->ulIvLen == 12) {
+ _mm_storeu_si128((__m128i *)gcm->CTR,
+ _mm_setr_epi32(((unsigned int *)gcmParams->pIv)[0],
+ ((unsigned int *)gcmParams->pIv)[1],
+ ((unsigned int *)gcmParams->pIv)[2],
+ 0x01000000));
+ } else {
+ /* If IV size is not 96 bits, then the initial counter value is GHASH
+ * of the IV */
+ intel_aes_gcmAAD(gcm->Htbl, gcmParams->pIv, IV_whole_len, gcm->T);
+
+ /* Partial block */
+ if (IV_remainder_len) {
+ PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+ PORT_Memcpy(buff, gcmParams->pIv + IV_whole_len, IV_remainder_len);
+ intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+ }
+
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcmParams->ulIvLen,
+ 0,
+ gcm->X0,
+ gcm->CTR);
+
+ /* TAG should be zero again */
+ _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128());
+ }
+
+ /* Encrypt the initial counter, will be used to encrypt the GHASH value,
+ * in the end */
+ rv = (*cipher)(context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ /* Promote the counter by 1 */
+ _mm_storeu_si128((__m128i *)gcm->CTR, _mm_shuffle_epi8(_mm_add_epi32(ONE, _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)gcm->CTR), BSWAP_MASK)), BSWAP_MASK));
+
+ /* Now hash AAD - it would actually make sense to seperate the context
+ * creation from the AAD, because that would allow to reuse the H, which
+ * only changes when the AES key changes, and not every package, like the
+ * IV and AAD */
+ intel_aes_gcmAAD(gcm->Htbl, gcmParams->pAAD, AAD_whole_len, gcm->T);
+ if (AAD_remainder_len) {
+ PORT_Memset(buff, 0, AES_BLOCK_SIZE);
+ PORT_Memcpy(buff, gcmParams->pAAD + AAD_whole_len, AAD_remainder_len);
+ intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T);
+ }
+ gcm->Alen += gcmParams->ulAADLen;
+ return gcm;
+
+loser:
+ PORT_Free(gcm);
+ return NULL;
+}
+
+void
+intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit)
+{
+ if (freeit) {
+ PORT_Free(gcm);
+ }
+}
+
+SECStatus
+intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ unsigned int j;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+ if (UINT_MAX - inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxout < inlen + tagBytes) {
+ *outlen = inlen + tagBytes;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ intel_aes_gcmENC(
+ inbuf,
+ outbuf,
+ gcm,
+ inlen);
+
+ gcm->Mlen += inlen;
+
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ *outlen = inlen + tagBytes;
+
+ for (j = 0; j < tagBytes; j++) {
+ outbuf[inlen + j] = T[j];
+ }
+ return SECSuccess;
+}
+
+SECStatus
+intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm,
+ unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize)
+{
+ unsigned int tagBytes;
+ unsigned char T[AES_BLOCK_SIZE];
+ const unsigned char *intag;
+
+ tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE;
+
+ /* get the authentication block */
+ if (inlen < tagBytes) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ inlen -= tagBytes;
+ intag = inbuf + inlen;
+
+ if (maxout < inlen) {
+ *outlen = inlen;
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ intel_aes_gcmDEC(
+ inbuf,
+ outbuf,
+ gcm,
+ inlen);
+
+ gcm->Mlen += inlen;
+ intel_aes_gcmTAG(
+ gcm->Htbl,
+ gcm->T,
+ gcm->Mlen,
+ gcm->Alen,
+ gcm->X0,
+ T);
+
+ if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) {
+ memset(outbuf, 0, inlen);
+ *outlen = 0;
+ /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ return SECFailure;
+ }
+ *outlen = inlen;
+
+ return SECSuccess;
+}
+
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm-x64-masm.asm b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
new file mode 100644
index 000000000..8b68b76e5
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x64-masm.asm
@@ -0,0 +1,1295 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.DATA
+ALIGN 16
+Lone dq 1,0
+Ltwo dq 2,0
+Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+ vpclmulqdq TMP1, SRC2, SRC1, 0h
+ vpclmulqdq TMP4, SRC2, SRC1, 011h
+
+ vpshufd TMP2, SRC2, 78
+ vpshufd TMP3, SRC1, 78
+ vpxor TMP2, TMP2, SRC2
+ vpxor TMP3, TMP3, SRC1
+
+ vpclmulqdq TMP2, TMP2, TMP3, 0h
+ vpxor TMP2, TMP2, TMP1
+ vpxor TMP2, TMP2, TMP4
+
+ vpslldq TMP3, TMP2, 8
+ vpsrldq TMP2, TMP2, 8
+
+ vpxor TMP1, TMP1, TMP3
+ vpxor TMP4, TMP4, TMP2
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpxor DST, TMP1, TMP4
+
+ ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+; unsigned char *Tp,
+; unsigned int Mlen,
+; unsigned int Alen,
+; unsigned char *X0,
+; unsigned char *TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl textequ <rcx>
+Tp textequ <rdx>
+Mlen textequ <r8>
+Alen textequ <r9>
+X0 textequ <r10>
+TAG textequ <r11>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ mov X0, [rsp + 1*8 + 4*8]
+ mov TAG, [rsp + 1*8 + 5*8]
+
+ vzeroupper
+ vmovdqu T, XMMWORD PTR[Tp]
+ vpxor TMP0, TMP0, TMP0
+
+ shl Mlen, 3
+ shl Alen, 3
+
+ ;vpinsrq TMP0, TMP0, Mlen, 0
+ ;vpinsrq TMP0, TMP0, Alen, 1
+ ; workaround the ml64.exe vpinsrq issue
+ vpinsrd TMP0, TMP0, r8d, 0
+ vpinsrd TMP0, TMP0, r9d, 2
+ shr Mlen, 32
+ shr Alen, 32
+ vpinsrd TMP0, TMP0, r8d, 1
+ vpinsrd TMP0, TMP0, r9d, 3
+
+ vpxor T, T, TMP0
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+ vpshufb T, T, [Lbswap_mask]
+ vpxor T, T, [X0]
+ vmovdqu XMMWORD PTR[TAG], T
+ vzeroupper
+
+ ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl textequ <rcx>
+KS textequ <rdx>
+NR textequ <r8d>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ vzeroupper
+ ; AES-ENC(0)
+ vmovdqu T, XMMWORD PTR[KS]
+ lea KS, [16 + KS]
+ dec NR
+Lenc_loop:
+ vaesenc T, T, [KS]
+ lea KS, [16 + KS]
+ dec NR
+ jnz Lenc_loop
+
+ vaesenclast T, T, [KS]
+ vpshufb T, T, [Lbswap_mask]
+
+ ;Calculate H` = GFMUL(H, 2)
+ vpsrad xmm3, T, 31
+ vpshufd xmm3, xmm3, 0ffh
+ vpand xmm5, xmm3, [Lpoly]
+ vpsrld xmm3, T, 31
+ vpslld xmm4, T, 1
+ vpslldq xmm3, xmm3, 4
+ vpxor T, xmm4, xmm3
+ vpxor T, T, xmm5
+
+ vmovdqu TMP0, T
+ vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+ i = 1
+ WHILE i LT 8
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+ vmovdqu XMMWORD PTR[Htbl + i*16], T
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+ i = i+1
+ ENDM
+ vzeroupper
+ ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl textequ <rcx>
+inp textequ <rdx>
+len textequ <r8>
+Tp textequ <r9>
+hlp0 textequ <r10>
+
+DATA textequ <xmm0>
+T textequ <xmm1>
+TMP0 textequ <xmm2>
+TMP1 textequ <xmm3>
+TMP2 textequ <xmm4>
+TMP3 textequ <xmm5>
+TMP4 textequ <xmm6>
+Xhi textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+ test len, len
+ jnz LbeginAAD
+ ret
+
+LbeginAAD:
+ vzeroupper
+
+ sub rsp, 2*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu T, XMMWORD PTR[Tp]
+ ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov hlp0, len
+ and hlp0, 128-1
+ jz Lmod_loop
+
+ and len, -128
+ sub hlp0, 16
+
+ ; Prefix block
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+
+ vpclmulqdq TMP0, DATA, [Htbl + hlp0], 0h
+ vpclmulqdq TMP1, DATA, [Htbl + hlp0], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + hlp0], 0h
+
+ lea inp, [inp+16]
+ test hlp0, hlp0
+ jnz Lpre_loop
+ jmp Lred1
+
+ ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+ sub hlp0, 16
+
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP3, DATA, [Htbl + hlp0], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + hlp0], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + hlp0], 0h
+ vpxor TMP2, TMP2, TMP3
+
+ test hlp0, hlp0
+ lea inp, [inp+16]
+ jnz Lpre_loop
+
+Lred1:
+
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+
+Lmod_loop:
+
+ sub len, 16*8
+ jb Ldone
+ ; Block #0
+ vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP0, DATA, [Htbl + 0*16], 0h
+ vpclmulqdq TMP1, DATA, [Htbl + 0*16], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + 0*16], 0h
+
+ ; Block #1
+ vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 1
+
+ ; Block #2
+ vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 2
+
+ vpxor T, T, TMP4 ;reduction stage 1b
+
+ ; Block #3
+ vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 3
+ ; Block #4
+ vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 4
+
+ vpxor T, T, TMP4 ;reduction stage 2b
+ ; Block #5
+ vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 5
+
+ vpxor T, T, Xhi ;reduction finalize
+ ; Block #6
+ vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 6
+ ; Block #7
+ vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+ KARATSUBA_AAD 7
+ ; Aggregated 8 blocks, now karatsuba fixup
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+ lea inp, [inp + 16*8]
+ jmp Lmod_loop
+
+Ldone:
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpxor T, T, Xhi
+ vmovdqu XMMWORD PTR[Tp], T
+ vzeroupper
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ add rsp, 16*2
+
+ ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT textequ <rcx>
+CT textequ <rdx>
+Htbl textequ <r8>
+Gctx textequ <r8>
+len textequ <r9>
+KS textequ <r10>
+NR textequ <eax>
+
+aluCTR textequ <r11d>
+aluKSl textequ <r12d>
+aluTMP textequ <r13d>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+TMP1 textequ <xmm2>
+TMP2 textequ <xmm3>
+TMP3 textequ <xmm4>
+TMP4 textequ <xmm5>
+TMP5 textequ <xmm6>
+CTR0 textequ <xmm7>
+CTR1 textequ <xmm8>
+CTR2 textequ <xmm9>
+CTR3 textequ <xmm10>
+CTR4 textequ <xmm11>
+CTR5 textequ <xmm12>
+CTR6 textequ <xmm13>
+CTR7 textequ <xmm14>
+BSWAPMASK textequ <xmm15>
+
+ROUND MACRO i
+ vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+ vaesenc CTR0, CTR0, TMP3
+ vaesenc CTR1, CTR1, TMP3
+ vaesenc CTR2, CTR2, TMP3
+ vaesenc CTR3, CTR3, TMP3
+ vaesenc CTR4, CTR4, TMP3
+ vaesenc CTR5, CTR5, TMP3
+ vaesenc CTR6, CTR6, TMP3
+ vaesenc CTR7, CTR7, TMP3
+ENDM
+ROUNDMUL MACRO i
+ vmovdqu TMP3, XMMWORD PTR[i*16 + KS]
+
+ vaesenc CTR0, CTR0, TMP3
+ vaesenc CTR1, CTR1, TMP3
+ vaesenc CTR2, CTR2, TMP3
+ vaesenc CTR3, CTR3, TMP3
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+
+ vaesenc CTR4, CTR4, TMP3
+ vaesenc CTR5, CTR5, TMP3
+ vaesenc CTR6, CTR6, TMP3
+ vaesenc CTR7, CTR7, TMP3
+
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+KARATSUBA MACRO i
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [3*4 + 8*16 + i*16 + rsp], aluTMP
+ENDM
+
+
+ test len, len
+ jnz LbeginENC
+ ret
+
+LbeginENC:
+
+ vzeroupper
+ push r11
+ push r12
+ push r13
+ push rbp
+ sub rsp, 10*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+ vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+ vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+ vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+ vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+ vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+ vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+ vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+ vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+ mov rbp, rsp
+ sub rsp, 16*16
+ and rsp, -16
+
+ vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+ mov KS, [16*16 + 3*16 + Gctx]
+ mov NR, [4 + KS]
+ lea KS, [48 + KS]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ mov aluKSl, [3*4 + KS]
+ bswap aluCTR
+ bswap aluKSl
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[8*16 + 0*16 + rsp], TMP0
+
+ cmp len, 128
+ jb LEncDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[8*16 + 1*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 2*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 3*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 4*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 5*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 6*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 7*16 + rsp], TMP0
+
+; Encrypt the initial 8 blocks
+ sub len, 128
+ vpaddd CTR1, CTR0, XMMWORD PTR[Lone]
+ vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo]
+ vpaddd CTR3, CTR2, XMMWORD PTR[Lone]
+ vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo]
+ vpaddd CTR5, CTR4, XMMWORD PTR[Lone]
+ vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo]
+ vpaddd CTR7, CTR6, XMMWORD PTR[Lone]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vpshufb CTR7, CTR7, BSWAPMASK
+
+ vmovdqu TMP3, XMMWORD PTR[0*16 + KS]
+ vpxor CTR0, CTR0, TMP3
+ vpxor CTR1, CTR1, TMP3
+ vpxor CTR2, CTR2, TMP3
+ vpxor CTR3, CTR3, TMP3
+ vpxor CTR4, CTR4, TMP3
+ vpxor CTR5, CTR5, TMP3
+ vpxor CTR6, CTR6, TMP3
+ vpxor CTR7, CTR7, TMP3
+
+ ROUND 1
+
+ add aluCTR, 8
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [8*16 + 0*16 + 3*4 + rsp], aluTMP
+
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+ ROUND 8
+ NEXTCTR 7
+ ROUND 9
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+ vpshufb TMP5, CTR7, BSWAPMASK
+
+ vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+ vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+ vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+ vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+ vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+ vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+ vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LEncDataOctets
+
+LEncDataOctets:
+ cmp len, 128
+ jb LEndEncOctets
+ sub len, 128
+
+ vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + rsp]
+ vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + rsp]
+ vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + rsp]
+ vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + rsp]
+ vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + rsp]
+ vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + rsp]
+ vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + rsp]
+ vmovdqa CTR7, XMMWORD PTR[8*16 + 7*16 + rsp]
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+ ROUNDMUL 1
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+ ROUNDMUL 2
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+ ROUNDMUL 3
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+ ROUNDMUL 4
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+ ROUNDMUL 5
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+ ROUNDMUL 6
+ NEXTCTR 5
+ vpxor TMP5, T, XMMWORD PTR[7*16 + rsp]
+ ROUNDMUL 7
+ NEXTCTR 6
+
+ ROUND 8
+ NEXTCTR 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ ROUND 9
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vpshufb CTR0, CTR0, BSWAPMASK
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vpshufb CTR1, CTR1, BSWAPMASK
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vpshufb CTR2, CTR2, BSWAPMASK
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vpshufb CTR3, CTR3, BSWAPMASK
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vpshufb CTR4, CTR4, BSWAPMASK
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vpshufb CTR5, CTR5, BSWAPMASK
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+ vpshufb CTR6, CTR6, BSWAPMASK
+ vmovdqu XMMWORD PTR[7*16 + CT], CTR7
+ vpshufb TMP5, CTR7, BSWAPMASK
+
+ vmovdqa XMMWORD PTR[1*16 + rsp], CTR6
+ vmovdqa XMMWORD PTR[2*16 + rsp], CTR5
+ vmovdqa XMMWORD PTR[3*16 + rsp], CTR4
+ vmovdqa XMMWORD PTR[4*16 + rsp], CTR3
+ vmovdqa XMMWORD PTR[5*16 + rsp], CTR2
+ vmovdqa XMMWORD PTR[6*16 + rsp], CTR1
+ vmovdqa XMMWORD PTR[7*16 + rsp], CTR0
+
+ vpxor T, T, TMP4
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LEncDataOctets
+
+LEndEncOctets:
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + rsp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + rsp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + rsp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + rsp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + rsp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + rsp]
+ KARATSUBA 6
+ vpxor TMP5, T, XMMWORD PTR[7*16 + rsp]
+ KARATSUBA 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vpxor T, T, TMP4
+
+ sub aluCTR, 7
+
+LEncDataSingles:
+
+ cmp len, 16
+ jb LEncDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[PT]
+ vmovdqu XMMWORD PTR[CT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP1, TMP1, BSWAPMASK
+ vpxor T, T, TMP1
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+ jmp LEncDataSingles
+
+LEncDataTail:
+
+ test len, len
+ jz LEncDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp]
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa XMMWORD PTR[rsp], TMP2
+; copy as many bytes as needed
+ xor KS, KS
+
+@@:
+ cmp len, KS
+ je @f
+ mov al, [PT + KS]
+ mov [rsp + KS], al
+ inc KS
+ jmp @b
+@@:
+ vpxor TMP1, TMP1, XMMWORD PTR[rsp]
+ vmovdqa XMMWORD PTR[rsp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [rsp + KS]
+ mov [CT + KS], al
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[rsp + KS], 0
+ inc KS
+ jmp @b
+@@:
+BAIL:
+ vmovdqa TMP1, XMMWORD PTR[rsp]
+ vpshufb TMP1, TMP1, BSWAPMASK
+ vpxor T, T, TMP1
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+LEncDataEnd:
+
+ vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov rsp, rbp
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+ vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+ vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+ vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+ vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+ vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+ vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+ vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+ add rsp, 10*16
+ pop rbp
+ pop r13
+ pop r12
+ pop r11
+
+ vzeroupper
+
+ ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmDEC PROC
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ xor aluTMP, aluKSl
+ bswap aluTMP
+ mov [3*4 + i*16 + rsp], aluTMP
+ENDM
+
+PT textequ <rdx>
+CT textequ <rcx>
+
+ test len, len
+ jnz LbeginDEC
+ ret
+
+LbeginDEC:
+
+ vzeroupper
+ push r11
+ push r12
+ push r13
+ push rbp
+ sub rsp, 10*16
+ vmovdqu XMMWORD PTR[rsp + 0*16], xmm6
+ vmovdqu XMMWORD PTR[rsp + 1*16], xmm7
+ vmovdqu XMMWORD PTR[rsp + 2*16], xmm8
+ vmovdqu XMMWORD PTR[rsp + 3*16], xmm9
+ vmovdqu XMMWORD PTR[rsp + 4*16], xmm10
+ vmovdqu XMMWORD PTR[rsp + 5*16], xmm11
+ vmovdqu XMMWORD PTR[rsp + 6*16], xmm12
+ vmovdqu XMMWORD PTR[rsp + 7*16], xmm13
+ vmovdqu XMMWORD PTR[rsp + 8*16], xmm14
+ vmovdqu XMMWORD PTR[rsp + 9*16], xmm15
+
+ mov rbp, rsp
+ sub rsp, 8*16
+ and rsp, -16
+
+ vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx]
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask]
+ mov KS, [16*16 + 3*16 + Gctx]
+ mov NR, [4 + KS]
+ lea KS, [48 + KS]
+
+ vpshufb CTR0, CTR0, BSWAPMASK
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ mov aluKSl, [3*4 + KS]
+ bswap aluCTR
+ bswap aluKSl
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[0*16 + rsp], TMP0
+
+ cmp len, 128
+ jb LDecDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[1*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[2*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[3*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[4*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[5*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[6*16 + rsp], TMP0
+ vmovdqu XMMWORD PTR[7*16 + rsp], TMP0
+
+ NEXTCTR 1
+ NEXTCTR 2
+ NEXTCTR 3
+ NEXTCTR 4
+ NEXTCTR 5
+ NEXTCTR 6
+ NEXTCTR 7
+
+LDecDataOctets:
+ cmp len, 128
+ jb LEndDecOctets
+ sub len, 128
+
+ vmovdqa CTR0, XMMWORD PTR[0*16 + rsp]
+ vmovdqa CTR1, XMMWORD PTR[1*16 + rsp]
+ vmovdqa CTR2, XMMWORD PTR[2*16 + rsp]
+ vmovdqa CTR3, XMMWORD PTR[3*16 + rsp]
+ vmovdqa CTR4, XMMWORD PTR[4*16 + rsp]
+ vmovdqa CTR5, XMMWORD PTR[5*16 + rsp]
+ vmovdqa CTR6, XMMWORD PTR[6*16 + rsp]
+ vmovdqa CTR7, XMMWORD PTR[7*16 + rsp]
+
+ vmovdqu TMP5, XMMWORD PTR[7*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 1
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 2
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 3
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 4
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 5
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ ROUNDMUL 6
+ NEXTCTR 5
+ vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+ vpshufb TMP5, TMP5, BSWAPMASK
+ vpxor TMP5, TMP5, T
+ ROUNDMUL 7
+ NEXTCTR 6
+
+ ROUND 8
+ NEXTCTR 7
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor T, TMP2, TMP3
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ ROUND 9
+
+ vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h
+ vpalignr T,T,T,8
+ vpxor T, T, TMP1
+
+ vmovdqu TMP5, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu TMP5, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu TMP5, XMMWORD PTR[14*16 + KS]
+@@:
+ vpxor TMP3, TMP5, XMMWORD PTR[0*16 + CT]
+ vaesenclast CTR0, CTR0, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[1*16 + CT]
+ vaesenclast CTR1, CTR1, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[2*16 + CT]
+ vaesenclast CTR2, CTR2, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[3*16 + CT]
+ vaesenclast CTR3, CTR3, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[4*16 + CT]
+ vaesenclast CTR4, CTR4, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[5*16 + CT]
+ vaesenclast CTR5, CTR5, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[6*16 + CT]
+ vaesenclast CTR6, CTR6, TMP3
+ vpxor TMP3, TMP5, XMMWORD PTR[7*16 + CT]
+ vaesenclast CTR7, CTR7, TMP3
+
+ vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+ vmovdqu XMMWORD PTR[7*16 + PT], CTR7
+
+ vpxor T, T, TMP4
+
+ lea CT, [8*16 + CT]
+ lea PT, [8*16 + PT]
+ jmp LDecDataOctets
+
+LEndDecOctets:
+
+ sub aluCTR, 7
+
+LDecDataSingles:
+
+ cmp len, 16
+ jb LDecDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+
+ vmovdqu TMP2, XMMWORD PTR[CT]
+ vpxor TMP1, TMP1, TMP2
+ vmovdqu XMMWORD PTR[PT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP2, TMP2, BSWAPMASK
+ vpxor T, T, TMP2
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4
+
+ jmp LDecDataSingles
+
+LDecDataTail:
+
+ test len, len
+ jz LDecDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + rsp]
+ inc aluCTR
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; copy as many bytes as needed
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [CT + KS]
+ mov [rsp + KS], al
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[rsp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ vmovdqa TMP2, XMMWORD PTR[rsp]
+ vpshufb TMP2, TMP2, BSWAPMASK
+ vpxor T, T, TMP2
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, TMP5, TMP2, TMP3, TMP4
+
+
+ vpxor TMP1, TMP1, XMMWORD PTR[rsp]
+ vmovdqa XMMWORD PTR[rsp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov al, [rsp + KS]
+ mov [PT + KS], al
+ inc KS
+ jmp @b
+@@:
+
+LDecDataEnd:
+
+ vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov rsp, rbp
+
+ vmovdqu xmm6, XMMWORD PTR[rsp + 0*16]
+ vmovdqu xmm7, XMMWORD PTR[rsp + 1*16]
+ vmovdqu xmm8, XMMWORD PTR[rsp + 2*16]
+ vmovdqu xmm9, XMMWORD PTR[rsp + 3*16]
+ vmovdqu xmm10, XMMWORD PTR[rsp + 4*16]
+ vmovdqu xmm11, XMMWORD PTR[rsp + 5*16]
+ vmovdqu xmm12, XMMWORD PTR[rsp + 6*16]
+ vmovdqu xmm13, XMMWORD PTR[rsp + 7*16]
+ vmovdqu xmm14, XMMWORD PTR[rsp + 8*16]
+ vmovdqu xmm15, XMMWORD PTR[rsp + 9*16]
+
+ add rsp, 10*16
+ pop rbp
+ pop r13
+ pop r12
+ pop r11
+
+ vzeroupper
+
+ ret
+ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm-x86-masm.asm b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
new file mode 100644
index 000000000..6362ad859
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm-x86-masm.asm
@@ -0,0 +1,1209 @@
+; LICENSE:
+; This submission to NSS is to be made available under the terms of the
+; Mozilla Public License, v. 2.0. You can obtain one at http:
+; //mozilla.org/MPL/2.0/.
+;###############################################################################
+; Copyright(c) 2014, Intel Corp.
+; Developers and authors:
+; Shay Gueron and Vlad Krasnov
+; Intel Corporation, Israel Development Centre, Haifa, Israel
+; Please send feedback directly to crypto.feedback.alias@intel.com
+
+
+.MODEL FLAT, C
+.XMM
+
+.DATA
+ALIGN 16
+Lone dq 1,0
+Ltwo dq 2,0
+Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh
+Lpoly dq 01h, 0c200000000000000h
+
+.CODE
+
+
+GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4
+ vpclmulqdq TMP1, SRC2, SRC1, 0h
+ vpclmulqdq TMP4, SRC2, SRC1, 011h
+
+ vpshufd TMP2, SRC2, 78
+ vpshufd TMP3, SRC1, 78
+ vpxor TMP2, TMP2, SRC2
+ vpxor TMP3, TMP3, SRC1
+
+ vpclmulqdq TMP2, TMP2, TMP3, 0h
+ vpxor TMP2, TMP2, TMP1
+ vpxor TMP2, TMP2, TMP4
+
+ vpslldq TMP3, TMP2, 8
+ vpsrldq TMP2, TMP2, 8
+
+ vpxor TMP1, TMP1, TMP3
+ vpxor TMP4, TMP4, TMP2
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpclmulqdq TMP2, TMP1, [Lpoly], 010h
+ vpshufd TMP3, TMP1, 78
+ vpxor TMP1, TMP2, TMP3
+
+ vpxor DST, TMP1, TMP4
+
+ ENDM
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the final GCM tag
+; void intel_aes_gcmTAG(unsigned char Htbl[16*16],
+; unsigned char *Tp,
+; unsigned int Mlen,
+; unsigned int Alen,
+; unsigned char* X0,
+; unsigned char* TAG);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmTAG PROC
+
+Htbl textequ <eax>
+Tp textequ <ecx>
+X0 textequ <edx>
+TAG textequ <ebx>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ push ebx
+
+ mov Htbl, [esp + 2*4 + 0*4]
+ mov Tp, [esp + 2*4 + 1*4]
+ mov X0, [esp + 2*4 + 4*4]
+ mov TAG, [esp + 2*4 + 5*4]
+
+ vzeroupper
+ vmovdqu T, XMMWORD PTR[Tp]
+
+ vpxor TMP0, TMP0, TMP0
+ vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0
+ vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2
+ vpsllq TMP0, TMP0, 3
+
+ vpxor T, T, TMP0
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+
+ vpshufb T, T, [Lbswap_mask]
+ vpxor T, T, [X0]
+ vmovdqu XMMWORD PTR[TAG], T
+ vzeroupper
+
+ pop ebx
+
+ ret
+
+intel_aes_gcmTAG ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Generates the H table
+; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmINIT PROC
+
+Htbl textequ <eax>
+KS textequ <ecx>
+NR textequ <edx>
+
+T textequ <xmm0>
+TMP0 textequ <xmm1>
+
+ mov Htbl, [esp + 4*1 + 0*4]
+ mov KS, [esp + 4*1 + 1*4]
+ mov NR, [esp + 4*1 + 2*4]
+
+ vzeroupper
+ ; AES-ENC(0)
+ vmovdqu T, XMMWORD PTR[KS]
+ lea KS, [16 + KS]
+ dec NR
+Lenc_loop:
+ vaesenc T, T, [KS]
+ lea KS, [16 + KS]
+ dec NR
+ jnz Lenc_loop
+
+ vaesenclast T, T, [KS]
+ vpshufb T, T, [Lbswap_mask]
+
+ ;Calculate H` = GFMUL(H, 2)
+ vpsrad xmm3, T, 31
+ vpshufd xmm3, xmm3, 0ffh
+ vpand xmm5, xmm3, [Lpoly]
+ vpsrld xmm3, T, 31
+ vpslld xmm4, T, 1
+ vpslldq xmm3, xmm3, 4
+ vpxor T, xmm4, xmm3
+ vpxor T, T, xmm5
+
+ vmovdqu TMP0, T
+ vmovdqu XMMWORD PTR[Htbl + 0*16], T
+
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2
+
+ i = 1
+ WHILE i LT 8
+ GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5
+ vmovdqu XMMWORD PTR[Htbl + i*16], T
+ vpshufd xmm2, T, 78
+ vpxor xmm2, xmm2, T
+ vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2
+ i = i+1
+ ENDM
+ vzeroupper
+ ret
+intel_aes_gcmINIT ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Authenticate only
+; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmAAD PROC
+
+Htbl textequ <eax>
+inp textequ <ecx>
+len textequ <edx>
+Tp textequ <ebx>
+hlp0 textequ <esi>
+
+DATA textequ <xmm0>
+T textequ <xmm1>
+TMP0 textequ <xmm2>
+TMP1 textequ <xmm3>
+TMP2 textequ <xmm4>
+TMP3 textequ <xmm5>
+TMP4 textequ <xmm6>
+Xhi textequ <xmm7>
+
+KARATSUBA_AAD MACRO i
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+ cmp DWORD PTR[esp + 1*3 + 2*4], 0
+ jnz LbeginAAD
+ ret
+
+LbeginAAD:
+ push ebx
+ push esi
+
+ mov Htbl, [esp + 4*3 + 0*4]
+ mov inp, [esp + 4*3 + 1*4]
+ mov len, [esp + 4*3 + 2*4]
+ mov Tp, [esp + 4*3 + 3*4]
+
+ vzeroupper
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu T, XMMWORD PTR[Tp]
+ ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov hlp0, len
+ and hlp0, 128-1
+ jz Lmod_loop
+
+ and len, -128
+ sub hlp0, 16
+
+ ; Prefix block
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+
+ vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+ vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+
+ lea inp, [inp+16]
+ test hlp0, hlp0
+ jnz Lpre_loop
+ jmp Lred1
+
+ ;hash remaining prefix bocks (up to 7 total prefix blocks)
+Lpre_loop:
+
+ sub hlp0, 16
+
+ vmovdqu DATA, XMMWORD PTR[inp]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h
+ vpxor TMP0, TMP0, TMP3
+ vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h
+ vpxor TMP1, TMP1, TMP3
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h
+ vpxor TMP2, TMP2, TMP3
+
+ test hlp0, hlp0
+ lea inp, [inp+16]
+ jnz Lpre_loop
+
+Lred1:
+
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+Lmod_loop:
+
+ sub len, 16*8
+ jb Ldone
+ ; Block #0
+ vmovdqu DATA, XMMWORD PTR[inp + 16*7]
+ vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask]
+
+ vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h
+ vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h
+ vpshufd TMP3, DATA, 78
+ vpxor TMP3, TMP3, DATA
+ vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h
+
+ ; Block #1
+ vmovdqu DATA, XMMWORD PTR[inp + 16*6]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 1
+
+ ; Block #2
+ vmovdqu DATA, XMMWORD PTR[inp + 16*5]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 2
+
+ vpxor T, T, TMP4 ;reduction stage 1b
+
+ ; Block #3
+ vmovdqu DATA, XMMWORD PTR[inp + 16*4]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 3
+ ; Block #4
+ vmovdqu DATA, XMMWORD PTR[inp + 16*3]
+ vpshufb DATA, DATA, [Lbswap_mask]
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a
+ vpalignr T, T, T, 8
+
+ KARATSUBA_AAD 4
+
+ vpxor T, T, TMP4 ;reduction stage 2b
+ ; Block #5
+ vmovdqu DATA, XMMWORD PTR[inp + 16*2]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 5
+
+ vpxor T, T, Xhi ;reduction finalize
+ ; Block #6
+ vmovdqu DATA, XMMWORD PTR[inp + 16*1]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ KARATSUBA_AAD 6
+ ; Block #7
+ vmovdqu DATA, XMMWORD PTR[inp + 16*0]
+ vpshufb DATA, DATA, [Lbswap_mask]
+ vpxor DATA, DATA, T
+ KARATSUBA_AAD 7
+ ; Aggregated 8 blocks, now karatsuba fixup
+ vpxor TMP2, TMP2, TMP0
+ vpxor TMP2, TMP2, TMP1
+ vpsrldq TMP3, TMP2, 8
+ vpslldq TMP2, TMP2, 8
+
+ vpxor Xhi, TMP1, TMP3
+ vpxor T, TMP0, TMP2
+
+ lea inp, [inp + 16*8]
+ jmp Lmod_loop
+
+Ldone:
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpclmulqdq TMP4, T, [Lpoly], 010h
+ vpalignr T, T, T, 8
+ vpxor T, T, TMP4
+
+ vpxor T, T, Xhi
+ vmovdqu XMMWORD PTR[Tp], T
+ vzeroupper
+
+ pop esi
+ pop ebx
+ ret
+
+intel_aes_gcmAAD ENDP
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Encrypt and Authenticate
+; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+ALIGN 16
+intel_aes_gcmENC PROC
+
+PT textequ <eax>
+CT textequ <ecx>
+Htbl textequ <edx>
+Gctx textequ <edx>
+len textequ <DWORD PTR[ebp + 5*4 + 3*4]>
+KS textequ <esi>
+NR textequ <DWORD PTR[-40 + KS]>
+
+aluCTR textequ <ebx>
+aluTMP textequ <edi>
+
+T textequ <XMMWORD PTR[16*16 + 1*16 + Gctx]>
+TMP0 textequ <xmm1>
+TMP1 textequ <xmm2>
+TMP2 textequ <xmm3>
+TMP3 textequ <xmm4>
+TMP4 textequ <xmm5>
+TMP5 textequ <xmm6>
+
+CTR0 textequ <xmm0>
+CTR1 textequ <xmm1>
+CTR2 textequ <xmm2>
+CTR3 textequ <xmm3>
+CTR4 textequ <xmm4>
+CTR5 textequ <xmm5>
+CTR6 textequ <xmm6>
+
+ROUND MACRO i
+ vmovdqu xmm7, XMMWORD PTR[i*16 + KS]
+ vaesenc CTR0, CTR0, xmm7
+ vaesenc CTR1, CTR1, xmm7
+ vaesenc CTR2, CTR2, xmm7
+ vaesenc CTR3, CTR3, xmm7
+ vaesenc CTR4, CTR4, xmm7
+ vaesenc CTR5, CTR5, xmm7
+ vaesenc CTR6, CTR6, xmm7
+ENDM
+
+KARATSUBA MACRO i
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h
+ vpxor TMP0, TMP0, TMP3
+ vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl]
+ vpclmulqdq TMP3, TMP5, TMP4, 011h
+ vpxor TMP1, TMP1, TMP3
+ vpclmulqdq TMP3, TMP5, TMP4, 000h
+ vpxor TMP2, TMP2, TMP3
+ENDM
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [3*4 + KS]
+ mov [3*4 + 8*16 + i*16 + esp], aluTMP
+ENDM
+
+ cmp DWORD PTR[1*4 + 3*4 + esp], 0
+ jne LbeginENC
+ ret
+
+LbeginENC:
+
+ vzeroupper
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp, esp
+ sub esp, 16*16
+ and esp, -16
+
+ mov PT, [ebp + 5*4 + 0*4]
+ mov CT, [ebp + 5*4 + 1*4]
+ mov Gctx, [ebp + 5*4 + 2*4]
+
+ mov KS, [16*16 + 3*16 + Gctx]
+ lea KS, [44 + KS]
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ bswap aluCTR
+
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0
+
+ cmp len, 16*7
+ jb LEncDataSingles
+; Prepare the "top" counters
+ vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0
+
+ vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+; Encrypt the initial 7 blocks
+ sub len, 16*7
+ vpaddd CTR1, CTR0, XMMWORD PTR[Lone]
+ vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo]
+ vpaddd CTR3, CTR2, XMMWORD PTR[Lone]
+ vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo]
+ vpaddd CTR5, CTR4, XMMWORD PTR[Lone]
+ vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo]
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqu xmm7, XMMWORD PTR[0*16 + KS]
+ vpxor CTR0, CTR0, xmm7
+ vpxor CTR1, CTR1, xmm7
+ vpxor CTR2, CTR2, xmm7
+ vpxor CTR3, CTR3, xmm7
+ vpxor CTR4, CTR4, xmm7
+ vpxor CTR5, CTR5, xmm7
+ vpxor CTR6, CTR6, xmm7
+
+ ROUND 1
+
+ add aluCTR, 7
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [KS + 3*4]
+ mov [8*16 + 0*16 + 3*4 + esp], aluTMP
+
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+ ROUND 8
+ ROUND 9
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+ vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+ vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+ vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+ vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+ vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LEncData7
+
+LEncData7:
+ cmp len, 16*7
+ jb LEndEnc7
+ sub len, 16*7
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+ vpxor TMP5, TMP5, T
+ KARATSUBA 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp]
+ vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp]
+ vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp]
+ vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp]
+ vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp]
+ vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp]
+ vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp]
+
+ ROUND 1
+ NEXTCTR 0
+ ROUND 2
+ NEXTCTR 1
+ ROUND 3
+ NEXTCTR 2
+ ROUND 4
+ NEXTCTR 3
+ ROUND 5
+ NEXTCTR 4
+ ROUND 6
+ NEXTCTR 5
+ ROUND 7
+ NEXTCTR 6
+
+ ROUND 8
+ ROUND 9
+
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT]
+
+ vmovdqu XMMWORD PTR[0*16 + CT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + CT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + CT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + CT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + CT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + CT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + CT], CTR6
+
+ vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask]
+ vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask]
+ vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask]
+
+ vmovdqa XMMWORD PTR[1*16 + esp], CTR5
+ vmovdqa XMMWORD PTR[2*16 + esp], CTR4
+ vmovdqa XMMWORD PTR[3*16 + esp], CTR3
+ vmovdqa XMMWORD PTR[4*16 + esp], CTR2
+ vmovdqa XMMWORD PTR[5*16 + esp], CTR1
+ vmovdqa XMMWORD PTR[6*16 + esp], CTR0
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LEncData7
+
+LEndEnc7:
+
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ vmovdqu TMP5, XMMWORD PTR[1*16 + esp]
+ KARATSUBA 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + esp]
+ KARATSUBA 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + esp]
+ KARATSUBA 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + esp]
+ KARATSUBA 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + esp]
+ KARATSUBA 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + esp]
+ vpxor TMP5, TMP5, T
+ KARATSUBA 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ sub aluCTR, 6
+
+LEncDataSingles:
+
+ cmp len, 16
+ jb LEncDataTail
+ sub len, 16
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[PT]
+ vmovdqu XMMWORD PTR[CT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ jmp LEncDataSingles
+
+LEncDataTail:
+
+ cmp len, 0
+ je LEncDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp]
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+; zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa XMMWORD PTR[esp], TMP2
+; copy as many bytes as needed
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[PT + KS]
+ mov BYTE PTR[esp + KS], dl
+ inc KS
+ jmp @b
+@@:
+ vpxor TMP1, TMP1, XMMWORD PTR[esp]
+ vmovdqa XMMWORD PTR[esp], TMP1
+ xor KS, KS
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[esp + KS]
+ mov BYTE PTR[CT + KS], dl
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[esp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+ vmovdqa TMP1, XMMWORD PTR[esp]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+LEncDataEnd:
+ inc aluCTR
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov esp, ebp
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+
+ vzeroupper
+
+ ret
+intel_aes_gcmENC ENDP
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;
+; Decrypt and Authenticate
+; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len);
+;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+NEXTCTR MACRO i
+ add aluCTR, 1
+ mov aluTMP, aluCTR
+ bswap aluTMP
+ xor aluTMP, [3*4 + KS]
+ mov [3*4 + i*16 + esp], aluTMP
+ENDM
+
+intel_aes_gcmDEC PROC
+
+ cmp DWORD PTR[1*4 + 3*4 + esp], 0
+ jne LbeginDEC
+ ret
+
+LbeginDEC:
+
+ vzeroupper
+ push ebp
+ push ebx
+ push esi
+ push edi
+
+ mov ebp, esp
+ sub esp, 8*16
+ and esp, -16
+
+ mov CT, [ebp + 5*4 + 0*4]
+ mov PT, [ebp + 5*4 + 1*4]
+ mov Gctx, [ebp + 5*4 + 2*4]
+
+ mov KS, [16*16 + 3*16 + Gctx]
+ lea KS, [44 + KS]
+
+ mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx]
+ bswap aluCTR
+
+
+ vmovdqu TMP0, XMMWORD PTR[0*16 + KS]
+ vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx]
+ vmovdqu XMMWORD PTR[0*16 + esp], TMP0
+
+ cmp len, 16*7
+ jb LDecDataSingles
+ vmovdqu XMMWORD PTR[1*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[2*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[3*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[4*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[5*16 + esp], TMP0
+ vmovdqu XMMWORD PTR[6*16 + esp], TMP0
+ dec aluCTR
+
+LDecData7:
+ cmp len, 16*7
+ jb LDecData7End
+ sub len, 16*7
+
+ vmovdqu TMP5, XMMWORD PTR[0*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP5, TMP5, T
+ vpshufd TMP4, TMP5, 78
+ vpxor TMP4, TMP4, TMP5
+ vpclmulqdq TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h
+ vmovdqu TMP4, XMMWORD PTR[6*16 + Htbl]
+ vpclmulqdq TMP1, TMP5, TMP4, 011h
+ vpclmulqdq TMP2, TMP5, TMP4, 000h
+
+ NEXTCTR 0
+ vmovdqu TMP5, XMMWORD PTR[1*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 5
+ NEXTCTR 1
+ vmovdqu TMP5, XMMWORD PTR[2*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 4
+ NEXTCTR 2
+ vmovdqu TMP5, XMMWORD PTR[3*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 3
+ NEXTCTR 3
+ vmovdqu TMP5, XMMWORD PTR[4*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 2
+ NEXTCTR 4
+ vmovdqu TMP5, XMMWORD PTR[5*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 1
+ NEXTCTR 5
+ vmovdqu TMP5, XMMWORD PTR[6*16 + CT]
+ vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask]
+ KARATSUBA 0
+ NEXTCTR 6
+
+ vpxor TMP0, TMP0, TMP1
+ vpxor TMP0, TMP0, TMP2
+ vpsrldq TMP3, TMP0, 8
+ vpxor TMP4, TMP1, TMP3
+ vpslldq TMP3, TMP0, 8
+ vpxor TMP5, TMP2, TMP3
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h
+ vpalignr TMP5,TMP5,TMP5,8
+ vpxor TMP5, TMP5, TMP1
+
+ vpxor TMP5, TMP5, TMP4
+ vmovdqu T, TMP5
+
+ vmovdqa CTR0, XMMWORD PTR[0*16 + esp]
+ vmovdqa CTR1, XMMWORD PTR[1*16 + esp]
+ vmovdqa CTR2, XMMWORD PTR[2*16 + esp]
+ vmovdqa CTR3, XMMWORD PTR[3*16 + esp]
+ vmovdqa CTR4, XMMWORD PTR[4*16 + esp]
+ vmovdqa CTR5, XMMWORD PTR[5*16 + esp]
+ vmovdqa CTR6, XMMWORD PTR[6*16 + esp]
+
+ ROUND 1
+ ROUND 2
+ ROUND 3
+ ROUND 4
+ ROUND 5
+ ROUND 6
+ ROUND 7
+ ROUND 8
+ ROUND 9
+ vmovdqu xmm7, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+
+ ROUND 10
+ ROUND 11
+ vmovdqu xmm7, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+
+ ROUND 12
+ ROUND 13
+ vmovdqu xmm7, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast CTR0, CTR0, xmm7
+ vaesenclast CTR1, CTR1, xmm7
+ vaesenclast CTR2, CTR2, xmm7
+ vaesenclast CTR3, CTR3, xmm7
+ vaesenclast CTR4, CTR4, xmm7
+ vaesenclast CTR5, CTR5, xmm7
+ vaesenclast CTR6, CTR6, xmm7
+
+ vpxor CTR0, CTR0, XMMWORD PTR[0*16 + CT]
+ vpxor CTR1, CTR1, XMMWORD PTR[1*16 + CT]
+ vpxor CTR2, CTR2, XMMWORD PTR[2*16 + CT]
+ vpxor CTR3, CTR3, XMMWORD PTR[3*16 + CT]
+ vpxor CTR4, CTR4, XMMWORD PTR[4*16 + CT]
+ vpxor CTR5, CTR5, XMMWORD PTR[5*16 + CT]
+ vpxor CTR6, CTR6, XMMWORD PTR[6*16 + CT]
+
+ vmovdqu XMMWORD PTR[0*16 + PT], CTR0
+ vmovdqu XMMWORD PTR[1*16 + PT], CTR1
+ vmovdqu XMMWORD PTR[2*16 + PT], CTR2
+ vmovdqu XMMWORD PTR[3*16 + PT], CTR3
+ vmovdqu XMMWORD PTR[4*16 + PT], CTR4
+ vmovdqu XMMWORD PTR[5*16 + PT], CTR5
+ vmovdqu XMMWORD PTR[6*16 + PT], CTR6
+
+ lea CT, [7*16 + CT]
+ lea PT, [7*16 + PT]
+ jmp LDecData7
+
+LDecData7End:
+
+ NEXTCTR 0
+
+LDecDataSingles:
+
+ cmp len, 16
+ jb LDecDataTail
+ sub len, 16
+
+ vmovdqu TMP1, XMMWORD PTR[CT]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+ NEXTCTR 0
+
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast TMP1, TMP1, TMP2
+ vpxor TMP1, TMP1, XMMWORD PTR[CT]
+ vmovdqu XMMWORD PTR[PT], TMP1
+
+ lea PT, [16+PT]
+ lea CT, [16+CT]
+ jmp LDecDataSingles
+
+LDecDataTail:
+
+ cmp len, 0
+ je LDecDataEnd
+
+ vmovdqa TMP1, XMMWORD PTR[0*16 + esp]
+ inc aluCTR
+ vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[10*16 + KS]
+ cmp NR, 10
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[12*16 + KS]
+ cmp NR, 12
+ je @f
+ vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS]
+ vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS]
+ vmovdqu TMP2, XMMWORD PTR[14*16 + KS]
+@@:
+ vaesenclast xmm7, TMP1, TMP2
+
+; copy as many bytes as needed
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[CT + KS]
+ mov BYTE PTR[esp + KS], dl
+ inc KS
+ jmp @b
+@@:
+ cmp KS, 16
+ je @f
+ mov BYTE PTR[esp + KS], 0
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+ vmovdqa TMP1, XMMWORD PTR[esp]
+ vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask]
+ vpxor TMP1, TMP1, T
+
+ vmovdqu TMP0, XMMWORD PTR[Htbl]
+ GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4
+ vmovdqu T, TMP1
+
+ vpxor xmm7, xmm7, XMMWORD PTR[esp]
+ vmovdqa XMMWORD PTR[esp], xmm7
+ xor KS, KS
+ mov aluTMP, edx
+@@:
+ cmp len, KS
+ je @f
+ mov dl, BYTE PTR[esp + KS]
+ mov BYTE PTR[PT + KS], dl
+ inc KS
+ jmp @b
+@@:
+ mov edx, aluTMP
+
+LDecDataEnd:
+
+ bswap aluCTR
+ mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR
+
+ mov esp, ebp
+ pop edi
+ pop esi
+ pop ebx
+ pop ebp
+
+ vzeroupper
+
+ ret
+intel_aes_gcmDEC ENDP
+
+
+END
diff --git a/security/nss/lib/freebl/intel-gcm.h b/security/nss/lib/freebl/intel-gcm.h
new file mode 100644
index 000000000..566e544d8
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.h
@@ -0,0 +1,83 @@
+/******************************************************************************/
+/* LICENSE: */
+/* This submission to NSS is to be made available under the terms of the */
+/* Mozilla Public License, v. 2.0. You can obtain one at http: */
+/* //mozilla.org/MPL/2.0/. */
+/******************************************************************************/
+/* Copyright(c) 2013, Intel Corp. */
+/******************************************************************************/
+/* Reference: */
+/* [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication */
+/* Instruction and its Usage for Computing the GCM Mode (Rev. 2.01) */
+/* http://software.intel.com/sites/default/files/article/165685/clmul-wp-r*/
+/*ev-2.01-2012-09-21.pdf */
+/* [2] S. Gueron, M. E. Kounavis: Efficient Implementation of the Galois */
+/* Counter Mode Using a Carry-less Multiplier and a Fast Reduction */
+/* Algorithm. Information Processing Letters 110: 549-553 (2010). */
+/* [3] S. Gueron: AES Performance on the 2nd Generation Intel(R) Core(TM) */
+/* Processor Family (to be posted) (2012). */
+/* [4] S. Gueron: Fast GHASH computations for speeding up AES-GCM (to be */
+/* published) (2012). */
+
+#ifndef INTEL_GCM_H
+#define INTEL_GCM_H 1
+
+#include "blapii.h"
+
+typedef struct intel_AES_GCMContextStr intel_AES_GCMContext;
+
+intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher,
+ const unsigned char *params, unsigned int blocksize);
+
+void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit);
+
+SECStatus intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+SECStatus intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf,
+ unsigned int *outlen, unsigned int maxout,
+ const unsigned char *inbuf, unsigned int inlen,
+ unsigned int blocksize);
+
+/* Prototypes of functions in the assembler file for fast AES-GCM, using
+ Intel AES-NI and CLMUL-NI, as described in [1]
+ [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication
+ Instruction and its Usage for Computing the GCM Mode */
+
+/* Prepares the constants used in the aggregated reduction method */
+void intel_aes_gcmINIT(unsigned char Htbl[16 * 16],
+ unsigned char *KS,
+ int NR);
+
+/* Produces the final GHASH value */
+void intel_aes_gcmTAG(unsigned char Htbl[16 * 16],
+ unsigned char *Tp,
+ unsigned long Mlen,
+ unsigned long Alen,
+ unsigned char *X0,
+ unsigned char *TAG);
+
+/* Hashes the Additional Authenticated Data, should be used before enc/dec.
+ Operates on whole blocks only. Partial blocks should be padded externally. */
+void intel_aes_gcmAAD(unsigned char Htbl[16 * 16],
+ unsigned char *AAD,
+ unsigned long Alen,
+ unsigned char *Tp);
+
+/* Encrypts and hashes the Plaintext.
+ Operates on any length of data, however partial block should only be encrypted
+ at the last call, otherwise the result will be incorrect. */
+void intel_aes_gcmENC(const unsigned char *PT,
+ unsigned char *CT,
+ void *Gctx,
+ unsigned long len);
+
+/* Similar to ENC, but decrypts the Ciphertext. */
+void intel_aes_gcmDEC(const unsigned char *CT,
+ unsigned char *PT,
+ void *Gctx,
+ unsigned long len);
+
+#endif
diff --git a/security/nss/lib/freebl/intel-gcm.s b/security/nss/lib/freebl/intel-gcm.s
new file mode 100644
index 000000000..1a3106091
--- /dev/null
+++ b/security/nss/lib/freebl/intel-gcm.s
@@ -0,0 +1,1340 @@
+# LICENSE:
+# This submission to NSS is to be made available under the terms of the
+# Mozilla Public License, v. 2.0. You can obtain one at http:
+# //mozilla.org/MPL/2.0/.
+################################################################################
+# Copyright(c) 2012, Intel Corp.
+
+.align 16
+.Lone:
+.quad 1,0
+.Ltwo:
+.quad 2,0
+.Lbswap_mask:
+.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+.Lshuff_mask:
+.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f
+.Lpoly:
+.quad 0x1, 0xc200000000000000
+
+
+################################################################################
+# Generates the final GCM tag
+# void intel_aes_gcmTAG(uint8_t Htbl[16*16], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG);
+.type intel_aes_gcmTAG,@function
+.globl intel_aes_gcmTAG
+.align 16
+intel_aes_gcmTAG:
+
+.set Htbl, %rdi
+.set Tp, %rsi
+.set Mlen, %rdx
+.set Alen, %rcx
+.set X0, %r8
+.set TAG, %r9
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+ vmovdqu (Tp), T
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpxor TMP0, TMP0, TMP0
+ shl $3, Mlen
+ shl $3, Alen
+ vpinsrq $0, Mlen, TMP0, TMP0
+ vpinsrq $1, Alen, TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpxor (X0), T, T
+ vmovdqu T, (TAG)
+
+ret
+.size intel_aes_gcmTAG, .-intel_aes_gcmTAG
+################################################################################
+# Generates the H table
+# void intel_aes_gcmINIT(uint8_t Htbl[16*16], uint8_t *KS, int NR);
+.type intel_aes_gcmINIT,@function
+.globl intel_aes_gcmINIT
+.align 16
+intel_aes_gcmINIT:
+
+.set Htbl, %rdi
+.set KS, %rsi
+.set NR, %edx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+
+CALCULATE_POWERS_OF_H:
+ vmovdqu 16*0(KS), T
+ vaesenc 16*1(KS), T, T
+ vaesenc 16*2(KS), T, T
+ vaesenc 16*3(KS), T, T
+ vaesenc 16*4(KS), T, T
+ vaesenc 16*5(KS), T, T
+ vaesenc 16*6(KS), T, T
+ vaesenc 16*7(KS), T, T
+ vaesenc 16*8(KS), T, T
+ vaesenc 16*9(KS), T, T
+ vmovdqu 16*10(KS), TMP0
+ cmp $10, NR
+ je .LH0done
+ vaesenc 16*10(KS), T, T
+ vaesenc 16*11(KS), T, T
+ vmovdqu 16*12(KS), TMP0
+ cmp $12, NR
+ je .LH0done
+ vaesenc 16*12(KS), T, T
+ vaesenc 16*13(KS), T, T
+ vmovdqu 16*14(KS), TMP0
+
+.LH0done:
+ vaesenclast TMP0, T, T
+
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ vmovdqu T, TMP0
+ # Calculate H` = GFMUL(H, 2)
+ vpsrld $7 , T , %xmm3
+ vmovdqu .Lshuff_mask(%rip), %xmm4
+ vpshufb %xmm4, %xmm3 , %xmm3
+ movq $0xff00 , %rax
+ vmovq %rax, %xmm4
+ vpshufb %xmm3, %xmm4 , %xmm4
+ vmovdqu .Lpoly(%rip), %xmm5
+ vpand %xmm4, %xmm5, %xmm5
+ vpsrld $31, T, %xmm3
+ vpslld $1, T, %xmm4
+ vpslldq $4, %xmm3, %xmm3
+ vpxor %xmm3, %xmm4, T #xmm1 holds now p(x)<<1
+
+ #adding p(x)<<1 to xmm5
+ vpxor %xmm5, T , T
+ vmovdqu T, TMP0
+ vmovdqu T, (Htbl) # H * 2
+ call GFMUL
+ vmovdqu T, 16(Htbl) # H^2 * 2
+ call GFMUL
+ vmovdqu T, 32(Htbl) # H^3 * 2
+ call GFMUL
+ vmovdqu T, 48(Htbl) # H^4 * 2
+ call GFMUL
+ vmovdqu T, 64(Htbl) # H^5 * 2
+ call GFMUL
+ vmovdqu T, 80(Htbl) # H^6 * 2
+ call GFMUL
+ vmovdqu T, 96(Htbl) # H^7 * 2
+ call GFMUL
+ vmovdqu T, 112(Htbl) # H^8 * 2
+
+ # Precalculations for the reduce 4 step
+ vpshufd $78, (Htbl), %xmm8
+ vpshufd $78, 16(Htbl), %xmm9
+ vpshufd $78, 32(Htbl), %xmm10
+ vpshufd $78, 48(Htbl), %xmm11
+ vpshufd $78, 64(Htbl), %xmm12
+ vpshufd $78, 80(Htbl), %xmm13
+ vpshufd $78, 96(Htbl), %xmm14
+ vpshufd $78, 112(Htbl), %xmm15
+
+ vpxor (Htbl), %xmm8, %xmm8
+ vpxor 16(Htbl), %xmm9, %xmm9
+ vpxor 32(Htbl), %xmm10, %xmm10
+ vpxor 48(Htbl), %xmm11, %xmm11
+ vpxor 64(Htbl), %xmm12, %xmm12
+ vpxor 80(Htbl), %xmm13, %xmm13
+ vpxor 96(Htbl), %xmm14, %xmm14
+ vpxor 112(Htbl), %xmm15, %xmm15
+
+ vmovdqu %xmm8, 128(Htbl)
+ vmovdqu %xmm9, 144(Htbl)
+ vmovdqu %xmm10, 160(Htbl)
+ vmovdqu %xmm11, 176(Htbl)
+ vmovdqu %xmm12, 192(Htbl)
+ vmovdqu %xmm13, 208(Htbl)
+ vmovdqu %xmm14, 224(Htbl)
+ vmovdqu %xmm15, 240(Htbl)
+
+ ret
+.size intel_aes_gcmINIT, .-intel_aes_gcmINIT
+################################################################################
+# Authenticate only
+# void intel_aes_gcmAAD(uint8_t Htbl[16*16], uint8_t *AAD, uint64_t Alen, uint8_t *Tp);
+
+.globl intel_aes_gcmAAD
+.type intel_aes_gcmAAD,@function
+.align 16
+intel_aes_gcmAAD:
+
+.set DATA, %xmm0
+.set T, %xmm1
+.set BSWAP_MASK, %xmm2
+.set TMP0, %xmm3
+.set TMP1, %xmm4
+.set TMP2, %xmm5
+.set TMP3, %xmm6
+.set TMP4, %xmm7
+.set Xhi, %xmm9
+
+.set Htbl, %rdi
+.set inp, %rsi
+.set len, %rdx
+.set Tp, %rcx
+
+.set hlp0, %r11
+
+.macro KARATSUBA_AAD i
+ vpclmulqdq $0x00, 16*\i(Htbl), DATA, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vpclmulqdq $0x11, 16*\i(Htbl), DATA, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpshufd $78, DATA, TMP3
+ vpxor DATA, TMP3, TMP3
+ vpclmulqdq $0x00, 16*(\i+8)(Htbl), TMP3, TMP3
+ vpxor TMP3, TMP2, TMP2
+.endm
+
+ test len, len
+ jnz .LbeginAAD
+ ret
+
+.LbeginAAD:
+
+ push hlp0
+ vzeroupper
+
+ vmovdqa .Lbswap_mask(%rip), BSWAP_MASK
+
+ vpxor Xhi, Xhi, Xhi
+
+ vmovdqu (Tp),T
+ vpshufb BSWAP_MASK,T,T
+
+ # we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first
+ mov len, hlp0
+ and $~-128, hlp0
+
+ jz .Lmod_loop
+
+ sub hlp0, len
+ sub $16, hlp0
+
+ #hash first prefix block
+ vmovdqu (inp), DATA
+ vpshufb BSWAP_MASK, DATA, DATA
+ vpxor T, DATA, DATA
+
+ vpclmulqdq $0x00, (Htbl, hlp0), DATA, TMP0
+ vpclmulqdq $0x11, (Htbl, hlp0), DATA, TMP1
+ vpshufd $78, DATA, TMP2
+ vpxor DATA, TMP2, TMP2
+ vpclmulqdq $0x00, 16*8(Htbl, hlp0), TMP2, TMP2
+
+ lea 16(inp), inp
+ test hlp0, hlp0
+ jnz .Lpre_loop
+ jmp .Lred1
+
+ #hash remaining prefix bocks (up to 7 total prefix blocks)
+.align 64
+.Lpre_loop:
+
+ sub $16, hlp0
+
+ vmovdqu (inp),DATA # next data block
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x00, (Htbl,hlp0), DATA, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vpclmulqdq $0x11, (Htbl,hlp0), DATA, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpshufd $78, DATA, TMP3
+ vpxor DATA, TMP3, TMP3
+ vpclmulqdq $0x00, 16*8(Htbl,hlp0), TMP3, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+ test hlp0, hlp0
+
+ lea 16(inp), inp
+
+ jnz .Lpre_loop
+
+.Lred1:
+ vpxor TMP0, TMP2, TMP2
+ vpxor TMP1, TMP2, TMP2
+ vpsrldq $8, TMP2, TMP3
+ vpslldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, Xhi
+ vpxor TMP2, TMP0, T
+
+.align 64
+.Lmod_loop:
+ sub $0x80, len
+ jb .Ldone
+
+ vmovdqu 16*7(inp),DATA # Ii
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x00, (Htbl), DATA, TMP0
+ vpclmulqdq $0x11, (Htbl), DATA, TMP1
+ vpshufd $78, DATA, TMP2
+ vpxor DATA, TMP2, TMP2
+ vpclmulqdq $0x00, 16*8(Htbl), TMP2, TMP2
+ #########################################################
+ vmovdqu 16*6(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+ KARATSUBA_AAD 1
+ #########################################################
+ vmovdqu 16*5(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 1a
+ vpalignr $8, T, T, T
+
+ KARATSUBA_AAD 2
+
+ vpxor TMP4, T, T #reduction stage 1b
+ #########################################################
+ vmovdqu 16*4(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 3
+ #########################################################
+ vmovdqu 16*3(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 2a
+ vpalignr $8, T, T, T
+
+ KARATSUBA_AAD 4
+
+ vpxor TMP4, T, T #reduction stage 2b
+ #########################################################
+ vmovdqu 16*2(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 5
+
+ vpxor Xhi, T, T #reduction finalize
+ #########################################################
+ vmovdqu 16*1(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+
+ KARATSUBA_AAD 6
+ #########################################################
+ vmovdqu 16*0(inp),DATA
+ vpshufb BSWAP_MASK,DATA,DATA
+ vpxor T,DATA,DATA
+
+ KARATSUBA_AAD 7
+ #########################################################
+ vpxor TMP0, TMP2, TMP2 # karatsuba fixup
+ vpxor TMP1, TMP2, TMP2
+ vpsrldq $8, TMP2, TMP3
+ vpslldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, Xhi
+ vpxor TMP2, TMP0, T
+
+ lea 16*8(inp), inp
+ jmp .Lmod_loop
+ #########################################################
+
+.Ldone:
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3
+ vpalignr $8, T, T, T
+ vpxor TMP3, T, T
+
+ vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3
+ vpalignr $8, T, T, T
+ vpxor TMP3, T, T
+
+ vpxor Xhi, T, T
+
+.Lsave:
+ vpshufb BSWAP_MASK,T, T
+ vmovdqu T,(Tp)
+ vzeroupper
+
+ pop hlp0
+ ret
+.size intel_aes_gcmAAD,.-intel_aes_gcmAAD
+
+################################################################################
+# Encrypt and Authenticate
+# void intel_aes_gcmENC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmENC,@function
+.globl intel_aes_gcmENC
+.align 16
+intel_aes_gcmENC:
+
+.set PT,%rdi
+.set CT,%rsi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+.macro ROUND i
+ vmovdqu \i*16(KS), TMP3
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+.endm
+
+.macro ROUNDMUL i
+
+ vmovdqu \i*16(%rsp), TMP5
+ vmovdqu \i*16(KS), TMP3
+
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+
+ vpshufd $78, TMP5, TMP4
+ vpxor TMP5, TMP4, TMP4
+
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP4, TMP3
+ vpxor TMP3, TMP0, TMP0
+ vmovdqa \i*16(Htbl), TMP4
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+.endm
+
+.macro KARATSUBA i
+ vmovdqu \i*16(%rsp), TMP5
+
+ vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+.endm
+
+ test len, len
+ jnz .Lbegin
+ ret
+
+.Lbegin:
+
+ vzeroupper
+ push %rbp
+ push %rbx
+
+ movq %rsp, %rbp
+ sub $128, %rsp
+ andq $-16, %rsp
+
+ vmovdqu 288(Gctx), CTR
+ vmovdqu 272(Gctx), T
+ mov 304(Gctx), KS
+ mov 4(KS), NR
+ lea 48(KS), KS
+
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ cmp $128, len
+ jb .LDataSingles
+
+# Encrypt the first eight blocks
+ sub $128, len
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vpshufb .Lbswap_mask(%rip), CTR0, CTR0
+ vpshufb .Lbswap_mask(%rip), CTR1, CTR1
+ vpshufb .Lbswap_mask(%rip), CTR2, CTR2
+ vpshufb .Lbswap_mask(%rip), CTR3, CTR3
+ vpshufb .Lbswap_mask(%rip), CTR4, CTR4
+ vpshufb .Lbswap_mask(%rip), CTR5, CTR5
+ vpshufb .Lbswap_mask(%rip), CTR6, CTR6
+ vpshufb .Lbswap_mask(%rip), CTR7, CTR7
+
+ vpxor (KS), CTR0, CTR0
+ vpxor (KS), CTR1, CTR1
+ vpxor (KS), CTR2, CTR2
+ vpxor (KS), CTR3, CTR3
+ vpxor (KS), CTR4, CTR4
+ vpxor (KS), CTR5, CTR5
+ vpxor (KS), CTR6, CTR6
+ vpxor (KS), CTR7, CTR7
+
+ ROUND 1
+ ROUND 2
+ ROUND 3
+ ROUND 4
+ ROUND 5
+ ROUND 6
+ ROUND 7
+ ROUND 8
+ ROUND 9
+
+ vmovdqu 160(KS), TMP5
+ cmp $12, NR
+ jb .LLast1
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $14, NR
+ jb .LLast1
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LLast1:
+
+ vpxor (PT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (CT)
+ vpshufb TMP3, CTR0, CTR0
+ vmovdqu CTR1, 16(CT)
+ vpshufb TMP3, CTR1, CTR1
+ vmovdqu CTR2, 32(CT)
+ vpshufb TMP3, CTR2, CTR2
+ vmovdqu CTR3, 48(CT)
+ vpshufb TMP3, CTR3, CTR3
+ vmovdqu CTR4, 64(CT)
+ vpshufb TMP3, CTR4, CTR4
+ vmovdqu CTR5, 80(CT)
+ vpshufb TMP3, CTR5, CTR5
+ vmovdqu CTR6, 96(CT)
+ vpshufb TMP3, CTR6, CTR6
+ vmovdqu CTR7, 112(CT)
+ vpshufb TMP3, CTR7, CTR7
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDataOctets
+
+# Encrypt 8 blocks each time while hashing previous 8 blocks
+.align 64
+.LDataOctets:
+ cmp $128, len
+ jb .LEndOctets
+ sub $128, len
+
+ vmovdqa CTR7, TMP5
+ vmovdqa CTR6, 1*16(%rsp)
+ vmovdqa CTR5, 2*16(%rsp)
+ vmovdqa CTR4, 3*16(%rsp)
+ vmovdqa CTR3, 4*16(%rsp)
+ vmovdqa CTR2, 5*16(%rsp)
+ vmovdqa CTR1, 6*16(%rsp)
+ vmovdqa CTR0, 7*16(%rsp)
+
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vmovdqu (KS), TMP4
+ vpshufb TMP3, CTR0, CTR0
+ vpxor TMP4, CTR0, CTR0
+ vpshufb TMP3, CTR1, CTR1
+ vpxor TMP4, CTR1, CTR1
+ vpshufb TMP3, CTR2, CTR2
+ vpxor TMP4, CTR2, CTR2
+ vpshufb TMP3, CTR3, CTR3
+ vpxor TMP4, CTR3, CTR3
+ vpshufb TMP3, CTR4, CTR4
+ vpxor TMP4, CTR4, CTR4
+ vpshufb TMP3, CTR5, CTR5
+ vpxor TMP4, CTR5, CTR5
+ vpshufb TMP3, CTR6, CTR6
+ vpxor TMP4, CTR6, CTR6
+ vpshufb TMP3, CTR7, CTR7
+ vpxor TMP4, CTR7, CTR7
+
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ ROUNDMUL 1
+
+ ROUNDMUL 2
+
+ ROUNDMUL 3
+
+ ROUNDMUL 4
+
+ ROUNDMUL 5
+
+ ROUNDMUL 6
+
+ vpxor 7*16(%rsp), T, TMP5
+ vmovdqu 7*16(KS), TMP3
+
+ vaesenc TMP3, CTR0, CTR0
+ vaesenc TMP3, CTR1, CTR1
+ vaesenc TMP3, CTR2, CTR2
+ vaesenc TMP3, CTR3, CTR3
+
+ vpshufd $78, TMP5, TMP4
+ vpxor TMP5, TMP4, TMP4
+
+ vaesenc TMP3, CTR4, CTR4
+ vaesenc TMP3, CTR5, CTR5
+ vaesenc TMP3, CTR6, CTR6
+ vaesenc TMP3, CTR7, CTR7
+
+ vpclmulqdq $0x11, 7*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 7*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpclmulqdq $0x00, 128+7*16(Htbl), TMP4, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ ROUND 8
+ vmovdqa .Lpoly(%rip), TMP5
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+
+ vpclmulqdq $0x10, TMP5, T, TMP1
+ vpalignr $8, T, T, T
+ vpxor T, TMP1, T
+
+ ROUND 9
+
+ vpclmulqdq $0x10, TMP5, T, TMP1
+ vpalignr $8, T, T, T
+ vpxor T, TMP1, T
+
+ vmovdqu 160(KS), TMP5
+ cmp $10, NR
+ jbe .LLast2
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $12, NR
+ jbe .LLast2
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LLast2:
+
+ vpxor (PT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(PT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (CT)
+ vpshufb TMP3, CTR0, CTR0
+ vmovdqu CTR1, 16(CT)
+ vpshufb TMP3, CTR1, CTR1
+ vmovdqu CTR2, 32(CT)
+ vpshufb TMP3, CTR2, CTR2
+ vmovdqu CTR3, 48(CT)
+ vpshufb TMP3, CTR3, CTR3
+ vmovdqu CTR4, 64(CT)
+ vpshufb TMP3, CTR4, CTR4
+ vmovdqu CTR5, 80(CT)
+ vpshufb TMP3, CTR5, CTR5
+ vmovdqu CTR6, 96(CT)
+ vpshufb TMP3, CTR6, CTR6
+ vmovdqu CTR7,112(CT)
+ vpshufb TMP3, CTR7, CTR7
+
+ vpxor TMP4, T, T
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDataOctets
+
+.LEndOctets:
+
+ vmovdqa CTR7, TMP5
+ vmovdqa CTR6, 1*16(%rsp)
+ vmovdqa CTR5, 2*16(%rsp)
+ vmovdqa CTR4, 3*16(%rsp)
+ vmovdqa CTR3, 4*16(%rsp)
+ vmovdqa CTR2, 5*16(%rsp)
+ vmovdqa CTR1, 6*16(%rsp)
+ vmovdqa CTR0, 7*16(%rsp)
+
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ KARATSUBA 1
+ KARATSUBA 2
+ KARATSUBA 3
+ KARATSUBA 4
+ KARATSUBA 5
+ KARATSUBA 6
+
+ vmovdqu 7*16(%rsp), TMP5
+ vpxor T, TMP5, TMP5
+ vmovdqu 16*7(Htbl), TMP4
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+7*16(Htbl), TMP4
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+
+ vmovdqa .Lpoly(%rip), TMP2
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vpxor TMP4, T, T
+
+#Here we encrypt any remaining whole block
+.LDataSingles:
+
+ cmp $16, len
+ jb .LDataTail
+ sub $16, len
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LLast3
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LLast3
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LLast3:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor (PT), TMP1, TMP1
+ vmovdqu TMP1, (CT)
+ addq $16, CT
+ addq $16, PT
+
+ vpshufb .Lbswap_mask(%rip), TMP1, TMP1
+ vpxor TMP1, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+ jmp .LDataSingles
+
+#Here we encypt the final partial block, if there is one
+.LDataTail:
+
+ test len, len
+ jz DATA_END
+# First prepare the counter block
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LLast4
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LLast4
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LLast4:
+ vaesenclast TMP2, TMP1, TMP1
+#Zero a temp location
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa TMP2, (%rsp)
+
+# Copy the required bytes only (could probably use rep movsb)
+ xor KS, KS
+.LEncCpy:
+ cmp KS, len
+ je .LEncCpyEnd
+ movb (PT, KS, 1), %r8b
+ movb %r8b, (%rsp, KS, 1)
+ inc KS
+ jmp .LEncCpy
+.LEncCpyEnd:
+# Xor with the counter block
+ vpxor (%rsp), TMP1, TMP0
+# Again, store at temp location
+ vmovdqa TMP0, (%rsp)
+# Copy only the required bytes to CT, and zero the rest for the hash
+ xor KS, KS
+.LEncCpy2:
+ cmp KS, len
+ je .LEncCpy3
+ movb (%rsp, KS, 1), %r8b
+ movb %r8b, (CT, KS, 1)
+ inc KS
+ jmp .LEncCpy2
+.LEncCpy3:
+ cmp $16, KS
+ je .LEndCpy3
+ movb $0, (%rsp, KS, 1)
+ inc KS
+ jmp .LEncCpy3
+.LEndCpy3:
+ vmovdqa (%rsp), TMP0
+
+ vpshufb .Lbswap_mask(%rip), TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+DATA_END:
+
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vmovdqu T, 272(Gctx)
+ vmovdqu CTR, 288(Gctx)
+
+ movq %rbp, %rsp
+
+ popq %rbx
+ popq %rbp
+ ret
+ .size intel_aes_gcmENC, .-intel_aes_gcmENC
+
+#########################
+# Decrypt and Authenticate
+# void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len);
+.type intel_aes_gcmDEC,@function
+.globl intel_aes_gcmDEC
+.align 16
+intel_aes_gcmDEC:
+# parameter 1: CT # input
+# parameter 2: PT # output
+# parameter 3: %rdx # Gctx
+# parameter 4: %rcx # len
+
+.macro DEC_KARATSUBA i
+ vmovdqu (7-\i)*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+
+ vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+.endm
+
+.set PT,%rsi
+.set CT,%rdi
+.set Htbl, %rdx
+.set len, %rcx
+.set KS,%r9
+.set NR,%r10d
+
+.set Gctx, %rdx
+
+.set T,%xmm0
+.set TMP0,%xmm1
+.set TMP1,%xmm2
+.set TMP2,%xmm3
+.set TMP3,%xmm4
+.set TMP4,%xmm5
+.set TMP5,%xmm6
+.set CTR0,%xmm7
+.set CTR1,%xmm8
+.set CTR2,%xmm9
+.set CTR3,%xmm10
+.set CTR4,%xmm11
+.set CTR5,%xmm12
+.set CTR6,%xmm13
+.set CTR7,%xmm14
+.set CTR,%xmm15
+
+ test len, len
+ jnz .LbeginDec
+ ret
+
+.LbeginDec:
+
+ pushq %rbp
+ pushq %rbx
+ movq %rsp, %rbp
+ sub $128, %rsp
+ andq $-16, %rsp
+ vmovdqu 288(Gctx), CTR
+ vmovdqu 272(Gctx), T
+ mov 304(Gctx), KS
+ mov 4(KS), NR
+ lea 48(KS), KS
+
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vpshufb .Lbswap_mask(%rip), T, T
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+ jmp .LDECOctets
+
+# Decrypt 8 blocks each time while hashing them at the same time
+.align 64
+.LDECOctets:
+
+ cmp $128, len
+ jb .LDECSingles
+ sub $128, len
+
+ vmovdqa CTR, CTR0
+ vpaddd .Lone(%rip), CTR0, CTR1
+ vpaddd .Ltwo(%rip), CTR0, CTR2
+ vpaddd .Lone(%rip), CTR2, CTR3
+ vpaddd .Ltwo(%rip), CTR2, CTR4
+ vpaddd .Lone(%rip), CTR4, CTR5
+ vpaddd .Ltwo(%rip), CTR4, CTR6
+ vpaddd .Lone(%rip), CTR6, CTR7
+ vpaddd .Ltwo(%rip), CTR6, CTR
+
+ vpshufb TMP3, CTR0, CTR0
+ vpshufb TMP3, CTR1, CTR1
+ vpshufb TMP3, CTR2, CTR2
+ vpshufb TMP3, CTR3, CTR3
+ vpshufb TMP3, CTR4, CTR4
+ vpshufb TMP3, CTR5, CTR5
+ vpshufb TMP3, CTR6, CTR6
+ vpshufb TMP3, CTR7, CTR7
+
+ vmovdqu (KS), TMP3
+ vpxor TMP3, CTR0, CTR0
+ vpxor TMP3, CTR1, CTR1
+ vpxor TMP3, CTR2, CTR2
+ vpxor TMP3, CTR3, CTR3
+ vpxor TMP3, CTR4, CTR4
+ vpxor TMP3, CTR5, CTR5
+ vpxor TMP3, CTR6, CTR6
+ vpxor TMP3, CTR7, CTR7
+
+ vmovdqu 7*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+ vmovdqu 16*0(Htbl), TMP3
+ vpclmulqdq $0x11, TMP3, TMP5, TMP1
+ vpclmulqdq $0x00, TMP3, TMP5, TMP2
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+0*16(Htbl), TMP3
+ vpclmulqdq $0x00, TMP3, TMP5, TMP0
+
+ ROUND 1
+ DEC_KARATSUBA 1
+
+ ROUND 2
+ DEC_KARATSUBA 2
+
+ ROUND 3
+ DEC_KARATSUBA 3
+
+ ROUND 4
+ DEC_KARATSUBA 4
+
+ ROUND 5
+ DEC_KARATSUBA 5
+
+ ROUND 6
+ DEC_KARATSUBA 6
+
+ ROUND 7
+
+ vmovdqu 0*16(CT), TMP5
+ vpshufb .Lbswap_mask(%rip), TMP5, TMP5
+ vpxor T, TMP5, TMP5
+ vmovdqu 16*7(Htbl), TMP4
+
+ vpclmulqdq $0x11, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP1, TMP1
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP2, TMP2
+
+ vpshufd $78, TMP5, TMP3
+ vpxor TMP5, TMP3, TMP5
+ vmovdqu 128+7*16(Htbl), TMP4
+
+ vpclmulqdq $0x00, TMP4, TMP5, TMP3
+ vpxor TMP3, TMP0, TMP0
+
+ ROUND 8
+
+ vpxor TMP1, TMP0, TMP0
+ vpxor TMP2, TMP0, TMP0
+
+ vpsrldq $8, TMP0, TMP3
+ vpxor TMP3, TMP1, TMP4
+ vpslldq $8, TMP0, TMP3
+ vpxor TMP3, TMP2, T
+ vmovdqa .Lpoly(%rip), TMP2
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ ROUND 9
+
+ vpalignr $8, T, T, TMP1
+ vpclmulqdq $0x10, TMP2, T, T
+ vpxor T, TMP1, T
+
+ vmovdqu 160(KS), TMP5
+ cmp $10, NR
+
+ jbe .LDECLast1
+
+ ROUND 10
+ ROUND 11
+
+ vmovdqu 192(KS), TMP5
+ cmp $12, NR
+
+ jbe .LDECLast1
+
+ ROUND 12
+ ROUND 13
+
+ vmovdqu 224(KS), TMP5
+
+.LDECLast1:
+
+ vpxor (CT), TMP5, TMP3
+ vaesenclast TMP3, CTR0, CTR0
+ vpxor 16(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR1, CTR1
+ vpxor 32(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR2, CTR2
+ vpxor 48(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR3, CTR3
+ vpxor 64(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR4, CTR4
+ vpxor 80(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR5, CTR5
+ vpxor 96(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR6, CTR6
+ vpxor 112(CT), TMP5, TMP3
+ vaesenclast TMP3, CTR7, CTR7
+
+ vmovdqu .Lbswap_mask(%rip), TMP3
+
+ vmovdqu CTR0, (PT)
+ vmovdqu CTR1, 16(PT)
+ vmovdqu CTR2, 32(PT)
+ vmovdqu CTR3, 48(PT)
+ vmovdqu CTR4, 64(PT)
+ vmovdqu CTR5, 80(PT)
+ vmovdqu CTR6, 96(PT)
+ vmovdqu CTR7,112(PT)
+
+ vpxor TMP4, T, T
+
+ lea 128(CT), CT
+ lea 128(PT), PT
+ jmp .LDECOctets
+
+#Here we decrypt and hash any remaining whole block
+.LDECSingles:
+
+ cmp $16, len
+ jb .LDECTail
+ sub $16, len
+
+ vmovdqu (CT), TMP1
+ vpshufb .Lbswap_mask(%rip), TMP1, TMP1
+ vpxor TMP1, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LDECLast2
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LDECLast2
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+.LDECLast2:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor (CT), TMP1, TMP1
+ vmovdqu TMP1, (PT)
+ addq $16, CT
+ addq $16, PT
+ jmp .LDECSingles
+
+#Here we decrypt the final partial block, if there is one
+.LDECTail:
+ test len, len
+ jz .LDEC_END
+
+ vpshufb .Lbswap_mask(%rip), CTR, TMP1
+ vpaddd .Lone(%rip), CTR, CTR
+
+ vpxor (KS), TMP1, TMP1
+ vaesenc 16*1(KS), TMP1, TMP1
+ vaesenc 16*2(KS), TMP1, TMP1
+ vaesenc 16*3(KS), TMP1, TMP1
+ vaesenc 16*4(KS), TMP1, TMP1
+ vaesenc 16*5(KS), TMP1, TMP1
+ vaesenc 16*6(KS), TMP1, TMP1
+ vaesenc 16*7(KS), TMP1, TMP1
+ vaesenc 16*8(KS), TMP1, TMP1
+ vaesenc 16*9(KS), TMP1, TMP1
+ vmovdqu 16*10(KS), TMP2
+ cmp $10, NR
+ je .LDECLast3
+ vaesenc 16*10(KS), TMP1, TMP1
+ vaesenc 16*11(KS), TMP1, TMP1
+ vmovdqu 16*12(KS), TMP2
+ cmp $12, NR
+ je .LDECLast3
+ vaesenc 16*12(KS), TMP1, TMP1
+ vaesenc 16*13(KS), TMP1, TMP1
+ vmovdqu 16*14(KS), TMP2
+
+.LDECLast3:
+ vaesenclast TMP2, TMP1, TMP1
+
+ vpxor TMP2, TMP2, TMP2
+ vmovdqa TMP2, (%rsp)
+# Copy the required bytes only (could probably use rep movsb)
+ xor KS, KS
+.LDecCpy:
+ cmp KS, len
+ je .LDecCpy2
+ movb (CT, KS, 1), %r8b
+ movb %r8b, (%rsp, KS, 1)
+ inc KS
+ jmp .LDecCpy
+.LDecCpy2:
+ cmp $16, KS
+ je .LDecCpyEnd
+ movb $0, (%rsp, KS, 1)
+ inc KS
+ jmp .LDecCpy2
+.LDecCpyEnd:
+# Xor with the counter block
+ vmovdqa (%rsp), TMP0
+ vpxor TMP0, TMP1, TMP1
+# Again, store at temp location
+ vmovdqa TMP1, (%rsp)
+# Copy only the required bytes to PT, and zero the rest for the hash
+ xor KS, KS
+.LDecCpy3:
+ cmp KS, len
+ je .LDecCpyEnd3
+ movb (%rsp, KS, 1), %r8b
+ movb %r8b, (PT, KS, 1)
+ inc KS
+ jmp .LDecCpy3
+.LDecCpyEnd3:
+ vpshufb .Lbswap_mask(%rip), TMP0, TMP0
+ vpxor TMP0, T, T
+ vmovdqu (Htbl), TMP0
+ call GFMUL
+.LDEC_END:
+
+ vpshufb .Lbswap_mask(%rip), T, T
+ vpshufb .Lbswap_mask(%rip), CTR, CTR
+ vmovdqu T, 272(Gctx)
+ vmovdqu CTR, 288(Gctx)
+
+ movq %rbp, %rsp
+
+ popq %rbx
+ popq %rbp
+ ret
+ .size intel_aes_gcmDEC, .-intel_aes_gcmDEC
+#########################
+# a = T
+# b = TMP0 - remains unchanged
+# res = T
+# uses also TMP1,TMP2,TMP3,TMP4
+# __m128i GFMUL(__m128i A, __m128i B);
+.type GFMUL,@function
+.globl GFMUL
+GFMUL:
+ vpclmulqdq $0x00, TMP0, T, TMP1
+ vpclmulqdq $0x11, TMP0, T, TMP4
+
+ vpshufd $78, T, TMP2
+ vpshufd $78, TMP0, TMP3
+ vpxor T, TMP2, TMP2
+ vpxor TMP0, TMP3, TMP3
+
+ vpclmulqdq $0x00, TMP3, TMP2, TMP2
+ vpxor TMP1, TMP2, TMP2
+ vpxor TMP4, TMP2, TMP2
+
+ vpslldq $8, TMP2, TMP3
+ vpsrldq $8, TMP2, TMP2
+
+ vpxor TMP3, TMP1, TMP1
+ vpxor TMP2, TMP4, TMP4
+
+ vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2
+ vpshufd $78, TMP1, TMP3
+ vpxor TMP3, TMP2, TMP1
+
+ vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2
+ vpshufd $78, TMP1, TMP3
+ vpxor TMP3, TMP2, TMP1
+
+ vpxor TMP4, TMP1, T
+ ret
+.size GFMUL, .-GFMUL
+
diff --git a/security/nss/lib/freebl/jpake.c b/security/nss/lib/freebl/jpake.c
new file mode 100644
index 000000000..741c7a876
--- /dev/null
+++ b/security/nss/lib/freebl/jpake.c
@@ -0,0 +1,495 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "secerr.h"
+#include "secitem.h"
+#include "secmpi.h"
+
+/* Hash an item's length and then its value. Only items smaller than 2^16 bytes
+ * are allowed. Lengths are hashed in network byte order. This is designed
+ * to match the OpenSSL J-PAKE implementation.
+ */
+static mp_err
+hashSECItem(HASHContext *hash, const SECItem *it)
+{
+ unsigned char length[2];
+
+ if (it->len > 0xffff)
+ return MP_BADARG;
+
+ length[0] = (unsigned char)(it->len >> 8);
+ length[1] = (unsigned char)(it->len);
+ hash->hashobj->update(hash->hash_context, length, 2);
+ hash->hashobj->update(hash->hash_context, it->data, it->len);
+ return MP_OKAY;
+}
+
+/* Hash all public components of the signature, each prefixed with its
+ length, and then convert the hash to an mp_int. */
+static mp_err
+hashPublicParams(HASH_HashType hashType, const SECItem *g,
+ const SECItem *gv, const SECItem *gx,
+ const SECItem *signerID, mp_int *h)
+{
+ mp_err err;
+ unsigned char hBuf[HASH_LENGTH_MAX];
+ SECItem hItem;
+ HASHContext hash;
+
+ hash.hashobj = HASH_GetRawHashObject(hashType);
+ if (hash.hashobj == NULL || hash.hashobj->length > sizeof hBuf) {
+ return MP_BADARG;
+ }
+ hash.hash_context = hash.hashobj->create();
+ if (hash.hash_context == NULL) {
+ return MP_MEM;
+ }
+
+ hItem.data = hBuf;
+ hItem.len = hash.hashobj->length;
+
+ hash.hashobj->begin(hash.hash_context);
+ CHECK_MPI_OK(hashSECItem(&hash, g));
+ CHECK_MPI_OK(hashSECItem(&hash, gv));
+ CHECK_MPI_OK(hashSECItem(&hash, gx));
+ CHECK_MPI_OK(hashSECItem(&hash, signerID));
+ hash.hashobj->end(hash.hash_context, hItem.data, &hItem.len,
+ sizeof hBuf);
+ SECITEM_TO_MPINT(hItem, h);
+
+cleanup:
+ if (hash.hash_context != NULL) {
+ hash.hashobj->destroy(hash.hash_context, PR_TRUE);
+ }
+
+ return err;
+}
+
+/* Generate a Schnorr signature for round 1 or round 2 */
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int p;
+ mp_int q;
+ mp_int g;
+ mp_int X;
+ mp_int GX;
+ mp_int V;
+ mp_int GV;
+ mp_int h;
+ mp_int tmp;
+ mp_int R;
+ SECItem v;
+
+ if (!arena ||
+ !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+ !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+ !pqg->base.data || pqg->base.len == 0 ||
+ !signerID || !signerID->data || signerID->len == 0 ||
+ !x || !x->data || x->len == 0 ||
+ (testRandom && (!testRandom->data || testRandom->len == 0)) ||
+ (gxIn == NULL && (!gxOut || gxOut->data != NULL)) ||
+ (gxIn != NULL && (!gxIn->data || gxIn->len == 0 || gxOut != NULL)) ||
+ !gv || gv->data != NULL ||
+ !r || r->data != NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&X) = 0;
+ MP_DIGITS(&GX) = 0;
+ MP_DIGITS(&V) = 0;
+ MP_DIGITS(&GV) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&R) = 0;
+
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&X));
+ CHECK_MPI_OK(mp_init(&GX));
+ CHECK_MPI_OK(mp_init(&V));
+ CHECK_MPI_OK(mp_init(&GV));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&R));
+
+ SECITEM_TO_MPINT(pqg->prime, &p);
+ SECITEM_TO_MPINT(pqg->subPrime, &q);
+ SECITEM_TO_MPINT(pqg->base, &g);
+ SECITEM_TO_MPINT(*x, &X);
+
+ /* gx = g^x */
+ if (gxIn == NULL) {
+ CHECK_MPI_OK(mp_exptmod(&g, &X, &p, &GX));
+ MPINT_TO_SECITEM(&GX, gxOut, arena);
+ gxIn = gxOut;
+ } else {
+ SECITEM_TO_MPINT(*gxIn, &GX);
+ }
+
+ /* v is a random value in the q subgroup */
+ if (testRandom == NULL) {
+ v.data = NULL;
+ rv = DSA_NewRandom(arena, &pqg->subPrime, &v);
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ } else {
+ v.data = testRandom->data;
+ v.len = testRandom->len;
+ }
+ SECITEM_TO_MPINT(v, &V);
+
+ /* gv = g^v (mod q), random v, 1 <= v < q */
+ CHECK_MPI_OK(mp_exptmod(&g, &V, &p, &GV));
+ MPINT_TO_SECITEM(&GV, gv, arena);
+
+ /* h = H(g, gv, gx, signerID) */
+ CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gxIn, signerID,
+ &h));
+
+ /* r = v - x*h (mod q) */
+ CHECK_MPI_OK(mp_mulmod(&X, &h, &q, &tmp));
+ CHECK_MPI_OK(mp_submod(&V, &tmp, &q, &R));
+ MPINT_TO_SECITEM(&R, r, arena);
+
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&X);
+ mp_clear(&GX);
+ mp_clear(&V);
+ mp_clear(&GV);
+ mp_clear(&h);
+ mp_clear(&tmp);
+ mp_clear(&R);
+
+ if (rv == SECSuccess && err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* Verify a Schnorr signature generated by the peer in round 1 or round 2. */
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *peerID,
+ const SECItem *gx, const SECItem *gv, const SECItem *r)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int p;
+ mp_int q;
+ mp_int g;
+ mp_int p_minus_1;
+ mp_int GX;
+ mp_int h;
+ mp_int one;
+ mp_int R;
+ mp_int gr;
+ mp_int gxh;
+ mp_int gr_gxh;
+ SECItem calculated;
+
+ if (!arena ||
+ !pqg || !pqg->prime.data || pqg->prime.len == 0 ||
+ !pqg->subPrime.data || pqg->subPrime.len == 0 ||
+ !pqg->base.data || pqg->base.len == 0 ||
+ !signerID || !signerID->data || signerID->len == 0 ||
+ !peerID || !peerID->data || peerID->len == 0 ||
+ !gx || !gx->data || gx->len == 0 ||
+ !gv || !gv->data || gv->len == 0 ||
+ !r || !r->data || r->len == 0 ||
+ SECITEM_CompareItem(signerID, peerID) == SECEqual) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&p_minus_1) = 0;
+ MP_DIGITS(&GX) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&one) = 0;
+ MP_DIGITS(&R) = 0;
+ MP_DIGITS(&gr) = 0;
+ MP_DIGITS(&gxh) = 0;
+ MP_DIGITS(&gr_gxh) = 0;
+ calculated.data = NULL;
+
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&g));
+ CHECK_MPI_OK(mp_init(&p_minus_1));
+ CHECK_MPI_OK(mp_init(&GX));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&one));
+ CHECK_MPI_OK(mp_init(&R));
+ CHECK_MPI_OK(mp_init(&gr));
+ CHECK_MPI_OK(mp_init(&gxh));
+ CHECK_MPI_OK(mp_init(&gr_gxh));
+
+ SECITEM_TO_MPINT(pqg->prime, &p);
+ SECITEM_TO_MPINT(pqg->subPrime, &q);
+ SECITEM_TO_MPINT(pqg->base, &g);
+ SECITEM_TO_MPINT(*gx, &GX);
+ SECITEM_TO_MPINT(*r, &R);
+
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &p_minus_1));
+ CHECK_MPI_OK(mp_exptmod(&GX, &q, &p, &one));
+ /* Check g^x is in [1, p-2], R is in [0, q-1], and (g^x)^q mod p == 1 */
+ if (!(mp_cmp_z(&GX) > 0 &&
+ mp_cmp(&GX, &p_minus_1) < 0 &&
+ mp_cmp(&R, &q) < 0 &&
+ mp_cmp_d(&one, 1) == 0)) {
+ goto badSig;
+ }
+
+ CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gx, peerID,
+ &h));
+
+ /* Calculate g^v = g^r * g^x^h */
+ CHECK_MPI_OK(mp_exptmod(&g, &R, &p, &gr));
+ CHECK_MPI_OK(mp_exptmod(&GX, &h, &p, &gxh));
+ CHECK_MPI_OK(mp_mulmod(&gr, &gxh, &p, &gr_gxh));
+
+ /* Compare calculated g^v to given g^v */
+ MPINT_TO_SECITEM(&gr_gxh, &calculated, arena);
+ if (calculated.len == gv->len &&
+ NSS_SecureMemcmp(calculated.data, gv->data, calculated.len) == 0) {
+ rv = SECSuccess;
+ } else {
+ badSig:
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ }
+
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&g);
+ mp_clear(&p_minus_1);
+ mp_clear(&GX);
+ mp_clear(&h);
+ mp_clear(&one);
+ mp_clear(&R);
+ mp_clear(&gr);
+ mp_clear(&gxh);
+ mp_clear(&gr_gxh);
+
+ if (rv == SECSuccess && err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* Calculate base = gx1*gx3*gx4 (mod p), i.e. g^(x1+x3+x4) (mod p) */
+static mp_err
+jpake_Round2Base(const SECItem *gx1, const SECItem *gx3,
+ const SECItem *gx4, const mp_int *p, mp_int *base)
+{
+ mp_err err;
+ mp_int GX1;
+ mp_int GX3;
+ mp_int GX4;
+ mp_int tmp;
+
+ MP_DIGITS(&GX1) = 0;
+ MP_DIGITS(&GX3) = 0;
+ MP_DIGITS(&GX4) = 0;
+ MP_DIGITS(&tmp) = 0;
+
+ CHECK_MPI_OK(mp_init(&GX1));
+ CHECK_MPI_OK(mp_init(&GX3));
+ CHECK_MPI_OK(mp_init(&GX4));
+ CHECK_MPI_OK(mp_init(&tmp));
+
+ SECITEM_TO_MPINT(*gx1, &GX1);
+ SECITEM_TO_MPINT(*gx3, &GX3);
+ SECITEM_TO_MPINT(*gx4, &GX4);
+
+ /* In round 2, the peer/attacker sends us g^x3 and g^x4 and the protocol
+ requires that these values are distinct. */
+ if (mp_cmp(&GX3, &GX4) == 0) {
+ return MP_BADARG;
+ }
+
+ CHECK_MPI_OK(mp_mul(&GX1, &GX3, &tmp));
+ CHECK_MPI_OK(mp_mul(&tmp, &GX4, &tmp));
+ CHECK_MPI_OK(mp_mod(&tmp, p, base));
+
+cleanup:
+ mp_clear(&GX1);
+ mp_clear(&GX3);
+ mp_clear(&GX4);
+ mp_clear(&tmp);
+ return err;
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena,
+ const SECItem *p, const SECItem *q, const SECItem *gx1,
+ const SECItem *gx3, const SECItem *gx4, SECItem *base,
+ const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+ mp_err err;
+ mp_int P;
+ mp_int Q;
+ mp_int X2;
+ mp_int S;
+ mp_int result;
+
+ if (!arena ||
+ !p || !p->data || p->len == 0 ||
+ !q || !q->data || q->len == 0 ||
+ !gx1 || !gx1->data || gx1->len == 0 ||
+ !gx3 || !gx3->data || gx3->len == 0 ||
+ !gx4 || !gx4->data || gx4->len == 0 ||
+ !base || base->data != NULL ||
+ (x2s != NULL && (x2s->data != NULL ||
+ !x2 || !x2->data || x2->len == 0 ||
+ !s || !s->data || s->len == 0))) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&X2) = 0;
+ MP_DIGITS(&S) = 0;
+ MP_DIGITS(&result) = 0;
+
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&result));
+
+ if (x2s != NULL) {
+ CHECK_MPI_OK(mp_init(&X2));
+ CHECK_MPI_OK(mp_init(&S));
+
+ SECITEM_TO_MPINT(*q, &Q);
+ SECITEM_TO_MPINT(*x2, &X2);
+
+ SECITEM_TO_MPINT(*s, &S);
+ /* S must be in [1, Q-1] */
+ if (mp_cmp_z(&S) <= 0 || mp_cmp(&S, &Q) >= 0) {
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_mulmod(&X2, &S, &Q, &result));
+ MPINT_TO_SECITEM(&result, x2s, arena);
+ }
+
+ SECITEM_TO_MPINT(*p, &P);
+ CHECK_MPI_OK(jpake_Round2Base(gx1, gx3, gx4, &P, &result));
+ MPINT_TO_SECITEM(&result, base, arena);
+
+cleanup:
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&X2);
+ mp_clear(&S);
+ mp_clear(&result);
+
+ if (err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K)
+{
+ mp_err err;
+ mp_int P;
+ mp_int Q;
+ mp_int tmp;
+ mp_int exponent;
+ mp_int divisor;
+ mp_int base;
+
+ if (!arena ||
+ !p || !p->data || p->len == 0 ||
+ !q || !q->data || q->len == 0 ||
+ !x2 || !x2->data || x2->len == 0 ||
+ !gx4 || !gx4->data || gx4->len == 0 ||
+ !x2s || !x2s->data || x2s->len == 0 ||
+ !B || !B->data || B->len == 0 ||
+ !K || K->data != NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&exponent) = 0;
+ MP_DIGITS(&divisor) = 0;
+ MP_DIGITS(&base) = 0;
+
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&exponent));
+ CHECK_MPI_OK(mp_init(&divisor));
+ CHECK_MPI_OK(mp_init(&base));
+
+ /* exponent = -x2s (mod q) */
+ SECITEM_TO_MPINT(*q, &Q);
+ SECITEM_TO_MPINT(*x2s, &tmp);
+ /* q == 0 (mod q), so q - x2s == -x2s (mod q) */
+ CHECK_MPI_OK(mp_sub(&Q, &tmp, &exponent));
+
+ /* divisor = gx4^-x2s = 1/(gx4^x2s) (mod p) */
+ SECITEM_TO_MPINT(*p, &P);
+ SECITEM_TO_MPINT(*gx4, &tmp);
+ CHECK_MPI_OK(mp_exptmod(&tmp, &exponent, &P, &divisor));
+
+ /* base = B*divisor = B/(gx4^x2s) (mod p) */
+ SECITEM_TO_MPINT(*B, &tmp);
+ CHECK_MPI_OK(mp_mulmod(&divisor, &tmp, &P, &base));
+
+ /* tmp = base^x2 (mod p) */
+ SECITEM_TO_MPINT(*x2, &exponent);
+ CHECK_MPI_OK(mp_exptmod(&base, &exponent, &P, &tmp));
+
+ MPINT_TO_SECITEM(&tmp, K, arena);
+
+cleanup:
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&tmp);
+ mp_clear(&exponent);
+ mp_clear(&divisor);
+ mp_clear(&base);
+
+ if (err != MP_OKAY) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c
new file mode 100644
index 000000000..2447a0c9f
--- /dev/null
+++ b/security/nss/lib/freebl/ldvector.c
@@ -0,0 +1,353 @@
+/*
+ * ldvector.c - platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+extern int FREEBL_InitStubs(void);
+#endif
+
+#include "loader.h"
+#include "alghmac.h"
+#include "hmacct.h"
+#include "blapii.h"
+
+static const struct FREEBLVectorStr vector =
+ {
+
+ sizeof vector,
+ FREEBL_VERSION,
+
+ RSA_NewKey,
+ RSA_PublicKeyOp,
+ RSA_PrivateKeyOp,
+ DSA_NewKey,
+ DSA_SignDigest,
+ DSA_VerifyDigest,
+ DSA_NewKeyFromSeed,
+ DSA_SignDigestWithSeed,
+ DH_GenParam,
+ DH_NewKey,
+ DH_Derive,
+ KEA_Derive,
+ KEA_Verify,
+ RC4_CreateContext,
+ RC4_DestroyContext,
+ RC4_Encrypt,
+ RC4_Decrypt,
+ RC2_CreateContext,
+ RC2_DestroyContext,
+ RC2_Encrypt,
+ RC2_Decrypt,
+ RC5_CreateContext,
+ RC5_DestroyContext,
+ RC5_Encrypt,
+ RC5_Decrypt,
+ DES_CreateContext,
+ DES_DestroyContext,
+ DES_Encrypt,
+ DES_Decrypt,
+ AES_CreateContext,
+ AES_DestroyContext,
+ AES_Encrypt,
+ AES_Decrypt,
+ MD5_Hash,
+ MD5_HashBuf,
+ MD5_NewContext,
+ MD5_DestroyContext,
+ MD5_Begin,
+ MD5_Update,
+ MD5_End,
+ MD5_FlattenSize,
+ MD5_Flatten,
+ MD5_Resurrect,
+ MD5_TraceState,
+ MD2_Hash,
+ MD2_NewContext,
+ MD2_DestroyContext,
+ MD2_Begin,
+ MD2_Update,
+ MD2_End,
+ MD2_FlattenSize,
+ MD2_Flatten,
+ MD2_Resurrect,
+ SHA1_Hash,
+ SHA1_HashBuf,
+ SHA1_NewContext,
+ SHA1_DestroyContext,
+ SHA1_Begin,
+ SHA1_Update,
+ SHA1_End,
+ SHA1_TraceState,
+ SHA1_FlattenSize,
+ SHA1_Flatten,
+ SHA1_Resurrect,
+ RNG_RNGInit,
+ RNG_RandomUpdate,
+ RNG_GenerateGlobalRandomBytes,
+ RNG_RNGShutdown,
+ PQG_ParamGen,
+ PQG_ParamGenSeedLen,
+ PQG_VerifyParams,
+
+ /* End of Version 3.001. */
+
+ RSA_PrivateKeyOpDoubleChecked,
+ RSA_PrivateKeyCheck,
+ BL_Cleanup,
+
+ /* End of Version 3.002. */
+
+ SHA256_NewContext,
+ SHA256_DestroyContext,
+ SHA256_Begin,
+ SHA256_Update,
+ SHA256_End,
+ SHA256_HashBuf,
+ SHA256_Hash,
+ SHA256_TraceState,
+ SHA256_FlattenSize,
+ SHA256_Flatten,
+ SHA256_Resurrect,
+
+ SHA512_NewContext,
+ SHA512_DestroyContext,
+ SHA512_Begin,
+ SHA512_Update,
+ SHA512_End,
+ SHA512_HashBuf,
+ SHA512_Hash,
+ SHA512_TraceState,
+ SHA512_FlattenSize,
+ SHA512_Flatten,
+ SHA512_Resurrect,
+
+ SHA384_NewContext,
+ SHA384_DestroyContext,
+ SHA384_Begin,
+ SHA384_Update,
+ SHA384_End,
+ SHA384_HashBuf,
+ SHA384_Hash,
+ SHA384_TraceState,
+ SHA384_FlattenSize,
+ SHA384_Flatten,
+ SHA384_Resurrect,
+
+ /* End of Version 3.003. */
+
+ AESKeyWrap_CreateContext,
+ AESKeyWrap_DestroyContext,
+ AESKeyWrap_Encrypt,
+ AESKeyWrap_Decrypt,
+
+ /* End of Version 3.004. */
+
+ BLAPI_SHVerify,
+ BLAPI_VerifySelf,
+
+ /* End of Version 3.005. */
+
+ EC_NewKey,
+ EC_NewKeyFromSeed,
+ EC_ValidatePublicKey,
+ ECDH_Derive,
+ ECDSA_SignDigest,
+ ECDSA_VerifyDigest,
+ ECDSA_SignDigestWithSeed,
+
+ /* End of Version 3.006. */
+ /* End of Version 3.007. */
+
+ AES_InitContext,
+ AESKeyWrap_InitContext,
+ DES_InitContext,
+ RC2_InitContext,
+ RC4_InitContext,
+
+ AES_AllocateContext,
+ AESKeyWrap_AllocateContext,
+ DES_AllocateContext,
+ RC2_AllocateContext,
+ RC4_AllocateContext,
+
+ MD2_Clone,
+ MD5_Clone,
+ SHA1_Clone,
+ SHA256_Clone,
+ SHA384_Clone,
+ SHA512_Clone,
+
+ TLS_PRF,
+ HASH_GetRawHashObject,
+
+ HMAC_Create,
+ HMAC_Init,
+ HMAC_Begin,
+ HMAC_Update,
+ HMAC_Clone,
+ HMAC_Finish,
+ HMAC_Destroy,
+
+ RNG_SystemInfoForRNG,
+
+ /* End of Version 3.008. */
+
+ FIPS186Change_GenerateX,
+ FIPS186Change_ReduceModQForDSA,
+
+ /* End of Version 3.009. */
+ Camellia_InitContext,
+ Camellia_AllocateContext,
+ Camellia_CreateContext,
+ Camellia_DestroyContext,
+ Camellia_Encrypt,
+ Camellia_Decrypt,
+
+ PQG_DestroyParams,
+ PQG_DestroyVerify,
+
+ /* End of Version 3.010. */
+
+ SEED_InitContext,
+ SEED_AllocateContext,
+ SEED_CreateContext,
+ SEED_DestroyContext,
+ SEED_Encrypt,
+ SEED_Decrypt,
+
+ BL_Init,
+ BL_SetForkState,
+
+ PRNGTEST_Instantiate,
+ PRNGTEST_Reseed,
+ PRNGTEST_Generate,
+
+ PRNGTEST_Uninstantiate,
+
+ /* End of Version 3.011. */
+
+ RSA_PopulatePrivateKey,
+
+ DSA_NewRandom,
+
+ JPAKE_Sign,
+ JPAKE_Verify,
+ JPAKE_Round2,
+ JPAKE_Final,
+
+ /* End of Version 3.012 */
+
+ TLS_P_hash,
+ SHA224_NewContext,
+ SHA224_DestroyContext,
+ SHA224_Begin,
+ SHA224_Update,
+ SHA224_End,
+ SHA224_HashBuf,
+ SHA224_Hash,
+ SHA224_TraceState,
+ SHA224_FlattenSize,
+ SHA224_Flatten,
+ SHA224_Resurrect,
+ SHA224_Clone,
+ BLAPI_SHVerifyFile,
+
+ /* End of Version 3.013 */
+
+ PQG_ParamGenV2,
+ PRNGTEST_RunHealthTests,
+
+ /* End of Version 3.014 */
+
+ HMAC_ConstantTime,
+ SSLv3_MAC_ConstantTime,
+
+ /* End of Version 3.015 */
+
+ RSA_SignRaw,
+ RSA_CheckSignRaw,
+ RSA_CheckSignRecoverRaw,
+ RSA_EncryptRaw,
+ RSA_DecryptRaw,
+ RSA_EncryptOAEP,
+ RSA_DecryptOAEP,
+ RSA_EncryptBlock,
+ RSA_DecryptBlock,
+ RSA_SignPSS,
+ RSA_CheckSignPSS,
+ RSA_Sign,
+ RSA_CheckSign,
+ RSA_CheckSignRecover,
+
+ /* End of Version 3.016 */
+
+ EC_FillParams,
+ EC_DecodeParams,
+ EC_CopyParams,
+
+ /* End of Version 3.017 */
+
+ ChaCha20Poly1305_InitContext,
+ ChaCha20Poly1305_CreateContext,
+ ChaCha20Poly1305_DestroyContext,
+ ChaCha20Poly1305_Seal,
+ ChaCha20Poly1305_Open,
+
+ /* End of Version 3.018 */
+
+ EC_GetPointSize
+
+ /* End of Version 3.019 */
+ };
+
+const FREEBLVector*
+FREEBL_GetVector(void)
+{
+#ifdef FREEBL_NO_DEPEND
+ SECStatus rv;
+#endif
+
+#define NSS_VERSION_VARIABLE __nss_freebl_version
+#include "verref.h"
+
+#ifdef FREEBL_NO_DEPEND
+ /* this entry point is only valid if nspr and nss-util has been loaded */
+ rv = FREEBL_InitStubs();
+ if (rv != SECSuccess) {
+ return NULL;
+ }
+#endif
+ /* make sure the Full self tests have been run before continuing */
+ BL_POSTRan(PR_FALSE);
+
+ return &vector;
+}
+
+#ifdef FREEBL_LOWHASH
+static const struct NSSLOWVectorStr nssvector =
+ {
+ sizeof nssvector,
+ NSSLOW_VERSION,
+ FREEBL_GetVector,
+ NSSLOW_Init,
+ NSSLOW_Shutdown,
+ NSSLOW_Reset,
+ NSSLOWHASH_NewContext,
+ NSSLOWHASH_Begin,
+ NSSLOWHASH_Update,
+ NSSLOWHASH_End,
+ NSSLOWHASH_Destroy,
+ NSSLOWHASH_Length
+ };
+
+const NSSLOWVector*
+NSSLOW_GetVector(void)
+{
+ /* POST check and stub init happens in FREEBL_GetVector() and
+ * NSSLOW_Init() respectively */
+ return &nssvector;
+}
+#endif
diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c
new file mode 100644
index 000000000..792171b08
--- /dev/null
+++ b/security/nss/lib/freebl/loader.c
@@ -0,0 +1,2126 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+
+static const char *NameOfThisSharedLib =
+ SHLIB_PREFIX "softokn" SOFTOKEN_SHLIB_VERSION "." SHLIB_SUFFIX;
+
+static PRLibrary *blLib = NULL;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const FREEBLVector *vector;
+static const char *libraryName = NULL;
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/* determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+ PRLibrary *handle;
+ const char *name = getLibName();
+
+ if (!name) {
+ PR_SetError(PR_LOAD_LIBRARY_ERROR, 0);
+ return PR_FAILURE;
+ }
+
+ handle = loader_LoadLibrary(name);
+ if (handle) {
+ PRFuncPtr address = PR_FindFunctionSymbol(handle, "FREEBL_GetVector");
+ if (address) {
+ FREEBLGetVectorFn *getVector = (FREEBLGetVectorFn *)address;
+ const FREEBLVector *dsoVector = getVector();
+ if (dsoVector) {
+ unsigned short dsoVersion = dsoVector->version;
+ unsigned short myVersion = FREEBL_VERSION;
+ if (MSB(dsoVersion) == MSB(myVersion) &&
+ LSB(dsoVersion) >= LSB(myVersion) &&
+ dsoVector->length >= sizeof(FREEBLVector)) {
+ vector = dsoVector;
+ libraryName = name;
+ blLib = handle;
+ return PR_SUCCESS;
+ }
+ }
+ }
+#ifdef DEBUG
+ if (blLib) {
+ PRStatus status = PR_UnloadLibrary(blLib);
+ PORT_Assert(PR_SUCCESS == status);
+ }
+#else
+ if (blLib)
+ PR_UnloadLibrary(blLib);
+#endif
+ }
+ return PR_FAILURE;
+}
+
+static const PRCallOnceType pristineCallOnce;
+static PRCallOnceType loadFreeBLOnce;
+
+static PRStatus
+freebl_RunLoaderOnce(void)
+{
+ PRStatus status;
+
+ status = PR_CallOnce(&loadFreeBLOnce, &freebl_LoadDSO);
+ return status;
+}
+
+SECStatus
+BL_Init(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_BL_Init)();
+}
+
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RSA_NewKey)(keySizeInBits, publicExponent);
+}
+
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PublicKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyOp)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyOpDoubleChecked)(key, output, input);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PrivateKeyCheck)(key);
+}
+
+SECStatus
+DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewKey)(params, privKey);
+}
+
+SECStatus
+DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+DSA_NewKeyFromSeed(const PQGParams *params, const unsigned char *seed,
+ DSAPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewKeyFromSeed)(params, seed, privKey);
+}
+
+SECStatus
+DSA_SignDigestWithSeed(DSAPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *seed)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_SignDigestWithSeed)(key, signature, digest, seed);
+}
+
+SECStatus
+DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DSA_NewRandom)(arena, q, seed);
+}
+
+SECStatus
+DH_GenParam(int primeLen, DHParams **params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_GenParam)(primeLen, params);
+}
+
+SECStatus
+DH_NewKey(DHParams *params, DHPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_NewKey)(params, privKey);
+}
+
+SECStatus
+DH_Derive(SECItem *publicValue, SECItem *prime, SECItem *privateValue,
+ SECItem *derivedSecret, unsigned int maxOutBytes)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DH_Derive)(publicValue, prime, privateValue,
+ derivedSecret, maxOutBytes);
+}
+
+SECStatus
+KEA_Derive(SECItem *prime, SECItem *public1, SECItem *public2,
+ SECItem *private1, SECItem *private2, SECItem *derivedSecret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_KEA_Derive)(prime, public1, public2,
+ private1, private2, derivedSecret);
+}
+
+PRBool
+KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return (vector->p_KEA_Verify)(Y, prime, subPrime);
+}
+
+RC4Context *
+RC4_CreateContext(const unsigned char *key, int len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC4_CreateContext)(key, len);
+}
+
+void
+RC4_DestroyContext(RC4Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RC4_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC4_Encrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RC4_Decrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+RC2Context *
+RC2_CreateContext(const unsigned char *key, unsigned int len,
+ const unsigned char *iv, int mode, unsigned effectiveKeyLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC2_CreateContext)(key, len, iv, mode, effectiveKeyLen);
+}
+
+void
+RC2_DestroyContext(RC2Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RC2_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC2_Encrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC2_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RC2_Decrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC2_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+RC5Context *
+RC5_CreateContext(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC5_CreateContext)(key, rounds, wordSize, iv, mode);
+}
+
+void
+RC5_DestroyContext(RC5Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RC5_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC5_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC5_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+DESContext *
+DES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_DES_CreateContext)(key, iv, mode, encrypt);
+}
+
+void
+DES_DestroyContext(DESContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_DES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+DES_Encrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+DES_Decrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SEED_CreateContext)(key, iv, mode, encrypt);
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SEED_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SEED_Encrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SEED_Decrypt)(cx, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AES_CreateContext)(key, iv, mode, encrypt, keylen,
+ blocklen);
+}
+
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_AES_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_Encrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_Decrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_Hash)(dest, src);
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_HashBuf)(dest, src, src_length);
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD5_NewContext)();
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_DestroyContext)(cx, freeit);
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Begin)(cx);
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Update)(cx, input, inputLen);
+}
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_MD5_FlattenSize)(cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD5_Flatten)(cx, space);
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD5_Resurrect)(space, arg);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_TraceState)(cx);
+}
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD2_Hash)(dest, src);
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD2_NewContext)();
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_DestroyContext)(cx, freeit);
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Begin)(cx);
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Update)(cx, input, inputLen);
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_MD2_FlattenSize)(cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_MD2_Flatten)(cx, space);
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_MD2_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_Hash)(dest, src);
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_HashBuf)(dest, src, src_length);
+}
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA1_NewContext)();
+}
+
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_DestroyContext)(cx, freeit);
+}
+
+void
+SHA1_Begin(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Begin)(cx);
+}
+
+void
+SHA1_Update(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Update)(cx, input, inputLen);
+}
+
+void
+SHA1_End(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA1_TraceState(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_TraceState)(cx);
+}
+
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA1_FlattenSize)(cx);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA1_Flatten)(cx, space);
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA1_Resurrect)(space, arg);
+}
+
+SECStatus
+RNG_RNGInit(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_RNGInit)();
+}
+
+SECStatus
+RNG_RandomUpdate(const void *data, size_t bytes)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_RandomUpdate)(data, bytes);
+}
+
+SECStatus
+RNG_GenerateGlobalRandomBytes(void *dest, size_t len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RNG_GenerateGlobalRandomBytes)(dest, len);
+}
+
+void
+RNG_RNGShutdown(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RNG_RNGShutdown)();
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGen)(j, pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGenSeedLen)(j, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PQG_VerifyParams(const PQGParams *params, const PQGVerify *vfy,
+ SECStatus *result)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_VerifyParams)(params, vfy, result);
+}
+
+void
+PQG_DestroyParams(PQGParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_PQG_DestroyParams)(params);
+}
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_PQG_DestroyVerify)(vfy);
+}
+
+void
+BL_Cleanup(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_BL_Cleanup)();
+}
+
+void
+BL_Unload(void)
+{
+ /* This function is not thread-safe, but doesn't need to be, because it is
+ * only called from functions that are also defined as not thread-safe,
+ * namely C_Finalize in softoken, and the SSL bypass shutdown callback called
+ * from NSS_Shutdown. */
+ char *disableUnload = NULL;
+ vector = NULL;
+ disableUnload = PR_GetEnvSecure("NSS_DISABLE_UNLOAD");
+ if (blLib && !disableUnload) {
+#ifdef DEBUG
+ PRStatus status = PR_UnloadLibrary(blLib);
+ PORT_Assert(PR_SUCCESS == status);
+#else
+ PR_UnloadLibrary(blLib);
+#endif
+ }
+ blLib = NULL;
+ loadFreeBLOnce = pristineCallOnce;
+}
+
+/* ============== New for 3.003 =============================== */
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_Hash)(dest, src);
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_HashBuf)(dest, src, src_length);
+}
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA256_NewContext)();
+}
+
+void
+SHA256_DestroyContext(SHA256Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_DestroyContext)(cx, freeit);
+}
+
+void
+SHA256_Begin(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Begin)(cx);
+}
+
+void
+SHA256_Update(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Update)(cx, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA256_TraceState(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_TraceState)(cx);
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA256_FlattenSize)(cx);
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA256_Flatten)(cx, space);
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA256_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_Hash)(dest, src);
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_HashBuf)(dest, src, src_length);
+}
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA512_NewContext)();
+}
+
+void
+SHA512_DestroyContext(SHA512Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_DestroyContext)(cx, freeit);
+}
+
+void
+SHA512_Begin(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Begin)(cx);
+}
+
+void
+SHA512_Update(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Update)(cx, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA512_TraceState(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_TraceState)(cx);
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA512_FlattenSize)(cx);
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA512_Flatten)(cx, space);
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA512_Resurrect)(space, arg);
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_Hash)(dest, src);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_HashBuf)(dest, src, src_length);
+}
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA384_NewContext)();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_DestroyContext)(cx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Begin)(cx);
+}
+
+void
+SHA384_Update(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Update)(cx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA384_TraceState(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_TraceState)(cx);
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA384_FlattenSize)(cx);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA384_Flatten)(cx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA384_Resurrect)(space, arg);
+}
+
+AESKeyWrapContext *
+AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int encrypt, unsigned int keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return vector->p_AESKeyWrap_CreateContext(key, iv, encrypt, keylen);
+}
+
+void
+AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ vector->p_AESKeyWrap_DestroyContext(cx, freeit);
+}
+
+SECStatus
+AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_Encrypt(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+SECStatus
+AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_AESKeyWrap_Decrypt(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_SHVerify(name, addr);
+}
+
+/*
+ * The Caller is expected to pass NULL as the name, which will
+ * trigger the p_BLAPI_VerifySelf() to return 'TRUE'. Pass the real
+ * name of the shared library we loaded (the static libraryName set
+ * in freebl_LoadDSO) to p_BLAPI_VerifySelf.
+ */
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+ PORT_Assert(!name);
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_VerifySelf(libraryName);
+}
+
+/* ============== New for 3.006 =============================== */
+
+SECStatus
+EC_NewKey(ECParams *params, ECPrivateKey **privKey)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_NewKey)(params, privKey);
+}
+
+SECStatus
+EC_NewKeyFromSeed(ECParams *params, ECPrivateKey **privKey,
+ const unsigned char *seed, int seedlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_NewKeyFromSeed)(params, privKey, seed, seedlen);
+}
+
+SECStatus
+EC_ValidatePublicKey(ECParams *params, SECItem *publicValue)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_ValidatePublicKey)(params, publicValue);
+}
+
+SECStatus
+ECDH_Derive(SECItem *publicValue, ECParams *params, SECItem *privateValue,
+ PRBool withCofactor, SECItem *derivedSecret)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDH_Derive)(publicValue, params, privateValue,
+ withCofactor, derivedSecret);
+}
+
+SECStatus
+ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_SignDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature,
+ const SECItem *digest)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_VerifyDigest)(key, signature, digest);
+}
+
+SECStatus
+ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature,
+ const SECItem *digest, const unsigned char *seed, const int seedlen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ECDSA_SignDigestWithSeed)(key, signature, digest,
+ seed, seedlen);
+}
+
+/* ============== New for 3.008 =============================== */
+
+AESContext *
+AES_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AES_AllocateContext)();
+}
+
+AESKeyWrapContext *
+AESKeyWrap_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_AESKeyWrap_AllocateContext)();
+}
+
+DESContext *
+DES_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_DES_AllocateContext)();
+}
+
+RC2Context *
+RC2_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC2_AllocateContext)();
+}
+
+RC4Context *
+RC4_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_RC4_AllocateContext)();
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AES_InitContext)(cx, key, keylen, iv, mode, encrypt,
+ blocklen);
+}
+
+SECStatus
+AESKeyWrap_InitContext(AESKeyWrapContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int blocklen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_AESKeyWrap_InitContext)(cx, key, keylen, iv, mode,
+ encrypt, blocklen);
+}
+
+SECStatus
+DES_InitContext(DESContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_DES_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SEED_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra);
+}
+
+SECStatus
+RC2_InitContext(RC2Context *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int effectiveKeyLen, unsigned int xtra)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC2_InitContext)(cx, key, keylen, iv, mode,
+ effectiveKeyLen, xtra);
+}
+
+SECStatus
+RC4_InitContext(RC4Context *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *x1, int x2,
+ unsigned int x3, unsigned int x4)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RC4_InitContext)(cx, key, keylen, x1, x2, x3, x4);
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD2_Clone)(dest, src);
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_MD5_Clone)(dest, src);
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA1_Clone)(dest, src);
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA256_Clone)(dest, src);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA384_Clone)(dest, src);
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA512_Clone)(dest, src);
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_TLS_PRF)(secret, label, seed, result, isFIPS);
+}
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HASH_GetRawHashObject)(hashType);
+}
+
+void
+HMAC_Destroy(HMACContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Destroy)(cx, freeit);
+}
+
+HMACContext *
+HMAC_Create(const SECHashObject *hashObj, const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HMAC_Create)(hashObj, secret, secret_len, isFIPS);
+}
+
+SECStatus
+HMAC_Init(HMACContext *cx, const SECHashObject *hashObj,
+ const unsigned char *secret, unsigned int secret_len, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_Init)(cx, hashObj, secret, secret_len, isFIPS);
+}
+
+void
+HMAC_Begin(HMACContext *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Begin)(cx);
+}
+
+void
+HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_HMAC_Update)(cx, data, data_len);
+}
+
+SECStatus
+HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len,
+ unsigned int max_result_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_Finish)(cx, result, result_len, max_result_len);
+}
+
+HMACContext *
+HMAC_Clone(HMACContext *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_HMAC_Clone)(cx);
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_RNG_SystemInfoForRNG)();
+}
+
+SECStatus
+FIPS186Change_GenerateX(unsigned char *XKEY, const unsigned char *XSEEDj,
+ unsigned char *x_j)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_FIPS186Change_GenerateX)(XKEY, XSEEDj, x_j);
+}
+
+SECStatus
+FIPS186Change_ReduceModQForDSA(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_FIPS186Change_ReduceModQForDSA)(w, q, xj);
+}
+
+/* === new for Camellia === */
+SECStatus
+Camellia_InitContext(CamelliaContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv, int mode,
+ unsigned int encrypt, unsigned int unused)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_InitContext)(cx, key, keylen, iv, mode, encrypt,
+ unused);
+}
+
+CamelliaContext *
+Camellia_AllocateContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_Camellia_AllocateContext)();
+}
+
+CamelliaContext *
+Camellia_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_Camellia_CreateContext)(key, iv, mode, encrypt, keylen);
+}
+
+void
+Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_Camellia_DestroyContext)(cx, freeit);
+}
+
+SECStatus
+Camellia_Encrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_Encrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+Camellia_Decrypt(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_Camellia_Decrypt)(cx, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+void
+BL_SetForkState(PRBool forked)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_BL_SetForkState)(forked);
+}
+
+SECStatus
+PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *nonce, unsigned int nonce_len,
+ const PRUint8 *personal_string, unsigned int ps_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Instantiate)(entropy, entropy_len,
+ nonce, nonce_len,
+ personal_string, ps_len);
+}
+
+SECStatus
+PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Reseed)(entropy, entropy_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len,
+ const PRUint8 *additional, unsigned int additional_len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Generate)(bytes, bytes_len,
+ additional, additional_len);
+}
+
+SECStatus
+PRNGTEST_Uninstantiate()
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PRNGTEST_Uninstantiate)();
+}
+
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_PopulatePrivateKey)(key);
+}
+
+SECStatus
+JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType,
+ const SECItem *signerID, const SECItem *x,
+ const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Sign)(arena, pqg, hashType, signerID, x,
+ testRandom, gxIn, gxOut, gv, r);
+}
+
+SECStatus
+JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Verify)(arena, pqg, hashType, signerID, peerID,
+ gx, gv, r);
+}
+
+SECStatus
+JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *gx1, const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Round2)(arena, p, q, gx1, gx3, gx4, base, x2, s, x2s);
+}
+
+SECStatus
+JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q,
+ const SECItem *x2, const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_JPAKE_Final)(arena, p, q, x2, gx4, x2s, B, K);
+}
+
+SECStatus
+TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_TLS_P_hash)(hashAlg, secret, label, seed, result, isFIPS);
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_Hash)(dest, src);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_HashBuf)(dest, src, src_length);
+}
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA224_NewContext)();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *cx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_DestroyContext)(cx, freeit);
+}
+
+void
+SHA224_Begin(SHA256Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Begin)(cx);
+}
+
+void
+SHA224_Update(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Update)(cx, input, inputLen);
+}
+
+void
+SHA224_End(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_End)(cx, digest, digestLen, maxDigestLen);
+}
+
+void
+SHA224_TraceState(SHA224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_TraceState)(cx);
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *cx)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return 0;
+ return (vector->p_SHA224_FlattenSize)(cx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *cx, unsigned char *space)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SHA224_Flatten)(cx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_SHA224_Resurrect)(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_SHA224_Clone)(dest, src);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *name)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return PR_FALSE;
+ return vector->p_BLAPI_SHVerifyFile(name);
+}
+
+/* === new for DSA-2 === */
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_PQG_ParamGenV2)(L, N, seedBytes, pParams, pVfy);
+}
+
+SECStatus
+PRNGTEST_RunHealthTests(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return vector->p_PRNGTEST_RunHealthTests();
+}
+
+SECStatus
+SSLv3_MAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_SSLv3_MAC_ConstantTime)(
+ result, resultLen, maxResultLen,
+ hashObj,
+ secret, secretLen,
+ header, headerLen,
+ body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+HMAC_ConstantTime(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_HMAC_ConstantTime)(
+ result, resultLen, maxResultLen,
+ hashObj,
+ secret, secretLen,
+ header, headerLen,
+ body, bodyLen, bodyTotalLen);
+}
+
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_SignRaw)(key, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRaw)(key, sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRecoverRaw)(key, data, dataLen, maxDataLen,
+ sig, sigLen);
+}
+
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptRaw)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptRaw)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptOAEP)(key, hashAlg, maskHashAlg, label,
+ labelLen, seed, seedLen, output,
+ outputLen, maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptOAEP)(key, hashAlg, maskHashAlg, label,
+ labelLen, output, outputLen,
+ maxOutputLen, input, inputLen);
+}
+
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_EncryptBlock)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_DecryptBlock)(key, output, outputLen, maxOutputLen,
+ input, inputLen);
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_SignPSS)(key, hashAlg, maskHashAlg, salt, saltLen,
+ output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignPSS)(key, hashAlg, maskHashAlg, saltLen,
+ sig, sigLen, hash, hashLen);
+}
+
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_Sign)(key, output, outputLen, maxOutputLen, input,
+ inputLen);
+}
+
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSign)(key, sig, sigLen, data, dataLen);
+}
+
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_RSA_CheckSignRecover)(key, output, outputLen, maxOutputLen,
+ sig, sigLen);
+}
+
+SECStatus
+EC_FillParams(PLArenaPool *arena,
+ const SECItem *encodedParams,
+ ECParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_FillParams)(arena, encodedParams, params);
+}
+
+SECStatus
+EC_DecodeParams(const SECItem *encodedParams,
+ ECParams **ecparams)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_DecodeParams)(encodedParams, ecparams);
+}
+
+SECStatus
+EC_CopyParams(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_CopyParams)(arena, dstParams, srcParams);
+}
+
+SECStatus
+ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_InitContext)(ctx, key, keyLen, tagLen);
+}
+
+ChaCha20Poly1305Context *
+ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen,
+ unsigned int tagLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_ChaCha20Poly1305_CreateContext)(key, keyLen, tagLen);
+}
+
+void
+ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_ChaCha20Poly1305_DestroyContext)(ctx, freeit);
+}
+
+SECStatus
+ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Seal)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen);
+}
+
+SECStatus
+ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx,
+ unsigned char *output, unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_ChaCha20Poly1305_Open)(
+ ctx, output, outputLen, maxOutputLen, input, inputLen,
+ nonce, nonceLen, ad, adLen);
+}
+
+int
+EC_GetPointSize(const ECParams *params)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return SECFailure;
+ return (vector->p_EC_GetPointSize)(params);
+}
diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h
new file mode 100644
index 000000000..ed392cc47
--- /dev/null
+++ b/security/nss/lib/freebl/loader.h
@@ -0,0 +1,788 @@
+/*
+ * loader.h - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _LOADER_H_
+#define _LOADER_H_ 1
+
+#include "blapi.h"
+
+#define FREEBL_VERSION 0x0313
+
+struct FREEBLVectorStr {
+
+ unsigned short length; /* of this struct in bytes */
+ unsigned short version; /* of this struct. */
+
+ RSAPrivateKey *(*p_RSA_NewKey)(int keySizeInBits,
+ SECItem *publicExponent);
+
+ SECStatus (*p_RSA_PublicKeyOp)(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_RSA_PrivateKeyOp)(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_DSA_NewKey)(const PQGParams *params,
+ DSAPrivateKey **privKey);
+
+ SECStatus (*p_DSA_SignDigest)(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_DSA_VerifyDigest)(DSAPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_DSA_NewKeyFromSeed)(const PQGParams *params,
+ const unsigned char *seed,
+ DSAPrivateKey **privKey);
+
+ SECStatus (*p_DSA_SignDigestWithSeed)(DSAPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed);
+
+ SECStatus (*p_DH_GenParam)(int primeLen, DHParams **params);
+
+ SECStatus (*p_DH_NewKey)(DHParams *params,
+ DHPrivateKey **privKey);
+
+ SECStatus (*p_DH_Derive)(SECItem *publicValue,
+ SECItem *prime,
+ SECItem *privateValue,
+ SECItem *derivedSecret,
+ unsigned int maxOutBytes);
+
+ SECStatus (*p_KEA_Derive)(SECItem *prime,
+ SECItem *public1,
+ SECItem *public2,
+ SECItem *private1,
+ SECItem *private2,
+ SECItem *derivedSecret);
+
+ PRBool (*p_KEA_Verify)(SECItem *Y, SECItem *prime, SECItem *subPrime);
+
+ RC4Context *(*p_RC4_CreateContext)(const unsigned char *key, int len);
+
+ void (*p_RC4_DestroyContext)(RC4Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC4_Encrypt)(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC4_Decrypt)(RC4Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ RC2Context *(*p_RC2_CreateContext)(const unsigned char *key,
+ unsigned int len, const unsigned char *iv,
+ int mode, unsigned effectiveKeyLen);
+
+ void (*p_RC2_DestroyContext)(RC2Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC2_Encrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC2_Decrypt)(RC2Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ RC5Context *(*p_RC5_CreateContext)(const SECItem *key, unsigned int rounds,
+ unsigned int wordSize, const unsigned char *iv, int mode);
+
+ void (*p_RC5_DestroyContext)(RC5Context *cx, PRBool freeit);
+
+ SECStatus (*p_RC5_Encrypt)(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_RC5_Decrypt)(RC5Context *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ DESContext *(*p_DES_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+
+ void (*p_DES_DestroyContext)(DESContext *cx, PRBool freeit);
+
+ SECStatus (*p_DES_Encrypt)(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_DES_Decrypt)(DESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ AESContext *(*p_AES_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, int encrypt, unsigned int keylen,
+ unsigned int blocklen);
+
+ void (*p_AES_DestroyContext)(AESContext *cx, PRBool freeit);
+
+ SECStatus (*p_AES_Encrypt)(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_AES_Decrypt)(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_MD5_Hash)(unsigned char *dest, const char *src);
+
+ SECStatus (*p_MD5_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+ MD5Context *(*p_MD5_NewContext)(void);
+
+ void (*p_MD5_DestroyContext)(MD5Context *cx, PRBool freeit);
+
+ void (*p_MD5_Begin)(MD5Context *cx);
+
+ void (*p_MD5_Update)(MD5Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+ void (*p_MD5_End)(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ unsigned int (*p_MD5_FlattenSize)(MD5Context *cx);
+
+ SECStatus (*p_MD5_Flatten)(MD5Context *cx, unsigned char *space);
+
+ MD5Context *(*p_MD5_Resurrect)(unsigned char *space, void *arg);
+
+ void (*p_MD5_TraceState)(MD5Context *cx);
+
+ SECStatus (*p_MD2_Hash)(unsigned char *dest, const char *src);
+
+ MD2Context *(*p_MD2_NewContext)(void);
+
+ void (*p_MD2_DestroyContext)(MD2Context *cx, PRBool freeit);
+
+ void (*p_MD2_Begin)(MD2Context *cx);
+
+ void (*p_MD2_Update)(MD2Context *cx,
+ const unsigned char *input, unsigned int inputLen);
+
+ void (*p_MD2_End)(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ unsigned int (*p_MD2_FlattenSize)(MD2Context *cx);
+
+ SECStatus (*p_MD2_Flatten)(MD2Context *cx, unsigned char *space);
+
+ MD2Context *(*p_MD2_Resurrect)(unsigned char *space, void *arg);
+
+ SECStatus (*p_SHA1_Hash)(unsigned char *dest, const char *src);
+
+ SECStatus (*p_SHA1_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+
+ SHA1Context *(*p_SHA1_NewContext)(void);
+
+ void (*p_SHA1_DestroyContext)(SHA1Context *cx, PRBool freeit);
+
+ void (*p_SHA1_Begin)(SHA1Context *cx);
+
+ void (*p_SHA1_Update)(SHA1Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+
+ void (*p_SHA1_End)(SHA1Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+
+ void (*p_SHA1_TraceState)(SHA1Context *cx);
+
+ unsigned int (*p_SHA1_FlattenSize)(SHA1Context *cx);
+
+ SECStatus (*p_SHA1_Flatten)(SHA1Context *cx, unsigned char *space);
+
+ SHA1Context *(*p_SHA1_Resurrect)(unsigned char *space, void *arg);
+
+ SECStatus (*p_RNG_RNGInit)(void);
+
+ SECStatus (*p_RNG_RandomUpdate)(const void *data, size_t bytes);
+
+ SECStatus (*p_RNG_GenerateGlobalRandomBytes)(void *dest, size_t len);
+
+ void (*p_RNG_RNGShutdown)(void);
+
+ SECStatus (*p_PQG_ParamGen)(unsigned int j, PQGParams **pParams,
+ PQGVerify **pVfy);
+
+ SECStatus (*p_PQG_ParamGenSeedLen)(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy);
+
+ SECStatus (*p_PQG_VerifyParams)(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result);
+
+ /* Version 3.001 came to here */
+
+ SECStatus (*p_RSA_PrivateKeyOpDoubleChecked)(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input);
+
+ SECStatus (*p_RSA_PrivateKeyCheck)(const RSAPrivateKey *key);
+
+ void (*p_BL_Cleanup)(void);
+
+ /* Version 3.002 came to here */
+
+ SHA256Context *(*p_SHA256_NewContext)(void);
+ void (*p_SHA256_DestroyContext)(SHA256Context *cx, PRBool freeit);
+ void (*p_SHA256_Begin)(SHA256Context *cx);
+ void (*p_SHA256_Update)(SHA256Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA256_End)(SHA256Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA256_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA256_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA256_TraceState)(SHA256Context *cx);
+ unsigned int (*p_SHA256_FlattenSize)(SHA256Context *cx);
+ SECStatus (*p_SHA256_Flatten)(SHA256Context *cx, unsigned char *space);
+ SHA256Context *(*p_SHA256_Resurrect)(unsigned char *space, void *arg);
+
+ SHA512Context *(*p_SHA512_NewContext)(void);
+ void (*p_SHA512_DestroyContext)(SHA512Context *cx, PRBool freeit);
+ void (*p_SHA512_Begin)(SHA512Context *cx);
+ void (*p_SHA512_Update)(SHA512Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA512_End)(SHA512Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA512_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA512_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA512_TraceState)(SHA512Context *cx);
+ unsigned int (*p_SHA512_FlattenSize)(SHA512Context *cx);
+ SECStatus (*p_SHA512_Flatten)(SHA512Context *cx, unsigned char *space);
+ SHA512Context *(*p_SHA512_Resurrect)(unsigned char *space, void *arg);
+
+ SHA384Context *(*p_SHA384_NewContext)(void);
+ void (*p_SHA384_DestroyContext)(SHA384Context *cx, PRBool freeit);
+ void (*p_SHA384_Begin)(SHA384Context *cx);
+ void (*p_SHA384_Update)(SHA384Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA384_End)(SHA384Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA384_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA384_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA384_TraceState)(SHA384Context *cx);
+ unsigned int (*p_SHA384_FlattenSize)(SHA384Context *cx);
+ SECStatus (*p_SHA384_Flatten)(SHA384Context *cx, unsigned char *space);
+ SHA384Context *(*p_SHA384_Resurrect)(unsigned char *space, void *arg);
+
+ /* Version 3.003 came to here */
+
+ AESKeyWrapContext *(*p_AESKeyWrap_CreateContext)(const unsigned char *key,
+ const unsigned char *iv, int encrypt, unsigned int keylen);
+
+ void (*p_AESKeyWrap_DestroyContext)(AESKeyWrapContext *cx, PRBool freeit);
+
+ SECStatus (*p_AESKeyWrap_Encrypt)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_AESKeyWrap_Decrypt)(AESKeyWrapContext *cx,
+ unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ /* Version 3.004 came to here */
+
+ PRBool (*p_BLAPI_SHVerify)(const char *name, PRFuncPtr addr);
+ PRBool (*p_BLAPI_VerifySelf)(const char *name);
+
+ /* Version 3.005 came to here */
+
+ SECStatus (*p_EC_NewKey)(ECParams *params,
+ ECPrivateKey **privKey);
+
+ SECStatus (*p_EC_NewKeyFromSeed)(ECParams *params,
+ ECPrivateKey **privKey,
+ const unsigned char *seed,
+ int seedlen);
+
+ SECStatus (*p_EC_ValidatePublicKey)(ECParams *params,
+ SECItem *publicValue);
+
+ SECStatus (*p_ECDH_Derive)(SECItem *publicValue,
+ ECParams *params,
+ SECItem *privateValue,
+ PRBool withCofactor,
+ SECItem *derivedSecret);
+
+ SECStatus (*p_ECDSA_SignDigest)(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_ECDSA_VerifyDigest)(ECPublicKey *key,
+ const SECItem *signature,
+ const SECItem *digest);
+
+ SECStatus (*p_ECDSA_SignDigestWithSeed)(ECPrivateKey *key,
+ SECItem *signature,
+ const SECItem *digest,
+ const unsigned char *seed,
+ const int seedlen);
+
+ /* Version 3.006 came to here */
+
+ /* no modification to FREEBLVectorStr itself
+ * but ECParamStr was modified
+ */
+
+ /* Version 3.007 came to here */
+
+ SECStatus (*p_AES_InitContext)(AESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+ SECStatus (*p_AESKeyWrap_InitContext)(AESKeyWrapContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int blocklen);
+ SECStatus (*p_DES_InitContext)(DESContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+ SECStatus (*p_RC2_InitContext)(RC2Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int effectiveKeyLen,
+ unsigned int);
+ SECStatus (*p_RC4_InitContext)(RC4Context *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *,
+ int,
+ unsigned int,
+ unsigned int);
+
+ AESContext *(*p_AES_AllocateContext)(void);
+ AESKeyWrapContext *(*p_AESKeyWrap_AllocateContext)(void);
+ DESContext *(*p_DES_AllocateContext)(void);
+ RC2Context *(*p_RC2_AllocateContext)(void);
+ RC4Context *(*p_RC4_AllocateContext)(void);
+
+ void (*p_MD2_Clone)(MD2Context *dest, MD2Context *src);
+ void (*p_MD5_Clone)(MD5Context *dest, MD5Context *src);
+ void (*p_SHA1_Clone)(SHA1Context *dest, SHA1Context *src);
+ void (*p_SHA256_Clone)(SHA256Context *dest, SHA256Context *src);
+ void (*p_SHA384_Clone)(SHA384Context *dest, SHA384Context *src);
+ void (*p_SHA512_Clone)(SHA512Context *dest, SHA512Context *src);
+
+ SECStatus (*p_TLS_PRF)(const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS);
+
+ const SECHashObject *(*p_HASH_GetRawHashObject)(HASH_HashType hashType);
+
+ HMACContext *(*p_HMAC_Create)(const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+ SECStatus (*p_HMAC_Init)(HMACContext *cx, const SECHashObject *hash_obj,
+ const unsigned char *secret,
+ unsigned int secret_len, PRBool isFIPS);
+ void (*p_HMAC_Begin)(HMACContext *cx);
+ void (*p_HMAC_Update)(HMACContext *cx, const unsigned char *data,
+ unsigned int data_len);
+ HMACContext *(*p_HMAC_Clone)(HMACContext *cx);
+ SECStatus (*p_HMAC_Finish)(HMACContext *cx, unsigned char *result,
+ unsigned int *result_len,
+ unsigned int max_result_len);
+ void (*p_HMAC_Destroy)(HMACContext *cx, PRBool freeit);
+
+ void (*p_RNG_SystemInfoForRNG)(void);
+
+ /* Version 3.008 came to here */
+
+ SECStatus (*p_FIPS186Change_GenerateX)(unsigned char *XKEY,
+ const unsigned char *XSEEDj,
+ unsigned char *x_j);
+ SECStatus (*p_FIPS186Change_ReduceModQForDSA)(const unsigned char *w,
+ const unsigned char *q,
+ unsigned char *xj);
+
+ /* Version 3.009 came to here */
+
+ SECStatus (*p_Camellia_InitContext)(CamelliaContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int unused);
+
+ CamelliaContext *(*p_Camellia_AllocateContext)(void);
+ CamelliaContext *(*p_Camellia_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keylen);
+ void (*p_Camellia_DestroyContext)(CamelliaContext *cx, PRBool freeit);
+
+ SECStatus (*p_Camellia_Encrypt)(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ SECStatus (*p_Camellia_Decrypt)(CamelliaContext *cx, unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+
+ void (*p_PQG_DestroyParams)(PQGParams *params);
+
+ void (*p_PQG_DestroyVerify)(PQGVerify *vfy);
+
+ /* Version 3.010 came to here */
+
+ SECStatus (*p_SEED_InitContext)(SEEDContext *cx,
+ const unsigned char *key,
+ unsigned int keylen,
+ const unsigned char *iv,
+ int mode,
+ unsigned int encrypt,
+ unsigned int);
+
+ SEEDContext *(*p_SEED_AllocateContext)(void);
+
+ SEEDContext *(*p_SEED_CreateContext)(const unsigned char *key,
+ const unsigned char *iv,
+ int mode, PRBool encrypt);
+
+ void (*p_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit);
+
+ SECStatus (*p_SEED_Encrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_SEED_Decrypt)(SEEDContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen);
+
+ SECStatus (*p_BL_Init)(void);
+ void (*p_BL_SetForkState)(PRBool);
+
+ SECStatus (*p_PRNGTEST_Instantiate)(const PRUint8 *entropy,
+ unsigned int entropy_len,
+ const PRUint8 *nonce,
+ unsigned int nonce_len,
+ const PRUint8 *personal_string,
+ unsigned int ps_len);
+
+ SECStatus (*p_PRNGTEST_Reseed)(const PRUint8 *entropy,
+ unsigned int entropy_len,
+ const PRUint8 *additional,
+ unsigned int additional_len);
+
+ SECStatus (*p_PRNGTEST_Generate)(PRUint8 *bytes,
+ unsigned int bytes_len,
+ const PRUint8 *additional,
+ unsigned int additional_len);
+
+ SECStatus (*p_PRNGTEST_Uninstantiate)(void);
+ /* Version 3.011 came to here */
+
+ SECStatus (*p_RSA_PopulatePrivateKey)(RSAPrivateKey *key);
+
+ SECStatus (*p_DSA_NewRandom)(PLArenaPool *arena, const SECItem *q,
+ SECItem *seed);
+
+ SECStatus (*p_JPAKE_Sign)(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *x, const SECItem *testRandom,
+ const SECItem *gxIn, SECItem *gxOut,
+ SECItem *gv, SECItem *r);
+
+ SECStatus (*p_JPAKE_Verify)(PLArenaPool *arena, const PQGParams *pqg,
+ HASH_HashType hashType, const SECItem *signerID,
+ const SECItem *peerID, const SECItem *gx,
+ const SECItem *gv, const SECItem *r);
+
+ SECStatus (*p_JPAKE_Round2)(PLArenaPool *arena, const SECItem *p,
+ const SECItem *q, const SECItem *gx1,
+ const SECItem *gx3, const SECItem *gx4,
+ SECItem *base, const SECItem *x2,
+ const SECItem *s, SECItem *x2s);
+
+ SECStatus (*p_JPAKE_Final)(PLArenaPool *arena, const SECItem *p,
+ const SECItem *q, const SECItem *x2,
+ const SECItem *gx4, const SECItem *x2s,
+ const SECItem *B, SECItem *K);
+
+ /* Version 3.012 came to here */
+
+ SECStatus (*p_TLS_P_hash)(HASH_HashType hashAlg,
+ const SECItem *secret,
+ const char *label,
+ SECItem *seed,
+ SECItem *result,
+ PRBool isFIPS);
+
+ SHA224Context *(*p_SHA224_NewContext)(void);
+ void (*p_SHA224_DestroyContext)(SHA224Context *cx, PRBool freeit);
+ void (*p_SHA224_Begin)(SHA224Context *cx);
+ void (*p_SHA224_Update)(SHA224Context *cx, const unsigned char *input,
+ unsigned int inputLen);
+ void (*p_SHA224_End)(SHA224Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen);
+ SECStatus (*p_SHA224_HashBuf)(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length);
+ SECStatus (*p_SHA224_Hash)(unsigned char *dest, const char *src);
+ void (*p_SHA224_TraceState)(SHA224Context *cx);
+ unsigned int (*p_SHA224_FlattenSize)(SHA224Context *cx);
+ SECStatus (*p_SHA224_Flatten)(SHA224Context *cx, unsigned char *space);
+ SHA224Context *(*p_SHA224_Resurrect)(unsigned char *space, void *arg);
+ void (*p_SHA224_Clone)(SHA224Context *dest, SHA224Context *src);
+ PRBool (*p_BLAPI_SHVerifyFile)(const char *name);
+
+ /* Version 3.013 came to here */
+
+ SECStatus (*p_PQG_ParamGenV2)(unsigned int L, unsigned int N,
+ unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy);
+ SECStatus (*p_PRNGTEST_RunHealthTests)(void);
+
+ /* Version 3.014 came to here */
+
+ SECStatus (*p_HMAC_ConstantTime)(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+ SECStatus (*p_SSLv3_MAC_ConstantTime)(
+ unsigned char *result,
+ unsigned int *resultLen,
+ unsigned int maxResultLen,
+ const SECHashObject *hashObj,
+ const unsigned char *secret,
+ unsigned int secretLen,
+ const unsigned char *header,
+ unsigned int headerLen,
+ const unsigned char *body,
+ unsigned int bodyLen,
+ unsigned int bodyTotalLen);
+
+ /* Version 3.015 came to here */
+
+ SECStatus (*p_RSA_SignRaw)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSignRaw)(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+ SECStatus (*p_RSA_CheckSignRecoverRaw)(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+ SECStatus (*p_RSA_EncryptRaw)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptRaw)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_EncryptOAEP)(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptOAEP)(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_EncryptBlock)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_DecryptBlock)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_SignPSS)(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSignPSS)(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen);
+ SECStatus (*p_RSA_Sign)(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen);
+ SECStatus (*p_RSA_CheckSign)(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen);
+ SECStatus (*p_RSA_CheckSignRecover)(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen);
+
+ /* Version 3.016 came to here */
+
+ SECStatus (*p_EC_FillParams)(PLArenaPool *arena,
+ const SECItem *encodedParams, ECParams *params);
+ SECStatus (*p_EC_DecodeParams)(const SECItem *encodedParams,
+ ECParams **ecparams);
+ SECStatus (*p_EC_CopyParams)(PLArenaPool *arena, ECParams *dstParams,
+ const ECParams *srcParams);
+
+ /* Version 3.017 came to here */
+
+ SECStatus (*p_ChaCha20Poly1305_InitContext)(ChaCha20Poly1305Context *ctx,
+ const unsigned char *key,
+ unsigned int keyLen,
+ unsigned int tagLen);
+
+ ChaCha20Poly1305Context *(*p_ChaCha20Poly1305_CreateContext)(
+ const unsigned char *key, unsigned int keyLen, unsigned int tagLen);
+
+ void (*p_ChaCha20Poly1305_DestroyContext)(ChaCha20Poly1305Context *ctx,
+ PRBool freeit);
+
+ SECStatus (*p_ChaCha20Poly1305_Seal)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+ SECStatus (*p_ChaCha20Poly1305_Open)(
+ const ChaCha20Poly1305Context *ctx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ const unsigned char *nonce, unsigned int nonceLen,
+ const unsigned char *ad, unsigned int adLen);
+
+ /* Version 3.018 came to here */
+
+ int (*p_EC_GetPointSize)(const ECParams *);
+
+ /* Version 3.019 came to here */
+
+ /* Add new function pointers at the end of this struct and bump
+ * FREEBL_VERSION at the beginning of this file. */
+};
+
+typedef struct FREEBLVectorStr FREEBLVector;
+
+#ifdef FREEBL_LOWHASH
+#include "nsslowhash.h"
+
+#define NSSLOW_VERSION 0x0300
+
+struct NSSLOWVectorStr {
+ unsigned short length; /* of this struct in bytes */
+ unsigned short version; /* of this struct. */
+ const FREEBLVector *(*p_FREEBL_GetVector)(void);
+ NSSLOWInitContext *(*p_NSSLOW_Init)(void);
+ void (*p_NSSLOW_Shutdown)(NSSLOWInitContext *context);
+ void (*p_NSSLOW_Reset)(NSSLOWInitContext *context);
+ NSSLOWHASHContext *(*p_NSSLOWHASH_NewContext)(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType);
+ void (*p_NSSLOWHASH_Begin)(NSSLOWHASHContext *context);
+ void (*p_NSSLOWHASH_Update)(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len);
+ void (*p_NSSLOWHASH_End)(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len);
+ void (*p_NSSLOWHASH_Destroy)(NSSLOWHASHContext *context);
+ unsigned int (*p_NSSLOWHASH_Length)(NSSLOWHASHContext *context);
+};
+
+typedef struct NSSLOWVectorStr NSSLOWVector;
+#endif
+
+SEC_BEGIN_PROTOS
+
+#ifdef FREEBL_LOWHASH
+typedef const NSSLOWVector *NSSLOWGetVectorFn(void);
+
+extern NSSLOWGetVectorFn NSSLOW_GetVector;
+#endif
+
+typedef const FREEBLVector *FREEBLGetVectorFn(void);
+
+extern FREEBLGetVectorFn FREEBL_GetVector;
+
+SEC_END_PROTOS
+
+#endif
diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c
new file mode 100644
index 000000000..7690c98da
--- /dev/null
+++ b/security/nss/lib/freebl/lowhash_vector.c
@@ -0,0 +1,217 @@
+/*
+ * loader.c - load platform dependent DSO containing freebl implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define _GNU_SOURCE 1
+#include "loader.h"
+#include "prmem.h"
+#include "prerror.h"
+#include "prinit.h"
+#include "prenv.h"
+#include "blname.c"
+
+#include "prio.h"
+#include "prprf.h"
+#include <stdio.h>
+#include "prsystem.h"
+#include "nsslowhash.h"
+#include <dlfcn.h>
+#include "pratom.h"
+
+static PRLibrary *blLib;
+
+#define LSB(x) ((x)&0xff)
+#define MSB(x) ((x) >> 8)
+
+static const NSSLOWVector *vector;
+static const char *libraryName = NULL;
+
+/* pretty much only glibc uses this, make sure we don't have any depenencies
+ * on nspr.. */
+#undef PORT_Alloc
+#undef PORT_Free
+#define PORT_Alloc malloc
+#define PR_Malloc malloc
+#define PORT_Free free
+#define PR_Free free
+#define PR_GetDirectorySeparator() '/'
+#define PR_LoadLibraryWithFlags(libspec, flags) \
+ (PRLibrary *)dlopen(libSpec.value.pathname, RTLD_NOW | RTLD_LOCAL)
+#define PR_GetLibraryFilePathname(name, addr) \
+ freebl_lowhash_getLibraryFilePath(addr)
+
+static char *
+freebl_lowhash_getLibraryFilePath(void *addr)
+{
+ Dl_info dli;
+ if (dladdr(addr, &dli) == 0) {
+ return NULL;
+ }
+ return strdup(dli.dli_fname);
+}
+
+/*
+ * The PR_LoadLibraryWithFlags call above defines this variable away, so we
+ * don't need it..
+ */
+#ifdef nodef
+static const char *NameOfThisSharedLib =
+ SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX;
+#endif
+
+#include "genload.c"
+
+/* This function must be run only once. */
+/* determine if hybrid platform, then actually load the DSO. */
+static PRStatus
+freebl_LoadDSO(void)
+{
+ PRLibrary *handle;
+ const char *name = getLibName();
+
+ if (!name) {
+ /*PR_SetError(PR_LOAD_LIBRARY_ERROR,0); */
+ return PR_FAILURE;
+ }
+ handle = loader_LoadLibrary(name);
+ if (handle) {
+ void *address = dlsym(handle, "NSSLOW_GetVector");
+ if (address) {
+ NSSLOWGetVectorFn *getVector = (NSSLOWGetVectorFn *)address;
+ const NSSLOWVector *dsoVector = getVector();
+ if (dsoVector) {
+ unsigned short dsoVersion = dsoVector->version;
+ unsigned short myVersion = NSSLOW_VERSION;
+ if (MSB(dsoVersion) == MSB(myVersion) &&
+ LSB(dsoVersion) >= LSB(myVersion) &&
+ dsoVector->length >= sizeof(NSSLOWVector)) {
+ vector = dsoVector;
+ libraryName = name;
+ blLib = handle;
+ return PR_SUCCESS;
+ }
+ }
+ }
+ (void)dlclose(handle);
+ }
+ return PR_FAILURE;
+}
+
+static PRCallOnceType loadFreeBLOnce;
+
+static PRStatus
+freebl_RunLoaderOnce(void)
+{
+ /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped
+ * down version. */
+ if (loadFreeBLOnce.initialized) {
+ return loadFreeBLOnce.status;
+ }
+ if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) {
+ loadFreeBLOnce.status = freebl_LoadDSO();
+ loadFreeBLOnce.initialized = 1;
+ } else {
+ /* shouldn't have a lot of takers on the else clause, which is good
+ * since we don't have condition variables yet.
+ * 'initialized' only ever gets set (not cleared) so we don't
+ * need the traditional locks. */
+ while (!loadFreeBLOnce.initialized) {
+ sleep(1); /* don't have condition variables, just give up the CPU */
+ }
+ }
+
+ return loadFreeBLOnce.status;
+}
+
+const FREEBLVector *
+FREEBL_GetVector(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) {
+ return NULL;
+ }
+ if (vector) {
+ return (vector->p_FREEBL_GetVector)();
+ }
+ return NULL;
+}
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_NSSLOW_Init)();
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOW_Shutdown)(context);
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOW_Reset)(context);
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return NULL;
+ return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType);
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOWHASH_Begin)(context);
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOWHASH_Update)(context, buf, len);
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOWHASH_End)(context, buf, ret, len);
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return;
+ (vector->p_NSSLOWHASH_Destroy)(context);
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+ if (!vector && PR_SUCCESS != freebl_RunLoaderOnce())
+ return -1;
+ return (vector->p_NSSLOWHASH_Length)(context);
+}
diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn
new file mode 100644
index 000000000..1ef983907
--- /dev/null
+++ b/security/nss/lib/freebl/manifest.mn
@@ -0,0 +1,195 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# NOTE: any ifdefs in this file must be defined on the gmake command line
+# (if anywhere). They cannot come from Makefile or config.mk
+
+CORE_DEPTH = ../..
+
+MODULE = nss
+
+# copied from Linux.mk. We have a chicken and egg issue here. We need to set
+# Library name before we call the platform code in coreconf, but we need to
+# Pick up the automatic setting of FREEBL_LOWHASH before we can set the
+# Library name... so for now we mimic the code in Linux.mk to get the
+# automatic setting early...
+#
+# On Linux 2.6 or later, build libfreebl3.so with no NSPR and libnssutil3.so
+# dependencies by default. Set FREEBL_NO_DEPEND to 0 in the environment to
+# override this.
+#
+#
+include $(CORE_DEPTH)/coreconf/arch.mk
+ifeq ($(OS_ARCH),Linux)
+ifneq ($(OS_TARGET),Android)
+ifeq (2.6,$(firstword $(sort 2.6 $(OS_RELEASE))))
+ifndef FREEBL_NO_DEPEND
+FREEBL_NO_DEPEND = 1
+FREEBL_LOWHASH = 1
+endif
+endif
+endif
+endif
+
+
+LIBRARY_NAME = freebl
+LIBRARY_VERSION = 3
+
+ifdef FREEBL_CHILD_BUILD
+ ifdef USE_ABI32_INT32
+ LIBRARY_NAME = freebl_32int
+ endif
+ ifdef USE_ABI32_INT64
+ LIBRARY_NAME = freebl_32int64
+ endif
+ ifdef USE_ABI32_FPU
+ LIBRARY_NAME = freebl_32fpu
+ endif
+ ifdef USE_ABI64_INT
+ LIBRARY_NAME = freebl_64int
+ endif
+ ifdef USE_ABI64_FPU
+ LIBRARY_NAME = freebl_64fpu
+ endif
+ ifdef FREEBL_LOWHASH
+ LIBRARY_NAME = freeblpriv
+ endif
+ ifdef USE_STUB_BUILD
+ # for the stub build, reset name to the default (from freeblpriv)
+ LIBRARY_NAME = freebl
+ endif
+endif
+
+# if the library name contains _, we prefix the version with _
+ifneq (,$(findstring _,$(LIBRARY_NAME)))
+ LIBRARY_VERSION := _$(LIBRARY_VERSION)
+endif
+
+MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def
+
+SOFTOKEN_LIBRARY_VERSION = 3
+
+DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \
+ -DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \
+ -DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\"
+
+REQUIRES =
+
+EXPORTS = \
+ blapit.h \
+ shsign.h \
+ ecl-exp.h \
+ $(LOWHASH_EXPORTS) \
+ $(NULL)
+
+PRIVATE_EXPORTS = \
+ alghmac.h \
+ blapi.h \
+ chacha20poly1305.h \
+ hmacct.h \
+ secmpi.h \
+ secrng.h \
+ ec.h \
+ ecl.h \
+ ecl-curve.h \
+ $(NULL)
+
+MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h
+MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c
+
+
+ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h
+ifndef NSS_DISABLE_ECC
+ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \
+ ecp_aff.c ecp_jac.c ecp_mont.c \
+ ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \
+ ecp_256_32.c ecp_25519.c
+else
+ECL_SRCS = $(NULL)
+endif
+SHA_SRCS = sha_fast.c
+MPCPU_SRCS = mpcpucache.c
+
+CSRCS = \
+ freeblver.c \
+ ldvector.c \
+ sysrand.c \
+ $(SHA_SRCS) \
+ md2.c \
+ md5.c \
+ sha512.c \
+ alghmac.c \
+ rawhash.c \
+ alg2268.c \
+ arcfour.c \
+ arcfive.c \
+ desblapi.c \
+ des.c \
+ drbg.c \
+ chacha20poly1305.c \
+ cts.c \
+ ctr.c \
+ fipsfreebl.c \
+ gcm.c \
+ hmacct.c \
+ rijndael.c \
+ aeskeywrap.c \
+ camellia.c \
+ dh.c \
+ ec.c \
+ ecdecode.c \
+ pqg.c \
+ dsa.c \
+ rsa.c \
+ rsapkcs.c \
+ shvfy.c \
+ tlsprfalg.c \
+ seed.c \
+ jpake.c \
+ $(MPI_SRCS) \
+ $(MPCPU_SRCS) \
+ $(ECL_SRCS) \
+ $(STUBS_SRCS) \
+ $(LOWHASH_SRCS) \
+ $(EXTRA_SRCS) \
+ $(NULL)
+
+ALL_CSRCS := $(CSRCS)
+
+ALL_HDRS = \
+ alghmac.h \
+ blapi.h \
+ blapit.h \
+ des.h \
+ ec.h \
+ loader.h \
+ rijndael.h \
+ camellia.h \
+ secmpi.h \
+ sha_fast.h \
+ sha256.h \
+ shsign.h \
+ vis_proto.h \
+ seed.h \
+ $(NULL)
+
+
+ifdef AES_GEN_TBL
+DEFINES += -DRIJNDAEL_GENERATE_TABLES
+else
+ifdef AES_GEN_TBL_M
+DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO
+else
+ifdef AES_GEN_VAL
+DEFINES += -DRIJNDAEL_GENERATE_VALUES
+else
+ifdef AES_GEN_VAL_M
+DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO
+else
+DEFINES += -DRIJNDAEL_INCLUDE_TABLES
+endif
+endif
+endif
+endif
diff --git a/security/nss/lib/freebl/md2.c b/security/nss/lib/freebl/md2.c
new file mode 100644
index 000000000..cb3d3d82b
--- /dev/null
+++ b/security/nss/lib/freebl/md2.c
@@ -0,0 +1,269 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+
+#include "blapi.h"
+
+#define MD2_DIGEST_LEN 16
+#define MD2_BUFSIZE 16
+#define MD2_X_SIZE 48 /* The X array, [CV | INPUT | TMP VARS] */
+#define MD2_CV 0 /* index into X for chaining variables */
+#define MD2_INPUT 16 /* index into X for input */
+#define MD2_TMPVARS 32 /* index into X for temporary variables */
+#define MD2_CHECKSUM_SIZE 16
+
+struct MD2ContextStr {
+ unsigned char checksum[MD2_BUFSIZE];
+ unsigned char X[MD2_X_SIZE];
+ PRUint8 unusedBuffer;
+};
+
+static const PRUint8 MD2S[256] = {
+ 0051, 0056, 0103, 0311, 0242, 0330, 0174, 0001,
+ 0075, 0066, 0124, 0241, 0354, 0360, 0006, 0023,
+ 0142, 0247, 0005, 0363, 0300, 0307, 0163, 0214,
+ 0230, 0223, 0053, 0331, 0274, 0114, 0202, 0312,
+ 0036, 0233, 0127, 0074, 0375, 0324, 0340, 0026,
+ 0147, 0102, 0157, 0030, 0212, 0027, 0345, 0022,
+ 0276, 0116, 0304, 0326, 0332, 0236, 0336, 0111,
+ 0240, 0373, 0365, 0216, 0273, 0057, 0356, 0172,
+ 0251, 0150, 0171, 0221, 0025, 0262, 0007, 0077,
+ 0224, 0302, 0020, 0211, 0013, 0042, 0137, 0041,
+ 0200, 0177, 0135, 0232, 0132, 0220, 0062, 0047,
+ 0065, 0076, 0314, 0347, 0277, 0367, 0227, 0003,
+ 0377, 0031, 0060, 0263, 0110, 0245, 0265, 0321,
+ 0327, 0136, 0222, 0052, 0254, 0126, 0252, 0306,
+ 0117, 0270, 0070, 0322, 0226, 0244, 0175, 0266,
+ 0166, 0374, 0153, 0342, 0234, 0164, 0004, 0361,
+ 0105, 0235, 0160, 0131, 0144, 0161, 0207, 0040,
+ 0206, 0133, 0317, 0145, 0346, 0055, 0250, 0002,
+ 0033, 0140, 0045, 0255, 0256, 0260, 0271, 0366,
+ 0034, 0106, 0141, 0151, 0064, 0100, 0176, 0017,
+ 0125, 0107, 0243, 0043, 0335, 0121, 0257, 0072,
+ 0303, 0134, 0371, 0316, 0272, 0305, 0352, 0046,
+ 0054, 0123, 0015, 0156, 0205, 0050, 0204, 0011,
+ 0323, 0337, 0315, 0364, 0101, 0201, 0115, 0122,
+ 0152, 0334, 0067, 0310, 0154, 0301, 0253, 0372,
+ 0044, 0341, 0173, 0010, 0014, 0275, 0261, 0112,
+ 0170, 0210, 0225, 0213, 0343, 0143, 0350, 0155,
+ 0351, 0313, 0325, 0376, 0073, 0000, 0035, 0071,
+ 0362, 0357, 0267, 0016, 0146, 0130, 0320, 0344,
+ 0246, 0167, 0162, 0370, 0353, 0165, 0113, 0012,
+ 0061, 0104, 0120, 0264, 0217, 0355, 0037, 0032,
+ 0333, 0231, 0215, 0063, 0237, 0021, 0203, 0024
+};
+
+SECStatus
+MD2_Hash(unsigned char *dest, const char *src)
+{
+ unsigned int len;
+ MD2Context *cx = MD2_NewContext();
+ if (!cx) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return SECFailure;
+ }
+ MD2_Begin(cx);
+ MD2_Update(cx, (const unsigned char *)src, PORT_Strlen(src));
+ MD2_End(cx, dest, &len, MD2_DIGEST_LEN);
+ MD2_DestroyContext(cx, PR_TRUE);
+ return SECSuccess;
+}
+
+MD2Context *
+MD2_NewContext(void)
+{
+ MD2Context *cx = (MD2Context *)PORT_ZAlloc(sizeof(MD2Context));
+ if (cx == NULL) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return NULL;
+ }
+ return cx;
+}
+
+void
+MD2_DestroyContext(MD2Context *cx, PRBool freeit)
+{
+ if (freeit)
+ PORT_ZFree(cx, sizeof(*cx));
+}
+
+void
+MD2_Begin(MD2Context *cx)
+{
+ memset(cx, 0, sizeof(*cx));
+ cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+static void
+md2_compress(MD2Context *cx)
+{
+ int j;
+ unsigned char P;
+ P = cx->checksum[MD2_CHECKSUM_SIZE - 1];
+/* Compute the running checksum, and set the tmp variables to be
+ * CV[i] XOR input[i]
+ */
+#define CKSUMFN(n) \
+ P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT + n] ^ P]; \
+ cx->checksum[n] = P; \
+ cx->X[MD2_TMPVARS + n] = cx->X[n] ^ cx->X[MD2_INPUT + n];
+ CKSUMFN(0);
+ CKSUMFN(1);
+ CKSUMFN(2);
+ CKSUMFN(3);
+ CKSUMFN(4);
+ CKSUMFN(5);
+ CKSUMFN(6);
+ CKSUMFN(7);
+ CKSUMFN(8);
+ CKSUMFN(9);
+ CKSUMFN(10);
+ CKSUMFN(11);
+ CKSUMFN(12);
+ CKSUMFN(13);
+ CKSUMFN(14);
+ CKSUMFN(15);
+/* The compression function. */
+#define COMPRESS(n) \
+ P = cx->X[n] ^ MD2S[P]; \
+ cx->X[n] = P;
+ P = 0x00;
+ for (j = 0; j < 18; j++) {
+ COMPRESS(0);
+ COMPRESS(1);
+ COMPRESS(2);
+ COMPRESS(3);
+ COMPRESS(4);
+ COMPRESS(5);
+ COMPRESS(6);
+ COMPRESS(7);
+ COMPRESS(8);
+ COMPRESS(9);
+ COMPRESS(10);
+ COMPRESS(11);
+ COMPRESS(12);
+ COMPRESS(13);
+ COMPRESS(14);
+ COMPRESS(15);
+ COMPRESS(16);
+ COMPRESS(17);
+ COMPRESS(18);
+ COMPRESS(19);
+ COMPRESS(20);
+ COMPRESS(21);
+ COMPRESS(22);
+ COMPRESS(23);
+ COMPRESS(24);
+ COMPRESS(25);
+ COMPRESS(26);
+ COMPRESS(27);
+ COMPRESS(28);
+ COMPRESS(29);
+ COMPRESS(30);
+ COMPRESS(31);
+ COMPRESS(32);
+ COMPRESS(33);
+ COMPRESS(34);
+ COMPRESS(35);
+ COMPRESS(36);
+ COMPRESS(37);
+ COMPRESS(38);
+ COMPRESS(39);
+ COMPRESS(40);
+ COMPRESS(41);
+ COMPRESS(42);
+ COMPRESS(43);
+ COMPRESS(44);
+ COMPRESS(45);
+ COMPRESS(46);
+ COMPRESS(47);
+ P = (P + j) % 256;
+ }
+ cx->unusedBuffer = MD2_BUFSIZE;
+}
+
+void
+MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ PRUint32 bytesToConsume;
+
+ /* Fill the remaining input buffer. */
+ if (cx->unusedBuffer != MD2_BUFSIZE) {
+ bytesToConsume = PR_MIN(inputLen, cx->unusedBuffer);
+ memcpy(&cx->X[MD2_INPUT + (MD2_BUFSIZE - cx->unusedBuffer)],
+ input, bytesToConsume);
+ if (cx->unusedBuffer + bytesToConsume >= MD2_BUFSIZE)
+ md2_compress(cx);
+ inputLen -= bytesToConsume;
+ input += bytesToConsume;
+ }
+
+ /* Iterate over 16-byte chunks of the input. */
+ while (inputLen >= MD2_BUFSIZE) {
+ memcpy(&cx->X[MD2_INPUT], input, MD2_BUFSIZE);
+ md2_compress(cx);
+ inputLen -= MD2_BUFSIZE;
+ input += MD2_BUFSIZE;
+ }
+
+ /* Copy any input that remains into the buffer. */
+ if (inputLen)
+ memcpy(&cx->X[MD2_INPUT], input, inputLen);
+ cx->unusedBuffer = MD2_BUFSIZE - inputLen;
+}
+
+void
+MD2_End(MD2Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint8 padStart;
+ if (maxDigestLen < MD2_BUFSIZE) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+ padStart = MD2_BUFSIZE - cx->unusedBuffer;
+ memset(&cx->X[MD2_INPUT + padStart], cx->unusedBuffer,
+ cx->unusedBuffer);
+ md2_compress(cx);
+ memcpy(&cx->X[MD2_INPUT], cx->checksum, MD2_BUFSIZE);
+ md2_compress(cx);
+ *digestLen = MD2_DIGEST_LEN;
+ memcpy(digest, &cx->X[MD2_CV], MD2_DIGEST_LEN);
+}
+
+unsigned int
+MD2_FlattenSize(MD2Context *cx)
+{
+ return sizeof(*cx);
+}
+
+SECStatus
+MD2_Flatten(MD2Context *cx, unsigned char *space)
+{
+ memcpy(space, cx, sizeof(*cx));
+ return SECSuccess;
+}
+
+MD2Context *
+MD2_Resurrect(unsigned char *space, void *arg)
+{
+ MD2Context *cx = MD2_NewContext();
+ if (cx)
+ memcpy(cx, space, sizeof(*cx));
+ return cx;
+}
+
+void
+MD2_Clone(MD2Context *dest, MD2Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
diff --git a/security/nss/lib/freebl/md5.c b/security/nss/lib/freebl/md5.c
new file mode 100644
index 000000000..bdd36a61b
--- /dev/null
+++ b/security/nss/lib/freebl/md5.c
@@ -0,0 +1,598 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "prlong.h"
+
+#include "blapi.h"
+#include "blapii.h"
+
+#define MD5_HASH_LEN 16
+#define MD5_BUFFER_SIZE 64
+#define MD5_END_BUFFER (MD5_BUFFER_SIZE - 8)
+
+#define CV0_1 0x67452301
+#define CV0_2 0xefcdab89
+#define CV0_3 0x98badcfe
+#define CV0_4 0x10325476
+
+#define T1_0 0xd76aa478
+#define T1_1 0xe8c7b756
+#define T1_2 0x242070db
+#define T1_3 0xc1bdceee
+#define T1_4 0xf57c0faf
+#define T1_5 0x4787c62a
+#define T1_6 0xa8304613
+#define T1_7 0xfd469501
+#define T1_8 0x698098d8
+#define T1_9 0x8b44f7af
+#define T1_10 0xffff5bb1
+#define T1_11 0x895cd7be
+#define T1_12 0x6b901122
+#define T1_13 0xfd987193
+#define T1_14 0xa679438e
+#define T1_15 0x49b40821
+
+#define T2_0 0xf61e2562
+#define T2_1 0xc040b340
+#define T2_2 0x265e5a51
+#define T2_3 0xe9b6c7aa
+#define T2_4 0xd62f105d
+#define T2_5 0x02441453
+#define T2_6 0xd8a1e681
+#define T2_7 0xe7d3fbc8
+#define T2_8 0x21e1cde6
+#define T2_9 0xc33707d6
+#define T2_10 0xf4d50d87
+#define T2_11 0x455a14ed
+#define T2_12 0xa9e3e905
+#define T2_13 0xfcefa3f8
+#define T2_14 0x676f02d9
+#define T2_15 0x8d2a4c8a
+
+#define T3_0 0xfffa3942
+#define T3_1 0x8771f681
+#define T3_2 0x6d9d6122
+#define T3_3 0xfde5380c
+#define T3_4 0xa4beea44
+#define T3_5 0x4bdecfa9
+#define T3_6 0xf6bb4b60
+#define T3_7 0xbebfbc70
+#define T3_8 0x289b7ec6
+#define T3_9 0xeaa127fa
+#define T3_10 0xd4ef3085
+#define T3_11 0x04881d05
+#define T3_12 0xd9d4d039
+#define T3_13 0xe6db99e5
+#define T3_14 0x1fa27cf8
+#define T3_15 0xc4ac5665
+
+#define T4_0 0xf4292244
+#define T4_1 0x432aff97
+#define T4_2 0xab9423a7
+#define T4_3 0xfc93a039
+#define T4_4 0x655b59c3
+#define T4_5 0x8f0ccc92
+#define T4_6 0xffeff47d
+#define T4_7 0x85845dd1
+#define T4_8 0x6fa87e4f
+#define T4_9 0xfe2ce6e0
+#define T4_10 0xa3014314
+#define T4_11 0x4e0811a1
+#define T4_12 0xf7537e82
+#define T4_13 0xbd3af235
+#define T4_14 0x2ad7d2bb
+#define T4_15 0xeb86d391
+
+#define R1B0 0
+#define R1B1 1
+#define R1B2 2
+#define R1B3 3
+#define R1B4 4
+#define R1B5 5
+#define R1B6 6
+#define R1B7 7
+#define R1B8 8
+#define R1B9 9
+#define R1B10 10
+#define R1B11 11
+#define R1B12 12
+#define R1B13 13
+#define R1B14 14
+#define R1B15 15
+
+#define R2B0 1
+#define R2B1 6
+#define R2B2 11
+#define R2B3 0
+#define R2B4 5
+#define R2B5 10
+#define R2B6 15
+#define R2B7 4
+#define R2B8 9
+#define R2B9 14
+#define R2B10 3
+#define R2B11 8
+#define R2B12 13
+#define R2B13 2
+#define R2B14 7
+#define R2B15 12
+
+#define R3B0 5
+#define R3B1 8
+#define R3B2 11
+#define R3B3 14
+#define R3B4 1
+#define R3B5 4
+#define R3B6 7
+#define R3B7 10
+#define R3B8 13
+#define R3B9 0
+#define R3B10 3
+#define R3B11 6
+#define R3B12 9
+#define R3B13 12
+#define R3B14 15
+#define R3B15 2
+
+#define R4B0 0
+#define R4B1 7
+#define R4B2 14
+#define R4B3 5
+#define R4B4 12
+#define R4B5 3
+#define R4B6 10
+#define R4B7 1
+#define R4B8 8
+#define R4B9 15
+#define R4B10 6
+#define R4B11 13
+#define R4B12 4
+#define R4B13 11
+#define R4B14 2
+#define R4B15 9
+
+#define S1_0 7
+#define S1_1 12
+#define S1_2 17
+#define S1_3 22
+
+#define S2_0 5
+#define S2_1 9
+#define S2_2 14
+#define S2_3 20
+
+#define S3_0 4
+#define S3_1 11
+#define S3_2 16
+#define S3_3 23
+
+#define S4_0 6
+#define S4_1 10
+#define S4_2 15
+#define S4_3 21
+
+struct MD5ContextStr {
+ PRUint32 lsbInput;
+ PRUint32 msbInput;
+ PRUint32 cv[4];
+ union {
+ PRUint8 b[64];
+ PRUint32 w[16];
+ } u;
+};
+
+#define inBuf u.b
+
+SECStatus
+MD5_Hash(unsigned char *dest, const char *src)
+{
+ return MD5_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+SECStatus
+MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ unsigned int len;
+ MD5Context cx;
+
+ MD5_Begin(&cx);
+ MD5_Update(&cx, src, src_length);
+ MD5_End(&cx, dest, &len, MD5_HASH_LEN);
+ memset(&cx, 0, sizeof cx);
+ return SECSuccess;
+}
+
+MD5Context *
+MD5_NewContext(void)
+{
+ /* no need to ZAlloc, MD5_Begin will init the context */
+ MD5Context *cx = (MD5Context *)PORT_Alloc(sizeof(MD5Context));
+ if (cx == NULL) {
+ PORT_SetError(PR_OUT_OF_MEMORY_ERROR);
+ return NULL;
+ }
+ return cx;
+}
+
+void
+MD5_DestroyContext(MD5Context *cx, PRBool freeit)
+{
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+}
+
+void
+MD5_Begin(MD5Context *cx)
+{
+ cx->lsbInput = 0;
+ cx->msbInput = 0;
+ /* memset(cx->inBuf, 0, sizeof(cx->inBuf)); */
+ cx->cv[0] = CV0_1;
+ cx->cv[1] = CV0_2;
+ cx->cv[2] = CV0_3;
+ cx->cv[3] = CV0_4;
+}
+
+#define cls(i32, s) (tmp = i32, tmp << s | tmp >> (32 - s))
+
+#if defined(SOLARIS) || defined(HPUX)
+#define addto64(sumhigh, sumlow, addend) \
+ sumlow += addend; \
+ sumhigh += (sumlow < addend);
+#else
+#define addto64(sumhigh, sumlow, addend) \
+ sumlow += addend; \
+ if (sumlow < addend) \
+ ++sumhigh;
+#endif
+
+#define MASK 0x00ff00ff
+#ifdef IS_LITTLE_ENDIAN
+#define lendian(i32) \
+ (i32)
+#else
+#define lendian(i32) \
+ (tmp = (i32 >> 16) | (i32 << 16), ((tmp & MASK) << 8) | ((tmp >> 8) & MASK))
+#endif
+
+#ifndef IS_LITTLE_ENDIAN
+
+#define lebytes(b4) \
+ ((b4)[3] << 24 | (b4)[2] << 16 | (b4)[1] << 8 | (b4)[0])
+
+static void
+md5_prep_state_le(MD5Context *cx)
+{
+ PRUint32 tmp;
+ cx->u.w[0] = lendian(cx->u.w[0]);
+ cx->u.w[1] = lendian(cx->u.w[1]);
+ cx->u.w[2] = lendian(cx->u.w[2]);
+ cx->u.w[3] = lendian(cx->u.w[3]);
+ cx->u.w[4] = lendian(cx->u.w[4]);
+ cx->u.w[5] = lendian(cx->u.w[5]);
+ cx->u.w[6] = lendian(cx->u.w[6]);
+ cx->u.w[7] = lendian(cx->u.w[7]);
+ cx->u.w[8] = lendian(cx->u.w[8]);
+ cx->u.w[9] = lendian(cx->u.w[9]);
+ cx->u.w[10] = lendian(cx->u.w[10]);
+ cx->u.w[11] = lendian(cx->u.w[11]);
+ cx->u.w[12] = lendian(cx->u.w[12]);
+ cx->u.w[13] = lendian(cx->u.w[13]);
+ cx->u.w[14] = lendian(cx->u.w[14]);
+ cx->u.w[15] = lendian(cx->u.w[15]);
+}
+
+static void
+md5_prep_buffer_le(MD5Context *cx, const PRUint8 *beBuf)
+{
+ cx->u.w[0] = lebytes(&beBuf[0]);
+ cx->u.w[1] = lebytes(&beBuf[4]);
+ cx->u.w[2] = lebytes(&beBuf[8]);
+ cx->u.w[3] = lebytes(&beBuf[12]);
+ cx->u.w[4] = lebytes(&beBuf[16]);
+ cx->u.w[5] = lebytes(&beBuf[20]);
+ cx->u.w[6] = lebytes(&beBuf[24]);
+ cx->u.w[7] = lebytes(&beBuf[28]);
+ cx->u.w[8] = lebytes(&beBuf[32]);
+ cx->u.w[9] = lebytes(&beBuf[36]);
+ cx->u.w[10] = lebytes(&beBuf[40]);
+ cx->u.w[11] = lebytes(&beBuf[44]);
+ cx->u.w[12] = lebytes(&beBuf[48]);
+ cx->u.w[13] = lebytes(&beBuf[52]);
+ cx->u.w[14] = lebytes(&beBuf[56]);
+ cx->u.w[15] = lebytes(&beBuf[60]);
+}
+#endif
+
+#define F(X, Y, Z) \
+ ((X & Y) | ((~X) & Z))
+
+#define G(X, Y, Z) \
+ ((X & Z) | (Y & (~Z)))
+
+#define H(X, Y, Z) \
+ (X ^ Y ^ Z)
+
+#define I(X, Y, Z) \
+ (Y ^ (X | (~Z)))
+
+#define FF(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + F(b, c, d) + bufint + ti, s)
+
+#define GG(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + G(b, c, d) + bufint + ti, s)
+
+#define HH(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + H(b, c, d) + bufint + ti, s)
+
+#define II(a, b, c, d, bufint, s, ti) \
+ a = b + cls(a + I(b, c, d) + bufint + ti, s)
+
+static void NO_SANITIZE_ALIGNMENT
+md5_compress(MD5Context *cx, const PRUint32 *wBuf)
+{
+ PRUint32 a, b, c, d;
+ PRUint32 tmp;
+ a = cx->cv[0];
+ b = cx->cv[1];
+ c = cx->cv[2];
+ d = cx->cv[3];
+ FF(a, b, c, d, wBuf[R1B0], S1_0, T1_0);
+ FF(d, a, b, c, wBuf[R1B1], S1_1, T1_1);
+ FF(c, d, a, b, wBuf[R1B2], S1_2, T1_2);
+ FF(b, c, d, a, wBuf[R1B3], S1_3, T1_3);
+ FF(a, b, c, d, wBuf[R1B4], S1_0, T1_4);
+ FF(d, a, b, c, wBuf[R1B5], S1_1, T1_5);
+ FF(c, d, a, b, wBuf[R1B6], S1_2, T1_6);
+ FF(b, c, d, a, wBuf[R1B7], S1_3, T1_7);
+ FF(a, b, c, d, wBuf[R1B8], S1_0, T1_8);
+ FF(d, a, b, c, wBuf[R1B9], S1_1, T1_9);
+ FF(c, d, a, b, wBuf[R1B10], S1_2, T1_10);
+ FF(b, c, d, a, wBuf[R1B11], S1_3, T1_11);
+ FF(a, b, c, d, wBuf[R1B12], S1_0, T1_12);
+ FF(d, a, b, c, wBuf[R1B13], S1_1, T1_13);
+ FF(c, d, a, b, wBuf[R1B14], S1_2, T1_14);
+ FF(b, c, d, a, wBuf[R1B15], S1_3, T1_15);
+ GG(a, b, c, d, wBuf[R2B0], S2_0, T2_0);
+ GG(d, a, b, c, wBuf[R2B1], S2_1, T2_1);
+ GG(c, d, a, b, wBuf[R2B2], S2_2, T2_2);
+ GG(b, c, d, a, wBuf[R2B3], S2_3, T2_3);
+ GG(a, b, c, d, wBuf[R2B4], S2_0, T2_4);
+ GG(d, a, b, c, wBuf[R2B5], S2_1, T2_5);
+ GG(c, d, a, b, wBuf[R2B6], S2_2, T2_6);
+ GG(b, c, d, a, wBuf[R2B7], S2_3, T2_7);
+ GG(a, b, c, d, wBuf[R2B8], S2_0, T2_8);
+ GG(d, a, b, c, wBuf[R2B9], S2_1, T2_9);
+ GG(c, d, a, b, wBuf[R2B10], S2_2, T2_10);
+ GG(b, c, d, a, wBuf[R2B11], S2_3, T2_11);
+ GG(a, b, c, d, wBuf[R2B12], S2_0, T2_12);
+ GG(d, a, b, c, wBuf[R2B13], S2_1, T2_13);
+ GG(c, d, a, b, wBuf[R2B14], S2_2, T2_14);
+ GG(b, c, d, a, wBuf[R2B15], S2_3, T2_15);
+ HH(a, b, c, d, wBuf[R3B0], S3_0, T3_0);
+ HH(d, a, b, c, wBuf[R3B1], S3_1, T3_1);
+ HH(c, d, a, b, wBuf[R3B2], S3_2, T3_2);
+ HH(b, c, d, a, wBuf[R3B3], S3_3, T3_3);
+ HH(a, b, c, d, wBuf[R3B4], S3_0, T3_4);
+ HH(d, a, b, c, wBuf[R3B5], S3_1, T3_5);
+ HH(c, d, a, b, wBuf[R3B6], S3_2, T3_6);
+ HH(b, c, d, a, wBuf[R3B7], S3_3, T3_7);
+ HH(a, b, c, d, wBuf[R3B8], S3_0, T3_8);
+ HH(d, a, b, c, wBuf[R3B9], S3_1, T3_9);
+ HH(c, d, a, b, wBuf[R3B10], S3_2, T3_10);
+ HH(b, c, d, a, wBuf[R3B11], S3_3, T3_11);
+ HH(a, b, c, d, wBuf[R3B12], S3_0, T3_12);
+ HH(d, a, b, c, wBuf[R3B13], S3_1, T3_13);
+ HH(c, d, a, b, wBuf[R3B14], S3_2, T3_14);
+ HH(b, c, d, a, wBuf[R3B15], S3_3, T3_15);
+ II(a, b, c, d, wBuf[R4B0], S4_0, T4_0);
+ II(d, a, b, c, wBuf[R4B1], S4_1, T4_1);
+ II(c, d, a, b, wBuf[R4B2], S4_2, T4_2);
+ II(b, c, d, a, wBuf[R4B3], S4_3, T4_3);
+ II(a, b, c, d, wBuf[R4B4], S4_0, T4_4);
+ II(d, a, b, c, wBuf[R4B5], S4_1, T4_5);
+ II(c, d, a, b, wBuf[R4B6], S4_2, T4_6);
+ II(b, c, d, a, wBuf[R4B7], S4_3, T4_7);
+ II(a, b, c, d, wBuf[R4B8], S4_0, T4_8);
+ II(d, a, b, c, wBuf[R4B9], S4_1, T4_9);
+ II(c, d, a, b, wBuf[R4B10], S4_2, T4_10);
+ II(b, c, d, a, wBuf[R4B11], S4_3, T4_11);
+ II(a, b, c, d, wBuf[R4B12], S4_0, T4_12);
+ II(d, a, b, c, wBuf[R4B13], S4_1, T4_13);
+ II(c, d, a, b, wBuf[R4B14], S4_2, T4_14);
+ II(b, c, d, a, wBuf[R4B15], S4_3, T4_15);
+ cx->cv[0] += a;
+ cx->cv[1] += b;
+ cx->cv[2] += c;
+ cx->cv[3] += d;
+}
+
+void
+MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen)
+{
+ PRUint32 bytesToConsume;
+ PRUint32 inBufIndex = cx->lsbInput & 63;
+ const PRUint32 *wBuf;
+
+ /* Add the number of input bytes to the 64-bit input counter. */
+ addto64(cx->msbInput, cx->lsbInput, inputLen);
+ if (inBufIndex) {
+ /* There is already data in the buffer. Fill with input. */
+ bytesToConsume = PR_MIN(inputLen, MD5_BUFFER_SIZE - inBufIndex);
+ memcpy(&cx->inBuf[inBufIndex], input, bytesToConsume);
+ if (inBufIndex + bytesToConsume >= MD5_BUFFER_SIZE) {
+/* The buffer is filled. Run the compression function. */
+#ifndef IS_LITTLE_ENDIAN
+ md5_prep_state_le(cx);
+#endif
+ md5_compress(cx, cx->u.w);
+ }
+ /* Remaining input. */
+ inputLen -= bytesToConsume;
+ input += bytesToConsume;
+ }
+
+ /* Iterate over 64-byte chunks of the message. */
+ while (inputLen >= MD5_BUFFER_SIZE) {
+#ifdef IS_LITTLE_ENDIAN
+#ifdef HAVE_UNALIGNED_ACCESS
+ /* x86 can handle arithmetic on non-word-aligned buffers */
+ wBuf = (PRUint32 *)input;
+#else
+ if ((ptrdiff_t)input & 0x3) {
+ /* buffer not aligned, copy it to force alignment */
+ memcpy(cx->inBuf, input, MD5_BUFFER_SIZE);
+ wBuf = cx->u.w;
+ } else {
+ /* buffer is aligned */
+ wBuf = (PRUint32 *)input;
+ }
+#endif
+#else
+ md5_prep_buffer_le(cx, input);
+ wBuf = cx->u.w;
+#endif
+ md5_compress(cx, wBuf);
+ inputLen -= MD5_BUFFER_SIZE;
+ input += MD5_BUFFER_SIZE;
+ }
+
+ /* Tail of message (message bytes mod 64). */
+ if (inputLen)
+ memcpy(cx->inBuf, input, inputLen);
+}
+
+static const unsigned char padbytes[] = {
+ 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+void
+MD5_End(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+ PRUint32 tmp;
+#endif
+ PRUint32 lowInput, highInput;
+ PRUint32 inBufIndex = cx->lsbInput & 63;
+
+ if (maxDigestLen < MD5_HASH_LEN) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+
+ /* Copy out the length of bits input before padding. */
+ lowInput = cx->lsbInput;
+ highInput = (cx->msbInput << 3) | (lowInput >> 29);
+ lowInput <<= 3;
+
+ if (inBufIndex < MD5_END_BUFFER) {
+ MD5_Update(cx, padbytes, MD5_END_BUFFER - inBufIndex);
+ } else {
+ MD5_Update(cx, padbytes,
+ MD5_END_BUFFER + MD5_BUFFER_SIZE - inBufIndex);
+ }
+
+ /* Store the number of bytes input (before padding) in final 64 bits. */
+ cx->u.w[14] = lendian(lowInput);
+ cx->u.w[15] = lendian(highInput);
+
+/* Final call to compress. */
+#ifndef IS_LITTLE_ENDIAN
+ md5_prep_state_le(cx);
+#endif
+ md5_compress(cx, cx->u.w);
+
+ /* Copy the resulting values out of the chain variables into return buf. */
+ if (digestLen)
+ *digestLen = MD5_HASH_LEN;
+#ifndef IS_LITTLE_ENDIAN
+ cx->cv[0] = lendian(cx->cv[0]);
+ cx->cv[1] = lendian(cx->cv[1]);
+ cx->cv[2] = lendian(cx->cv[2]);
+ cx->cv[3] = lendian(cx->cv[3]);
+#endif
+ memcpy(digest, cx->cv, MD5_HASH_LEN);
+}
+
+void
+MD5_EndRaw(MD5Context *cx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#ifndef IS_LITTLE_ENDIAN
+ PRUint32 tmp;
+#endif
+ PRUint32 cv[4];
+
+ if (maxDigestLen < MD5_HASH_LEN) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return;
+ }
+
+ memcpy(cv, cx->cv, sizeof(cv));
+#ifndef IS_LITTLE_ENDIAN
+ cv[0] = lendian(cv[0]);
+ cv[1] = lendian(cv[1]);
+ cv[2] = lendian(cv[2]);
+ cv[3] = lendian(cv[3]);
+#endif
+ memcpy(digest, cv, MD5_HASH_LEN);
+ if (digestLen)
+ *digestLen = MD5_HASH_LEN;
+}
+
+unsigned int
+MD5_FlattenSize(MD5Context *cx)
+{
+ return sizeof(*cx);
+}
+
+SECStatus
+MD5_Flatten(MD5Context *cx, unsigned char *space)
+{
+ memcpy(space, cx, sizeof(*cx));
+ return SECSuccess;
+}
+
+MD5Context *
+MD5_Resurrect(unsigned char *space, void *arg)
+{
+ MD5Context *cx = MD5_NewContext();
+ if (cx)
+ memcpy(cx, space, sizeof(*cx));
+ return cx;
+}
+
+void
+MD5_Clone(MD5Context *dest, MD5Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+void
+MD5_TraceState(MD5Context *cx)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/mknewpc2.c b/security/nss/lib/freebl/mknewpc2.c
new file mode 100644
index 000000000..6b2968816
--- /dev/null
+++ b/security/nss/lib/freebl/mknewpc2.c
@@ -0,0 +1,208 @@
+/*
+ * mknewpc2.c
+ *
+ * Generate PC-2 tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+typedef unsigned char BYTE;
+typedef unsigned int HALF;
+
+#define DES_ENCRYPT 0
+#define DES_DECRYPT 1
+
+/* two 28-bit registers defined in key schedule production process */
+static HALF C0, D0;
+
+static HALF L0, R0;
+
+/* key schedule, 16 internal keys, each with 8 6-bit parts */
+static BYTE KS[8][16];
+
+/*
+ * This table takes the 56 bits in C0 and D0 and shows show they are
+ * permuted into the 8 6-bit parts of the key in the key schedule.
+ * The bits of C0 are numbered left to right, 1-28.
+ * The bits of D0 are numbered left to right, 29-56.
+ * Zeros in this table represent bits that are always zero.
+ * Note that all the bits in the first 4 rows come from C0,
+ * and all the bits in the second 4 rows come from D0.
+ */
+static const BYTE PC2[64] = {
+ 14, 17, 11, 24, 1, 5, 0, 0, /* S1 */
+ 3, 28, 15, 6, 21, 10, 0, 0, /* S2 */
+ 23, 19, 12, 4, 26, 8, 0, 0, /* S3 */
+ 16, 7, 27, 20, 13, 2, 0, 0, /* S4 */
+
+ 41, 52, 31, 37, 47, 55, 0, 0, /* S5 */
+ 30, 40, 51, 45, 33, 48, 0, 0, /* S6 */
+ 44, 49, 39, 56, 34, 53, 0, 0, /* S7 */
+ 46, 42, 50, 36, 29, 32, 0, 0 /* S8 */
+};
+
+/* This table represents the same info as PC2, except that
+ * The bits of C0 and D0 are each numbered right to left, 0-27.
+ * -1 values indicate bits that are always zero.
+ * As before all the bits in the first 4 rows come from C0,
+ * and all the bits in the second 4 rows come from D0.
+ */
+static signed char PC2a[64] = {
+ /* bits of C0 */
+ 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+ 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */
+ 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */
+ 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */
+ /* bits of D0 */
+ 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */
+ 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */
+ 12, 7, 17, 0, 22, 3, -1, -1, /* S7 */
+ 10, 14, 6, 20, 27, 24, -1, -1 /* S8 */
+};
+
+/* This table represents the same info as PC2a, except that
+ * The order of of the rows has been changed to increase the efficiency
+ * with which the key sechedule is created.
+ * Fewer shifts and ANDs are required to make the KS from these.
+ */
+static const signed char PC2b[64] = {
+ /* bits of C0 */
+ 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */
+ 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */
+ 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */
+ 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */
+ /* bits of D0 */
+ 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */
+ 10, 14, 6, 20, 27, 24, -1, -1, /* S8 */
+ 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */
+ 12, 7, 17, 0, 22, 3, -1, -1 /* S7 */
+};
+
+/* Only 24 of the 28 bits in C0 and D0 are used in PC2.
+ * The used bits of C0 and D0 are grouped into 4 groups of 6,
+ * so that the PC2 permutation can be accomplished with 4 lookups
+ * in tables of 64 entries.
+ * The following table shows how the bits of C0 and D0 are grouped
+ * into indexes for the respective table lookups.
+ * Bits are numbered right-to-left, 0-27, as in PC2b.
+ */
+static BYTE NDX[48] = {
+ /* Bits of C0 */
+ 27, 26, 25, 24, 23, 22, /* C0 table 0 */
+ 18, 17, 16, 15, 14, 13, /* C0 table 1 */
+ 9, 8, 7, 2, 1, 0, /* C0 table 2 */
+ 5, 4, 21, 20, 12, 11, /* C0 table 3 */
+ /* bits of D0 */
+ 27, 26, 25, 24, 23, 22, /* D0 table 0 */
+ 20, 19, 17, 16, 15, 14, /* D0 table 1 */
+ 12, 11, 10, 9, 8, 7, /* D0 table 2 */
+ 6, 5, 4, 3, 1, 0 /* D0 table 3 */
+};
+
+/* Here's the code that does that grouping.
+ left = PC2LOOKUP(0, 0, ((c0 >> 22) & 0x3F) );
+ left |= PC2LOOKUP(0, 1, ((c0 >> 13) & 0x3F) );
+ left |= PC2LOOKUP(0, 2, ((c0 >> 4) & 0x38) | (c0 & 0x7) );
+ left |= PC2LOOKUP(0, 3, ((c0>>18)&0xC) | ((c0>>11)&0x3) | (c0&0x30));
+
+ right = PC2LOOKUP(1, 0, ((d0 >> 22) & 0x3F) );
+ right |= PC2LOOKUP(1, 1, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf) );
+ right |= PC2LOOKUP(1, 2, ((d0 >> 7) & 0x3F) );
+ right |= PC2LOOKUP(1, 3, ((d0 >> 1) & 0x3C) | (d0 & 0x3));
+*/
+
+void
+make_pc2a(void)
+{
+
+ int i, j;
+
+ for (i = 0; i < 64; ++i) {
+ j = PC2[i];
+ if (j == 0)
+ j = -1;
+ else if (j < 29)
+ j = 28 - j;
+ else
+ j = 56 - j;
+ PC2a[i] = j;
+ }
+ for (i = 0; i < 64; i += 8) {
+ printf("%3d,%3d,%3d,%3d,%3d,%3d,%3d,%3d,\n",
+ PC2a[i + 0], PC2a[i + 1], PC2a[i + 2], PC2a[i + 3],
+ PC2a[i + 4], PC2a[i + 5], PC2a[i + 6], PC2a[i + 7]);
+ }
+}
+
+HALF PC2cd0[64];
+
+HALF PC_2H[8][64];
+
+void
+mktable()
+{
+ int i;
+ int table;
+ const BYTE* ndx = NDX;
+ HALF mask;
+
+ mask = 0x80000000;
+ for (i = 0; i < 32; ++i, mask >>= 1) {
+ int bit = PC2b[i];
+ if (bit < 0)
+ continue;
+ PC2cd0[bit + 32] = mask;
+ }
+
+ mask = 0x80000000;
+ for (i = 32; i < 64; ++i, mask >>= 1) {
+ int bit = PC2b[i];
+ if (bit < 0)
+ continue;
+ PC2cd0[bit] = mask;
+ }
+
+#if DEBUG
+ for (i = 0; i < 64; ++i) {
+ printf("0x%08x,\n", PC2cd0[i]);
+ }
+#endif
+ for (i = 0; i < 24; ++i) {
+ NDX[i] += 32; /* because c0 is the upper half */
+ }
+
+ for (table = 0; table < 8; ++table) {
+ HALF bitvals[6];
+ for (i = 0; i < 6; ++i) {
+ bitvals[5 - i] = PC2cd0[*ndx++];
+ }
+ for (i = 0; i < 64; ++i) {
+ int j;
+ int k = 0;
+ HALF value = 0;
+
+ for (j = i; j; j >>= 1, ++k) {
+ if (j & 1) {
+ value |= bitvals[k];
+ }
+ }
+ PC_2H[table][i] = value;
+ }
+ printf("/* table %d */ {\n", table);
+ for (i = 0; i < 64; i += 4) {
+ printf(" 0x%08x, 0x%08x, 0x%08x, 0x%08x, \n",
+ PC_2H[table][i], PC_2H[table][i + 1],
+ PC_2H[table][i + 2], PC_2H[table][i + 3]);
+ }
+ printf(" },\n");
+ }
+}
+
+int
+main(void)
+{
+ /* make_pc2a(); */
+ mktable();
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mksp.c b/security/nss/lib/freebl/mksp.c
new file mode 100644
index 000000000..ca83ac8e7
--- /dev/null
+++ b/security/nss/lib/freebl/mksp.c
@@ -0,0 +1,119 @@
+/*
+ * mksp.c
+ *
+ * Generate SP tables for DES-150 library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+
+/*
+ * sboxes - the tables for the s-box functions
+ * from FIPS 46, pages 15-16.
+ */
+unsigned char S[8][64] = {
+ /* Func S1 = */
+ { 14, 0, 4, 15, 13, 7, 1, 4, 2, 14, 15, 2, 11, 13, 8, 1,
+ 3, 10, 10, 6, 6, 12, 12, 11, 5, 9, 9, 5, 0, 3, 7, 8,
+ 4, 15, 1, 12, 14, 8, 8, 2, 13, 4, 6, 9, 2, 1, 11, 7,
+ 15, 5, 12, 11, 9, 3, 7, 14, 3, 10, 10, 0, 5, 6, 0, 13 },
+ /* Func S2 = */
+ { 15, 3, 1, 13, 8, 4, 14, 7, 6, 15, 11, 2, 3, 8, 4, 14,
+ 9, 12, 7, 0, 2, 1, 13, 10, 12, 6, 0, 9, 5, 11, 10, 5,
+ 0, 13, 14, 8, 7, 10, 11, 1, 10, 3, 4, 15, 13, 4, 1, 2,
+ 5, 11, 8, 6, 12, 7, 6, 12, 9, 0, 3, 5, 2, 14, 15, 9 },
+ /* Func S3 = */
+ { 10, 13, 0, 7, 9, 0, 14, 9, 6, 3, 3, 4, 15, 6, 5, 10,
+ 1, 2, 13, 8, 12, 5, 7, 14, 11, 12, 4, 11, 2, 15, 8, 1,
+ 13, 1, 6, 10, 4, 13, 9, 0, 8, 6, 15, 9, 3, 8, 0, 7,
+ 11, 4, 1, 15, 2, 14, 12, 3, 5, 11, 10, 5, 14, 2, 7, 12 },
+ /* Func S4 = */
+ { 7, 13, 13, 8, 14, 11, 3, 5, 0, 6, 6, 15, 9, 0, 10, 3,
+ 1, 4, 2, 7, 8, 2, 5, 12, 11, 1, 12, 10, 4, 14, 15, 9,
+ 10, 3, 6, 15, 9, 0, 0, 6, 12, 10, 11, 1, 7, 13, 13, 8,
+ 15, 9, 1, 4, 3, 5, 14, 11, 5, 12, 2, 7, 8, 2, 4, 14 },
+ /* Func S5 = */
+ { 2, 14, 12, 11, 4, 2, 1, 12, 7, 4, 10, 7, 11, 13, 6, 1,
+ 8, 5, 5, 0, 3, 15, 15, 10, 13, 3, 0, 9, 14, 8, 9, 6,
+ 4, 11, 2, 8, 1, 12, 11, 7, 10, 1, 13, 14, 7, 2, 8, 13,
+ 15, 6, 9, 15, 12, 0, 5, 9, 6, 10, 3, 4, 0, 5, 14, 3 },
+ /* Func S6 = */
+ { 12, 10, 1, 15, 10, 4, 15, 2, 9, 7, 2, 12, 6, 9, 8, 5,
+ 0, 6, 13, 1, 3, 13, 4, 14, 14, 0, 7, 11, 5, 3, 11, 8,
+ 9, 4, 14, 3, 15, 2, 5, 12, 2, 9, 8, 5, 12, 15, 3, 10,
+ 7, 11, 0, 14, 4, 1, 10, 7, 1, 6, 13, 0, 11, 8, 6, 13 },
+ /* Func S7 = */
+ { 4, 13, 11, 0, 2, 11, 14, 7, 15, 4, 0, 9, 8, 1, 13, 10,
+ 3, 14, 12, 3, 9, 5, 7, 12, 5, 2, 10, 15, 6, 8, 1, 6,
+ 1, 6, 4, 11, 11, 13, 13, 8, 12, 1, 3, 4, 7, 10, 14, 7,
+ 10, 9, 15, 5, 6, 0, 8, 15, 0, 14, 5, 2, 9, 3, 2, 12 },
+ /* Func S8 = */
+ { 13, 1, 2, 15, 8, 13, 4, 8, 6, 10, 15, 3, 11, 7, 1, 4,
+ 10, 12, 9, 5, 3, 6, 14, 11, 5, 0, 0, 14, 12, 9, 7, 2,
+ 7, 2, 11, 1, 4, 14, 1, 7, 9, 4, 12, 10, 14, 8, 2, 13,
+ 0, 15, 6, 12, 10, 9, 13, 0, 15, 3, 3, 5, 5, 6, 8, 11 }
+};
+
+/*
+ * Permutation function for results from s-boxes
+ * from FIPS 46 pages 12 and 16.
+ * P =
+ */
+unsigned char P[32] = {
+ 16, 7, 20, 21, 29, 12, 28, 17,
+ 1, 15, 23, 26, 5, 18, 31, 10,
+ 2, 8, 24, 14, 32, 27, 3, 9,
+ 19, 13, 30, 6, 22, 11, 4, 25
+};
+
+unsigned int Pinv[32];
+unsigned int SP[8][64];
+
+void
+makePinv(void)
+{
+ int i;
+ unsigned int Pi = 0x80000000;
+ for (i = 0; i < 32; ++i) {
+ int j = 32 - P[i];
+ Pinv[j] = Pi;
+ Pi >>= 1;
+ }
+}
+
+void
+makeSP(void)
+{
+ int box;
+ for (box = 0; box < 8; ++box) {
+ int item;
+ printf("/* box S%d */ {\n", box + 1);
+ for (item = 0; item < 64; ++item) {
+ unsigned int s = S[box][item];
+ unsigned int val = 0;
+ unsigned int bitnum = (7 - box) * 4;
+ for (; s; s >>= 1, ++bitnum) {
+ if (s & 1) {
+ val |= Pinv[bitnum];
+ }
+ }
+ val = (val << 3) | (val >> 29);
+ SP[box][item] = val;
+ }
+ for (item = 0; item < 64; item += 4) {
+ printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
+ SP[box][item], SP[box][item + 1], SP[box][item + 2], SP[box][item + 3]);
+ }
+ printf(" },\n");
+ }
+}
+
+int
+main()
+{
+ makePinv();
+ makeSP();
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/Makefile b/security/nss/lib/freebl/mpi/Makefile
new file mode 100644
index 000000000..0dee5bed1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile
@@ -0,0 +1,244 @@
+#
+# Makefile for MPI library
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use. The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+
+##
+## Define PERL to point to your local Perl interpreter. It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+include target.mk
+
+CFLAGS+= $(XCFLAGS)
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library. Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+##
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and
+## building the distribution tarball. Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS= mpi.c mpprime.c mplogic.c mp_gf2m.c mpmontg.c mpi-test.c primes.c \
+ mpcpucache.c tests/ \
+ utils/gcd.c utils/invmod.c utils/lap.c \
+ utils/ptab.pl utils/sieve.c utils/isprime.c\
+ utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+ utils/bbsrand.c utils/prng.c utils/primegen.c \
+ utils/basecvt.c utils/makeprime.c\
+ utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+ utils/mpi.h utils/mpprime.h mulsqr.c \
+ make-test-arrays test-arrays.txt all-tests make-logtab \
+ types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h mp_gf2m.h \
+ mp_gf2m-priv.h utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \
+ basecvt fact exptmod pi makeprime identest
+
+LIBOBJS = mpprime.o mpmontg.o mplogic.o mp_gf2m.o mpi.o mpcpucache.o $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mp_gf2m.h mpprime.h
+
+help:
+ @ echo ""
+ @ echo "The following targets can be built with this Makefile:"
+ @ echo ""
+ @ echo "libmpi.a - arithmetic and prime testing library"
+ @ echo "mpi-test - test driver (requires MP_IOFUNC)"
+ @ echo "tools - command line tools"
+ @ echo "doc - manual pages for tools"
+ @ echo "clean - clean up objects and such"
+ @ echo "distclean - get ready for distribution"
+ @ echo "dist - distribution tarball"
+ @ echo ""
+
+.SUFFIXES: .c .o .i
+
+.c.i:
+ $(CC) $(CFLAGS) -E $< > $@
+
+#.c.o: $*.h $*.c
+# $(CC) $(CFLAGS) -c $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+ $(PERL) make-logtab > logtab.h
+
+mpi.o: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.o: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mp_gf2m.o: mp_gf2m.c mpi-priv.h mp_gf2m.h mp_gf2m-priv.h $(LIBHDRS)
+
+mpmontg.o: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpcpucache.o: mpcpucache.c $(LIBHDRS)
+
+mpi_mips.o: mpi_mips.s
+ $(CC) -o $@ $(ASFLAGS) -c mpi_mips.s
+
+mpi_sparc.o : montmulf.h
+
+mpv_sparcv9.s: vis_64.il mpv_sparc.c
+ $(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_64.il mpv_sparc.c
+
+mpv_sparcv8.s: vis_64.il mpv_sparc.c
+ $(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_32.il mpv_sparc.c
+
+montmulfv8.o montmulfv9.o mpv_sparcv8.o mpv_sparcv9.o : %.o : %.s
+ $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $<
+
+mpi_arm.o: mpi_arm.c $(LIBHDRS)
+
+# This rule is used to build the .s sources, which are then hand optimized.
+#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h
+# $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c
+
+
+libmpi.a: $(LIBOBJS)
+ ar -cvr libmpi.a $(LIBOBJS)
+ $(RANLIB) libmpi.a
+
+lib libs: libmpi.a
+
+mpi.i: mpi.h
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.o mptest2.o mptest3.o mptest3a.o mptest4.o mptest4a.o \
+ mptest4b.o mptest6.o mptest7.o mptest8.o mptest9.o mptestb.o
+MPTESTS = $(MPTESTOBJS:.o=)
+
+$(MPTESTOBJS): mptest%.o: tests/mptest-%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -o $@ -c $<
+
+$(MPTESTS): mptest%: mptest%.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+tests: mptest1 mptest2 mptest3 mptest3a mptest4 mptest4a mptest4b mptest6 \
+ mptestb bbsrand
+
+utests: mptest7 mptest8 mptest9
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.o bbs_rand.o prng.o
+UTILOBJS = primegen.o metime.o identest.o basecvt.o fact.o exptmod.o pi.o \
+ makeprime.o gcd.o invmod.o lap.o isprime.o \
+ dec2hex.o hex2dec.o
+UTILS = $(UTILOBJS:.o=)
+
+$(UTILS): % : %.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+$(UTILOBJS) $(EXTRAOBJS): %.o : utils/%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -o $@ -c $<
+
+prng: prng.o bbs_rand.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+bbsrand: bbsrand.o bbs_rand.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+utils: $(UTILS) prng bbsrand
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+ $(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.o: mpi-test.c test-info.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -o $@ -c $<
+
+mpi-test: mpi-test.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+mdxptest.o: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest: mdxptest.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+mulsqr.o: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
+ $(CC) $(CFLAGS) -DMP_SQUARE=1 -o $@ -c mulsqr.c
+
+mulsqr: mulsqr.o libmpi.a
+ $(CC) $(CFLAGS) -o $@ $^ $(LIBS)
+
+#---------------------------------------
+
+alltests: tests utests mpi-test
+
+tools: $(TOOLS)
+
+doc:
+ (cd doc; ./build)
+
+clean:
+ rm -f *.o *.a *.i
+ rm -f core
+ rm -f *~ .*~
+ rm -f utils/*.o
+ rm -f utils/core
+ rm -f utils/*~ utils/.*~
+
+clobber: clean
+ rm -f $(TOOLS) $(UTILS)
+
+distclean: clean
+ rm -f mptest? mpi-test metime mulsqr karatsuba
+ rm -f mptest?a mptest?b
+ rm -f utils/mptest?
+ rm -f test-info.c logtab.h
+ rm -f libmpi.a
+ rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+ tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+ pgps -ab mpi-$(VERS).tar
+ chmod +r mpi-$(VERS).tar.asc
+ gzip -9 mpi-$(VERS).tar
+
+# END
diff --git a/security/nss/lib/freebl/mpi/Makefile.os2 b/security/nss/lib/freebl/mpi/Makefile.os2
new file mode 100644
index 000000000..fa705ee08
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile.os2
@@ -0,0 +1,243 @@
+#
+# Makefile.win - gmake Makefile for building MPI with VACPP on OS/2
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use. The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+CC=icc.exe
+AS=alp.exe
+
+##
+## Define PERL to point to your local Perl interpreter. It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+
+#OS/2
+AS_SRCS = mpi_x86.asm
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
+ -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+CFLAGS = /Ti+ -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ $(MPICMN)
+ASFLAGS =
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library. Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+##
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and
+## building the distribution tarball. Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
+ utils/gcd.c utils/invmod.c utils/lap.c \
+ utils/ptab.pl utils/sieve.c utils/isprime.c\
+ utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+ utils/bbsrand.c utils/prng.c utils/primegen.c \
+ utils/basecvt.c utils/makeprime.c\
+ utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+ utils/mpi.h utils/mpprime.h mulsqr.c \
+ make-test-arrays test-arrays.txt all-tests make-logtab \
+ types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
+ utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
+ primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
+
+AS_OBJS = $(AS_SRCS:.asm=.obj)
+LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
+
+
+help:
+ @ echo ""
+ @ echo "The following targets can be built with this Makefile:"
+ @ echo ""
+ @ echo "mpi.lib - arithmetic and prime testing library"
+ @ echo "mpi-test.exe - test driver (requires MP_IOFUNC)"
+ @ echo "tools - command line tools"
+ @ echo "doc - manual pages for tools"
+ @ echo "clean - clean up objects and such"
+ @ echo "distclean - get ready for distribution"
+ @ echo "dist - distribution tarball"
+ @ echo ""
+
+.SUFFIXES: .c .obj .i .lib .exe .asm
+
+.c.i:
+ $(CC) $(CFLAGS) -E $< > $@
+
+.c.obj:
+ $(CC) $(CFLAGS) -c $<
+
+.asm.obj:
+ $(AS) $(ASFLAGS) $<
+
+.obj.exe:
+ $(CC) $(CFLAGS) -Fo$@ $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+ $(PERL) make-logtab > logtab.h
+
+mpi.obj: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpi_mips.obj: mpi_mips.s
+ $(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
+
+mpi.lib: $(LIBOBJS)
+ ilib /out:mpi.lib $(LIBOBJS)
+ $(RANLIB) mpi.lib
+
+lib libs: mpi.lib
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
+ mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
+MPTESTS = $(MPTESTOBJS:.obj=.exe)
+
+$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
+ mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
+
+utests: mptest7.exe mptest8.exe mptest9.exe
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
+UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
+ exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
+ isprime.obj dec2hex.obj hex2dec.obj
+UTILS = $(UTILOBJS:.obj=.exe)
+
+$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+utils: $(UTILS) prng.exe bbsrand.exe
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+ $(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
+ $(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
+
+mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+#---------------------------------------
+
+alltests: tests utests mpi-test.exe
+
+tools: $(TOOLS)
+
+doc:
+ (cd doc; ./build)
+
+clean:
+ rm -f *.obj *.lib *.pdb *.ilk
+ cd utils; rm -f *.obj *.lib *.pdb *.ilk
+
+distclean: clean
+ rm -f mptest? mpi-test metime mulsqr karatsuba
+ rm -f mptest?a mptest?b
+ rm -f utils/mptest?
+ rm -f test-info.c logtab.h
+ rm -f mpi.lib
+ rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+ tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+ pgps -ab mpi-$(VERS).tar
+ chmod +r mpi-$(VERS).tar.asc
+ gzip -9 mpi-$(VERS).tar
+
+
+print:
+ @echo LIBOBJS = $(LIBOBJS)
+# END
diff --git a/security/nss/lib/freebl/mpi/Makefile.win b/security/nss/lib/freebl/mpi/Makefile.win
new file mode 100644
index 000000000..cd41dfab8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/Makefile.win
@@ -0,0 +1,254 @@
+#
+# Makefile.win - gmake Makefile for building MPI with MSVC on NT
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+## Define CC to be the C compiler you wish to use. The GNU cc
+## compiler (gcc) should work, at the very least
+#CC=cc
+#CC=gcc
+CC=cl.exe
+ifeq ($(CPU_ARCH),x86_64)
+AS=ml64.exe
+else
+AS=ml.exe
+endif
+
+##
+## Define PERL to point to your local Perl interpreter. It
+## should be Perl 5.x, although it's conceivable that Perl 4
+## might work ... I haven't tested it.
+##
+#PERL=/usr/bin/perl
+#PERL=perl
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
+
+ifeq ($(CPU_ARCH),x86_64)
+AS_SRCS = mpi_x86_64.asm
+CFLAGS = -O2 -Z7 -MD -W3 -nologo -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WIN64 -D_AMD64_ -D_M_AMD64 -D_WINDOWS -DWIN95 $(MPICMN)
+ASFLAGS = -Cp -Sn -Zi -I.
+else
+#NT
+AS_SRCS = mpi_x86.asm
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \
+ -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+CFLAGS = -O2 -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \
+ -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN)
+ASFLAGS = -Cp -Sn -Zi -coff -I.
+endif
+
+##
+## Define LIBS to include any libraries you need to link against.
+## If NO_TABLE is define, LIBS should include '-lm' or whatever is
+## necessary to bring in the math library. Otherwise, it can be
+## left alone, unless your system has other peculiar requirements.
+##
+LIBS=#-lmalloc#-lefence#-lm
+
+##
+## Define RANLIB to be the library header randomizer; you might not
+## need this on some systems (just set it to 'echo' on these systems,
+## such as IRIX)
+##
+RANLIB=echo
+
+##
+## This is the version string used for the documentation and
+## building the distribution tarball. Don't mess with it unless
+## you are releasing a new version
+VERS=1.7p6
+
+## ----------------------------------------------------------------------
+## You probably don't need to change anything below this line...
+##
+
+##
+## This is the list of source files that need to be packed into
+## the distribution file
+SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \
+ utils/gcd.c utils/invmod.c utils/lap.c \
+ utils/ptab.pl utils/sieve.c utils/isprime.c\
+ utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \
+ utils/bbsrand.c utils/prng.c utils/primegen.c \
+ utils/basecvt.c utils/makeprime.c\
+ utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \
+ utils/mpi.h utils/mpprime.h mulsqr.c \
+ make-test-arrays test-arrays.txt all-tests make-logtab \
+ types.pl stats timetest multest
+
+## These are the header files that go into the distribution file
+HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \
+ utils/bbs_rand.h tests/mpi.h tests/mpprime.h
+
+## These are the documentation files that go into the distribution file
+DOCS=README doc utils/README utils/PRIMES
+
+## This is the list of tools built by 'make tools'
+TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \
+ primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe
+
+AS_OBJS = $(AS_SRCS:.asm=.obj)
+LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS)
+LIBHDRS = mpi-config.h mpi-priv.h mpi.h
+APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h
+
+
+help:
+ @ echo ""
+ @ echo "The following targets can be built with this Makefile:"
+ @ echo ""
+ @ echo "mpi.lib - arithmetic and prime testing library"
+ @ echo "mpi-test - test driver (requires MP_IOFUNC)"
+ @ echo "tools - command line tools"
+ @ echo "doc - manual pages for tools"
+ @ echo "clean - clean up objects and such"
+ @ echo "distclean - get ready for distribution"
+ @ echo "dist - distribution tarball"
+ @ echo ""
+
+.SUFFIXES: .c .obj .i .lib .exe .asm
+
+.c.i:
+ $(CC) $(CFLAGS) -E $< > $@
+
+.c.obj:
+ $(CC) $(CFLAGS) -c $<
+
+.asm.obj:
+ $(AS) $(ASFLAGS) -c $<
+
+.obj.exe:
+ $(CC) $(CFLAGS) -Fo$@ $<
+
+#---------------------------------------
+
+$(LIBOBJS): $(LIBHDRS)
+
+logtab.h: make-logtab
+ $(PERL) make-logtab > logtab.h
+
+mpi.obj: mpi.c logtab.h $(LIBHDRS)
+
+mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS)
+
+mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS)
+
+mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS)
+
+mpi_mips.obj: mpi_mips.s
+ $(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s
+
+mpi.lib: $(LIBOBJS)
+ ar -cvr mpi.lib $(LIBOBJS)
+ $(RANLIB) mpi.lib
+
+lib libs: mpi.lib
+
+#---------------------------------------
+
+MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \
+ mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj
+MPTESTS = $(MPTESTOBJS:.obj=.exe)
+
+$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \
+ mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe
+
+utests: mptest7.exe mptest8.exe mptest9.exe
+
+#---------------------------------------
+
+EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj
+UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \
+ exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \
+ isprime.obj dec2hex.obj hex2dec.obj
+UTILS = $(UTILOBJS:.obj=.exe)
+
+$(UTILS): %.exe : %.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+utils: $(UTILS) prng.exe bbsrand.exe
+
+#---------------------------------------
+
+test-info.c: test-arrays.txt
+ $(PERL) make-test-arrays test-arrays.txt > test-info.c
+
+mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS)
+ $(CC) $(CFLAGS) -Fo$@ -c $<
+
+mpi-test.exe: mpi-test.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h
+
+mdxptest.exe: mdxptest.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h
+ $(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c
+
+mulsqr.exe: mulsqr.obj mpi.lib $(LIBS)
+ $(CC) $(CFLAGS) -Fo$@ $^
+
+#---------------------------------------
+
+alltests: tests utests mpi-test.exe
+
+tools: $(TOOLS)
+
+doc:
+ (cd doc; ./build)
+
+clean:
+ rm -f *.obj *.lib *.pdb *.ilk
+ cd utils; rm -f *.obj *.lib *.pdb *.ilk
+
+distclean: clean
+ rm -f mptest? mpi-test metime mulsqr karatsuba
+ rm -f mptest?a mptest?b
+ rm -f utils/mptest?
+ rm -f test-info.c logtab.h
+ rm -f mpi.lib
+ rm -f $(TOOLS)
+
+dist: Makefile $(HDRS) $(SRCS) $(DOCS)
+ tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS)
+ pgps -ab mpi-$(VERS).tar
+ chmod +r mpi-$(VERS).tar.asc
+ gzip -9 mpi-$(VERS).tar
+
+
+print:
+ @echo LIBOBJS = $(LIBOBJS)
+# END
diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README
new file mode 100644
index 000000000..475549bad
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/README
@@ -0,0 +1,749 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+About the MPI Library
+---------------------
+
+The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision
+signed integer arithmetic package. The implementation is not the most
+efficient possible, but the code is small and should be fairly easily
+portable to just about any machine that supports an ANSI C compiler,
+as long as it is capable of at least 16-bit arithmetic (but also see
+below for more on this).
+
+This library was written with an eye to cryptographic applications;
+thus, some care is taken to make sure that temporary values are not
+left lying around in memory when they are no longer in use. This adds
+some overhead for zeroing buffers before they are released back into
+the free pool; however, it gives you the assurance that there is only
+one copy of your important values residing in your process's address
+space at a time. Obviously, it is difficult to guarantee anything, in
+a pre-emptive multitasking environment, but this at least helps you
+keep a lid on the more obvious ways your data can get spread around in
+memory.
+
+
+Using the Library
+-----------------
+
+To use the MPI library in your program, you must include the header:
+
+#include "mpi.h"
+
+This header provides all the type and function declarations you'll
+need to use the library. Almost all the names defined by the library
+begin with the prefix 'mp_', so it should be easy to keep them from
+clashing with your program's namespace (he says, glibly, knowing full
+well there are always pathological cases).
+
+There are a few things you may want to configure about the library.
+By default, the MPI library uses an unsigned short for its digit type,
+and an unsigned int for its word type. The word type must be big
+enough to contain at least two digits, for the primitive arithmetic to
+work out. On my machine, a short is 2 bytes and an int is 4 bytes --
+but if you have 64-bit ints, you might want to use a 4-byte digit and
+an 8-byte word. I have tested the library using 1-byte digits and
+2-byte words, as well. Whatever you choose to do, the things you need
+to change are:
+
+(1) The type definitions for mp_digit and mp_word.
+
+(2) The macro DIGIT_FMT which tells mp_print() how to display a
+ single digit. This is just a printf() format string, so you
+ can adjust it appropriately.
+
+(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the
+ largest value expressible in an mp_digit and an mp_word,
+ respectively.
+
+Both the mp_digit and mp_word should be UNSIGNED integer types. The
+code relies on having the full positive precision of the type used for
+digits and words.
+
+The remaining type definitions should be left alone, for the most
+part. The code in the library does not make any significant
+assumptions about the sizes of things, but there is little if any
+reason to change the other parameters, so I would recommend you leave
+them as you found them.
+
+The library comes with a Perl script, 'types.pl', which will scan your
+current Makefile settings, and attempt to find good definitions for
+these types. It relies on a Unix sort of build environment, so it
+probably won't work under MacOS or Windows, but it can be convenient
+if you're porting to a new flavour of Unix. Just run 'types.pl' at
+the command line, and it will spit out its results to the standard
+output.
+
+
+Conventions
+-----------
+
+Most functions in the library return a value of type mp_err. This
+permits the library to communicate success or various kinds of failure
+to the calling program. The return values currently defined are:
+
+ MP_OKAY - okay, operation succeeded, all's well
+ MP_YES - okay, the answer is yes (same as MP_OKAY)
+ MP_NO - okay, but answer is no (not MP_OKAY)
+ MP_MEM - operation ran out of memory
+ MP_RANGE - input parameter was out of range
+ MP_BADARG - an invalid input parameter was provided
+ MP_UNDEF - no output value is defined for this input
+
+The only function which currently uses MP_UNDEF is mp_invmod().
+Division by zero is undefined, but the division functions will return
+MP_RANGE for a zero divisor. MP_BADARG usually means you passed a
+bogus mp_int structure to the function. MP_YES and MP_NO are not used
+by the library itself; they're defined so you can use them in your own
+extensions.
+
+If you need a readable interpretation of these error codes in your
+program, you may also use the mp_strerror() function. This function
+takes an mp_err as input, and returns a pointer to a human-readable
+string describing the meaning of the error. These strings are stored
+as constants within the library, so the caller should not attempt to
+modify or free the memory associated with these strings.
+
+The library represents values in signed-magnitude format. Values
+strictly less than zero are negative, all others are considered
+positive (zero is positive by fiat). You can access the 'sign' member
+of the mp_int structure directly, but better is to use the mp_cmp_z()
+function, to find out which side of zero the value lies on.
+
+Most arithmetic functions have a single-digit variant, as well as the
+full arbitrary-precision. An mp_digit is an unsigned value between 0
+and DIGIT_MAX inclusive. The radix is available as RADIX. The number
+of bits in a given digit is given as DIGIT_BIT.
+
+Generally, input parameters are given before output parameters.
+Unless otherwise specified, any input parameter can be re-used as an
+output parameter, without confusing anything.
+
+The basic numeric type defined by the library is an mp_int. Virtually
+all the functions in the library take a pointer to an mp_int as one of
+their parameters. An explanation of how to create and use these
+structures follows. And so, without further ado...
+
+
+Initialization and Cleanup
+--------------------------
+
+The basic numeric type defined by the library is an 'mp_int'.
+However, it is not sufficient to simply declare a variable of type
+mp_int in your program. These variables also need to be initialized
+before they can be used, to allocate the internal storage they require
+for computation.
+
+This is done using one of the following functions:
+
+ mp_init(mp_int *mp);
+ mp_init_copy(mp_int *mp, mp_int *from);
+ mp_init_size(mp_int *mp, mp_size p);
+
+Each of these requires a pointer to a structure of type mp_int. The
+basic mp_init() simply initializes the mp_int to a default size, and
+sets its value to zero. If you would like to initialize a copy of an
+existing mp_int, use mp_init_copy(), where the 'from' parameter is the
+mp_int you'd like to make a copy of. The third function,
+mp_init_size(), permits you to specify how many digits of precision
+should be preallocated for your mp_int. This can help the library
+avoid unnecessary re-allocations later on.
+
+The default precision used by mp_init() can be retrieved using:
+
+ precision = mp_get_prec();
+
+This returns the number of digits that will be allocated. You can
+change this value by using:
+
+ mp_set_prec(unsigned int prec);
+
+Any positive value is acceptable -- if you pass zero, the default
+precision will be re-set to the compiled-in library default (this is
+specified in the header file 'mpi-config.h', and typically defaults to
+8 or 16).
+
+Just as you must allocate an mp_int before you can use it, you must
+clean up the structure when you are done with it. This is performed
+using the mp_clear() function. Remember that any mp_int that you
+create as a local variable in a function must be mp_clear()'d before
+that function exits, or else the memory allocated to that mp_int will
+be orphaned and unrecoverable.
+
+To set an mp_int to a given value, the following functions are given:
+
+ mp_set(mp_int *mp, mp_digit d);
+ mp_set_int(mp_int *mp, long z);
+
+The mp_set() function sets the mp_int to a single digit value, while
+mp_set_int() sets the mp_int to a signed long integer value.
+
+To set an mp_int to zero, use:
+
+ mp_zero(mp_int *mp);
+
+
+Copying and Moving
+------------------
+
+If you have two initialized mp_int's, and you want to copy the value
+of one into the other, use:
+
+ mp_copy(from, to)
+
+This takes care of clearing the old value of 'to', and copies the new
+value into it. If 'to' is not yet initialized, use mp_init_copy()
+instead (see above).
+
+Note: The library tries, whenever possible, to avoid allocating
+---- new memory. Thus, mp_copy() tries first to satisfy the needs
+ of the copy by re-using the memory already allocated to 'to'.
+ Only if this proves insufficient will mp_copy() actually
+ allocate new memory.
+
+ For this reason, if you know a priori that 'to' has enough
+ available space to hold 'from', you don't need to check the
+ return value of mp_copy() for memory failure. The USED()
+ macro tells you how many digits are used by an mp_int, and
+ the ALLOC() macro tells you how many are allocated.
+
+If you have two initialized mp_int's, and you want to exchange their
+values, use:
+
+ mp_exch(a, b)
+
+This is better than using mp_copy() with a temporary, since it will
+not (ever) touch the memory allocator -- it just swaps the exact
+contents of the two structures. The mp_exch() function cannot fail;
+if you pass it an invalid structure, it just ignores it, and does
+nothing.
+
+
+Basic Arithmetic
+----------------
+
+Once you have initialized your integers, you can operate on them. The
+basic arithmetic functions on full mp_int values are:
+
+mp_add(a, b, c) - computes c = a + b
+mp_sub(a, b, c) - computes c = a - b
+mp_mul(a, b, c) - computes c = a * b
+mp_sqr(a, b) - computes b = a * a
+mp_div(a, b, q, r) - computes q, r such that a = bq + r
+mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d
+mp_expt(a, b, c) - computes c = a ** b
+mp_2expt(a, k) - computes a = 2^k
+
+The mp_div_2d() function efficiently computes division by powers of
+two. Either the q or r parameter may be NULL, in which case that
+portion of the computation will be discarded.
+
+The algorithms used for some of the computations here are described in
+the following files which are included with this distribution:
+
+mul.txt Describes the multiplication algorithm
+div.txt Describes the division algorithm
+expt.txt Describes the exponentiation algorithm
+sqrt.txt Describes the square-root algorithm
+square.txt Describes the squaring algorithm
+
+There are single-digit versions of most of these routines, as well.
+In the following prototypes, 'd' is a single mp_digit:
+
+mp_add_d(a, d, c) - computes c = a + d
+mp_sub_d(a, d, c) - computes c = a - d
+mp_mul_d(a, d, c) - computes c = a * d
+mp_mul_2(a, c) - computes c = a * 2
+mp_div_d(a, d, q, r) - computes q, r such that a = bq + r
+mp_div_2(a, c) - computes c = a / 2
+mp_expt_d(a, d, c) - computes c = a ** d
+
+The mp_mul_2() and mp_div_2() functions take advantage of the internal
+representation of an mp_int to do multiplication by two more quickly
+than mp_mul_d() would. Other basic functions of an arithmetic variety
+include:
+
+mp_zero(a) - assign 0 to a
+mp_neg(a, c) - negate a: c = -a
+mp_abs(a, c) - absolute value: c = |a|
+
+
+Comparisons
+-----------
+
+Several comparison functions are provided. Each of these, unless
+otherwise specified, returns zero if the comparands are equal, < 0 if
+the first is less than the second, and > 0 if the first is greater
+than the second:
+
+mp_cmp_z(a) - compare a <=> 0
+mp_cmp_d(a, d) - compare a <=> d, d is a single digit
+mp_cmp(a, b) - compare a <=> b
+mp_cmp_mag(a, b) - compare |a| <=> |b|
+mp_isodd(a) - return nonzero if odd, zero otherwise
+mp_iseven(a) - return nonzero if even, zero otherwise
+
+
+Modular Arithmetic
+------------------
+
+Modular variations of the basic arithmetic functions are also
+supported. These are available if the MP_MODARITH parameter in
+mpi-config.h is turned on (it is by default). The modular arithmetic
+functions are:
+
+mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m
+mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below)
+mp_addmod(a, b, m, c) - compute c = (a + b) mod m
+mp_submod(a, b, m, c) - compute c = (a - b) mod m
+mp_mulmod(a, b, m, c) - compute c = (a * b) mod m
+mp_sqrmod(a, m, c) - compute c = (a * a) mod m
+mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m
+mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m
+
+The mp_sqr() function squares its input argument. A call to mp_sqr(a,
+c) is identical in meaning to mp_mul(a, a, c); however, if the
+MP_SQUARE variable is set true in mpi-config.h (see below), then it
+will be implemented with a different algorithm, that is supposed to
+take advantage of the redundant computation that takes place during
+squaring. Unfortunately, some compilers result in worse performance
+on this code, so you can change the behaviour at will. There is a
+utility program "mulsqr.c" that lets you test which does better on
+your system.
+
+The mp_sqrmod() function is analogous to the mp_sqr() function; it
+uses the mp_sqr() function rather than mp_mul(), and then performs the
+modular reduction. This probably won't help much unless you are doing
+a lot of them.
+
+See the file 'square.txt' for a synopsis of the algorithm used.
+
+Note: The mp_mod_d() function computes a modular reduction around
+---- a single digit d. The result is a single digit c.
+
+Because an inverse is defined for a (mod m) if and only if (a, m) = 1
+(that is, if a and m are relatively prime), mp_invmod() may not be
+able to compute an inverse for the arguments. In this case, it
+returns the value MP_UNDEF, and does not modify c. If an inverse is
+defined, however, it returns MP_OKAY, and sets c to the value of the
+inverse (mod m).
+
+See the file 'redux.txt' for a description of the modular reduction
+algorithm used by mp_exptmod().
+
+
+Greatest Common Divisor
+-----------------------
+
+If The greates common divisor of two values can be found using one of the
+following functions:
+
+mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm
+mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b)
+mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b)
+
+Also provided is a function to compute modular inverses, if they
+exist:
+
+mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists
+
+The function mp_xgcd() computes the greatest common divisor, and also
+returns values of x and y satisfying Bezout's identity. This is used
+by mp_invmod() to find modular inverses. However, if you do not need
+these values, you will find that mp_gcd() is MUCH more efficient,
+since it doesn't need all the intermediate values that mp_xgcd()
+requires in order to compute x and y.
+
+The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD
+algorithm due to Josef Stein.
+
+
+Input & Output Functions
+------------------------
+
+The following basic I/O routines are provided. These are present at
+all times:
+
+mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int
+mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int
+mp_radix_size(mp, r) - return length of buffer needed by mp_toradix()
+mp_raw_size(mp) - return length of buffer needed by mp_toraw()
+mp_toradix(mp, str, r) - convert an mp_int to a string of radix r
+ digits
+mp_toraw(mp, str) - convert an mp_int to a string of bytes
+mp_tovalue(ch, r) - convert ch to its value when taken as
+ a radix r digit, or -1 if invalid
+mp_strerror(err) - get a string describing mp_err value 'err'
+
+If you compile the MPI library with MP_IOFUNC defined, you will also
+have access to the following additional I/O function:
+
+mp_print(mp, ofp) - print an mp_int as text to output stream ofp
+
+Note that mp_radix_size() returns a size in bytes guaranteed to be AT
+LEAST big enough for the digits output by mp_toradix(). Because it
+uses an approximation technique to figure out how many digits will be
+needed, it may return a figure which is larger than necessary. Thus,
+the caller should not rely on the value to determine how many bytes
+will actually be written by mp_toradix(). The string mp_toradix()
+creates will be NUL terminated, so the standard C library function
+strlen() should be able to ascertain this for you, if you need it.
+
+The mp_read_radix() and mp_toradix() functions support bases from 2 to
+64 inclusive. If you require more general radix conversion facilities
+than this, you will need to write them yourself (that's why mp_div_d()
+is provided, after all).
+
+Note: mp_read_radix() will accept as digits either capital or
+---- lower-case letters. However, the current implementation of
+ mp_toradix() only outputs upper-case letters, when writing
+ bases betwee 10 and 36. The underlying code supports using
+ lower-case letters, but the interface stub does not have a
+ selector for it. You can add one yourself if you think it
+ is worthwhile -- I do not. Bases from 36 to 64 use lower-
+ case letters as distinct from upper-case. Bases 63 and
+ 64 use the characters '+' and '/' as digits.
+
+ Note also that compiling with MP_IOFUNC defined will cause
+ inclusion of <stdio.h>, so if you are trying to write code
+ which does not depend on the standard C library, you will
+ probably want to avoid this option. This is needed because
+ the mp_print() function takes a standard library FILE * as
+ one of its parameters, and uses the fprintf() function.
+
+The mp_toraw() function converts the integer to a sequence of bytes,
+in big-endian ordering (most-significant byte first). Assuming your
+bytes are 8 bits wide, this corresponds to base 256. The sign is
+encoded as a single leading byte, whose value is 0 for zero or
+positive values, or 1 for negative values. The mp_read_raw() function
+reverses this process -- it takes a buffer of bytes, interprets the
+first as a sign indicator (0 = zero/positive, nonzero = negative), and
+the rest as a sequence of 1-byte digits in big-endian ordering.
+
+The mp_raw_size() function returns the exact number of bytes required
+to store the given integer in "raw" format (as described in the
+previous paragraph). Zero is returned in case of error; a valid
+integer will require at least three bytes of storage.
+
+In previous versions of the MPI library, an "external representation
+format" was supported. This was removed, however, because I found I
+was never using it, it was not as portable as I would have liked, and
+I decided it was a waste of space.
+
+
+Other Functions
+---------------
+
+The files 'mpprime.h' and 'mpprime.c' define some routines which are
+useful for divisibility testing and probabilistic primality testing.
+The routines defined are:
+
+mpp_divis(a, b) - is a divisible by b?
+mpp_divis_d(a, d) - is a divisible by digit d?
+mpp_random(a) - set a to random value at current precision
+mpp_random_size(a, prec) - set a to random value at given precision
+
+Note: The mpp_random() and mpp_random_size() functions use the C
+---- library's rand() function to generate random values. It is
+ up to the caller to seed this generator before it is called.
+ These functions are not suitable for generating quantities
+ requiring cryptographic-quality randomness; they are intended
+ primarily for use in primality testing.
+
+ Note too that the MPI library does not call srand(), so your
+ application should do this, if you ever want the sequence
+ to change.
+
+mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits
+ in v? If so, let w be the index of
+ that digit
+
+mpp_divis_primes(a, np) - is a divisible by any of the first np
+ primes? If so, set np to the prime
+ which divided a.
+
+mpp_fermat(a, d) - test if w^a = w (mod a). If so,
+ returns MP_YES, otherwise MP_NO.
+
+mpp_pprime(a, nt) - perform nt iterations of the Rabin-
+ Miller probabilistic primality test
+ on a. Returns MP_YES if all tests
+ passed, or MP_NO if any test fails.
+
+The mpp_fermat() function works based on Fermat's little theorem, a
+consequence of which is that if p is a prime, and (w, p) = 1, then:
+
+ w^p = w (mod p)
+
+Put another way, if w^p != w (mod p), then p is not prime. The test
+is expensive to compute, but it helps to quickly eliminate an enormous
+class of composite numbers prior to Rabin-Miller testing.
+
+Building the Library
+--------------------
+
+The MPI library is designed to be as self-contained as possible. You
+should be able to compile it with your favourite ANSI C compiler, and
+link it into your program directly. If you are on a Unix system using
+the GNU C compiler (gcc), the following should work:
+
+% gcc -ansi -pedantic -Wall -O2 -c mpi.c
+
+The file 'mpi-config.h' defines several configurable parameters for
+the library, which you can adjust to suit your application. At the
+time of this writing, the available options are:
+
+MP_IOFUNC - Define true to include the mp_print() function,
+ which is moderately useful for debugging. This
+ implicitly includes <stdio.h>.
+
+MP_MODARITH - Define true to include the modular arithmetic
+ functions. If you don't need modular arithmetic
+ in your application, you can set this to zero to
+ leave out all the modular routines.
+
+MP_NUMTH - Define true to include number theoretic functions
+ such as mp_gcd(), mp_lcm(), and mp_invmod().
+
+MP_LOGTAB - If true, the file "logtab.h" is included, which
+ is basically a static table of base 2 logarithms.
+ These are used to compute how big the buffers for
+ radix conversion need to be. If you set this false,
+ the library includes <math.h> and uses log(). This
+ typically forces you to link against math libraries.
+
+MP_MEMSET - If true, use memset() to zero buffers. If you run
+ into weird alignment related bugs, set this to zero
+ and an explicit loop will be used.
+
+MP_MEMCPY - If true, use memcpy() to copy buffers. If you run
+ into weird alignment bugs, set this to zero and an
+ explicit loop will be used.
+
+MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument
+ checking macro, ARGCHK(), gets expanded. If this
+ is set to zero, ARGCHK() expands to nothing; no
+ argument checks are performed. If this is 1, the
+ ARGCHK() macro expands to code that returns MP_BADARG
+ or similar at runtime. If it is 2, ARGCHK() expands
+ to an assert() call that aborts the program on a
+ bad input.
+
+MP_DEBUG - Turns on debugging output. This is probably not at
+ all useful unless you are debugging the library. It
+ tends to spit out a LOT of output.
+
+MP_DEFPREC - The default precision of a newly-created mp_int, in
+ digits. The precision can be changed at runtime by
+ the mp_set_prec() function, but this is its initial
+ value.
+
+MP_SQUARE - If this is set to a nonzero value, the mp_sqr()
+ function will use an alternate algorithm that takes
+ advantage of the redundant inner product computation
+ when both multiplicands are identical. Unfortunately,
+ with some compilers this is actually SLOWER than just
+ calling mp_mul() with the same argument twice. So
+ if you set MP_SQUARE to zero, mp_sqr() will be expan-
+ ded into a call to mp_mul(). This applies to all
+ the uses of mp_sqr(), including mp_sqrmod() and the
+ internal calls to s_mp_sqr() inside mpi.c
+
+ The program 'mulsqr' (mulsqr.c) can be used to test
+ which works best for your configuration. Set up the
+ CC and CFLAGS variables in the Makefile, then type:
+
+ make mulsqr
+
+ Invoke it with arguments similar to the following:
+
+ mulsqr 25000 1024
+
+ That is, 25000 products computed on 1024-bit values.
+ The output will compare the two timings, and recommend
+ a setting for MP_SQUARE. It is off by default.
+
+If you would like to use the mp_print() function (see above), be sure
+to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the
+'tests' subdirectory expect this to be defined (although the test
+driver 'mpi-test' doesn't need it)
+
+The Makefile which comes with the library should take care of building
+the library for you, if you have set the CC and CFLAGS variables at
+the top of the file appropriately. By default, they are set up to
+use the GNU C compiler:
+
+CC=gcc
+CFLAGS=-ansi -pedantic -Wall -O2
+
+If all goes well, the library should compile without warnings using
+this combination. You should, of course, make whatever adjustments
+you find necessary.
+
+The MPI library distribution comes with several additional programs
+which are intended to demonstrate the use of the library, and provide
+a framework for testing it. There are a handful of test driver
+programs, in the files named 'mptest-X.c', where X is a digit. Also,
+there are some simple command-line utilities (in the 'utils'
+directory) for manipulating large numbers. These include:
+
+basecvt.c A radix-conversion program, supporting bases from
+ 2 to 64 inclusive.
+
+bbsrand.c A BBS (quadratic residue) pseudo-random number
+ generator. The file 'bbsrand.c' is just the driver
+ for the program; the real code lives in the files
+ 'bbs_rand.h' and 'bbs_rand.c'
+
+dec2hex.c Converts decimal to hexadecimal
+
+gcd.c Computes the greatest common divisor of two values.
+ If invoked as 'xgcd', also computes constants x and
+ y such that (a, b) = ax + by, in accordance with
+ Bezout's identity.
+
+hex2dec.c Converts hexadecimal to decimal
+
+invmod.c Computes modular inverses
+
+isprime.c Performs the Rabin-Miller probabilistic primality
+ test on a number. Values which fail this test are
+ definitely composite, and those which pass are very
+ likely to be prime (although there are no guarantees)
+
+lap.c Computes the order (least annihilating power) of
+ a value v modulo m. Very dumb algorithm.
+
+primegen.c Generates large (probable) primes.
+
+prng.c A pseudo-random number generator based on the
+ BBS generator code in 'bbs_rand.c'
+
+sieve.c Implements the Sieve of Eratosthenes, using a big
+ bitmap, to generate a list of prime numbers.
+
+fact.c Computes the factorial of an arbitrary precision
+ integer (iterative).
+
+exptmod.c Computes arbitrary precision modular exponentiation
+ from the command line (exptmod a b m -> a^b (mod m))
+
+Most of these can be built from the Makefile that comes with the
+library. Try 'make tools', if your environment supports it.
+
+
+Testing the Library
+-------------------
+
+Automatic test vectors are included, in the form of a program called
+'mpi-test'. To build this program and run all the tests, simply
+invoke the shell script 'all-tests'. If all the tests pass, you
+should see a message:
+
+ All tests passed
+
+If something went wrong, you'll get:
+
+ One or more tests failed.
+
+If this happens, scan back through the preceding lines, to see which
+test failed. Any failure indicates a bug in the library, which needs
+to be fixed before it will give accurate results. If you get any such
+thing, please let me know, and I'll try to fix it. Please let me know
+what platform and compiler you were using, as well as which test
+failed. If a reason for failure was given, please send me that text
+as well.
+
+If you're on a system where the standard Unix build tools don't work,
+you can build the 'mpi-test' program manually, and run it by hand.
+This is tedious and obnoxious, sorry.
+
+Further manual testing can be performed by building the manual testing
+programs, whose source is found in the 'tests' subdirectory. Each
+test is in a source file called 'mptest-X.c'. The Makefile contains a
+target to build all of them at once:
+
+ make tests
+
+Read the comments at the top of each source file to see what the
+driver is supposed to test. You probably don't need to do this; these
+programs were only written to help me as I was developing the library.
+
+The relevant files are:
+
+mpi-test.c The source for the test driver
+
+make-test-arrays A Perl script to generate some of the internal
+ data structures used by mpi-test.c
+
+test-arrays.txt The source file for make-test-arrays
+
+all-tests A Bourne shell script which runs all the
+ tests in the mpi-test suite
+
+Running 'make mpi-test' should build the mpi-test program. If you
+cannot use make, here is what needs to be done:
+
+(1) Use 'make-test-arrays' to generate the file 'test-info.c' from
+ the 'test-arrays.txt' file. Since Perl can be found everywhere,
+ this should be no trouble. Under Unix, this looks like:
+
+ make-test-arrays test-arrays.txt > test-info.c
+
+(2) Build the MPI library:
+
+ gcc -ansi -pedantic -Wall -c mpi.c
+
+(3) Build the mpi-test program:
+
+ gcc -ansi -pedantic -Wall -o mpi-test mpi.o mpi-test.c
+
+When you've got mpi-test, you can use 'all-tests' to run all the tests
+made available by mpi-test. If any of them fail, there should be a
+diagnostic indicating what went wrong. These are fairly high-level
+diagnostics, and won't really help you debug the problem; they're
+simply intended to help you isolate which function caused the problem.
+If you encounter a problem of this sort, feel free to e-mail me, and I
+will certainly attempt to help you debug it.
+
+Note: Several of the tests hard-wired into 'mpi-test' operate under
+---- the assumption that you are using at least a 16-bit mp_digit
+ type. If that is not true, several tests might fail, because
+ of range problems with the maximum digit value.
+
+ If you are using an 8-bit digit, you will also need to
+ modify the code for mp_read_raw(), which assumes that
+ multiplication by 256 can be done with mp_mul_d(), a
+ fact that fails when DIGIT_MAX is 255. You can replace
+ the call with s_mp_lshd(), which will give you the same
+ effect, and without doing as much work. :)
+
+Acknowledgements:
+----------------
+
+The algorithms used in this library were drawn primarily from Volume
+2 of Donald Knuth's magnum opus, _The Art of Computer Programming_,
+"Semi-Numerical Methods". Barrett's algorithm for modular reduction
+came from Menezes, Oorschot, and Vanstone's _Handbook of Applied
+Cryptography_, Chapter 14.
+
+Thanks are due to Tom St. Denis, for finding an obnoxious sign-related
+bug in mp_read_raw() that made things break on platforms which use
+signed chars.
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger. You can contact
+the author as follows:
+
+E-mail: <sting@linguist.dartmouth.edu>
+
+Postal: 8000 Cummings Hall, Thayer School of Engineering
+ Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
+ 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
+
+Last updated: 16-Jan-2000
diff --git a/security/nss/lib/freebl/mpi/all-tests b/security/nss/lib/freebl/mpi/all-tests
new file mode 100755
index 000000000..3429a15c0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/all-tests
@@ -0,0 +1,83 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ECHO=/bin/echo
+MAKE=gmake
+
+$ECHO "\n** Running unit tests for MPI library\n"
+
+# Build the mpi-test program, which comprises all the unit tests for
+# the MPI library...
+
+$ECHO "Bringing mpi-test up to date ... "
+if $MAKE mpi-test ; then
+ :
+else
+ $ECHO " "
+ $ECHO "Make failed to build mpi-test."
+ $ECHO " "
+ exit 1
+fi
+
+if [ ! -x mpi-test ] ; then
+ $ECHO " "
+ $ECHO "Cannot find 'mpi-test' program, testing cannot continue."
+ $ECHO " "
+ exit 1
+fi
+
+# Get the list of available test suites...
+tests=`./mpi-test list | awk '{print $1}'`
+errs=0
+
+# Run each test suite and check the result code of mpi-test
+for test in $tests ; do
+ $ECHO "$test ... \c"
+ if ./mpi-test $test ; then
+ $ECHO "passed"
+ else
+ $ECHO "FAILED"
+ errs=1
+ fi
+done
+
+# If any tests failed, we'll stop at this point
+if [ "$errs" = "0" ] ; then
+ $ECHO "All unit tests passed"
+else
+ $ECHO "One or more tests failed"
+ exit 1
+fi
+
+# Now try to build the 'pi' program, and see if it can compute the
+# first thousand digits of pi correctly
+$ECHO "\n** Running other tests\n"
+
+$ECHO "Bringing 'pi' up to date ... "
+if $MAKE pi ; then
+ :
+else
+ $ECHO "\nMake failed to build pi.\n"
+ exit 1
+fi
+
+if [ ! -x pi ] ; then
+ $ECHO "\nCannot find 'pi' program; testing cannot continue.\n"
+ exit 1
+fi
+
+./pi 2000 > /tmp/pi.tmp.$$
+if cmp tests/pi2k.txt /tmp/pi.tmp.$$ ; then
+ $ECHO "Okay! The pi test passes."
+else
+ $ECHO "Oops! The pi test failed. :("
+ exit 1
+fi
+
+rm -f /tmp/pi.tmp.$$
+
+exit 0
+
+# Here there be dragons
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE
new file mode 100644
index 000000000..35cca68ce
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE
@@ -0,0 +1,11 @@
+Within this directory, each of the file listed below is licensed under
+the terms given in the file LICENSE-MPL, also in this directory.
+
+basecvt.pod
+gcd.pod
+invmod.pod
+isprime.pod
+lap.pod
+mpi-test.pod
+prime.txt
+prng.pod
diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod
new file mode 100644
index 000000000..c3d87fbc7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod
@@ -0,0 +1,65 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ basecvt - radix conversion for arbitrary precision integers
+
+=head1 SYNOPSIS
+
+ basecvt <ibase> <obase> [values]
+
+=head1 DESCRIPTION
+
+The B<basecvt> program is a command-line tool for converting integers
+of arbitrary precision from one radix to another. The current version
+supports radix values from 2 (binary) to 64, inclusive. The first two
+command line arguments specify the input and output radix, in base 10.
+Any further arguments are taken to be integers notated in the input
+radix, and these are converted to the output radix. The output is
+written, one integer per line, to standard output.
+
+When reading integers, only digits considered "valid" for the input
+radix are considered. Processing of an integer terminates when an
+invalid input digit is encountered. So, for example, if you set the
+input radix to 10 and enter '10ACF', B<basecvt> would assume that you
+had entered '10' and ignore the rest of the string.
+
+If no values are provided, no output is written, but the program
+simply terminates with a zero exit status. Error diagnostics are
+written to standard error in the event of out-of-range radix
+specifications. Regardless of the actual values of the input and
+output radix, the radix arguments are taken to be in base 10 (decimal)
+notation.
+
+=head1 DIGITS
+
+For radices from 2-10, standard ASCII decimal digits 0-9 are used for
+both input and output. For radices from 11-36, the ASCII letters A-Z
+are also included, following the convention used in hexadecimal. In
+this range, input is accepted in either upper or lower case, although
+on output only lower-case letters are used.
+
+For radices from 37-62, the output includes both upper- and lower-case
+ASCII letters, and case matters. In this range, case is distinguished
+both for input and for output values.
+
+For radices 63 and 64, the characters '+' (plus) and '/' (forward
+solidus) are also used. These are derived from the MIME base64
+encoding scheme. The overall encoding is not the same as base64,
+because the ASCII digits are used for the bottom of the range, and the
+letters are shifted upward; however, the output will consist of the
+same character set.
+
+This input and output behaviour is inherited from the MPI library used
+by B<basecvt>, and so is not configurable at runtime.
+
+=head1 SEE ALSO
+
+ dec2hex(1), hex2dec(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build
new file mode 100755
index 000000000..4d75b1e5a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/build
@@ -0,0 +1,30 @@
+#!/bin/sh
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+VERS="1.7p6"
+SECT="1"
+NAME="MPI Tools"
+
+echo "Building manual pages ..."
+case $# in
+ 0)
+ files=`ls *.pod`
+ ;;
+ *)
+ files=$*
+ ;;
+esac
+
+for name in $files
+do
+ echo -n "$name ... "
+# sname=`noext $name`
+ sname=`basename $name .pod`
+ pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT
+ echo "(done)"
+done
+
+echo "Finished building."
+
diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt
new file mode 100644
index 000000000..c13fb6ef1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/div.txt
@@ -0,0 +1,64 @@
+Division
+
+This describes the division algorithm used by the MPI library.
+
+Input: a, b; a > b
+Compute: Q, R; a = Qb + R
+
+The input numbers are normalized so that the high-order digit of b is
+at least half the radix. This guarantees that we have a reasonable
+way to guess at the digits of the quotient (this method was taken from
+Knuth, vol. 2, with adaptations).
+
+To normalize, test the high-order digit of b. If it is less than half
+the radix, multiply both a and b by d, where:
+
+ radix - 1
+ d = -----------
+ bmax + 1
+
+...where bmax is the high-order digit of b. Otherwise, set d = 1.
+
+Given normalize values for a and b, let the notation a[n] denote the
+nth digit of a. Let #a be the number of significant figures of a (not
+including any leading zeroes).
+
+ Let R = 0
+ Let p = #a - 1
+
+ while(p >= 0)
+ do
+ R = (R * radix) + a[p]
+ p = p - 1
+ while(R < b and p >= 0)
+
+ if(R < b)
+ break
+
+ q = (R[#R - 1] * radix) + R[#R - 2]
+ q = q / b[#b - 1]
+
+ T = b * q
+
+ while(T > L)
+ q = q - 1
+ T = T - b
+ endwhile
+
+ L = L - T
+
+ Q = (Q * radix) + q
+
+ endwhile
+
+At this point, Q is the quotient, and R is the normalized remainder.
+To denormalize R, compute:
+
+ R = (R / d)
+
+At this point, you are finished.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt
new file mode 100644
index 000000000..bd9d6f196
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/expt.txt
@@ -0,0 +1,94 @@
+Exponentiation
+
+For exponentiation, the MPI library uses a simple and fairly standard
+square-and-multiply method. The algorithm is this:
+
+Input: a, b
+Output: a ** b
+
+ s = 1
+
+ while(b != 0)
+ if(b is odd)
+ s = s * a
+ endif
+
+ b = b / 2
+
+ x = x * x
+ endwhile
+
+ return s
+
+The modular exponentiation is done the same way, except replacing:
+
+ s = s * a
+
+with
+ s = (s * a) mod m
+
+and replacing
+
+ x = x * x
+
+with
+
+ x = (x * x) mod m
+
+Here is a sample exponentiation using the MPI library, as compared to
+the same problem solved by the Unix 'bc' program on my system:
+
+Computation of 2,381,283 ** 235
+
+'bc' says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+MPI says:
+
+4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\
+4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\
+6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\
+4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\
+6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\
+FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\
+CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\
+5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\
+CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\
+49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\
+5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\
+A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\
+D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\
+92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\
+A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\
+AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\
+E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\
+1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\
+CFFF2E1AC93F3CA264A1B
+
+Diff says:
+% diff bc.txt mp.txt
+%
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod
new file mode 100644
index 000000000..b5b8fa34f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/gcd.pod
@@ -0,0 +1,28 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ gcd - compute greatest common divisor of two integers
+
+=head1 SYNOPSIS
+
+ gcd <a> <b>
+
+=head1 DESCRIPTION
+
+The B<gcd> program computes the greatest common divisor of two
+arbitrary-precision integers I<a> and I<b>. The result is written in
+standard decimal notation to the standard output.
+
+If I<b> is zero, B<gcd> will print an error message and exit.
+
+=head1 SEE ALSO
+
+invmod(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod
new file mode 100644
index 000000000..0194f4488
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/invmod.pod
@@ -0,0 +1,34 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ invmod - compute modular inverse of an integer
+
+=head1 SYNOPSIS
+
+ invmod <a> <m>
+
+=head1 DESCRIPTION
+
+The B<invmod> program computes the inverse of I<a>, modulo I<m>, if
+that inverse exists. Both I<a> and I<m> are arbitrary-precision
+integers in decimal notation. The result is written in standard
+decimal notation to the standard output.
+
+If there is no inverse, the message:
+
+ No inverse
+
+...will be printed to the standard output (an inverse exists if and
+only if the greatest common divisor of I<a> and I<m> is 1).
+
+=head1 SEE ALSO
+
+gcd(1), isprime(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod
new file mode 100644
index 000000000..a8ec1f7ee
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/isprime.pod
@@ -0,0 +1,63 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ isprime - probabilistic primality testing
+
+=head1 SYNOPSIS
+
+ isprime <a>
+
+=head1 DESCRIPTION
+
+The B<isprime> program attempts to determine whether the arbitrary
+precision integer I<a> is prime. It first tests I<a> for divisibility
+by the first 170 or so small primes, and assuming I<a> is not
+divisible by any of these, applies 15 iterations of the Rabin-Miller
+probabilistic primality test.
+
+If the program discovers that the number is composite, it will print:
+
+ Not prime (reason)
+
+Where I<reason> is either:
+
+ divisible by small prime x
+
+Or:
+
+ failed nth pseudoprime test
+
+In the first case, I<x> indicates the first small prime factor that
+was found. In the second case, I<n> indicates which of the
+pseudoprime tests failed (numbered from 1)
+
+If this happens, the number is definitely not prime. However, if the
+number succeeds, this message results:
+
+ Probably prime, 1 in 4^15 chance of false positive
+
+If this happens, the number is prime with very high probability, but
+its primality has not been absolutely proven, only demonstrated to a
+very convincing degree.
+
+The value I<a> can be input in standard decimal notation, or, if it is
+prefixed with I<Ox>, it will be read as hexadecimal.
+
+=head1 ENVIRONMENT
+
+You can control how many iterations of Rabin-Miller are performed on
+the candidate number by setting the I<RM_TESTS> environment variable
+to an integer value before starting up B<isprime>. This will change
+the output slightly if the number passes all the tests.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), lap(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod
new file mode 100644
index 000000000..47539fbbf
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/lap.pod
@@ -0,0 +1,36 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ lap - compute least annihilating power of a number
+
+=head1 SYNOPSIS
+
+ lap <a> <m>
+
+=head1 DESCRIPTION
+
+The B<lap> program computes the order of I<a> modulo I<m>, for
+arbitrary precision integers I<a> and I<m>. The B<order> of I<a>
+modulo I<m> is defined as the smallest positive value I<n> for which
+I<a> raised to the I<n>th power, modulo I<m>, is equal to 1. The
+order may not exist, if I<m> is composite.
+
+=head1 RESTRICTIONS
+
+This program is very slow, especially for large moduli. It is
+intended as a way to help find primitive elements in a modular field,
+but it does not do so in a particularly inefficient manner. It was
+written simply to help verify that a particular candidate does not
+have an obviously short cycle mod I<m>.
+
+=head1 SEE ALSO
+
+gcd(1), invmod(1), isprime(1)
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
new file mode 100644
index 000000000..b05f866e5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod
@@ -0,0 +1,51 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+=head1 NAME
+
+ mpi-test - automated test program for MPI library
+
+=head1 SYNOPSIS
+
+ mpi-test <suite-name> [quiet]
+ mpi-test list
+ mpi-test help
+
+=head1 DESCRIPTION
+
+The B<mpi-test> program is a general unit test driver for the MPI
+library. It is used to verify that the library works as it is
+supposed to on your architecture. As with most such things, passing
+all the tests in B<mpi-test> does not guarantee the code is correct,
+but if any of them fail, there are certainly problems.
+
+Each major function of the library can be tested individually. For a
+list of the test suites understood by B<mpi-test>, run it with the
+I<list> command line option:
+
+ mpi-test list
+
+This will display a list of the available test suites and a brief
+synopsis of what each one does. For a brief overview of this
+document, run B<mpi-test> I<help>.
+
+B<mpi-test> exits with a zero status if the selected test succeeds, or
+a nonzero status if it fails. If a I<suite-name> which is not
+understood by B<mpi-test> is given, a diagnostic is printed to the
+standard error, and the program exits with a result code of 2. If a
+test fails, the result code will be 1, and a diagnostic is ordinarily
+printed to the standard error. However, if the I<quiet> option is
+provided, these diagnostics will be suppressed.
+
+=head1 RESTRICTIONS
+
+Only a few canned test cases are provided. The solutions have been
+verified using the GNU bc(1) program, so bugs there may cause problems
+here; however, this is very unlikely, so if a test fails, it is almost
+certainly my fault, not bc(1)'s.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Thayer School of Engineering, Hanover, New Hampshire, USA
diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt
new file mode 100644
index 000000000..975f56ddb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/mul.txt
@@ -0,0 +1,77 @@
+Multiplication
+
+This describes the multiplication algorithm used by the MPI library.
+
+This is basically a standard "schoolbook" algorithm. It is slow --
+O(mn) for m = #a, n = #b -- but easy to implement and verify.
+Basically, we run two nested loops, as illustrated here (R is the
+radix):
+
+k = 0
+for j <- 0 to (#b - 1)
+ for i <- 0 to (#a - 1)
+ w = (a[j] * b[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+It is necessary that 'w' have room for at least two radix R digits.
+The product of any two digits in radix R is at most:
+
+ (R - 1)(R - 1) = R^2 - 2R + 1
+
+Since a two-digit radix-R number can hold R^2 - 1 distinct values,
+this insures that the product will fit into the two-digit register.
+
+To insure that two digits is enough for w, we must also show that
+there is room for the carry-in from the previous multiplication, and
+the current value of the product digit that is being recomputed.
+Assuming each of these may be as big as R - 1 (and no larger,
+certainly), two digits will be enough if and only if:
+
+ (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1
+
+Solving this equation shows that, indeed, this is the case:
+
+ R^2 - 2R + 1 + 2R - 2 <= R^2 - 1
+
+ R^2 - 1 <= R^2 - 1
+
+This suggests that a good radix would be one more than the largest
+value that can be held in half a machine word -- so, for example, as
+in this implementation, where we used a radix of 65536 on a machine
+with 4-byte words. Another advantage of a radix of this sort is that
+binary-level operations are easy on numbers in this representation.
+
+Here's an example multiplication worked out longhand in radix-10,
+using the above algorithm:
+
+ a = 999
+ b = x 999
+ -------------
+ p = 98001
+
+w = (a[jx] * b[ix]) + kin + c[ix + jx]
+c[ix+jx] = w % RADIX
+k = w / RADIX
+ product
+ix jx a[jx] b[ix] kin w c[i+j] kout 000000
+0 0 9 9 0 81+0+0 1 8 000001
+0 1 9 9 8 81+8+0 9 8 000091
+0 2 9 9 8 81+8+0 9 8 000991
+ 8 0 008991
+1 0 9 9 0 81+0+9 0 9 008901
+1 1 9 9 9 81+9+9 9 9 008901
+1 2 9 9 9 81+9+8 8 9 008901
+ 9 0 098901
+2 0 9 9 0 81+0+9 0 9 098001
+2 1 9 9 9 81+9+8 8 9 098001
+2 2 9 9 9 81+9+9 9 9 098001
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt
new file mode 100644
index 000000000..a6ef91137
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/pi.txt
@@ -0,0 +1,53 @@
+This file describes how pi is computed by the program in 'pi.c' (see
+the utils subdirectory).
+
+Basically, we use Machin's formula, which is what everyone in the
+world uses as a simple method for computing approximations to pi.
+This works for up to a few thousand digits without too much effort.
+Beyond that, though, it gets too slow.
+
+Machin's formula states:
+
+ pi := 16 * arctan(1/5) - 4 * arctan(1/239)
+
+We compute this in integer arithmetic by first multiplying everything
+through by 10^d, where 'd' is the number of digits of pi we wanted to
+compute. It turns out, the last few digits will be wrong, but the
+number that are wrong is usually very small (ordinarly only 2-3).
+Having done this, we compute the arctan() function using the formula:
+
+ 1 1 1 1 1
+ arctan(1/x) := --- - ----- + ----- - ----- + ----- - ...
+ x 3 x^3 5 x^5 7 x^7 9 x^9
+
+This is done iteratively by computing the first term manually, and
+then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the
+current figure. This is then added to (or subtracted from) a running
+sum, as appropriate. The iteration continues until we overflow our
+available precision and the current figure goes to zero under integer
+division. At that point, we're finished.
+
+Actually, we get a couple extra bits of precision out of the fact that
+we know we're computing y * arctan(1/x), by setting up the multiplier
+as:
+
+ y * 10^d
+
+... instead of just 10^d. There is also a bit of cleverness in how
+the loop is constructed, to avoid special-casing the first term.
+Check out the code for arctan() in 'pi.c', if you are interested in
+seeing how it is set up.
+
+Thanks to Jason P. for this algorithm, which I assembled from notes
+and programs found on his cool "Pile of Pi Programs" page, at:
+
+ http://www.isr.umd.edu/~jasonp/pipage.html
+
+Thanks also to Henrik Johansson <Henrik.Johansson@Nexus.Comm.SE>, from
+whose pi program I borrowed the clever idea of pre-multiplying by x in
+order to avoid a special case on the loop iteration.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt
new file mode 100644
index 000000000..694797d5f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prime.txt
@@ -0,0 +1,6542 @@
+2
+3
+5
+7
+11
+13
+17
+19
+23
+29
+31
+37
+41
+43
+47
+53
+59
+61
+67
+71
+73
+79
+83
+89
+97
+101
+103
+107
+109
+113
+127
+131
+137
+139
+149
+151
+157
+163
+167
+173
+179
+181
+191
+193
+197
+199
+211
+223
+227
+229
+233
+239
+241
+251
+257
+263
+269
+271
+277
+281
+283
+293
+307
+311
+313
+317
+331
+337
+347
+349
+353
+359
+367
+373
+379
+383
+389
+397
+401
+409
+419
+421
+431
+433
+439
+443
+449
+457
+461
+463
+467
+479
+487
+491
+499
+503
+509
+521
+523
+541
+547
+557
+563
+569
+571
+577
+587
+593
+599
+601
+607
+613
+617
+619
+631
+641
+643
+647
+653
+659
+661
+673
+677
+683
+691
+701
+709
+719
+727
+733
+739
+743
+751
+757
+761
+769
+773
+787
+797
+809
+811
+821
+823
+827
+829
+839
+853
+857
+859
+863
+877
+881
+883
+887
+907
+911
+919
+929
+937
+941
+947
+953
+967
+971
+977
+983
+991
+997
+1009
+1013
+1019
+1021
+1031
+1033
+1039
+1049
+1051
+1061
+1063
+1069
+1087
+1091
+1093
+1097
+1103
+1109
+1117
+1123
+1129
+1151
+1153
+1163
+1171
+1181
+1187
+1193
+1201
+1213
+1217
+1223
+1229
+1231
+1237
+1249
+1259
+1277
+1279
+1283
+1289
+1291
+1297
+1301
+1303
+1307
+1319
+1321
+1327
+1361
+1367
+1373
+1381
+1399
+1409
+1423
+1427
+1429
+1433
+1439
+1447
+1451
+1453
+1459
+1471
+1481
+1483
+1487
+1489
+1493
+1499
+1511
+1523
+1531
+1543
+1549
+1553
+1559
+1567
+1571
+1579
+1583
+1597
+1601
+1607
+1609
+1613
+1619
+1621
+1627
+1637
+1657
+1663
+1667
+1669
+1693
+1697
+1699
+1709
+1721
+1723
+1733
+1741
+1747
+1753
+1759
+1777
+1783
+1787
+1789
+1801
+1811
+1823
+1831
+1847
+1861
+1867
+1871
+1873
+1877
+1879
+1889
+1901
+1907
+1913
+1931
+1933
+1949
+1951
+1973
+1979
+1987
+1993
+1997
+1999
+2003
+2011
+2017
+2027
+2029
+2039
+2053
+2063
+2069
+2081
+2083
+2087
+2089
+2099
+2111
+2113
+2129
+2131
+2137
+2141
+2143
+2153
+2161
+2179
+2203
+2207
+2213
+2221
+2237
+2239
+2243
+2251
+2267
+2269
+2273
+2281
+2287
+2293
+2297
+2309
+2311
+2333
+2339
+2341
+2347
+2351
+2357
+2371
+2377
+2381
+2383
+2389
+2393
+2399
+2411
+2417
+2423
+2437
+2441
+2447
+2459
+2467
+2473
+2477
+2503
+2521
+2531
+2539
+2543
+2549
+2551
+2557
+2579
+2591
+2593
+2609
+2617
+2621
+2633
+2647
+2657
+2659
+2663
+2671
+2677
+2683
+2687
+2689
+2693
+2699
+2707
+2711
+2713
+2719
+2729
+2731
+2741
+2749
+2753
+2767
+2777
+2789
+2791
+2797
+2801
+2803
+2819
+2833
+2837
+2843
+2851
+2857
+2861
+2879
+2887
+2897
+2903
+2909
+2917
+2927
+2939
+2953
+2957
+2963
+2969
+2971
+2999
+3001
+3011
+3019
+3023
+3037
+3041
+3049
+3061
+3067
+3079
+3083
+3089
+3109
+3119
+3121
+3137
+3163
+3167
+3169
+3181
+3187
+3191
+3203
+3209
+3217
+3221
+3229
+3251
+3253
+3257
+3259
+3271
+3299
+3301
+3307
+3313
+3319
+3323
+3329
+3331
+3343
+3347
+3359
+3361
+3371
+3373
+3389
+3391
+3407
+3413
+3433
+3449
+3457
+3461
+3463
+3467
+3469
+3491
+3499
+3511
+3517
+3527
+3529
+3533
+3539
+3541
+3547
+3557
+3559
+3571
+3581
+3583
+3593
+3607
+3613
+3617
+3623
+3631
+3637
+3643
+3659
+3671
+3673
+3677
+3691
+3697
+3701
+3709
+3719
+3727
+3733
+3739
+3761
+3767
+3769
+3779
+3793
+3797
+3803
+3821
+3823
+3833
+3847
+3851
+3853
+3863
+3877
+3881
+3889
+3907
+3911
+3917
+3919
+3923
+3929
+3931
+3943
+3947
+3967
+3989
+4001
+4003
+4007
+4013
+4019
+4021
+4027
+4049
+4051
+4057
+4073
+4079
+4091
+4093
+4099
+4111
+4127
+4129
+4133
+4139
+4153
+4157
+4159
+4177
+4201
+4211
+4217
+4219
+4229
+4231
+4241
+4243
+4253
+4259
+4261
+4271
+4273
+4283
+4289
+4297
+4327
+4337
+4339
+4349
+4357
+4363
+4373
+4391
+4397
+4409
+4421
+4423
+4441
+4447
+4451
+4457
+4463
+4481
+4483
+4493
+4507
+4513
+4517
+4519
+4523
+4547
+4549
+4561
+4567
+4583
+4591
+4597
+4603
+4621
+4637
+4639
+4643
+4649
+4651
+4657
+4663
+4673
+4679
+4691
+4703
+4721
+4723
+4729
+4733
+4751
+4759
+4783
+4787
+4789
+4793
+4799
+4801
+4813
+4817
+4831
+4861
+4871
+4877
+4889
+4903
+4909
+4919
+4931
+4933
+4937
+4943
+4951
+4957
+4967
+4969
+4973
+4987
+4993
+4999
+5003
+5009
+5011
+5021
+5023
+5039
+5051
+5059
+5077
+5081
+5087
+5099
+5101
+5107
+5113
+5119
+5147
+5153
+5167
+5171
+5179
+5189
+5197
+5209
+5227
+5231
+5233
+5237
+5261
+5273
+5279
+5281
+5297
+5303
+5309
+5323
+5333
+5347
+5351
+5381
+5387
+5393
+5399
+5407
+5413
+5417
+5419
+5431
+5437
+5441
+5443
+5449
+5471
+5477
+5479
+5483
+5501
+5503
+5507
+5519
+5521
+5527
+5531
+5557
+5563
+5569
+5573
+5581
+5591
+5623
+5639
+5641
+5647
+5651
+5653
+5657
+5659
+5669
+5683
+5689
+5693
+5701
+5711
+5717
+5737
+5741
+5743
+5749
+5779
+5783
+5791
+5801
+5807
+5813
+5821
+5827
+5839
+5843
+5849
+5851
+5857
+5861
+5867
+5869
+5879
+5881
+5897
+5903
+5923
+5927
+5939
+5953
+5981
+5987
+6007
+6011
+6029
+6037
+6043
+6047
+6053
+6067
+6073
+6079
+6089
+6091
+6101
+6113
+6121
+6131
+6133
+6143
+6151
+6163
+6173
+6197
+6199
+6203
+6211
+6217
+6221
+6229
+6247
+6257
+6263
+6269
+6271
+6277
+6287
+6299
+6301
+6311
+6317
+6323
+6329
+6337
+6343
+6353
+6359
+6361
+6367
+6373
+6379
+6389
+6397
+6421
+6427
+6449
+6451
+6469
+6473
+6481
+6491
+6521
+6529
+6547
+6551
+6553
+6563
+6569
+6571
+6577
+6581
+6599
+6607
+6619
+6637
+6653
+6659
+6661
+6673
+6679
+6689
+6691
+6701
+6703
+6709
+6719
+6733
+6737
+6761
+6763
+6779
+6781
+6791
+6793
+6803
+6823
+6827
+6829
+6833
+6841
+6857
+6863
+6869
+6871
+6883
+6899
+6907
+6911
+6917
+6947
+6949
+6959
+6961
+6967
+6971
+6977
+6983
+6991
+6997
+7001
+7013
+7019
+7027
+7039
+7043
+7057
+7069
+7079
+7103
+7109
+7121
+7127
+7129
+7151
+7159
+7177
+7187
+7193
+7207
+7211
+7213
+7219
+7229
+7237
+7243
+7247
+7253
+7283
+7297
+7307
+7309
+7321
+7331
+7333
+7349
+7351
+7369
+7393
+7411
+7417
+7433
+7451
+7457
+7459
+7477
+7481
+7487
+7489
+7499
+7507
+7517
+7523
+7529
+7537
+7541
+7547
+7549
+7559
+7561
+7573
+7577
+7583
+7589
+7591
+7603
+7607
+7621
+7639
+7643
+7649
+7669
+7673
+7681
+7687
+7691
+7699
+7703
+7717
+7723
+7727
+7741
+7753
+7757
+7759
+7789
+7793
+7817
+7823
+7829
+7841
+7853
+7867
+7873
+7877
+7879
+7883
+7901
+7907
+7919
+7927
+7933
+7937
+7949
+7951
+7963
+7993
+8009
+8011
+8017
+8039
+8053
+8059
+8069
+8081
+8087
+8089
+8093
+8101
+8111
+8117
+8123
+8147
+8161
+8167
+8171
+8179
+8191
+8209
+8219
+8221
+8231
+8233
+8237
+8243
+8263
+8269
+8273
+8287
+8291
+8293
+8297
+8311
+8317
+8329
+8353
+8363
+8369
+8377
+8387
+8389
+8419
+8423
+8429
+8431
+8443
+8447
+8461
+8467
+8501
+8513
+8521
+8527
+8537
+8539
+8543
+8563
+8573
+8581
+8597
+8599
+8609
+8623
+8627
+8629
+8641
+8647
+8663
+8669
+8677
+8681
+8689
+8693
+8699
+8707
+8713
+8719
+8731
+8737
+8741
+8747
+8753
+8761
+8779
+8783
+8803
+8807
+8819
+8821
+8831
+8837
+8839
+8849
+8861
+8863
+8867
+8887
+8893
+8923
+8929
+8933
+8941
+8951
+8963
+8969
+8971
+8999
+9001
+9007
+9011
+9013
+9029
+9041
+9043
+9049
+9059
+9067
+9091
+9103
+9109
+9127
+9133
+9137
+9151
+9157
+9161
+9173
+9181
+9187
+9199
+9203
+9209
+9221
+9227
+9239
+9241
+9257
+9277
+9281
+9283
+9293
+9311
+9319
+9323
+9337
+9341
+9343
+9349
+9371
+9377
+9391
+9397
+9403
+9413
+9419
+9421
+9431
+9433
+9437
+9439
+9461
+9463
+9467
+9473
+9479
+9491
+9497
+9511
+9521
+9533
+9539
+9547
+9551
+9587
+9601
+9613
+9619
+9623
+9629
+9631
+9643
+9649
+9661
+9677
+9679
+9689
+9697
+9719
+9721
+9733
+9739
+9743
+9749
+9767
+9769
+9781
+9787
+9791
+9803
+9811
+9817
+9829
+9833
+9839
+9851
+9857
+9859
+9871
+9883
+9887
+9901
+9907
+9923
+9929
+9931
+9941
+9949
+9967
+9973
+10007
+10009
+10037
+10039
+10061
+10067
+10069
+10079
+10091
+10093
+10099
+10103
+10111
+10133
+10139
+10141
+10151
+10159
+10163
+10169
+10177
+10181
+10193
+10211
+10223
+10243
+10247
+10253
+10259
+10267
+10271
+10273
+10289
+10301
+10303
+10313
+10321
+10331
+10333
+10337
+10343
+10357
+10369
+10391
+10399
+10427
+10429
+10433
+10453
+10457
+10459
+10463
+10477
+10487
+10499
+10501
+10513
+10529
+10531
+10559
+10567
+10589
+10597
+10601
+10607
+10613
+10627
+10631
+10639
+10651
+10657
+10663
+10667
+10687
+10691
+10709
+10711
+10723
+10729
+10733
+10739
+10753
+10771
+10781
+10789
+10799
+10831
+10837
+10847
+10853
+10859
+10861
+10867
+10883
+10889
+10891
+10903
+10909
+10937
+10939
+10949
+10957
+10973
+10979
+10987
+10993
+11003
+11027
+11047
+11057
+11059
+11069
+11071
+11083
+11087
+11093
+11113
+11117
+11119
+11131
+11149
+11159
+11161
+11171
+11173
+11177
+11197
+11213
+11239
+11243
+11251
+11257
+11261
+11273
+11279
+11287
+11299
+11311
+11317
+11321
+11329
+11351
+11353
+11369
+11383
+11393
+11399
+11411
+11423
+11437
+11443
+11447
+11467
+11471
+11483
+11489
+11491
+11497
+11503
+11519
+11527
+11549
+11551
+11579
+11587
+11593
+11597
+11617
+11621
+11633
+11657
+11677
+11681
+11689
+11699
+11701
+11717
+11719
+11731
+11743
+11777
+11779
+11783
+11789
+11801
+11807
+11813
+11821
+11827
+11831
+11833
+11839
+11863
+11867
+11887
+11897
+11903
+11909
+11923
+11927
+11933
+11939
+11941
+11953
+11959
+11969
+11971
+11981
+11987
+12007
+12011
+12037
+12041
+12043
+12049
+12071
+12073
+12097
+12101
+12107
+12109
+12113
+12119
+12143
+12149
+12157
+12161
+12163
+12197
+12203
+12211
+12227
+12239
+12241
+12251
+12253
+12263
+12269
+12277
+12281
+12289
+12301
+12323
+12329
+12343
+12347
+12373
+12377
+12379
+12391
+12401
+12409
+12413
+12421
+12433
+12437
+12451
+12457
+12473
+12479
+12487
+12491
+12497
+12503
+12511
+12517
+12527
+12539
+12541
+12547
+12553
+12569
+12577
+12583
+12589
+12601
+12611
+12613
+12619
+12637
+12641
+12647
+12653
+12659
+12671
+12689
+12697
+12703
+12713
+12721
+12739
+12743
+12757
+12763
+12781
+12791
+12799
+12809
+12821
+12823
+12829
+12841
+12853
+12889
+12893
+12899
+12907
+12911
+12917
+12919
+12923
+12941
+12953
+12959
+12967
+12973
+12979
+12983
+13001
+13003
+13007
+13009
+13033
+13037
+13043
+13049
+13063
+13093
+13099
+13103
+13109
+13121
+13127
+13147
+13151
+13159
+13163
+13171
+13177
+13183
+13187
+13217
+13219
+13229
+13241
+13249
+13259
+13267
+13291
+13297
+13309
+13313
+13327
+13331
+13337
+13339
+13367
+13381
+13397
+13399
+13411
+13417
+13421
+13441
+13451
+13457
+13463
+13469
+13477
+13487
+13499
+13513
+13523
+13537
+13553
+13567
+13577
+13591
+13597
+13613
+13619
+13627
+13633
+13649
+13669
+13679
+13681
+13687
+13691
+13693
+13697
+13709
+13711
+13721
+13723
+13729
+13751
+13757
+13759
+13763
+13781
+13789
+13799
+13807
+13829
+13831
+13841
+13859
+13873
+13877
+13879
+13883
+13901
+13903
+13907
+13913
+13921
+13931
+13933
+13963
+13967
+13997
+13999
+14009
+14011
+14029
+14033
+14051
+14057
+14071
+14081
+14083
+14087
+14107
+14143
+14149
+14153
+14159
+14173
+14177
+14197
+14207
+14221
+14243
+14249
+14251
+14281
+14293
+14303
+14321
+14323
+14327
+14341
+14347
+14369
+14387
+14389
+14401
+14407
+14411
+14419
+14423
+14431
+14437
+14447
+14449
+14461
+14479
+14489
+14503
+14519
+14533
+14537
+14543
+14549
+14551
+14557
+14561
+14563
+14591
+14593
+14621
+14627
+14629
+14633
+14639
+14653
+14657
+14669
+14683
+14699
+14713
+14717
+14723
+14731
+14737
+14741
+14747
+14753
+14759
+14767
+14771
+14779
+14783
+14797
+14813
+14821
+14827
+14831
+14843
+14851
+14867
+14869
+14879
+14887
+14891
+14897
+14923
+14929
+14939
+14947
+14951
+14957
+14969
+14983
+15013
+15017
+15031
+15053
+15061
+15073
+15077
+15083
+15091
+15101
+15107
+15121
+15131
+15137
+15139
+15149
+15161
+15173
+15187
+15193
+15199
+15217
+15227
+15233
+15241
+15259
+15263
+15269
+15271
+15277
+15287
+15289
+15299
+15307
+15313
+15319
+15329
+15331
+15349
+15359
+15361
+15373
+15377
+15383
+15391
+15401
+15413
+15427
+15439
+15443
+15451
+15461
+15467
+15473
+15493
+15497
+15511
+15527
+15541
+15551
+15559
+15569
+15581
+15583
+15601
+15607
+15619
+15629
+15641
+15643
+15647
+15649
+15661
+15667
+15671
+15679
+15683
+15727
+15731
+15733
+15737
+15739
+15749
+15761
+15767
+15773
+15787
+15791
+15797
+15803
+15809
+15817
+15823
+15859
+15877
+15881
+15887
+15889
+15901
+15907
+15913
+15919
+15923
+15937
+15959
+15971
+15973
+15991
+16001
+16007
+16033
+16057
+16061
+16063
+16067
+16069
+16073
+16087
+16091
+16097
+16103
+16111
+16127
+16139
+16141
+16183
+16187
+16189
+16193
+16217
+16223
+16229
+16231
+16249
+16253
+16267
+16273
+16301
+16319
+16333
+16339
+16349
+16361
+16363
+16369
+16381
+16411
+16417
+16421
+16427
+16433
+16447
+16451
+16453
+16477
+16481
+16487
+16493
+16519
+16529
+16547
+16553
+16561
+16567
+16573
+16603
+16607
+16619
+16631
+16633
+16649
+16651
+16657
+16661
+16673
+16691
+16693
+16699
+16703
+16729
+16741
+16747
+16759
+16763
+16787
+16811
+16823
+16829
+16831
+16843
+16871
+16879
+16883
+16889
+16901
+16903
+16921
+16927
+16931
+16937
+16943
+16963
+16979
+16981
+16987
+16993
+17011
+17021
+17027
+17029
+17033
+17041
+17047
+17053
+17077
+17093
+17099
+17107
+17117
+17123
+17137
+17159
+17167
+17183
+17189
+17191
+17203
+17207
+17209
+17231
+17239
+17257
+17291
+17293
+17299
+17317
+17321
+17327
+17333
+17341
+17351
+17359
+17377
+17383
+17387
+17389
+17393
+17401
+17417
+17419
+17431
+17443
+17449
+17467
+17471
+17477
+17483
+17489
+17491
+17497
+17509
+17519
+17539
+17551
+17569
+17573
+17579
+17581
+17597
+17599
+17609
+17623
+17627
+17657
+17659
+17669
+17681
+17683
+17707
+17713
+17729
+17737
+17747
+17749
+17761
+17783
+17789
+17791
+17807
+17827
+17837
+17839
+17851
+17863
+17881
+17891
+17903
+17909
+17911
+17921
+17923
+17929
+17939
+17957
+17959
+17971
+17977
+17981
+17987
+17989
+18013
+18041
+18043
+18047
+18049
+18059
+18061
+18077
+18089
+18097
+18119
+18121
+18127
+18131
+18133
+18143
+18149
+18169
+18181
+18191
+18199
+18211
+18217
+18223
+18229
+18233
+18251
+18253
+18257
+18269
+18287
+18289
+18301
+18307
+18311
+18313
+18329
+18341
+18353
+18367
+18371
+18379
+18397
+18401
+18413
+18427
+18433
+18439
+18443
+18451
+18457
+18461
+18481
+18493
+18503
+18517
+18521
+18523
+18539
+18541
+18553
+18583
+18587
+18593
+18617
+18637
+18661
+18671
+18679
+18691
+18701
+18713
+18719
+18731
+18743
+18749
+18757
+18773
+18787
+18793
+18797
+18803
+18839
+18859
+18869
+18899
+18911
+18913
+18917
+18919
+18947
+18959
+18973
+18979
+19001
+19009
+19013
+19031
+19037
+19051
+19069
+19073
+19079
+19081
+19087
+19121
+19139
+19141
+19157
+19163
+19181
+19183
+19207
+19211
+19213
+19219
+19231
+19237
+19249
+19259
+19267
+19273
+19289
+19301
+19309
+19319
+19333
+19373
+19379
+19381
+19387
+19391
+19403
+19417
+19421
+19423
+19427
+19429
+19433
+19441
+19447
+19457
+19463
+19469
+19471
+19477
+19483
+19489
+19501
+19507
+19531
+19541
+19543
+19553
+19559
+19571
+19577
+19583
+19597
+19603
+19609
+19661
+19681
+19687
+19697
+19699
+19709
+19717
+19727
+19739
+19751
+19753
+19759
+19763
+19777
+19793
+19801
+19813
+19819
+19841
+19843
+19853
+19861
+19867
+19889
+19891
+19913
+19919
+19927
+19937
+19949
+19961
+19963
+19973
+19979
+19991
+19993
+19997
+20011
+20021
+20023
+20029
+20047
+20051
+20063
+20071
+20089
+20101
+20107
+20113
+20117
+20123
+20129
+20143
+20147
+20149
+20161
+20173
+20177
+20183
+20201
+20219
+20231
+20233
+20249
+20261
+20269
+20287
+20297
+20323
+20327
+20333
+20341
+20347
+20353
+20357
+20359
+20369
+20389
+20393
+20399
+20407
+20411
+20431
+20441
+20443
+20477
+20479
+20483
+20507
+20509
+20521
+20533
+20543
+20549
+20551
+20563
+20593
+20599
+20611
+20627
+20639
+20641
+20663
+20681
+20693
+20707
+20717
+20719
+20731
+20743
+20747
+20749
+20753
+20759
+20771
+20773
+20789
+20807
+20809
+20849
+20857
+20873
+20879
+20887
+20897
+20899
+20903
+20921
+20929
+20939
+20947
+20959
+20963
+20981
+20983
+21001
+21011
+21013
+21017
+21019
+21023
+21031
+21059
+21061
+21067
+21089
+21101
+21107
+21121
+21139
+21143
+21149
+21157
+21163
+21169
+21179
+21187
+21191
+21193
+21211
+21221
+21227
+21247
+21269
+21277
+21283
+21313
+21317
+21319
+21323
+21341
+21347
+21377
+21379
+21383
+21391
+21397
+21401
+21407
+21419
+21433
+21467
+21481
+21487
+21491
+21493
+21499
+21503
+21517
+21521
+21523
+21529
+21557
+21559
+21563
+21569
+21577
+21587
+21589
+21599
+21601
+21611
+21613
+21617
+21647
+21649
+21661
+21673
+21683
+21701
+21713
+21727
+21737
+21739
+21751
+21757
+21767
+21773
+21787
+21799
+21803
+21817
+21821
+21839
+21841
+21851
+21859
+21863
+21871
+21881
+21893
+21911
+21929
+21937
+21943
+21961
+21977
+21991
+21997
+22003
+22013
+22027
+22031
+22037
+22039
+22051
+22063
+22067
+22073
+22079
+22091
+22093
+22109
+22111
+22123
+22129
+22133
+22147
+22153
+22157
+22159
+22171
+22189
+22193
+22229
+22247
+22259
+22271
+22273
+22277
+22279
+22283
+22291
+22303
+22307
+22343
+22349
+22367
+22369
+22381
+22391
+22397
+22409
+22433
+22441
+22447
+22453
+22469
+22481
+22483
+22501
+22511
+22531
+22541
+22543
+22549
+22567
+22571
+22573
+22613
+22619
+22621
+22637
+22639
+22643
+22651
+22669
+22679
+22691
+22697
+22699
+22709
+22717
+22721
+22727
+22739
+22741
+22751
+22769
+22777
+22783
+22787
+22807
+22811
+22817
+22853
+22859
+22861
+22871
+22877
+22901
+22907
+22921
+22937
+22943
+22961
+22963
+22973
+22993
+23003
+23011
+23017
+23021
+23027
+23029
+23039
+23041
+23053
+23057
+23059
+23063
+23071
+23081
+23087
+23099
+23117
+23131
+23143
+23159
+23167
+23173
+23189
+23197
+23201
+23203
+23209
+23227
+23251
+23269
+23279
+23291
+23293
+23297
+23311
+23321
+23327
+23333
+23339
+23357
+23369
+23371
+23399
+23417
+23431
+23447
+23459
+23473
+23497
+23509
+23531
+23537
+23539
+23549
+23557
+23561
+23563
+23567
+23581
+23593
+23599
+23603
+23609
+23623
+23627
+23629
+23633
+23663
+23669
+23671
+23677
+23687
+23689
+23719
+23741
+23743
+23747
+23753
+23761
+23767
+23773
+23789
+23801
+23813
+23819
+23827
+23831
+23833
+23857
+23869
+23873
+23879
+23887
+23893
+23899
+23909
+23911
+23917
+23929
+23957
+23971
+23977
+23981
+23993
+24001
+24007
+24019
+24023
+24029
+24043
+24049
+24061
+24071
+24077
+24083
+24091
+24097
+24103
+24107
+24109
+24113
+24121
+24133
+24137
+24151
+24169
+24179
+24181
+24197
+24203
+24223
+24229
+24239
+24247
+24251
+24281
+24317
+24329
+24337
+24359
+24371
+24373
+24379
+24391
+24407
+24413
+24419
+24421
+24439
+24443
+24469
+24473
+24481
+24499
+24509
+24517
+24527
+24533
+24547
+24551
+24571
+24593
+24611
+24623
+24631
+24659
+24671
+24677
+24683
+24691
+24697
+24709
+24733
+24749
+24763
+24767
+24781
+24793
+24799
+24809
+24821
+24841
+24847
+24851
+24859
+24877
+24889
+24907
+24917
+24919
+24923
+24943
+24953
+24967
+24971
+24977
+24979
+24989
+25013
+25031
+25033
+25037
+25057
+25073
+25087
+25097
+25111
+25117
+25121
+25127
+25147
+25153
+25163
+25169
+25171
+25183
+25189
+25219
+25229
+25237
+25243
+25247
+25253
+25261
+25301
+25303
+25307
+25309
+25321
+25339
+25343
+25349
+25357
+25367
+25373
+25391
+25409
+25411
+25423
+25439
+25447
+25453
+25457
+25463
+25469
+25471
+25523
+25537
+25541
+25561
+25577
+25579
+25583
+25589
+25601
+25603
+25609
+25621
+25633
+25639
+25643
+25657
+25667
+25673
+25679
+25693
+25703
+25717
+25733
+25741
+25747
+25759
+25763
+25771
+25793
+25799
+25801
+25819
+25841
+25847
+25849
+25867
+25873
+25889
+25903
+25913
+25919
+25931
+25933
+25939
+25943
+25951
+25969
+25981
+25997
+25999
+26003
+26017
+26021
+26029
+26041
+26053
+26083
+26099
+26107
+26111
+26113
+26119
+26141
+26153
+26161
+26171
+26177
+26183
+26189
+26203
+26209
+26227
+26237
+26249
+26251
+26261
+26263
+26267
+26293
+26297
+26309
+26317
+26321
+26339
+26347
+26357
+26371
+26387
+26393
+26399
+26407
+26417
+26423
+26431
+26437
+26449
+26459
+26479
+26489
+26497
+26501
+26513
+26539
+26557
+26561
+26573
+26591
+26597
+26627
+26633
+26641
+26647
+26669
+26681
+26683
+26687
+26693
+26699
+26701
+26711
+26713
+26717
+26723
+26729
+26731
+26737
+26759
+26777
+26783
+26801
+26813
+26821
+26833
+26839
+26849
+26861
+26863
+26879
+26881
+26891
+26893
+26903
+26921
+26927
+26947
+26951
+26953
+26959
+26981
+26987
+26993
+27011
+27017
+27031
+27043
+27059
+27061
+27067
+27073
+27077
+27091
+27103
+27107
+27109
+27127
+27143
+27179
+27191
+27197
+27211
+27239
+27241
+27253
+27259
+27271
+27277
+27281
+27283
+27299
+27329
+27337
+27361
+27367
+27397
+27407
+27409
+27427
+27431
+27437
+27449
+27457
+27479
+27481
+27487
+27509
+27527
+27529
+27539
+27541
+27551
+27581
+27583
+27611
+27617
+27631
+27647
+27653
+27673
+27689
+27691
+27697
+27701
+27733
+27737
+27739
+27743
+27749
+27751
+27763
+27767
+27773
+27779
+27791
+27793
+27799
+27803
+27809
+27817
+27823
+27827
+27847
+27851
+27883
+27893
+27901
+27917
+27919
+27941
+27943
+27947
+27953
+27961
+27967
+27983
+27997
+28001
+28019
+28027
+28031
+28051
+28057
+28069
+28081
+28087
+28097
+28099
+28109
+28111
+28123
+28151
+28163
+28181
+28183
+28201
+28211
+28219
+28229
+28277
+28279
+28283
+28289
+28297
+28307
+28309
+28319
+28349
+28351
+28387
+28393
+28403
+28409
+28411
+28429
+28433
+28439
+28447
+28463
+28477
+28493
+28499
+28513
+28517
+28537
+28541
+28547
+28549
+28559
+28571
+28573
+28579
+28591
+28597
+28603
+28607
+28619
+28621
+28627
+28631
+28643
+28649
+28657
+28661
+28663
+28669
+28687
+28697
+28703
+28711
+28723
+28729
+28751
+28753
+28759
+28771
+28789
+28793
+28807
+28813
+28817
+28837
+28843
+28859
+28867
+28871
+28879
+28901
+28909
+28921
+28927
+28933
+28949
+28961
+28979
+29009
+29017
+29021
+29023
+29027
+29033
+29059
+29063
+29077
+29101
+29123
+29129
+29131
+29137
+29147
+29153
+29167
+29173
+29179
+29191
+29201
+29207
+29209
+29221
+29231
+29243
+29251
+29269
+29287
+29297
+29303
+29311
+29327
+29333
+29339
+29347
+29363
+29383
+29387
+29389
+29399
+29401
+29411
+29423
+29429
+29437
+29443
+29453
+29473
+29483
+29501
+29527
+29531
+29537
+29567
+29569
+29573
+29581
+29587
+29599
+29611
+29629
+29633
+29641
+29663
+29669
+29671
+29683
+29717
+29723
+29741
+29753
+29759
+29761
+29789
+29803
+29819
+29833
+29837
+29851
+29863
+29867
+29873
+29879
+29881
+29917
+29921
+29927
+29947
+29959
+29983
+29989
+30011
+30013
+30029
+30047
+30059
+30071
+30089
+30091
+30097
+30103
+30109
+30113
+30119
+30133
+30137
+30139
+30161
+30169
+30181
+30187
+30197
+30203
+30211
+30223
+30241
+30253
+30259
+30269
+30271
+30293
+30307
+30313
+30319
+30323
+30341
+30347
+30367
+30389
+30391
+30403
+30427
+30431
+30449
+30467
+30469
+30491
+30493
+30497
+30509
+30517
+30529
+30539
+30553
+30557
+30559
+30577
+30593
+30631
+30637
+30643
+30649
+30661
+30671
+30677
+30689
+30697
+30703
+30707
+30713
+30727
+30757
+30763
+30773
+30781
+30803
+30809
+30817
+30829
+30839
+30841
+30851
+30853
+30859
+30869
+30871
+30881
+30893
+30911
+30931
+30937
+30941
+30949
+30971
+30977
+30983
+31013
+31019
+31033
+31039
+31051
+31063
+31069
+31079
+31081
+31091
+31121
+31123
+31139
+31147
+31151
+31153
+31159
+31177
+31181
+31183
+31189
+31193
+31219
+31223
+31231
+31237
+31247
+31249
+31253
+31259
+31267
+31271
+31277
+31307
+31319
+31321
+31327
+31333
+31337
+31357
+31379
+31387
+31391
+31393
+31397
+31469
+31477
+31481
+31489
+31511
+31513
+31517
+31531
+31541
+31543
+31547
+31567
+31573
+31583
+31601
+31607
+31627
+31643
+31649
+31657
+31663
+31667
+31687
+31699
+31721
+31723
+31727
+31729
+31741
+31751
+31769
+31771
+31793
+31799
+31817
+31847
+31849
+31859
+31873
+31883
+31891
+31907
+31957
+31963
+31973
+31981
+31991
+32003
+32009
+32027
+32029
+32051
+32057
+32059
+32063
+32069
+32077
+32083
+32089
+32099
+32117
+32119
+32141
+32143
+32159
+32173
+32183
+32189
+32191
+32203
+32213
+32233
+32237
+32251
+32257
+32261
+32297
+32299
+32303
+32309
+32321
+32323
+32327
+32341
+32353
+32359
+32363
+32369
+32371
+32377
+32381
+32401
+32411
+32413
+32423
+32429
+32441
+32443
+32467
+32479
+32491
+32497
+32503
+32507
+32531
+32533
+32537
+32561
+32563
+32569
+32573
+32579
+32587
+32603
+32609
+32611
+32621
+32633
+32647
+32653
+32687
+32693
+32707
+32713
+32717
+32719
+32749
+32771
+32779
+32783
+32789
+32797
+32801
+32803
+32831
+32833
+32839
+32843
+32869
+32887
+32909
+32911
+32917
+32933
+32939
+32941
+32957
+32969
+32971
+32983
+32987
+32993
+32999
+33013
+33023
+33029
+33037
+33049
+33053
+33071
+33073
+33083
+33091
+33107
+33113
+33119
+33149
+33151
+33161
+33179
+33181
+33191
+33199
+33203
+33211
+33223
+33247
+33287
+33289
+33301
+33311
+33317
+33329
+33331
+33343
+33347
+33349
+33353
+33359
+33377
+33391
+33403
+33409
+33413
+33427
+33457
+33461
+33469
+33479
+33487
+33493
+33503
+33521
+33529
+33533
+33547
+33563
+33569
+33577
+33581
+33587
+33589
+33599
+33601
+33613
+33617
+33619
+33623
+33629
+33637
+33641
+33647
+33679
+33703
+33713
+33721
+33739
+33749
+33751
+33757
+33767
+33769
+33773
+33791
+33797
+33809
+33811
+33827
+33829
+33851
+33857
+33863
+33871
+33889
+33893
+33911
+33923
+33931
+33937
+33941
+33961
+33967
+33997
+34019
+34031
+34033
+34039
+34057
+34061
+34123
+34127
+34129
+34141
+34147
+34157
+34159
+34171
+34183
+34211
+34213
+34217
+34231
+34253
+34259
+34261
+34267
+34273
+34283
+34297
+34301
+34303
+34313
+34319
+34327
+34337
+34351
+34361
+34367
+34369
+34381
+34403
+34421
+34429
+34439
+34457
+34469
+34471
+34483
+34487
+34499
+34501
+34511
+34513
+34519
+34537
+34543
+34549
+34583
+34589
+34591
+34603
+34607
+34613
+34631
+34649
+34651
+34667
+34673
+34679
+34687
+34693
+34703
+34721
+34729
+34739
+34747
+34757
+34759
+34763
+34781
+34807
+34819
+34841
+34843
+34847
+34849
+34871
+34877
+34883
+34897
+34913
+34919
+34939
+34949
+34961
+34963
+34981
+35023
+35027
+35051
+35053
+35059
+35069
+35081
+35083
+35089
+35099
+35107
+35111
+35117
+35129
+35141
+35149
+35153
+35159
+35171
+35201
+35221
+35227
+35251
+35257
+35267
+35279
+35281
+35291
+35311
+35317
+35323
+35327
+35339
+35353
+35363
+35381
+35393
+35401
+35407
+35419
+35423
+35437
+35447
+35449
+35461
+35491
+35507
+35509
+35521
+35527
+35531
+35533
+35537
+35543
+35569
+35573
+35591
+35593
+35597
+35603
+35617
+35671
+35677
+35729
+35731
+35747
+35753
+35759
+35771
+35797
+35801
+35803
+35809
+35831
+35837
+35839
+35851
+35863
+35869
+35879
+35897
+35899
+35911
+35923
+35933
+35951
+35963
+35969
+35977
+35983
+35993
+35999
+36007
+36011
+36013
+36017
+36037
+36061
+36067
+36073
+36083
+36097
+36107
+36109
+36131
+36137
+36151
+36161
+36187
+36191
+36209
+36217
+36229
+36241
+36251
+36263
+36269
+36277
+36293
+36299
+36307
+36313
+36319
+36341
+36343
+36353
+36373
+36383
+36389
+36433
+36451
+36457
+36467
+36469
+36473
+36479
+36493
+36497
+36523
+36527
+36529
+36541
+36551
+36559
+36563
+36571
+36583
+36587
+36599
+36607
+36629
+36637
+36643
+36653
+36671
+36677
+36683
+36691
+36697
+36709
+36713
+36721
+36739
+36749
+36761
+36767
+36779
+36781
+36787
+36791
+36793
+36809
+36821
+36833
+36847
+36857
+36871
+36877
+36887
+36899
+36901
+36913
+36919
+36923
+36929
+36931
+36943
+36947
+36973
+36979
+36997
+37003
+37013
+37019
+37021
+37039
+37049
+37057
+37061
+37087
+37097
+37117
+37123
+37139
+37159
+37171
+37181
+37189
+37199
+37201
+37217
+37223
+37243
+37253
+37273
+37277
+37307
+37309
+37313
+37321
+37337
+37339
+37357
+37361
+37363
+37369
+37379
+37397
+37409
+37423
+37441
+37447
+37463
+37483
+37489
+37493
+37501
+37507
+37511
+37517
+37529
+37537
+37547
+37549
+37561
+37567
+37571
+37573
+37579
+37589
+37591
+37607
+37619
+37633
+37643
+37649
+37657
+37663
+37691
+37693
+37699
+37717
+37747
+37781
+37783
+37799
+37811
+37813
+37831
+37847
+37853
+37861
+37871
+37879
+37889
+37897
+37907
+37951
+37957
+37963
+37967
+37987
+37991
+37993
+37997
+38011
+38039
+38047
+38053
+38069
+38083
+38113
+38119
+38149
+38153
+38167
+38177
+38183
+38189
+38197
+38201
+38219
+38231
+38237
+38239
+38261
+38273
+38281
+38287
+38299
+38303
+38317
+38321
+38327
+38329
+38333
+38351
+38371
+38377
+38393
+38431
+38447
+38449
+38453
+38459
+38461
+38501
+38543
+38557
+38561
+38567
+38569
+38593
+38603
+38609
+38611
+38629
+38639
+38651
+38653
+38669
+38671
+38677
+38693
+38699
+38707
+38711
+38713
+38723
+38729
+38737
+38747
+38749
+38767
+38783
+38791
+38803
+38821
+38833
+38839
+38851
+38861
+38867
+38873
+38891
+38903
+38917
+38921
+38923
+38933
+38953
+38959
+38971
+38977
+38993
+39019
+39023
+39041
+39043
+39047
+39079
+39089
+39097
+39103
+39107
+39113
+39119
+39133
+39139
+39157
+39161
+39163
+39181
+39191
+39199
+39209
+39217
+39227
+39229
+39233
+39239
+39241
+39251
+39293
+39301
+39313
+39317
+39323
+39341
+39343
+39359
+39367
+39371
+39373
+39383
+39397
+39409
+39419
+39439
+39443
+39451
+39461
+39499
+39503
+39509
+39511
+39521
+39541
+39551
+39563
+39569
+39581
+39607
+39619
+39623
+39631
+39659
+39667
+39671
+39679
+39703
+39709
+39719
+39727
+39733
+39749
+39761
+39769
+39779
+39791
+39799
+39821
+39827
+39829
+39839
+39841
+39847
+39857
+39863
+39869
+39877
+39883
+39887
+39901
+39929
+39937
+39953
+39971
+39979
+39983
+39989
+40009
+40013
+40031
+40037
+40039
+40063
+40087
+40093
+40099
+40111
+40123
+40127
+40129
+40151
+40153
+40163
+40169
+40177
+40189
+40193
+40213
+40231
+40237
+40241
+40253
+40277
+40283
+40289
+40343
+40351
+40357
+40361
+40387
+40423
+40427
+40429
+40433
+40459
+40471
+40483
+40487
+40493
+40499
+40507
+40519
+40529
+40531
+40543
+40559
+40577
+40583
+40591
+40597
+40609
+40627
+40637
+40639
+40693
+40697
+40699
+40709
+40739
+40751
+40759
+40763
+40771
+40787
+40801
+40813
+40819
+40823
+40829
+40841
+40847
+40849
+40853
+40867
+40879
+40883
+40897
+40903
+40927
+40933
+40939
+40949
+40961
+40973
+40993
+41011
+41017
+41023
+41039
+41047
+41051
+41057
+41077
+41081
+41113
+41117
+41131
+41141
+41143
+41149
+41161
+41177
+41179
+41183
+41189
+41201
+41203
+41213
+41221
+41227
+41231
+41233
+41243
+41257
+41263
+41269
+41281
+41299
+41333
+41341
+41351
+41357
+41381
+41387
+41389
+41399
+41411
+41413
+41443
+41453
+41467
+41479
+41491
+41507
+41513
+41519
+41521
+41539
+41543
+41549
+41579
+41593
+41597
+41603
+41609
+41611
+41617
+41621
+41627
+41641
+41647
+41651
+41659
+41669
+41681
+41687
+41719
+41729
+41737
+41759
+41761
+41771
+41777
+41801
+41809
+41813
+41843
+41849
+41851
+41863
+41879
+41887
+41893
+41897
+41903
+41911
+41927
+41941
+41947
+41953
+41957
+41959
+41969
+41981
+41983
+41999
+42013
+42017
+42019
+42023
+42043
+42061
+42071
+42073
+42083
+42089
+42101
+42131
+42139
+42157
+42169
+42179
+42181
+42187
+42193
+42197
+42209
+42221
+42223
+42227
+42239
+42257
+42281
+42283
+42293
+42299
+42307
+42323
+42331
+42337
+42349
+42359
+42373
+42379
+42391
+42397
+42403
+42407
+42409
+42433
+42437
+42443
+42451
+42457
+42461
+42463
+42467
+42473
+42487
+42491
+42499
+42509
+42533
+42557
+42569
+42571
+42577
+42589
+42611
+42641
+42643
+42649
+42667
+42677
+42683
+42689
+42697
+42701
+42703
+42709
+42719
+42727
+42737
+42743
+42751
+42767
+42773
+42787
+42793
+42797
+42821
+42829
+42839
+42841
+42853
+42859
+42863
+42899
+42901
+42923
+42929
+42937
+42943
+42953
+42961
+42967
+42979
+42989
+43003
+43013
+43019
+43037
+43049
+43051
+43063
+43067
+43093
+43103
+43117
+43133
+43151
+43159
+43177
+43189
+43201
+43207
+43223
+43237
+43261
+43271
+43283
+43291
+43313
+43319
+43321
+43331
+43391
+43397
+43399
+43403
+43411
+43427
+43441
+43451
+43457
+43481
+43487
+43499
+43517
+43541
+43543
+43573
+43577
+43579
+43591
+43597
+43607
+43609
+43613
+43627
+43633
+43649
+43651
+43661
+43669
+43691
+43711
+43717
+43721
+43753
+43759
+43777
+43781
+43783
+43787
+43789
+43793
+43801
+43853
+43867
+43889
+43891
+43913
+43933
+43943
+43951
+43961
+43963
+43969
+43973
+43987
+43991
+43997
+44017
+44021
+44027
+44029
+44041
+44053
+44059
+44071
+44087
+44089
+44101
+44111
+44119
+44123
+44129
+44131
+44159
+44171
+44179
+44189
+44201
+44203
+44207
+44221
+44249
+44257
+44263
+44267
+44269
+44273
+44279
+44281
+44293
+44351
+44357
+44371
+44381
+44383
+44389
+44417
+44449
+44453
+44483
+44491
+44497
+44501
+44507
+44519
+44531
+44533
+44537
+44543
+44549
+44563
+44579
+44587
+44617
+44621
+44623
+44633
+44641
+44647
+44651
+44657
+44683
+44687
+44699
+44701
+44711
+44729
+44741
+44753
+44771
+44773
+44777
+44789
+44797
+44809
+44819
+44839
+44843
+44851
+44867
+44879
+44887
+44893
+44909
+44917
+44927
+44939
+44953
+44959
+44963
+44971
+44983
+44987
+45007
+45013
+45053
+45061
+45077
+45083
+45119
+45121
+45127
+45131
+45137
+45139
+45161
+45179
+45181
+45191
+45197
+45233
+45247
+45259
+45263
+45281
+45289
+45293
+45307
+45317
+45319
+45329
+45337
+45341
+45343
+45361
+45377
+45389
+45403
+45413
+45427
+45433
+45439
+45481
+45491
+45497
+45503
+45523
+45533
+45541
+45553
+45557
+45569
+45587
+45589
+45599
+45613
+45631
+45641
+45659
+45667
+45673
+45677
+45691
+45697
+45707
+45737
+45751
+45757
+45763
+45767
+45779
+45817
+45821
+45823
+45827
+45833
+45841
+45853
+45863
+45869
+45887
+45893
+45943
+45949
+45953
+45959
+45971
+45979
+45989
+46021
+46027
+46049
+46051
+46061
+46073
+46091
+46093
+46099
+46103
+46133
+46141
+46147
+46153
+46171
+46181
+46183
+46187
+46199
+46219
+46229
+46237
+46261
+46271
+46273
+46279
+46301
+46307
+46309
+46327
+46337
+46349
+46351
+46381
+46399
+46411
+46439
+46441
+46447
+46451
+46457
+46471
+46477
+46489
+46499
+46507
+46511
+46523
+46549
+46559
+46567
+46573
+46589
+46591
+46601
+46619
+46633
+46639
+46643
+46649
+46663
+46679
+46681
+46687
+46691
+46703
+46723
+46727
+46747
+46751
+46757
+46769
+46771
+46807
+46811
+46817
+46819
+46829
+46831
+46853
+46861
+46867
+46877
+46889
+46901
+46919
+46933
+46957
+46993
+46997
+47017
+47041
+47051
+47057
+47059
+47087
+47093
+47111
+47119
+47123
+47129
+47137
+47143
+47147
+47149
+47161
+47189
+47207
+47221
+47237
+47251
+47269
+47279
+47287
+47293
+47297
+47303
+47309
+47317
+47339
+47351
+47353
+47363
+47381
+47387
+47389
+47407
+47417
+47419
+47431
+47441
+47459
+47491
+47497
+47501
+47507
+47513
+47521
+47527
+47533
+47543
+47563
+47569
+47581
+47591
+47599
+47609
+47623
+47629
+47639
+47653
+47657
+47659
+47681
+47699
+47701
+47711
+47713
+47717
+47737
+47741
+47743
+47777
+47779
+47791
+47797
+47807
+47809
+47819
+47837
+47843
+47857
+47869
+47881
+47903
+47911
+47917
+47933
+47939
+47947
+47951
+47963
+47969
+47977
+47981
+48017
+48023
+48029
+48049
+48073
+48079
+48091
+48109
+48119
+48121
+48131
+48157
+48163
+48179
+48187
+48193
+48197
+48221
+48239
+48247
+48259
+48271
+48281
+48299
+48311
+48313
+48337
+48341
+48353
+48371
+48383
+48397
+48407
+48409
+48413
+48437
+48449
+48463
+48473
+48479
+48481
+48487
+48491
+48497
+48523
+48527
+48533
+48539
+48541
+48563
+48571
+48589
+48593
+48611
+48619
+48623
+48647
+48649
+48661
+48673
+48677
+48679
+48731
+48733
+48751
+48757
+48761
+48767
+48779
+48781
+48787
+48799
+48809
+48817
+48821
+48823
+48847
+48857
+48859
+48869
+48871
+48883
+48889
+48907
+48947
+48953
+48973
+48989
+48991
+49003
+49009
+49019
+49031
+49033
+49037
+49043
+49057
+49069
+49081
+49103
+49109
+49117
+49121
+49123
+49139
+49157
+49169
+49171
+49177
+49193
+49199
+49201
+49207
+49211
+49223
+49253
+49261
+49277
+49279
+49297
+49307
+49331
+49333
+49339
+49363
+49367
+49369
+49391
+49393
+49409
+49411
+49417
+49429
+49433
+49451
+49459
+49463
+49477
+49481
+49499
+49523
+49529
+49531
+49537
+49547
+49549
+49559
+49597
+49603
+49613
+49627
+49633
+49639
+49663
+49667
+49669
+49681
+49697
+49711
+49727
+49739
+49741
+49747
+49757
+49783
+49787
+49789
+49801
+49807
+49811
+49823
+49831
+49843
+49853
+49871
+49877
+49891
+49919
+49921
+49927
+49937
+49939
+49943
+49957
+49991
+49993
+49999
+50021
+50023
+50033
+50047
+50051
+50053
+50069
+50077
+50087
+50093
+50101
+50111
+50119
+50123
+50129
+50131
+50147
+50153
+50159
+50177
+50207
+50221
+50227
+50231
+50261
+50263
+50273
+50287
+50291
+50311
+50321
+50329
+50333
+50341
+50359
+50363
+50377
+50383
+50387
+50411
+50417
+50423
+50441
+50459
+50461
+50497
+50503
+50513
+50527
+50539
+50543
+50549
+50551
+50581
+50587
+50591
+50593
+50599
+50627
+50647
+50651
+50671
+50683
+50707
+50723
+50741
+50753
+50767
+50773
+50777
+50789
+50821
+50833
+50839
+50849
+50857
+50867
+50873
+50891
+50893
+50909
+50923
+50929
+50951
+50957
+50969
+50971
+50989
+50993
+51001
+51031
+51043
+51047
+51059
+51061
+51071
+51109
+51131
+51133
+51137
+51151
+51157
+51169
+51193
+51197
+51199
+51203
+51217
+51229
+51239
+51241
+51257
+51263
+51283
+51287
+51307
+51329
+51341
+51343
+51347
+51349
+51361
+51383
+51407
+51413
+51419
+51421
+51427
+51431
+51437
+51439
+51449
+51461
+51473
+51479
+51481
+51487
+51503
+51511
+51517
+51521
+51539
+51551
+51563
+51577
+51581
+51593
+51599
+51607
+51613
+51631
+51637
+51647
+51659
+51673
+51679
+51683
+51691
+51713
+51719
+51721
+51749
+51767
+51769
+51787
+51797
+51803
+51817
+51827
+51829
+51839
+51853
+51859
+51869
+51871
+51893
+51899
+51907
+51913
+51929
+51941
+51949
+51971
+51973
+51977
+51991
+52009
+52021
+52027
+52051
+52057
+52067
+52069
+52081
+52103
+52121
+52127
+52147
+52153
+52163
+52177
+52181
+52183
+52189
+52201
+52223
+52237
+52249
+52253
+52259
+52267
+52289
+52291
+52301
+52313
+52321
+52361
+52363
+52369
+52379
+52387
+52391
+52433
+52453
+52457
+52489
+52501
+52511
+52517
+52529
+52541
+52543
+52553
+52561
+52567
+52571
+52579
+52583
+52609
+52627
+52631
+52639
+52667
+52673
+52691
+52697
+52709
+52711
+52721
+52727
+52733
+52747
+52757
+52769
+52783
+52807
+52813
+52817
+52837
+52859
+52861
+52879
+52883
+52889
+52901
+52903
+52919
+52937
+52951
+52957
+52963
+52967
+52973
+52981
+52999
+53003
+53017
+53047
+53051
+53069
+53077
+53087
+53089
+53093
+53101
+53113
+53117
+53129
+53147
+53149
+53161
+53171
+53173
+53189
+53197
+53201
+53231
+53233
+53239
+53267
+53269
+53279
+53281
+53299
+53309
+53323
+53327
+53353
+53359
+53377
+53381
+53401
+53407
+53411
+53419
+53437
+53441
+53453
+53479
+53503
+53507
+53527
+53549
+53551
+53569
+53591
+53593
+53597
+53609
+53611
+53617
+53623
+53629
+53633
+53639
+53653
+53657
+53681
+53693
+53699
+53717
+53719
+53731
+53759
+53773
+53777
+53783
+53791
+53813
+53819
+53831
+53849
+53857
+53861
+53881
+53887
+53891
+53897
+53899
+53917
+53923
+53927
+53939
+53951
+53959
+53987
+53993
+54001
+54011
+54013
+54037
+54049
+54059
+54083
+54091
+54101
+54121
+54133
+54139
+54151
+54163
+54167
+54181
+54193
+54217
+54251
+54269
+54277
+54287
+54293
+54311
+54319
+54323
+54331
+54347
+54361
+54367
+54371
+54377
+54401
+54403
+54409
+54413
+54419
+54421
+54437
+54443
+54449
+54469
+54493
+54497
+54499
+54503
+54517
+54521
+54539
+54541
+54547
+54559
+54563
+54577
+54581
+54583
+54601
+54617
+54623
+54629
+54631
+54647
+54667
+54673
+54679
+54709
+54713
+54721
+54727
+54751
+54767
+54773
+54779
+54787
+54799
+54829
+54833
+54851
+54869
+54877
+54881
+54907
+54917
+54919
+54941
+54949
+54959
+54973
+54979
+54983
+55001
+55009
+55021
+55049
+55051
+55057
+55061
+55073
+55079
+55103
+55109
+55117
+55127
+55147
+55163
+55171
+55201
+55207
+55213
+55217
+55219
+55229
+55243
+55249
+55259
+55291
+55313
+55331
+55333
+55337
+55339
+55343
+55351
+55373
+55381
+55399
+55411
+55439
+55441
+55457
+55469
+55487
+55501
+55511
+55529
+55541
+55547
+55579
+55589
+55603
+55609
+55619
+55621
+55631
+55633
+55639
+55661
+55663
+55667
+55673
+55681
+55691
+55697
+55711
+55717
+55721
+55733
+55763
+55787
+55793
+55799
+55807
+55813
+55817
+55819
+55823
+55829
+55837
+55843
+55849
+55871
+55889
+55897
+55901
+55903
+55921
+55927
+55931
+55933
+55949
+55967
+55987
+55997
+56003
+56009
+56039
+56041
+56053
+56081
+56087
+56093
+56099
+56101
+56113
+56123
+56131
+56149
+56167
+56171
+56179
+56197
+56207
+56209
+56237
+56239
+56249
+56263
+56267
+56269
+56299
+56311
+56333
+56359
+56369
+56377
+56383
+56393
+56401
+56417
+56431
+56437
+56443
+56453
+56467
+56473
+56477
+56479
+56489
+56501
+56503
+56509
+56519
+56527
+56531
+56533
+56543
+56569
+56591
+56597
+56599
+56611
+56629
+56633
+56659
+56663
+56671
+56681
+56687
+56701
+56711
+56713
+56731
+56737
+56747
+56767
+56773
+56779
+56783
+56807
+56809
+56813
+56821
+56827
+56843
+56857
+56873
+56891
+56893
+56897
+56909
+56911
+56921
+56923
+56929
+56941
+56951
+56957
+56963
+56983
+56989
+56993
+56999
+57037
+57041
+57047
+57059
+57073
+57077
+57089
+57097
+57107
+57119
+57131
+57139
+57143
+57149
+57163
+57173
+57179
+57191
+57193
+57203
+57221
+57223
+57241
+57251
+57259
+57269
+57271
+57283
+57287
+57301
+57329
+57331
+57347
+57349
+57367
+57373
+57383
+57389
+57397
+57413
+57427
+57457
+57467
+57487
+57493
+57503
+57527
+57529
+57557
+57559
+57571
+57587
+57593
+57601
+57637
+57641
+57649
+57653
+57667
+57679
+57689
+57697
+57709
+57713
+57719
+57727
+57731
+57737
+57751
+57773
+57781
+57787
+57791
+57793
+57803
+57809
+57829
+57839
+57847
+57853
+57859
+57881
+57899
+57901
+57917
+57923
+57943
+57947
+57973
+57977
+57991
+58013
+58027
+58031
+58043
+58049
+58057
+58061
+58067
+58073
+58099
+58109
+58111
+58129
+58147
+58151
+58153
+58169
+58171
+58189
+58193
+58199
+58207
+58211
+58217
+58229
+58231
+58237
+58243
+58271
+58309
+58313
+58321
+58337
+58363
+58367
+58369
+58379
+58391
+58393
+58403
+58411
+58417
+58427
+58439
+58441
+58451
+58453
+58477
+58481
+58511
+58537
+58543
+58549
+58567
+58573
+58579
+58601
+58603
+58613
+58631
+58657
+58661
+58679
+58687
+58693
+58699
+58711
+58727
+58733
+58741
+58757
+58763
+58771
+58787
+58789
+58831
+58889
+58897
+58901
+58907
+58909
+58913
+58921
+58937
+58943
+58963
+58967
+58979
+58991
+58997
+59009
+59011
+59021
+59023
+59029
+59051
+59053
+59063
+59069
+59077
+59083
+59093
+59107
+59113
+59119
+59123
+59141
+59149
+59159
+59167
+59183
+59197
+59207
+59209
+59219
+59221
+59233
+59239
+59243
+59263
+59273
+59281
+59333
+59341
+59351
+59357
+59359
+59369
+59377
+59387
+59393
+59399
+59407
+59417
+59419
+59441
+59443
+59447
+59453
+59467
+59471
+59473
+59497
+59509
+59513
+59539
+59557
+59561
+59567
+59581
+59611
+59617
+59621
+59627
+59629
+59651
+59659
+59663
+59669
+59671
+59693
+59699
+59707
+59723
+59729
+59743
+59747
+59753
+59771
+59779
+59791
+59797
+59809
+59833
+59863
+59879
+59887
+59921
+59929
+59951
+59957
+59971
+59981
+59999
+60013
+60017
+60029
+60037
+60041
+60077
+60083
+60089
+60091
+60101
+60103
+60107
+60127
+60133
+60139
+60149
+60161
+60167
+60169
+60209
+60217
+60223
+60251
+60257
+60259
+60271
+60289
+60293
+60317
+60331
+60337
+60343
+60353
+60373
+60383
+60397
+60413
+60427
+60443
+60449
+60457
+60493
+60497
+60509
+60521
+60527
+60539
+60589
+60601
+60607
+60611
+60617
+60623
+60631
+60637
+60647
+60649
+60659
+60661
+60679
+60689
+60703
+60719
+60727
+60733
+60737
+60757
+60761
+60763
+60773
+60779
+60793
+60811
+60821
+60859
+60869
+60887
+60889
+60899
+60901
+60913
+60917
+60919
+60923
+60937
+60943
+60953
+60961
+61001
+61007
+61027
+61031
+61043
+61051
+61057
+61091
+61099
+61121
+61129
+61141
+61151
+61153
+61169
+61211
+61223
+61231
+61253
+61261
+61283
+61291
+61297
+61331
+61333
+61339
+61343
+61357
+61363
+61379
+61381
+61403
+61409
+61417
+61441
+61463
+61469
+61471
+61483
+61487
+61493
+61507
+61511
+61519
+61543
+61547
+61553
+61559
+61561
+61583
+61603
+61609
+61613
+61627
+61631
+61637
+61643
+61651
+61657
+61667
+61673
+61681
+61687
+61703
+61717
+61723
+61729
+61751
+61757
+61781
+61813
+61819
+61837
+61843
+61861
+61871
+61879
+61909
+61927
+61933
+61949
+61961
+61967
+61979
+61981
+61987
+61991
+62003
+62011
+62017
+62039
+62047
+62053
+62057
+62071
+62081
+62099
+62119
+62129
+62131
+62137
+62141
+62143
+62171
+62189
+62191
+62201
+62207
+62213
+62219
+62233
+62273
+62297
+62299
+62303
+62311
+62323
+62327
+62347
+62351
+62383
+62401
+62417
+62423
+62459
+62467
+62473
+62477
+62483
+62497
+62501
+62507
+62533
+62539
+62549
+62563
+62581
+62591
+62597
+62603
+62617
+62627
+62633
+62639
+62653
+62659
+62683
+62687
+62701
+62723
+62731
+62743
+62753
+62761
+62773
+62791
+62801
+62819
+62827
+62851
+62861
+62869
+62873
+62897
+62903
+62921
+62927
+62929
+62939
+62969
+62971
+62981
+62983
+62987
+62989
+63029
+63031
+63059
+63067
+63073
+63079
+63097
+63103
+63113
+63127
+63131
+63149
+63179
+63197
+63199
+63211
+63241
+63247
+63277
+63281
+63299
+63311
+63313
+63317
+63331
+63337
+63347
+63353
+63361
+63367
+63377
+63389
+63391
+63397
+63409
+63419
+63421
+63439
+63443
+63463
+63467
+63473
+63487
+63493
+63499
+63521
+63527
+63533
+63541
+63559
+63577
+63587
+63589
+63599
+63601
+63607
+63611
+63617
+63629
+63647
+63649
+63659
+63667
+63671
+63689
+63691
+63697
+63703
+63709
+63719
+63727
+63737
+63743
+63761
+63773
+63781
+63793
+63799
+63803
+63809
+63823
+63839
+63841
+63853
+63857
+63863
+63901
+63907
+63913
+63929
+63949
+63977
+63997
+64007
+64013
+64019
+64033
+64037
+64063
+64067
+64081
+64091
+64109
+64123
+64151
+64153
+64157
+64171
+64187
+64189
+64217
+64223
+64231
+64237
+64271
+64279
+64283
+64301
+64303
+64319
+64327
+64333
+64373
+64381
+64399
+64403
+64433
+64439
+64451
+64453
+64483
+64489
+64499
+64513
+64553
+64567
+64577
+64579
+64591
+64601
+64609
+64613
+64621
+64627
+64633
+64661
+64663
+64667
+64679
+64693
+64709
+64717
+64747
+64763
+64781
+64783
+64793
+64811
+64817
+64849
+64853
+64871
+64877
+64879
+64891
+64901
+64919
+64921
+64927
+64937
+64951
+64969
+64997
+65003
+65011
+65027
+65029
+65033
+65053
+65063
+65071
+65089
+65099
+65101
+65111
+65119
+65123
+65129
+65141
+65147
+65167
+65171
+65173
+65179
+65183
+65203
+65213
+65239
+65257
+65267
+65269
+65287
+65293
+65309
+65323
+65327
+65353
+65357
+65371
+65381
+65393
+65407
+65413
+65419
+65423
+65437
+65447
+65449
+65479
+65497
+65519
+65521
diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod
new file mode 100644
index 000000000..6da4d4a9c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/prng.pod
@@ -0,0 +1,38 @@
+=head1 NAME
+
+ prng - pseudo-random number generator
+
+=head1 SYNOPSIS
+
+ prng [count]
+
+=head1 DESCRIPTION
+
+B<Prng> generates 32-bit pseudo-random integers using the
+Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using
+the standard C library's rand() function, which itself seeded from the
+system clock and the process ID number. Thus, the values generated
+are not particularly useful for cryptographic applications, but they
+are in general much better than the typical output of the usual
+multiplicative congruency generator used by most runtime libraries.
+
+You may optionally specify how many random values should be generated
+by giving a I<count> argument on the command line. If you do not
+specify a count, only one random value will be generated. The results
+are output to the standard output in decimal notation, one value per
+line.
+
+=head1 RESTRICTIONS
+
+As stated above, B<prng> uses the C library's rand() function to seed
+the generator, so it is not terribly suitable for cryptographic
+applications. Also note that each time you run the program, a new
+seed is generated, so it is better to run it once with a I<count>
+parameter than it is to run it multiple times to generate several
+values.
+
+=head1 AUTHOR
+
+ Michael J. Fromberger <sting@linguist.dartmouth.edu>
+ Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
+ Thayer School of Engineering, Dartmouth College, Hanover, NH USA
diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt
new file mode 100644
index 000000000..0df0f0390
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/redux.txt
@@ -0,0 +1,86 @@
+Modular Reduction
+
+Usually, modular reduction is accomplished by long division, using the
+mp_div() or mp_mod() functions. However, when performing modular
+exponentiation, you spend a lot of time reducing by the same modulus
+again and again. For this purpose, doing a full division for each
+multiplication is quite inefficient.
+
+For this reason, the mp_exptmod() function does not perform modular
+reductions in the usual way, but instead takes advantage of an
+algorithm due to Barrett, as described by Menezes, Oorschot and
+VanStone in their book _Handbook of Applied Cryptography_, published
+by the CRC Press (see Chapter 14 for details). This method reduces
+most of the computation of reduction to efficient shifting and masking
+operations, and avoids the multiple-precision division entirely.
+
+Here is a brief synopsis of Barrett reduction, as it is implemented in
+this library.
+
+Let b denote the radix of the computation (one more than the maximum
+value that can be denoted by an mp_digit). Let m be the modulus, and
+let k be the number of significant digits of m. Let x be the value to
+be reduced modulo m. By the Division Theorem, there exist unique
+integers Q and R such that:
+
+ x = Qm + R, 0 <= R < m
+
+Barrett reduction takes advantage of the fact that you can easily
+approximate Q to within two, given a value M such that:
+
+ 2k
+ b
+ M = floor( ----- )
+ m
+
+Computation of M requires a full-precision division step, so if you
+are only doing a single reduction by m, you gain no advantage.
+However, when multiple reductions by the same m are required, this
+division need only be done once, beforehand. Using this, we can use
+the following equation to compute Q', an approximation of Q:
+
+ x
+ floor( ------ ) M
+ k-1
+ b
+Q' = floor( ----------------- )
+ k+1
+ b
+
+The divisions by b^(k-1) and b^(k+1) and the floor() functions can be
+efficiently implemented with shifts and masks, leaving only a single
+multiplication to be performed to get this approximation. It can be
+shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with
+two additional subtractions to bring the value into line with the
+actual value of Q.
+
+Once we've got Q', we basically multiply that by m and subtract from
+x, yielding:
+
+ x - Q'm = Qm + R - Q'm
+
+Since we know the constraint on Q', this is one of:
+
+ R
+ m + R
+ 2m + R
+
+Since R < m by the Division Theorem, we can simply subtract off m
+until we get a value in the correct range, which will happen with no
+more than 2 subtractions:
+
+ v = x - Q'm
+
+ while(v >= m)
+ v = v - m
+ endwhile
+
+
+In random performance trials, modular exponentiation using this method
+of reduction gave around a 40% speedup over using the division for
+reduction.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt
new file mode 100644
index 000000000..4529cbfc4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt
@@ -0,0 +1,50 @@
+Square Root
+
+A simple iterative algorithm is used to compute the greatest integer
+less than or equal to the square root. Essentially, this is Newton's
+linear approximation, computed by finding successive values of the
+equation:
+
+ x[k]^2 - V
+x[k+1] = x[k] - ------------
+ 2 x[k]
+
+...where V is the value for which the square root is being sought. In
+essence, what is happening here is that we guess a value for the
+square root, then figure out how far off we were by squaring our guess
+and subtracting the target. Using this value, we compute a linear
+approximation for the error, and adjust the "guess". We keep doing
+this until the precision gets low enough that the above equation
+yields a quotient of zero. At this point, our last guess is one
+greater than the square root we're seeking.
+
+The initial guess is computed by dividing V by 4, which is a heuristic
+I have found to be fairly good on average. This also has the
+advantage of being very easy to compute efficiently, even for large
+values.
+
+So, the resulting algorithm works as follows:
+
+ x = V / 4 /* compute initial guess */
+
+ loop
+ t = (x * x) - V /* Compute absolute error */
+ u = 2 * x /* Adjust by tangent slope */
+ t = t / u
+
+ /* Loop is done if error is zero */
+ if(t == 0)
+ break
+
+ /* Adjust guess by error term */
+ x = x - t
+ end
+
+ x = x - 1
+
+The result of the computation is the value of x.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt
new file mode 100644
index 000000000..edbb97882
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/square.txt
@@ -0,0 +1,72 @@
+Squaring Algorithm
+
+When you are squaring a value, you can take advantage of the fact that
+half the multiplications performed by the more general multiplication
+algorithm (see 'mul.txt' for a description) are redundant when the
+multiplicand equals the multiplier.
+
+In particular, the modified algorithm is:
+
+k = 0
+for j <- 0 to (#a - 1)
+ w = c[2*j] + (a[j] ^ 2);
+ k = w div R
+
+ for i <- j+1 to (#a - 1)
+ w = (2 * a[j] * a[i]) + k + c[i+j]
+ c[i+j] = w mod R
+ k = w div R
+ endfor
+ c[i+j] = k;
+ k = 0;
+endfor
+
+On the surface, this looks identical to the multiplication algorithm;
+however, note the following differences:
+
+ - precomputation of the leading term in the outer loop
+
+ - i runs from j+1 instead of from zero
+
+ - doubling of a[i] * a[j] in the inner product
+
+Unfortunately, the construction of the inner product is such that we
+need more than two digits to represent the inner product, in some
+cases. In a C implementation, this means that some gymnastics must be
+performed in order to handle overflow, for which C has no direct
+abstraction. We do this by observing the following:
+
+If we have multiplied a[i] and a[j], and the product is more than half
+the maximum value expressible in two digits, then doubling this result
+will overflow into a third digit. If this occurs, we take note of the
+overflow, and double it anyway -- C integer arithmetic ignores
+overflow, so the two digits we get back should still be valid, modulo
+the overflow.
+
+Having doubled this value, we now have to add in the remainders and
+the digits already computed by earlier steps. If we did not overflow
+in the previous step, we might still cause an overflow here. That
+will happen whenever the maximum value expressible in two digits, less
+the amount we have to add, is greater than the result of the previous
+step. Thus, the overflow computation is:
+
+
+ u = 0
+ w = a[i] * a[j]
+
+ if(w > (R - 1)/ 2)
+ u = 1;
+
+ w = w * 2
+ v = c[i + j] + k
+
+ if(u == 0 && (R - 1 - v) < w)
+ u = 1
+
+If there is an overflow, u will be 1, otherwise u will be 0. The rest
+of the parameters are the same as they are in the above description.
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt
new file mode 100644
index 000000000..58f37c9df
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/doc/timing.txt
@@ -0,0 +1,213 @@
+MPI Library Timing Tests
+
+Hardware/OS
+(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3
+(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3
+(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20
+(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac
+(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1
+(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2
+
+Compiler
+(1) MIPSpro C 7.2.1 -O3 optimizations
+(2) GCC 2.95.1 -O3 optimizations
+(3) IBM AIX xlc -O3 optimizations (version unknown)
+(4) EGCS 2.91.66 -O3 optimizations
+(5) Metrowerks CodeWarrior 5.0 C, all optimizations
+(6) MIPSpro C 7.30 -O3 optimizations
+(7) same as (6), with optimized libmalloc.so
+
+Timings are given in seconds, computed using the C library's clock()
+function. The first column gives the hardware and compiler
+configuration used for the test. The second column indicates the
+number of tests that were aggregated to get the statistics for that
+size. These were compiled using 16 bit digits.
+
+Source data were generated randomly using a fixed seed, so they should
+be internally consistent, but may vary on different systems depending
+on the C library. Also, since the resolution of the timer accessed by
+clock() varies, there may be some variance in the precision of these
+measurements.
+
+Prime Generation (primegen)
+
+128 bits:
+A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46
+A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55
+B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29
+C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14
+D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70
+A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48
+A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07
+
+192 bits:
+A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96
+A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55
+B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97
+C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24
+D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63
+A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84
+A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88
+
+256 bits:
+A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79
+A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11
+B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35
+C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91
+D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00
+A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46
+A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60
+
+320 bits:
+A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81
+A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03
+B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80
+C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59
+D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73
+A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01
+A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78
+
+384 bits:
+A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89
+A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14
+B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78
+C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13
+D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81
+A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55
+A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02
+
+448 bits:
+A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63
+A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86
+B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86
+C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36
+D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17
+A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58
+A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16
+
+512 bits:
+A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35
+A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18
+B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45
+C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22
+D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11
+A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83
+A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02
+
+Modular Exponentation (metime)
+
+The following results are aggregated from 200 pseudo-randomly
+generated tests, based on a fixed seed.
+
+ base, exponent, and modulus size (bits)
+P/C 128 192 256 320 384 448 512 640 768 896 1024
+------- -----------------------------------------------------------------
+A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040
+A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668
+B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840
+C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507
+D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899
+E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317
+A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880
+A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855
+
+Multiplication and Squaring tests, (mulsqr)
+
+The following results are aggregated from 500000 pseudo-randomly
+generated tests, based on a per-run wall-clock seed. Times are given
+in seconds, except where indicated in microseconds (us).
+
+(A1)
+
+bits multiply square ad percent time/mult time/square
+64 9.33 9.15 > 1.9 18.7us 18.3us
+128 10.88 10.44 > 4.0 21.8us 20.9us
+192 13.30 11.89 > 10.6 26.7us 23.8us
+256 14.88 12.64 > 15.1 29.8us 25.3us
+320 18.64 15.01 > 19.5 37.3us 30.0us
+384 23.11 17.70 > 23.4 46.2us 35.4us
+448 28.28 20.88 > 26.2 56.6us 41.8us
+512 34.09 24.51 > 28.1 68.2us 49.0us
+640 47.86 33.25 > 30.5 95.7us 66.5us
+768 64.91 43.54 > 32.9 129.8us 87.1us
+896 84.49 55.48 > 34.3 169.0us 111.0us
+1024 107.25 69.21 > 35.5 214.5us 138.4us
+1536 227.97 141.91 > 37.8 456.0us 283.8us
+2048 394.05 242.15 > 38.5 788.1us 484.3us
+
+(A2)
+
+bits multiply square ad percent time/mult time/square
+64 7.87 7.95 < 1.0 15.7us 15.9us
+128 9.40 9.19 > 2.2 18.8us 18.4us
+192 11.15 10.59 > 5.0 22.3us 21.2us
+256 12.02 11.16 > 7.2 24.0us 22.3us
+320 14.62 13.43 > 8.1 29.2us 26.9us
+384 17.72 15.80 > 10.8 35.4us 31.6us
+448 21.24 18.51 > 12.9 42.5us 37.0us
+512 25.36 21.78 > 14.1 50.7us 43.6us
+640 34.57 29.00 > 16.1 69.1us 58.0us
+768 46.10 37.60 > 18.4 92.2us 75.2us
+896 58.94 47.72 > 19.0 117.9us 95.4us
+1024 73.76 59.12 > 19.8 147.5us 118.2us
+1536 152.00 118.80 > 21.8 304.0us 237.6us
+2048 259.41 199.57 > 23.1 518.8us 399.1us
+
+(B3)
+
+bits multiply square ad percent time/mult time/square
+64 2.60 2.47 > 5.0 5.20us 4.94us
+128 4.43 4.06 > 8.4 8.86us 8.12us
+192 7.03 6.10 > 13.2 14.1us 12.2us
+256 10.44 8.59 > 17.7 20.9us 17.2us
+320 14.44 11.64 > 19.4 28.9us 23.3us
+384 19.12 15.08 > 21.1 38.2us 30.2us
+448 24.55 19.09 > 22.2 49.1us 38.2us
+512 31.03 23.53 > 24.2 62.1us 47.1us
+640 45.05 33.80 > 25.0 90.1us 67.6us
+768 63.02 46.05 > 26.9 126.0us 92.1us
+896 83.74 60.29 > 28.0 167.5us 120.6us
+1024 106.73 76.65 > 28.2 213.5us 153.3us
+1536 228.94 160.98 > 29.7 457.9us 322.0us
+2048 398.08 275.93 > 30.7 796.2us 551.9us
+
+(C4)
+
+bits multiply square ad percent time/mult time/square
+64 1.34 1.28 > 4.5 2.68us 2.56us
+128 2.76 2.59 > 6.2 5.52us 5.18us
+192 4.52 4.16 > 8.0 9.04us 8.32us
+256 6.64 5.99 > 9.8 13.3us 12.0us
+320 9.20 8.13 > 11.6 18.4us 16.3us
+384 12.01 10.58 > 11.9 24.0us 21.2us
+448 15.24 13.33 > 12.5 30.5us 26.7us
+512 19.02 16.46 > 13.5 38.0us 32.9us
+640 27.56 23.54 > 14.6 55.1us 47.1us
+768 37.89 31.78 > 16.1 75.8us 63.6us
+896 49.24 41.42 > 15.9 98.5us 82.8us
+1024 62.59 52.18 > 16.6 125.2us 104.3us
+1536 131.66 107.72 > 18.2 263.3us 215.4us
+2048 226.45 182.95 > 19.2 453.0us 365.9us
+
+(A7)
+
+bits multiply square ad percent time/mult time/square
+64 1.74 1.71 > 1.7 3.48us 3.42us
+128 3.48 2.96 > 14.9 6.96us 5.92us
+192 5.74 4.60 > 19.9 11.5us 9.20us
+256 8.75 6.61 > 24.5 17.5us 13.2us
+320 12.5 8.99 > 28.1 25.0us 18.0us
+384 16.9 11.9 > 29.6 33.8us 23.8us
+448 22.2 15.2 > 31.7 44.4us 30.4us
+512 28.3 19.0 > 32.7 56.6us 38.0us
+640 42.4 28.0 > 34.0 84.8us 56.0us
+768 59.4 38.5 > 35.2 118.8us 77.0us
+896 79.5 51.2 > 35.6 159.0us 102.4us
+1024 102.6 65.5 > 36.2 205.2us 131.0us
+1536 224.3 140.6 > 37.3 448.6us 281.2us
+2048 393.4 244.3 > 37.9 786.8us 488.6us
+
+------------------------------------------------------------------
+ This Source Code Form is subject to the terms of the Mozilla Public
+ # License, v. 2.0. If a copy of the MPL was not distributed with this
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s
new file mode 100644
index 000000000..ae9da630d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hpma512.s
@@ -0,0 +1,615 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+/*
+ *
+ * This PA-RISC 2.0 function computes the product of two unsigned integers,
+ * and adds the result to a previously computed integer. The multiplicand
+ * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in
+ * memory in little-double-wordian order. The multiplier is an unsigned
+ * 64-bit integer. The previously computed integer to which the product is
+ * added is located in the result ("res") area, and is assumed to be a
+ * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory
+ * in little-double-wordian order. This value normally will be the result
+ * of a previously computed nine doubleword result. It is not necessary
+ * to pad the multiplicand with an additional 64-bit zero doubleword.
+ *
+ * Multiplicand, multiplier, and addend ideally should be aligned at
+ * 16-byte boundaries for best performance. The code will function
+ * correctly for alignment at eight-byte boundaries which are not 16-byte
+ * boundaries, but the execution may be slightly slower due to even/odd
+ * bank conflicts on PA-RISC 8000 processors.
+ *
+ * This function is designed to accept the same calling sequence as Bill
+ * Ackerman's "maxpy_little" function. The carry from the ninth doubleword
+ * of the result is written to the tenth word of the result, as is done by
+ * Bill Ackerman's function. The final carry also is returned as an
+ * integer, which may be ignored. The function prototype may be either
+ * of the following:
+ *
+ * void multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ * or
+ * int multacc512( int l, chunk* m, const chunk* a, chunk* res );
+ *
+ * where: "l" originally denoted vector lengths. This parameter is
+ * ignored. This function always assumes a multiplicand length of
+ * 512 bits (eight doublewords), and addend and result lengths of
+ * 576 bits (nine doublewords).
+ *
+ * "m" is a pointer to the doubleword multiplier, ideally aligned
+ * on a 16-byte boundary.
+ *
+ * "a" is a pointer to the eight-doubleword multiplicand, stored
+ * in little-double-wordian order, and ideally aligned on a 16-byte
+ * boundary.
+ *
+ * "res" is a pointer to the nine doubleword addend, and to the
+ * nine-doubleword product computed by this function. The result
+ * also is stored in little-double-wordian order, and ideally is
+ * aligned on a 16-byte boundary. It is expected that the alignment
+ * of the "res" area may alternate between even/odd doubleword
+ * boundaries for successive calls for 512-bit x 512-bit
+ * multiplications.
+ *
+ * The code for this function has been scheduled to use the parallelism
+ * of the PA-RISC 8000 series microprocessors as well as the author was
+ * able. Comments and/or suggestions for improvement are welcomed.
+ *
+ * The code is "64-bit safe". This means it may be called in either
+ * the 32ILP context or the 64LP context. All 64-bits of registers are
+ * saved and restored.
+ *
+ * This code is self-contained. It requires no other header files in order
+ * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic
+ * definitions for registers and stack offsets are included within this
+ * one source file.
+ *
+ * This is a leaf routine. As such, minimal use is made of the stack area.
+ * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight
+ * general registers, and 128 bytes are used to move intermediate products
+ * from the floating-point registers to the general registers. Stack
+ * protocols assure proper alignment of these areas.
+ *
+ */
+
+
+/* ====================================================================*/
+/* symbolic definitions for PA-RISC registers */
+/* in the MIPS style, avoids lots of case shifts */
+/* assigments (except t4) preserve register number parity */
+/* ====================================================================*/
+
+#define zero %r0 /* permanent zero */
+#define t5 %r1 /* temp register, altered by addil */
+
+#define rp %r2 /* return pointer */
+
+#define s1 %r3 /* callee saves register*/
+#define s0 %r4 /* callee saves register*/
+#define s3 %r5 /* callee saves register*/
+#define s2 %r6 /* callee saves register*/
+#define s5 %r7 /* callee saves register*/
+#define s4 %r8 /* callee saves register*/
+#define s7 %r9 /* callee saves register*/
+#define s6 %r10 /* callee saves register*/
+
+#define t1 %r19 /* caller saves register*/
+#define t0 %r20 /* caller saves register*/
+#define t3 %r21 /* caller saves register*/
+#define t2 %r22 /* caller saves register*/
+
+#define a3 %r23 /* fourth argument register, high word */
+#define a2 %r24 /* third argument register, low word*/
+#define a1 %r25 /* second argument register, high word*/
+#define a0 %r26 /* first argument register, low word*/
+
+#define v0 %r28 /* high order return value*/
+#define v1 %r29 /* low order return value*/
+
+#define sp %r30 /* stack pointer*/
+#define t4 %r31 /* temporary register */
+
+#define fa0 %fr4 /* first argument register*/
+#define fa1 %fr5 /* second argument register*/
+#define fa2 %fr6 /* third argument register*/
+#define fa3 %fr7 /* fourth argument register*/
+
+#define fa0r %fr4R /* first argument register*/
+#define fa1r %fr5R /* second argument register*/
+#define fa2r %fr6R /* third argument register*/
+#define fa3r %fr7R /* fourth argument register*/
+
+#define ft0 %fr8 /* caller saves register*/
+#define ft1 %fr9 /* caller saves register*/
+#define ft2 %fr10 /* caller saves register*/
+#define ft3 %fr11 /* caller saves register*/
+
+#define ft0r %fr8R /* caller saves register*/
+#define ft1r %fr9R /* caller saves register*/
+#define ft2r %fr10R /* caller saves register*/
+#define ft3r %fr11R /* caller saves register*/
+
+#define ft4 %fr22 /* caller saves register*/
+#define ft5 %fr23 /* caller saves register*/
+#define ft6 %fr24 /* caller saves register*/
+#define ft7 %fr25 /* caller saves register*/
+#define ft8 %fr26 /* caller saves register*/
+#define ft9 %fr27 /* caller saves register*/
+#define ft10 %fr28 /* caller saves register*/
+#define ft11 %fr29 /* caller saves register*/
+#define ft12 %fr30 /* caller saves register*/
+#define ft13 %fr31 /* caller saves register*/
+
+#define ft4r %fr22R /* caller saves register*/
+#define ft5r %fr23R /* caller saves register*/
+#define ft6r %fr24R /* caller saves register*/
+#define ft7r %fr25R /* caller saves register*/
+#define ft8r %fr26R /* caller saves register*/
+#define ft9r %fr27R /* caller saves register*/
+#define ft10r %fr28R /* caller saves register*/
+#define ft11r %fr29R /* caller saves register*/
+#define ft12r %fr30R /* caller saves register*/
+#define ft13r %fr31R /* caller saves register*/
+
+
+
+/* ================================================================== */
+/* functional definitions for PA-RISC registers */
+/* ================================================================== */
+
+/* general registers */
+
+#define T1 a0 /* temp, (length parameter ignored) */
+
+#define pM a1 /* -> 64-bit multiplier */
+#define T2 a1 /* temp, (after fetching multiplier) */
+
+#define pA a2 /* -> multiplicand vector (8 64-bit words) */
+#define T3 a2 /* temp, (after fetching multiplicand) */
+
+#define pR a3 /* -> addend vector (8 64-bit doublewords,
+ result vector (9 64-bit words) */
+
+#define S0 s0 /* callee saves summand registers */
+#define S1 s1
+#define S2 s2
+#define S3 s3
+#define S4 s4
+#define S5 s5
+#define S6 s6
+#define S7 s7
+
+#define S8 v0 /* caller saves summand registers */
+#define S9 v1
+#define S10 t0
+#define S11 t1
+#define S12 t2
+#define S13 t3
+#define S14 t4
+#define S15 t5
+
+
+
+/* floating-point registers */
+
+#define M fa0 /* multiplier double word */
+#define MR fa0r /* low order half of multiplier double word */
+#define ML fa0 /* high order half of multiplier double word */
+
+#define A0 fa2 /* multiplicand double word 0 */
+#define A0R fa2r /* low order half of multiplicand double word */
+#define A0L fa2 /* high order half of multiplicand double word */
+
+#define A1 fa3 /* multiplicand double word 1 */
+#define A1R fa3r /* low order half of multiplicand double word */
+#define A1L fa3 /* high order half of multiplicand double word */
+
+#define A2 ft0 /* multiplicand double word 2 */
+#define A2R ft0r /* low order half of multiplicand double word */
+#define A2L ft0 /* high order half of multiplicand double word */
+
+#define A3 ft1 /* multiplicand double word 3 */
+#define A3R ft1r /* low order half of multiplicand double word */
+#define A3L ft1 /* high order half of multiplicand double word */
+
+#define A4 ft2 /* multiplicand double word 4 */
+#define A4R ft2r /* low order half of multiplicand double word */
+#define A4L ft2 /* high order half of multiplicand double word */
+
+#define A5 ft3 /* multiplicand double word 5 */
+#define A5R ft3r /* low order half of multiplicand double word */
+#define A5L ft3 /* high order half of multiplicand double word */
+
+#define A6 ft4 /* multiplicand double word 6 */
+#define A6R ft4r /* low order half of multiplicand double word */
+#define A6L ft4 /* high order half of multiplicand double word */
+
+#define A7 ft5 /* multiplicand double word 7 */
+#define A7R ft5r /* low order half of multiplicand double word */
+#define A7L ft5 /* high order half of multiplicand double word */
+
+#define P0 ft6 /* product word 0 */
+#define P1 ft7 /* product word 0 */
+#define P2 ft8 /* product word 0 */
+#define P3 ft9 /* product word 0 */
+#define P4 ft10 /* product word 0 */
+#define P5 ft11 /* product word 0 */
+#define P6 ft12 /* product word 0 */
+#define P7 ft13 /* product word 0 */
+
+
+
+
+/* ====================================================================== */
+/* symbolic definitions for HP-UX stack offsets */
+/* symbolic definitions for memory NOPs */
+/* ====================================================================== */
+
+#define ST_SZ 192 /* stack area total size */
+
+#define SV0 -192(sp) /* general register save area */
+#define SV1 -184(sp)
+#define SV2 -176(sp)
+#define SV3 -168(sp)
+#define SV4 -160(sp)
+#define SV5 -152(sp)
+#define SV6 -144(sp)
+#define SV7 -136(sp)
+
+#define XF0 -128(sp) /* data transfer area */
+#define XF1 -120(sp) /* for floating-pt to integer regs */
+#define XF2 -112(sp)
+#define XF3 -104(sp)
+#define XF4 -96(sp)
+#define XF5 -88(sp)
+#define XF6 -80(sp)
+#define XF7 -72(sp)
+#define XF8 -64(sp)
+#define XF9 -56(sp)
+#define XF10 -48(sp)
+#define XF11 -40(sp)
+#define XF12 -32(sp)
+#define XF13 -24(sp)
+#define XF14 -16(sp)
+#define XF15 -8(sp)
+
+#define mnop proberi (sp),3,zero /* memory NOP */
+
+
+
+
+/* ====================================================================== */
+/* assembler formalities */
+/* ====================================================================== */
+
+#ifdef __LP64__
+ .level 2.0W
+#else
+ .level 2.0
+#endif
+ .space $TEXT$
+ .subspa $CODE$
+ .align 16
+
+/* ====================================================================== */
+/* here to compute 64-bit x 512-bit product + 512-bit addend */
+/* ====================================================================== */
+
+multacc512
+ .PROC
+ .CALLINFO
+ .ENTRY
+ fldd 0(pM),M ; multiplier double word
+ ldo ST_SZ(sp),sp ; push stack
+
+ fldd 0(pA),A0 ; multiplicand double word 0
+ std S1,SV1 ; save s1
+
+ fldd 16(pA),A2 ; multiplicand double word 2
+ std S3,SV3 ; save s3
+
+ fldd 32(pA),A4 ; multiplicand double word 4
+ std S5,SV5 ; save s5
+
+ fldd 48(pA),A6 ; multiplicand double word 6
+ std S7,SV7 ; save s7
+
+
+ std S0,SV0 ; save s0
+ fldd 8(pA),A1 ; multiplicand double word 1
+ xmpyu MR,A0L,P0 ; A0 cross 32-bit word products
+ xmpyu ML,A0R,P2
+
+ std S2,SV2 ; save s2
+ fldd 24(pA),A3 ; multiplicand double word 3
+ xmpyu MR,A2L,P4 ; A2 cross 32-bit word products
+ xmpyu ML,A2R,P6
+
+ std S4,SV4 ; save s4
+ fldd 40(pA),A5 ; multiplicand double word 5
+
+ std S6,SV6 ; save s6
+ fldd 56(pA),A7 ; multiplicand double word 7
+
+
+ fstd P0,XF0 ; MR * A0L
+ xmpyu MR,A0R,P0 ; A0 right 32-bit word product
+ xmpyu MR,A1L,P1 ; A1 cross 32-bit word product
+
+ fstd P2,XF2 ; ML * A0R
+ xmpyu ML,A0L,P2 ; A0 left 32-bit word product
+ xmpyu ML,A1R,P3 ; A1 cross 32-bit word product
+
+ fstd P4,XF4 ; MR * A2L
+ xmpyu MR,A2R,P4 ; A2 right 32-bit word product
+ xmpyu MR,A3L,P5 ; A3 cross 32-bit word product
+
+ fstd P6,XF6 ; ML * A2R
+ xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product
+ xmpyu ML,A3R,P7 ; A3 cross 32-bit word product
+
+
+ ldd XF0,S0 ; MR * A0L
+ fstd P1,XF1 ; MR * A1L
+
+ ldd XF2,S2 ; ML * A0R
+ fstd P3,XF3 ; ML * A1R
+
+ ldd XF4,S4 ; MR * A2L
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products
+ xmpyu ML,A1L,P3
+
+ ldd XF6,S6 ; ML * A2R
+ fstd P7,XF7 ; ML * A3R
+ xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products
+ xmpyu ML,A3L,P7
+
+
+ fstd P0,XF0 ; MR * A0R
+ ldd XF1,S1 ; MR * A1L
+ nop
+ add S0,S2,T1 ; A0 cross product sum
+
+ fstd P2,XF2 ; ML * A0L
+ ldd XF3,S3 ; ML * A1R
+ add,dc zero,zero,S0 ; A0 cross product sum carry
+ depd,z T1,31,32,S2 ; A0 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A2R
+ ldd XF5,S5 ; MR * A3L
+ shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32
+ add S4,S6,T3 ; A2 cross product sum
+
+ fstd P6,XF6 ; ML * A2L
+ ldd XF7,S7 ; ML * A3R
+ add,dc zero,zero,S4 ; A2 cross product sum carry
+ depd,z T3,31,32,S6 ; A2 cross product sum << 32
+
+
+ ldd XF0,S8 ; MR * A0R
+ fstd P1,XF1 ; MR * A1R
+ xmpyu MR,A4L,P0 ; A4 cross 32-bit word product
+ xmpyu MR,A5L,P1 ; A5 cross 32-bit word product
+
+ ldd XF2,S10 ; ML * A0L
+ fstd P3,XF3 ; ML * A1L
+ xmpyu ML,A4R,P2 ; A4 cross 32-bit word product
+ xmpyu ML,A5R,P3 ; A5 cross 32-bit word product
+
+ ldd XF4,S12 ; MR * A2R
+ fstd P5,XF5 ; MR * A3L
+ xmpyu MR,A6L,P4 ; A6 cross 32-bit word product
+ xmpyu MR,A7L,P5 ; A7 cross 32-bit word product
+
+ ldd XF6,S14 ; ML * A2L
+ fstd P7,XF7 ; ML * A3L
+ xmpyu ML,A6R,P6 ; A6 cross 32-bit word product
+ xmpyu ML,A7R,P7 ; A7 cross 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4L
+ ldd XF1,S9 ; MR * A1R
+ shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32
+ add S1,S3,T1 ; A1 cross product sum
+
+ fstd P2,XF2 ; ML * A4R
+ ldd XF3,S11 ; ML * A1L
+ add,dc zero,zero,S1 ; A1 cross product sum carry
+ depd,z T1,31,32,S3 ; A1 cross product sum << 32
+
+ fstd P4,XF4 ; MR * A6L
+ ldd XF5,S13 ; MR * A3R
+ shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32
+ add S5,S7,T3 ; A3 cross product sum
+
+ fstd P6,XF6 ; ML * A6R
+ ldd XF7,S15 ; ML * A3L
+ add,dc zero,zero,S5 ; A3 cross product sum carry
+ depd,z T3,31,32,S7 ; A3 cross product sum << 32
+
+
+ shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword
+
+ add,dc S0,S10,S10 ; M * A0 left doubleword
+ add S3,S9,S9 ; M * A1 right doubleword
+
+ add,dc S1,S11,S11 ; M * A1 left doubleword
+ add S6,S12,S12 ; M * A2 right doubleword
+
+
+ ldd 24(pR),S3 ; Addend word 3
+ fstd P1,XF1 ; MR * A5L
+ add,dc S4,S14,S14 ; M * A2 left doubleword
+ xmpyu MR,A5R,P1 ; A5 right 32-bit word product
+
+ ldd 8(pR),S1 ; Addend word 1
+ fstd P3,XF3 ; ML * A5R
+ add S7,S13,S13 ; M * A3 right doubleword
+ xmpyu ML,A5L,P3 ; A5 left 32-bit word product
+
+ ldd 0(pR),S7 ; Addend word 0
+ fstd P5,XF5 ; MR * A7L
+ add,dc S5,S15,S15 ; M * A3 left doubleword
+ xmpyu MR,A7R,P5 ; A7 right 32-bit word product
+
+ ldd 16(pR),S5 ; Addend word 2
+ fstd P7,XF7 ; ML * A7R
+ add S10,S9,S9 ; P1 doubleword
+ xmpyu ML,A7L,P7 ; A7 left 32-bit word products
+
+
+ ldd XF0,S0 ; MR * A4L
+ fstd P1,XF9 ; MR * A5R
+ add,dc S11,S12,S12 ; P2 doubleword
+ xmpyu MR,A4R,P0 ; A4 right 32-bit word product
+
+ ldd XF2,S2 ; ML * A4R
+ fstd P3,XF11 ; ML * A5L
+ add,dc S14,S13,S13 ; P3 doubleword
+ xmpyu ML,A4L,P2 ; A4 left 32-bit word product
+
+ ldd XF6,S6 ; ML * A6R
+ fstd P5,XF13 ; MR * A7R
+ add,dc zero,S15,T2 ; P4 partial doubleword
+ xmpyu MR,A6R,P4 ; A6 right 32-bit word product
+
+ ldd XF4,S4 ; MR * A6L
+ fstd P7,XF15 ; ML * A7L
+ add S7,S8,S8 ; R0 + P0, new R0 doubleword
+ xmpyu ML,A6L,P6 ; A6 left 32-bit word product
+
+
+ fstd P0,XF0 ; MR * A4R
+ ldd XF7,S7 ; ML * A7R
+ add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword
+
+ fstd P2,XF2 ; ML * A4L
+ ldd XF1,S1 ; MR * A5L
+ add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword
+
+ fstd P4,XF4 ; MR * A6R
+ ldd XF5,S5 ; MR * A7L
+ add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword
+
+ fstd P6,XF6 ; ML * A6L
+ ldd XF3,S3 ; ML * A5R
+ add,dc zero,T2,T2 ; c + partial P4
+ add S0,S2,T1 ; A4 cross product sum
+
+
+ std S8,0(pR) ; save R0
+ add,dc zero,zero,S0 ; A4 cross product sum carry
+ depd,z T1,31,32,S2 ; A4 cross product sum << 32
+
+ std S9,8(pR) ; save R1
+ shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32
+ add S4,S6,T3 ; A6 cross product sum
+
+ std S12,16(pR) ; save R2
+ add,dc zero,zero,S4 ; A6 cross product sum carry
+ depd,z T3,31,32,S6 ; A6 cross product sum << 32
+
+
+ std S13,24(pR) ; save R3
+ shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32
+ add S1,S3,T1 ; A5 cross product sum
+
+ ldd XF0,S8 ; MR * A4R
+ add,dc zero,zero,S1 ; A5 cross product sum carry
+ depd,z T1,31,32,S3 ; A5 cross product sum << 32
+
+ ldd XF2,S10 ; ML * A4L
+ ldd XF9,S9 ; MR * A5R
+ shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32
+ add S5,S7,T3 ; A7 cross product sum
+
+ ldd XF4,S12 ; MR * A6R
+ ldd XF11,S11 ; ML * A5L
+ add,dc zero,zero,S5 ; A7 cross product sum carry
+ depd,z T3,31,32,S7 ; A7 cross product sum << 32
+
+ ldd XF6,S14 ; ML * A6L
+ ldd XF13,S13 ; MR * A7R
+ shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32
+ add S2,S8,S8 ; M * A4 right doubleword
+
+
+ ldd XF15,S15 ; ML * A7L
+ add,dc S0,S10,S10 ; M * A4 left doubleword
+ add S3,S9,S9 ; M * A5 right doubleword
+
+ add,dc S1,S11,S11 ; M * A5 left doubleword
+ add S6,S12,S12 ; M * A6 right doubleword
+
+ ldd 32(pR),S0 ; Addend word 4
+ ldd 40(pR),S1 ; Addend word 5
+ add,dc S4,S14,S14 ; M * A6 left doubleword
+ add S7,S13,S13 ; M * A7 right doubleword
+
+ ldd 48(pR),S2 ; Addend word 6
+ ldd 56(pR),S3 ; Addend word 7
+ add,dc S5,S15,S15 ; M * A7 left doubleword
+ add S8,T2,S8 ; P4 doubleword
+
+ ldd 64(pR),S4 ; Addend word 8
+ ldd SV5,s5 ; restore s5
+ add,dc S10,S9,S9 ; P5 doubleword
+ add,dc S11,S12,S12 ; P6 doubleword
+
+
+ ldd SV6,s6 ; restore s6
+ ldd SV7,s7 ; restore s7
+ add,dc S14,S13,S13 ; P7 doubleword
+ add,dc zero,S15,S15 ; P8 doubleword
+
+ add S0,S8,S8 ; new R4 doubleword
+
+ ldd SV0,s0 ; restore s0
+ std S8,32(pR) ; save R4
+ add,dc S1,S9,S9 ; new R5 doubleword
+
+ ldd SV1,s1 ; restore s1
+ std S9,40(pR) ; save R5
+ add,dc S2,S12,S12 ; new R6 doubleword
+
+ ldd SV2,s2 ; restore s2
+ std S12,48(pR) ; save R6
+ add,dc S3,S13,S13 ; new R7 doubleword
+
+ ldd SV3,s3 ; restore s3
+ std S13,56(pR) ; save R7
+ add,dc S4,S15,S15 ; new R8 doubleword
+
+ ldd SV4,s4 ; restore s4
+ std S15,64(pR) ; save result[8]
+ add,dc zero,zero,v0 ; return carry from R8
+
+ CMPIB,*= 0,v0,$L0 ; if no overflow, exit
+ LDO 8(pR),pR
+
+$FINAL1 ; Final carry propagation
+ LDD 64(pR),v0
+ LDO 8(pR),pR
+ ADDI 1,v0,v0
+ CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry.
+ STD v0,56(pR)
+$L0
+ bv zero(rp) ; -> caller
+ ldo -ST_SZ(sp),sp ; pop stack
+
+/* ====================================================================== */
+/* end of module */
+/* ====================================================================== */
+
+
+ bve (rp)
+ .EXIT
+ nop
+ .PROCEND
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+ .EXPORT multacc512,ENTRY
+
+ .end
diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s
new file mode 100644
index 000000000..c72de8a12
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppa20.s
@@ -0,0 +1,904 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef __LP64__
+ .LEVEL 2.0W
+#else
+; .LEVEL 1.1
+; .ALLOW 2.0N
+ .LEVEL 2.0
+#endif
+ .SPACE $TEXT$,SORT=8
+ .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+; ***************************************************************
+;
+; maxpy_[little/big]
+;
+; ***************************************************************
+
+; There is no default -- you must specify one or the other.
+#define LITTLE_WORDIAN 1
+
+#ifdef LITTLE_WORDIAN
+#define EIGHT 8
+#define SIXTEEN 16
+#define THIRTY_TWO 32
+#define UN_EIGHT -8
+#define UN_SIXTEEN -16
+#define UN_TWENTY_FOUR -24
+#endif
+
+#ifdef BIG_WORDIAN
+#define EIGHT -8
+#define SIXTEEN -16
+#define THIRTY_TWO -32
+#define UN_EIGHT 8
+#define UN_SIXTEEN 16
+#define UN_TWENTY_FOUR 24
+#endif
+
+; This performs a multiple-precision integer version of "daxpy",
+; Using the selected addressing direction. "Little-wordian" means that
+; the least significant word of a number is stored at the lowest address.
+; "Big-wordian" means that the most significant word is at the lowest
+; address. Either way, the incoming address of the vector is that
+; of the least significant word. That means that, for little-wordian
+; addressing, we move the address upward as we propagate carries
+; from the least significant word to the most significant. For
+; big-wordian we move the address downward.
+
+; We use the following registers:
+;
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = address of scalar
+; r24 = arg3 = multiplicand vector
+; r23 = arg4 = result vector
+;
+; fr9 = scalar loaded once only from r25
+
+; The cycle counts shown in the bodies below are simply the result of a
+; scheduling by hand. The actual PCX-U hardware does it differently.
+; The intention is that the overall speed is the same.
+
+; The pipeline startup and shutdown code is constructed in the usual way,
+; by taking the loop bodies and removing unnecessary instructions.
+; We have left the comments describing cycle numbers in the code.
+; These are intended for reference when comparing with the main loop,
+; and have no particular relationship to actual cycle numbers.
+
+#ifdef LITTLE_WORDIAN
+maxpy_little
+#else
+maxpy_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately.
+ FLDD 0(%r25),%fr9 ; fr9 = scalar
+
+; First startup
+
+ FLDD 0(%r24),%fr24 ; Cycle 1
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Second startup
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ FSTD %fr26,-88(%sp) ; Cycle 2
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ LDD -104(%sp),%r31
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,-112(%sp)
+
+; Here is the loop.
+
+$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ FSTD %fr30,-56(%sp)
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ ADD %r3,%r1,%r1
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ FSTD %fr29,-72(%sp)
+ LDD -96(%sp),%r3
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6
+ ADD %r21,%r3,%r3
+ LDD -56(%sp),%r20
+ STD %r1,UN_SIXTEEN(%r23)
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ FLDD EIGHT(%r24),%fr28
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ STD %r28,UN_EIGHT(%r23)
+ LDD -72(%sp),%r29
+
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ ADD,DC %r3,%r4,%r4
+ FSTD %fr24,-96(%sp)
+
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr25,-80(%sp)
+ LDD 0(%r23),%r3
+
+ LDO SIXTEEN(%r24),%r24 ; Cycle 12
+ FSTD %fr31,-64(%sp)
+
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ ADD %r22,%r1,%r1
+ ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12
+ FSTD %fr27,-48(%sp)
+
+$ENDLOOP
+
+; Shutdown code, first stage.
+
+; MFCTL %cr16,%r21 ; for timing
+; STD %r21,UN_SIXTEEN(%r23)
+; LDD -112(%sp),%r21
+; STD %r21,UN_EIGHT(%r23)
+
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ ADD,DC %r29,%r4,%r4
+ CMPIB,= 0,%r26,$ONEMORE
+ FSTD %fr30,-56(%sp)
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ ADD %r3,%r1,%r1 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9
+ LDD -96(%sp),%r3
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN4
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ ADD %r21,%r3,%r3 ; Cycle 6
+ LDD -56(%sp),%r20
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ SHRPD %r3,%r0,32,%r21
+ LDD -88(%sp),%r4
+ LDD -48(%sp),%r1
+
+ ADD,DC %r0,%r0,%r20 ; Cycle 8
+ SHRPD %r19,%r3,32,%r3
+ LDD -104(%sp),%r31
+
+ SHRPD %r20,%r19,32,%r20 ; Cycle 9
+ ADD %r21,%r1,%r1
+ LDD -72(%sp),%r29
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, second stage.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+
+ STD %r28,UN_EIGHT(%r23) ; Cycle 9
+
+ LDD 0(%r23),%r3 ; Cycle 11
+
+; Shutdown code, third stage.
+
+ LDO SIXTEEN(%r23),%r23
+ ADD %r3,%r22,%r1
+$JOIN1 ADD,DC %r0,%r0,%r21
+ CMPIB,*= 0,%r21,$L0 ; if no overflow, exit
+ STD %r1,UN_SIXTEEN(%r23)
+
+; Final carry propagation
+
+$FINAL1 LDO EIGHT(%r23),%r23
+ LDD UN_SIXTEEN(%r23),%r21
+ ADDI 1,%r21,%r21
+ CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry.
+ STD %r21,UN_SIXTEEN(%r23)
+ B $L0
+ NOP
+
+; Here is the code that handles the difficult cases N=1, N=2, and N=3.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$N_IS_SMALL
+ CMPIB,= 0,%r26,$N_IS_ONE
+ FSTD %fr24,-96(%sp) ; Cycle 10
+ FLDD EIGHT(%r24),%fr28 ; Cycle 8
+ XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10
+ XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11
+ FSTD %fr25,-80(%sp)
+ FSTD %fr31,-64(%sp) ; Cycle 12
+ XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+ XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1
+ CMPIB,= 2,%r26,$N_IS_THREE
+ FSTD %fr30,-56(%sp)
+
+; N = 2
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ FSTD %fr28,-104(%sp) ; Cycle 3
+ LDD -96(%sp),%r3 ; Cycle 4
+ FSTD %fr29,-72(%sp)
+ B $JOIN4
+ ADD %r0,%r0,%r22
+
+$N_IS_THREE
+ FLDD SIXTEEN(%r24),%fr24
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+ B $JOIN3
+ ADD %r0,%r0,%r22
+
+$N_IS_ONE
+ FSTD %fr25,-80(%sp)
+ FSTD %fr27,-48(%sp)
+ FSTD %fr26,-88(%sp) ; Cycle 2
+ B $JOIN5
+ ADD %r0,%r0,%r22
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is quite tricky, because of the way the
+; carry chains and SHRPD chains have been chopped up.
+
+$ONEMORE
+
+ FLDD 0(%r24),%fr24
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3
+ FSTD %fr28,-104(%sp)
+ LDD UN_EIGHT(%r23),%r21
+ ADD %r3,%r1,%r1
+
+ XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4
+ ADD,DC %r21,%r4,%r28
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+ LDD -96(%sp),%r3
+ FSTD %fr29,-72(%sp)
+
+ XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5
+ ADD,DC %r20,%r31,%r22
+ LDD -64(%sp),%r19
+ LDD -80(%sp),%r21
+
+ STD %r1,UN_SIXTEEN(%r23); Cycle 6
+$JOIN3
+ XMPYU %fr9L,%fr24R,%fr24
+ LDD -56(%sp),%r20
+ ADD %r21,%r3,%r3
+
+ ADD,DC %r20,%r19,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+
+ LDD -104(%sp),%r31 ; Cycle 8
+ ADD,DC %r0,%r0,%r20
+ SHRPD %r19,%r3,32,%r3
+
+ LDD -72(%sp),%r29 ; Cycle 9
+ SHRPD %r20,%r19,32,%r20
+ ADD %r21,%r1,%r1
+
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ FSTD %fr24,-96(%sp)
+
+ ADD,DC %r0,%r20,%r20 ; Cycle 11
+ LDD 0(%r23),%r3
+ FSTD %fr25,-80(%sp)
+
+ ADD %r22,%r1,%r1 ; Cycle 13
+ FSTD %fr27,-48(%sp)
+
+; Shutdown code, stage 1-1/2.
+
+ ADD,DC %r29,%r4,%r4 ; Cycle 1
+
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ ADD,DC %r0,%r20,%r20
+ FSTD %fr26,-88(%sp)
+
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+
+ ADD,DC %r21,%r4,%r28 ; Cycle 4
+ STD %r28,UN_EIGHT(%r23) ; moved from cycle 9
+
+ ADD,DC %r20,%r31,%r22 ; Cycle 5
+ STD %r1,UN_SIXTEEN(%r23)
+$JOIN5
+ LDD -96(%sp),%r3 ; moved from cycle 4
+ LDD -80(%sp),%r21
+ ADD %r21,%r3,%r3 ; Cycle 6
+ ADD,DC %r0,%r0,%r19 ; Cycle 7
+ LDD -88(%sp),%r4
+ SHRPD %r3,%r0,32,%r21
+ LDD -48(%sp),%r1
+ SHRPD %r19,%r3,32,%r3 ; Cycle 8
+ ADD %r21,%r1,%r1 ; Cycle 9
+ ADD,DC %r3,%r4,%r4 ; Cycle 10
+ LDD 0(%r23),%r3 ; Cycle 11
+ ADD %r22,%r1,%r1 ; Cycle 13
+
+; Shutdown code, stage 2-1/2.
+
+ ADD,DC %r0,%r4,%r4 ; Cycle 1
+ LDO SIXTEEN(%r23),%r23 ; Cycle 2
+ LDD UN_EIGHT(%r23),%r21 ; Cycle 3
+ ADD %r3,%r1,%r1
+ STD %r1,UN_SIXTEEN(%r23)
+ ADD,DC %r21,%r4,%r1
+ B $JOIN1
+ LDO EIGHT(%r23),%r23
+
+; exit
+
+$L0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+
+ .PROCEND
+
+; ***************************************************************
+;
+; add_diag_[little/big]
+;
+; ***************************************************************
+
+; The arguments are as follows:
+; r2 return PC, of course
+; r26 = arg1 = length
+; r25 = arg2 = vector to square
+; r24 = arg3 = result vector
+
+#ifdef LITTLE_WORDIAN
+add_diag_little
+#else
+add_diag_big
+#endif
+ .PROC
+ .CALLINFO FRAME=120,ENTRY_GR=4
+ .ENTRY
+ STW,MA %r3,128(%sp)
+ STW %r4,-124(%sp)
+
+ ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately.
+ NOP
+
+; Startup code
+
+ FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body)
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body)
+ FSTD %fr30,-96(%sp)
+ FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2
+ LDD -88(%sp),%r22 ; Cycle 3
+ LDD -72(%sp),%r31 ; Cycle 4
+ XMPYU %fr7R,%fr7R,%fr28
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -96(%sp),%r20 ; Cycle 6
+ FSTD %fr28,-80(%sp)
+ ADD %r0,%r0,%r0 ; clear the carry bit
+ ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7
+ FSTD %fr24,-64(%sp)
+
+; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body".
+
+$DIAGLOOP
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDO SIXTEEN(%r25),%r25
+ LDD 0(%r24),%r1
+ FSTD %fr31,-104(%sp)
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD,DC %r22,%r3,%r3
+ FLDD UN_SIXTEEN(%r25),%fr7
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r1,%r3,%r3
+ XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4
+ LDD -80(%sp),%r21
+ STD %r3,0(%r24)
+ XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -64(%sp),%r29
+ LDD EIGHT(%r24),%r1
+ ADD,DC %r4,%r20,%r20 ; Cycle 6
+ LDD -104(%sp),%r19
+ FSTD %fr29,-88(%sp)
+ ADD %r20,%r1,%r1 ; Cycle 7
+ FSTD %fr27,-72(%sp)
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r28
+ FSTD %fr30,-96(%sp)
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD,DC %r21,%r4,%r4
+ FLDD UN_EIGHT(%r25),%fr7
+ STD %r1,UN_TWENTY_FOUR(%r24)
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r28,%r4,%r4
+ XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4
+ LDD -88(%sp),%r22
+ STD %r4,UN_SIXTEEN(%r24)
+ XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5
+ XMPYU %fr7L,%fr7L,%fr31
+ LDD -72(%sp),%r31
+ LDD UN_EIGHT(%r24),%r28
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ LDD -96(%sp),%r20
+ FSTD %fr28,-80(%sp)
+ ADD %r19,%r28,%r28 ; Cycle 7
+ FSTD %fr24,-64(%sp)
+ ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+$ENDDIAGLOOP
+
+ ADD,DC %r0,%r22,%r22
+ CMPIB,= 0,%r26,$ONEMOREDIAG
+ SHRPD %r31,%r0,31,%r3
+
+; Shutdown code, first stage.
+
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ LDO SIXTEEN(%r25),%r25 ; Cycle 6
+ LDD -104(%sp),%r19
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ ADD %r4,%r1,%r4
+ STD %r4,UN_SIXTEEN(%r24); Cycle 4
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ ADD,DC %r3,%r19,%r19 ; Cycle 6
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r0,%r22 ; Cycle 8
+ CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit
+ STD %r28,UN_EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG2
+ LDO EIGHT(%r24),%r24
+ LDD UN_EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry.
+ STD %r26,UN_EIGHT(%r24)
+
+ B $Z0
+ NOP
+
+; Here is the code that handles the difficult case N=1.
+; We do the usual trick -- branch out of the startup code at appropriate
+; points, and branch into the shutdown code.
+
+$DIAG_N_IS_ONE
+
+ LDD -88(%sp),%r22
+ LDD -72(%sp),%r31
+ B $JOINDIAG
+ LDD -96(%sp),%r20
+
+; We came out of the unrolled loop with wrong parity. Do one more
+; single cycle. This is the "alternate body". It will, of course,
+; give us opposite registers from the other case, so we need
+; completely different shutdown code.
+
+$ONEMOREDIAG
+ FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ FLDD 0(%r25),%fr7 ; Cycle 2
+ SHRPD %r0,%r31,31,%r4
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ LDD -80(%sp),%r21
+ ADD %r3,%r28,%r3
+ LDD -64(%sp),%r29 ; Cycle 4
+ STD %r3,0(%r24)
+ XMPYU %fr7R,%fr7R,%fr29
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ XMPYU %fr7L,%fr7R,%fr27
+ XMPYU %fr7L,%fr7L,%fr30
+ LDD -104(%sp),%r19 ; Cycle 6
+ FSTD %fr29,-88(%sp)
+ ADD,DC %r4,%r20,%r20
+ FSTD %fr27,-72(%sp) ; Cycle 7
+ ADD %r20,%r1,%r1
+ ADD,DC %r0,%r21,%r21 ; Cycle 8
+ STD %r1,EIGHT(%r24)
+
+; Shutdown code, first stage.
+
+ SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body)
+ LDO THIRTY_TWO(%r24),%r24
+ FSTD %fr30,-96(%sp)
+ LDD UN_SIXTEEN(%r24),%r1
+ SHRPD %r0,%r29,31,%r3 ; Cycle 2
+ ADD %r4,%r21,%r4
+ ADD,DC %r0,%r19,%r19 ; Cycle 3
+ LDD -88(%sp),%r22
+ ADD %r4,%r1,%r4
+ LDD -72(%sp),%r31 ; Cycle 4
+ STD %r4,UN_SIXTEEN(%r24)
+ LDD UN_EIGHT(%r24),%r28 ; Cycle 5
+ LDD -96(%sp),%r20 ; Cycle 6
+ ADD,DC %r3,%r19,%r19
+ ADD %r19,%r28,%r28 ; Cycle 7
+ ADD,DC %r0,%r22,%r22 ; Cycle 8
+ STD %r28,UN_EIGHT(%r24)
+
+; Shutdown code, second stage.
+
+$JOINDIAG
+ SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body)
+ LDD 0(%r24),%r28
+ SHRPD %r0,%r31,31,%r4 ; Cycle 2
+ ADD %r3,%r22,%r3
+ ADD,DC %r0,%r20,%r20 ; Cycle 3
+ ADD %r3,%r28,%r3
+ STD %r3,0(%r24) ; Cycle 4
+ LDD EIGHT(%r24),%r1 ; Cycle 5
+ ADD,DC %r4,%r20,%r20
+ ADD %r20,%r1,%r1 ; Cycle 7
+ ADD,DC %r0,%r0,%r21 ; Cycle 8
+ CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit
+ STD %r1,EIGHT(%r24)
+
+; Final carry propagation
+
+$FDIAG1
+ LDO EIGHT(%r24),%r24
+ LDD EIGHT(%r24),%r26
+ ADDI 1,%r26,%r26
+ CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry.
+ STD %r26,EIGHT(%r24)
+
+$Z0
+ LDW -124(%sp),%r4
+ BVE (%r2)
+ .EXIT
+ LDW,MB -128(%sp),%r3
+ .PROCEND
+; .ALLOW
+
+ .SPACE $TEXT$
+ .SUBSPA $CODE$
+#ifdef LITTLE_WORDIAN
+#ifdef __GNUC__
+; GNU-as (as of 2.19) does not support LONG_RETURN
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR
+#else
+ .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+#else
+ .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN
+ .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN
+#endif
+ .END
+
+
+; How to use "maxpy_PA20_little" and "maxpy_PA20_big"
+;
+; The routine "maxpy_PA20_little" or "maxpy_PA20_big"
+; performs a 64-bit x any-size multiply, and adds the
+; result to an area of memory. That is, it performs
+; something like
+;
+; A B C D
+; * Z
+; __________
+; P Q R S T
+;
+; and then adds the "PQRST" vector into an area of memory,
+; handling all carries.
+;
+; Digression on nomenclature and endian-ness:
+;
+; Each of the capital letters in the above represents a 64-bit
+; quantity. That is, you could think of the discussion as
+; being in terms of radix-16-quintillion arithmetic. The data
+; type being manipulated is "unsigned long long int". This
+; requires the 64-bit extension of the HP-UX C compiler,
+; available at release 10. You need these compiler flags to
+; enable these extensions:
+;
+; -Aa +e +DA2.0 +DS2.0
+;
+; (The first specifies ANSI C, the second enables the
+; extensions, which are beyond ANSI C, and the third and
+; fourth tell the compiler to use whatever features of the
+; PA2.0 architecture it wishes, in order to made the code more
+; efficient. Since the presence of the assembly code will
+; make the program unable to run on anything less than PA2.0,
+; you might as well gain the performance enhancements in the C
+; code as well.)
+;
+; Questions of "endian-ness" often come up, usually in the
+; context of byte ordering in a word. These routines have a
+; similar issue, that could be called "wordian-ness".
+; Independent of byte ordering (PA is always big-endian), one
+; can make two choices when representing extremely large
+; numbers as arrays of 64-bit doublewords in memory.
+;
+; "Little-wordian" layout means that the least significant
+; word of a number is stored at the lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 0
+; | |
+; | |_______address 8
+; |
+; address 32
+;
+; "Big-wordian" means that the most significant word is at the
+; lowest address.
+;
+; MSW LSW
+; | |
+; V V
+;
+; A B C D E
+;
+; ^ ^ ^
+; | | |____ address 32
+; | |
+; | |_______address 24
+; |
+; address 0
+;
+; When you compile the file, you must specify one or the other, with
+; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN".
+;
+; Incidentally, you assemble this file as part of your
+; project with the same C compiler as the rest of the program.
+; My "makefile" for a superprecision arithmetic package has
+; the following stuff:
+;
+; # definitions:
+; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1
+; CFLAGS = +O3
+; LDFLAGS = -L /usr/lib -Wl,-aarchive
+;
+; # general build rule for ".s" files:
+; .s.o:
+; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN
+;
+; # Now any bind step that calls for pa20.o will assemble pa20.s
+;
+; End of digression, back to arithmetic:
+;
+; The way we multiply two huge numbers is, of course, to multiply
+; the "ABCD" vector by each of the "WXYZ" doublewords, adding
+; the result vectors with increasing offsets, the way we learned
+; in school, back before we all used calculators:
+;
+; A B C D
+; * W X Y Z
+; __________
+; P Q R S T
+; E F G H I
+; M N O P Q
+; + R S T U V
+; _______________
+; F I N A L S U M
+;
+; So we call maxpy_PA20_big (in my case; my package is
+; big-wordian) repeatedly, giving the W, X, Y, and Z arguments
+; in turn as the "scalar", and giving the "ABCD" vector each
+; time. We direct it to add its result into an area of memory
+; that we have cleared at the start. We skew the exact
+; location into that area with each call.
+;
+; The prototype for the function is
+;
+; extern void maxpy_PA20_big(
+; int length, /* Number of doublewords in the multiplicand vector. */
+; const long long int *scalaraddr, /* Address to fetch the scalar. */
+; const long long int *multiplicand, /* The multiplicand vector. */
+; long long int *result); /* Where to accumulate the result. */
+;
+; (You should place a copy of this prototype in an include file
+; or in your C file.)
+;
+; Now, IN ALL CASES, the given address for the multiplicand or
+; the result is that of the LEAST SIGNIFICANT DOUBLEWORD.
+; That word is, of course, the word at which the routine
+; starts processing. "maxpy_PA20_little" then increases the
+; addresses as it computes. "maxpy_PA20_big" decreases them.
+;
+; In our example above, "length" would be 4 in each case.
+; "multiplicand" would be the "ABCD" vector. Specifically,
+; the address of the element "D". "scalaraddr" would be the
+; address of "W", "X", "Y", or "Z" on the four calls that we
+; would make. (The order doesn't matter, of course.)
+; "result" would be the appropriate address in the result
+; area. When multiplying by "Z", that would be the least
+; significant word. When multiplying by "Y", it would be the
+; next higher word (8 bytes higher if little-wordian; 8 bytes
+; lower if big-wordian), and so on. The size of the result
+; area must be the the sum of the sizes of the multiplicand
+; and multiplier vectors, and must be initialized to zero
+; before we start.
+;
+; Whenever the routine adds its partial product into the result
+; vector, it follows carry chains as far as they need to go.
+;
+; Here is the super-precision multiply routine that I use for
+; my package. The package is big-wordian. I have taken out
+; handling of exponents (it's a floating point package):
+;
+; static void mul_PA20(
+; int size,
+; const long long int *arg1,
+; const long long int *arg2,
+; long long int *result)
+; {
+; int i;
+;
+; for (i=0 ; i<2*size ; i++) result[i] = 0ULL;
+;
+; for (i=0 ; i<size ; i++) {
+; maxpy_PA20_big(size, &arg2[i], &arg1[size-1], &result[size+i]);
+; }
+; }
diff --git a/security/nss/lib/freebl/mpi/hppatch.adb b/security/nss/lib/freebl/mpi/hppatch.adb
new file mode 100644
index 000000000..6875032ef
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/hppatch.adb
@@ -0,0 +1,21 @@
+#/bin/sh
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# script to change the system id in an object file from PA-RISC 2.0 to 1.1
+
+adb -w $1 << EOF
+?m 0 -1 0
+0x0?X
+0x0?W (@0x0&~0x40000)|(~@0x0&0x40000)
+
+0?"change checksum"
+0x7c?X
+0x7c?W (@0x7c&~0x40000)|(~@0x7c&0x40000)
+$q
+EOF
+
+exit 0
+
diff --git a/security/nss/lib/freebl/mpi/logtab.h b/security/nss/lib/freebl/mpi/logtab.h
new file mode 100644
index 000000000..24cb13c5b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/logtab.h
@@ -0,0 +1,28 @@
+/*
+ * logtab.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+const float s_logv_2[] = {
+ 0.000000000f, 0.000000000f, 1.000000000f, 0.630929754f, /* 0 1 2 3 */
+ 0.500000000f, 0.430676558f, 0.386852807f, 0.356207187f, /* 4 5 6 7 */
+ 0.333333333f, 0.315464877f, 0.301029996f, 0.289064826f, /* 8 9 10 11 */
+ 0.278942946f, 0.270238154f, 0.262649535f, 0.255958025f, /* 12 13 14 15 */
+ 0.250000000f, 0.244650542f, 0.239812467f, 0.235408913f, /* 16 17 18 19 */
+ 0.231378213f, 0.227670249f, 0.224243824f, 0.221064729f, /* 20 21 22 23 */
+ 0.218104292f, 0.215338279f, 0.212746054f, 0.210309918f, /* 24 25 26 27 */
+ 0.208014598f, 0.205846832f, 0.203795047f, 0.201849087f, /* 28 29 30 31 */
+ 0.200000000f, 0.198239863f, 0.196561632f, 0.194959022f, /* 32 33 34 35 */
+ 0.193426404f, 0.191958720f, 0.190551412f, 0.189200360f, /* 36 37 38 39 */
+ 0.187901825f, 0.186652411f, 0.185449023f, 0.184288833f, /* 40 41 42 43 */
+ 0.183169251f, 0.182087900f, 0.181042597f, 0.180031327f, /* 44 45 46 47 */
+ 0.179052232f, 0.178103594f, 0.177183820f, 0.176291434f, /* 48 49 50 51 */
+ 0.175425064f, 0.174583430f, 0.173765343f, 0.172969690f, /* 52 53 54 55 */
+ 0.172195434f, 0.171441601f, 0.170707280f, 0.169991616f, /* 56 57 58 59 */
+ 0.169293808f, 0.168613099f, 0.167948779f, 0.167300179f, /* 60 61 62 63 */
+ 0.166666667f
+};
diff --git a/security/nss/lib/freebl/mpi/make-logtab b/security/nss/lib/freebl/mpi/make-logtab
new file mode 100755
index 000000000..fadba1c86
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/make-logtab
@@ -0,0 +1,29 @@
+#!/usr/bin/perl
+
+#
+# make-logtab
+#
+# Generate a table of logarithms of 2 in various bases, for use in
+# estimating the output sizes of various bases.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+$ARRAYNAME = $ENV{'ARRAYNAME'} || "s_logv_2";
+$ARRAYTYPE = $ENV{'ARRAYTYPE'} || "float";
+
+printf("const %s %s[] = {\n %0.9ff, %0.9ff, ",
+ $ARRAYTYPE, $ARRAYNAME, 0, 0);
+$brk = 2;
+for($ix = 2; $ix < 64; $ix++) {
+ printf("%0.9ff, ", (log(2)/log($ix)));
+ $brk = ($brk + 1) & 3;
+ if(!$brk) {
+ printf(" /* %2d %2d %2d %2d */\n ",
+ $ix - 3, $ix - 2, $ix - 1, $ix);
+ }
+}
+printf("%0.9ff\n};\n\n", (log(2)/log($ix)));
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/make-test-arrays b/security/nss/lib/freebl/mpi/make-test-arrays
new file mode 100755
index 000000000..ecdd55202
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/make-test-arrays
@@ -0,0 +1,98 @@
+#!/usr/bin/perl
+
+#
+# make-test-arrays
+#
+# Given a test-arrays file, which specifies the test suite names, the
+# names of the functions which perform those test suites, and
+# descriptive comments, this script generates C structures for the
+# mpi-test program. The input consists of lines of the form:
+#
+# suite-name:function-name:comment
+#
+# The output is written to the standard output. Blank lines are
+# ignored, and comments beginning with '#' are stripped.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Read parameters from the environment, if available
+$NAMEVAR = $ENV{'NAMEVAR'} || "g_names";
+$COUNTVAR = $ENV{'COUNTVAR'} || "g_count";
+$FUNCVAR = $ENV{'FUNCVAR'} || "g_tests";
+$DESCVAR = $ENV{'DESCVAR'} || "g_descs";
+$FUNCLEN = 13;
+$NAMELEN = 18;
+$DESCLEN = 45;
+
+#------------------------------------------------------------------------
+# Suck in input from the files on the command line, or standard input
+while(<>) {
+ chomp;
+ s/\#.*$//;
+ next if /^\s*$/;
+
+ ($suite, $func, $desc) = split(/:/, $_);
+
+ $tmp = { "suite" => $suite,
+ "func" => $func,
+ "desc" => $desc };
+
+ push(@item, $tmp);
+}
+$count = scalar(@item);
+$last = pop(@item);
+
+#------------------------------------------------------------------------
+# Output the table of names
+print "/* Table mapping test suite names to index numbers */\n";
+printf("const int %s = %d;\n", $COUNTVAR, $count);
+printf("const char *%s[] = {\n", $NAMEVAR);
+
+foreach $elt (@item) {
+ printf(" \"%s\",%s/* %s%s */\n", $elt->{"suite"},
+ " " x ($NAMELEN - length($elt->{"suite"})),
+ $elt->{"desc"},
+ " " x ($DESCLEN - length($elt->{"desc"})));
+}
+printf(" \"%s\" %s/* %s%s */\n", $last->{"suite"},
+ " " x ($NAMELEN - length($last->{"suite"})),
+ $last->{"desc"},
+ " " x ($DESCLEN - length($last->{"desc"})));
+print "};\n\n";
+
+#------------------------------------------------------------------------
+# Output the driver function prototypes
+print "/* Test function prototypes */\n";
+foreach $elt (@item, $last) {
+ printf("int %s(void);\n", $elt->{"func"});
+}
+print "\n";
+
+#------------------------------------------------------------------------
+# Output the table of functions
+print "/* Table mapping index numbers to functions */\n";
+printf("int (*%s[])(void) = {\n ", $FUNCVAR);
+$brk = 0;
+
+foreach $elt (@item) {
+ print($elt->{"func"}, ", ",
+ " " x ($FUNCLEN - length($elt->{"func"})));
+ $brk = ($brk + 1) & 3;
+ print "\n " unless($brk);
+}
+print $last->{"func"}, "\n};\n\n";
+
+#------------------------------------------------------------------------
+# Output the table of descriptions
+print "/* Table mapping index numbers to descriptions */\n";
+printf("const char *%s[] = {\n", $DESCVAR);
+
+foreach $elt (@item) {
+ printf(" \"%s\",\n", $elt->{"desc"});
+}
+printf(" \"%s\"\n};\n\n", $last->{"desc"});
+
+exit 0;
+
diff --git a/security/nss/lib/freebl/mpi/mdxptest.c b/security/nss/lib/freebl/mpi/mdxptest.c
new file mode 100644
index 000000000..adbcfc3d1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mdxptest.c
@@ -0,0 +1,306 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "mpi.h"
+#include "mpi-priv.h"
+
+/* #define OLD_WAY 1 */
+
+/* This key is the 1024-bit test key used for speed testing of RSA private
+** key ops.
+*/
+
+#define CONST const
+
+static CONST unsigned char default_n[128] = {
+ 0xc2, 0xae, 0x96, 0x89, 0xaf, 0xce, 0xd0, 0x7b, 0x3b, 0x35, 0xfd, 0x0f, 0xb1, 0xf4, 0x7a, 0xd1,
+ 0x3c, 0x7d, 0xb5, 0x86, 0xf2, 0x68, 0x36, 0xc9, 0x97, 0xe6, 0x82, 0x94, 0x86, 0xaa, 0x05, 0x39,
+ 0xec, 0x11, 0x51, 0xcc, 0x5c, 0xa1, 0x59, 0xba, 0x29, 0x18, 0xf3, 0x28, 0xf1, 0x9d, 0xe3, 0xae,
+ 0x96, 0x5d, 0x6d, 0x87, 0x73, 0xf6, 0xf6, 0x1f, 0xd0, 0x2d, 0xfb, 0x2f, 0x7a, 0x13, 0x7f, 0xc8,
+ 0x0c, 0x7a, 0xe9, 0x85, 0xfb, 0xce, 0x74, 0x86, 0xf8, 0xef, 0x2f, 0x85, 0x37, 0x73, 0x0f, 0x62,
+ 0x4e, 0x93, 0x17, 0xb7, 0x7e, 0x84, 0x9a, 0x94, 0x11, 0x05, 0xca, 0x0d, 0x31, 0x4b, 0x2a, 0xc8,
+ 0xdf, 0xfe, 0xe9, 0x0c, 0x13, 0xc7, 0xf2, 0xad, 0x19, 0x64, 0x28, 0x3c, 0xb5, 0x6a, 0xc8, 0x4b,
+ 0x79, 0xea, 0x7c, 0xce, 0x75, 0x92, 0x45, 0x3e, 0xa3, 0x9d, 0x64, 0x6f, 0x04, 0x69, 0x19, 0x17
+};
+
+static CONST unsigned char default_d[128] = {
+ 0x13, 0xcb, 0xbc, 0xf2, 0xf3, 0x35, 0x8c, 0x6d, 0x7b, 0x6f, 0xd9, 0xf3, 0xa6, 0x9c, 0xbd, 0x80,
+ 0x59, 0x2e, 0x4f, 0x2f, 0x11, 0xa7, 0x17, 0x2b, 0x18, 0x8f, 0x0f, 0xe8, 0x1a, 0x69, 0x5f, 0x6e,
+ 0xac, 0x5a, 0x76, 0x7e, 0xd9, 0x4c, 0x6e, 0xdb, 0x47, 0x22, 0x8a, 0x57, 0x37, 0x7a, 0x5e, 0x94,
+ 0x7a, 0x25, 0xb5, 0xe5, 0x78, 0x1d, 0x3c, 0x99, 0xaf, 0x89, 0x7d, 0x69, 0x2e, 0x78, 0x9d, 0x1d,
+ 0x84, 0xc8, 0xc1, 0xd7, 0x1a, 0xb2, 0x6d, 0x2d, 0x8a, 0xd9, 0xab, 0x6b, 0xce, 0xae, 0xb0, 0xa0,
+ 0x58, 0x55, 0xad, 0x5c, 0x40, 0x8a, 0xd6, 0x96, 0x08, 0x8a, 0xe8, 0x63, 0xe6, 0x3d, 0x6c, 0x20,
+ 0x49, 0xc7, 0xaf, 0x0f, 0x25, 0x73, 0xd3, 0x69, 0x43, 0x3b, 0xf2, 0x32, 0xf8, 0x3d, 0x5e, 0xee,
+ 0x7a, 0xca, 0xd6, 0x94, 0x55, 0xe5, 0xbd, 0x25, 0x34, 0x8d, 0x63, 0x40, 0xb5, 0x8a, 0xc3, 0x01
+};
+
+#define DEFAULT_ITERS 50
+
+typedef clock_t timetype;
+#define gettime(x) *(x) = clock()
+#define subtime(a, b) a -= b
+#define msec(x) ((clock_t)((double)x * 1000.0 / CLOCKS_PER_SEC))
+#define sec(x) (x / CLOCKS_PER_SEC)
+
+struct TimingContextStr {
+ timetype start;
+ timetype end;
+ timetype interval;
+
+ int minutes;
+ int seconds;
+ int millisecs;
+};
+
+typedef struct TimingContextStr TimingContext;
+
+TimingContext *
+CreateTimingContext(void)
+{
+ return (TimingContext *)malloc(sizeof(TimingContext));
+}
+
+void
+DestroyTimingContext(TimingContext *ctx)
+{
+ free(ctx);
+}
+
+void
+TimingBegin(TimingContext *ctx)
+{
+ gettime(&ctx->start);
+}
+
+static void
+timingUpdate(TimingContext *ctx)
+{
+
+ ctx->millisecs = msec(ctx->interval) % 1000;
+ ctx->seconds = sec(ctx->interval);
+ ctx->minutes = ctx->seconds / 60;
+ ctx->seconds %= 60;
+}
+
+void
+TimingEnd(TimingContext *ctx)
+{
+ gettime(&ctx->end);
+ ctx->interval = ctx->end;
+ subtime(ctx->interval, ctx->start);
+ timingUpdate(ctx);
+}
+
+char *
+TimingGenerateString(TimingContext *ctx)
+{
+ static char sBuf[4096];
+
+ sprintf(sBuf, "%d minutes, %d.%03d seconds", ctx->minutes,
+ ctx->seconds, ctx->millisecs);
+ return sBuf;
+}
+
+static void
+dumpBytes(unsigned char *b, int l)
+{
+ int i;
+ if (l <= 0)
+ return;
+ for (i = 0; i < l; ++i) {
+ if (i % 16 == 0)
+ printf("\t");
+ printf(" %02x", b[i]);
+ if (i % 16 == 15)
+ printf("\n");
+ }
+ if ((i % 16) != 0)
+ printf("\n");
+ printf("\n");
+}
+
+static mp_err
+testNewFuncs(const unsigned char *modulusBytes, int modulus_len)
+{
+ mp_err mperr = MP_OKAY;
+ mp_int modulus;
+ unsigned char buf[512];
+
+ mperr = mp_init(&modulus);
+ mperr = mp_read_unsigned_octets(&modulus, modulusBytes, modulus_len);
+ mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len);
+ mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 1);
+ mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 4);
+ mperr = mp_to_unsigned_octets(&modulus, buf, modulus_len);
+ mperr = mp_to_signed_octets(&modulus, buf, modulus_len + 1);
+ mp_clear(&modulus);
+ return mperr;
+}
+
+int
+testModExp(const unsigned char *modulusBytes,
+ const unsigned int expo,
+ const unsigned char *input,
+ unsigned char *output,
+ int modulus_len)
+{
+ mp_err mperr = MP_OKAY;
+ mp_int modulus;
+ mp_int base;
+ mp_int exponent;
+ mp_int result;
+
+ mperr = mp_init(&modulus);
+ mperr += mp_init(&base);
+ mperr += mp_init(&exponent);
+ mperr += mp_init(&result);
+ /* we initialize all mp_ints unconditionally, even if some fail.
+ ** This guarantees that the DIGITS pointer is valid (even if null).
+ ** So, mp_clear will do the right thing below.
+ */
+ if (mperr == MP_OKAY) {
+ mperr = mp_read_unsigned_octets(&modulus,
+ modulusBytes + (sizeof default_n - modulus_len), modulus_len);
+ mperr += mp_read_unsigned_octets(&base, input, modulus_len);
+ mp_set(&exponent, expo);
+ if (mperr == MP_OKAY) {
+#if OLD_WAY
+ mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
+#else
+ mperr = mp_exptmod(&base, &exponent, &modulus, &result);
+#endif
+ if (mperr == MP_OKAY) {
+ mperr = mp_to_fixlen_octets(&result, output, modulus_len);
+ }
+ }
+ }
+ mp_clear(&base);
+ mp_clear(&result);
+
+ mp_clear(&modulus);
+ mp_clear(&exponent);
+
+ return (int)mperr;
+}
+
+int
+doModExp(const unsigned char *modulusBytes,
+ const unsigned char *exponentBytes,
+ const unsigned char *input,
+ unsigned char *output,
+ int modulus_len)
+{
+ mp_err mperr = MP_OKAY;
+ mp_int modulus;
+ mp_int base;
+ mp_int exponent;
+ mp_int result;
+
+ mperr = mp_init(&modulus);
+ mperr += mp_init(&base);
+ mperr += mp_init(&exponent);
+ mperr += mp_init(&result);
+ /* we initialize all mp_ints unconditionally, even if some fail.
+ ** This guarantees that the DIGITS pointer is valid (even if null).
+ ** So, mp_clear will do the right thing below.
+ */
+ if (mperr == MP_OKAY) {
+ mperr = mp_read_unsigned_octets(&modulus,
+ modulusBytes + (sizeof default_n - modulus_len), modulus_len);
+ mperr += mp_read_unsigned_octets(&exponent, exponentBytes, modulus_len);
+ mperr += mp_read_unsigned_octets(&base, input, modulus_len);
+ if (mperr == MP_OKAY) {
+#if OLD_WAY
+ mperr = s_mp_exptmod(&base, &exponent, &modulus, &result);
+#else
+ mperr = mp_exptmod(&base, &exponent, &modulus, &result);
+#endif
+ if (mperr == MP_OKAY) {
+ mperr = mp_to_fixlen_octets(&result, output, modulus_len);
+ }
+ }
+ }
+ mp_clear(&base);
+ mp_clear(&result);
+
+ mp_clear(&modulus);
+ mp_clear(&exponent);
+
+ return (int)mperr;
+}
+
+int
+main(int argc, char **argv)
+{
+ TimingContext *timeCtx;
+ char *progName;
+ long iters = DEFAULT_ITERS;
+ unsigned int modulus_len;
+ int i;
+ int rv;
+ unsigned char buf[1024];
+ unsigned char buf2[1024];
+
+ progName = strrchr(argv[0], '/');
+ if (!progName)
+ progName = strrchr(argv[0], '\\');
+ progName = progName ? progName + 1 : argv[0];
+
+ if (argc >= 2) {
+ iters = atol(argv[1]);
+ }
+
+ if (argc >= 3) {
+ modulus_len = atol(argv[2]);
+ } else
+ modulus_len = sizeof default_n;
+
+ /* no library init function !? */
+
+ memset(buf, 0x41, sizeof buf);
+
+ if (iters < 2) {
+ testNewFuncs(default_n, modulus_len);
+ testNewFuncs(default_n + 1, modulus_len - 1);
+ testNewFuncs(default_n + 2, modulus_len - 2);
+ testNewFuncs(default_n + 3, modulus_len - 3);
+
+ rv = testModExp(default_n, 0, buf, buf2, modulus_len);
+ dumpBytes((unsigned char *)buf2, modulus_len);
+
+ rv = testModExp(default_n, 1, buf, buf2, modulus_len);
+ dumpBytes((unsigned char *)buf2, modulus_len);
+
+ rv = testModExp(default_n, 2, buf, buf2, modulus_len);
+ dumpBytes((unsigned char *)buf2, modulus_len);
+
+ rv = testModExp(default_n, 3, buf, buf2, modulus_len);
+ dumpBytes((unsigned char *)buf2, modulus_len);
+ }
+ rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
+ if (rv != 0) {
+ fprintf(stderr, "Error in modexp operation:\n");
+ exit(1);
+ }
+ dumpBytes((unsigned char *)buf2, modulus_len);
+
+ timeCtx = CreateTimingContext();
+ TimingBegin(timeCtx);
+ i = iters;
+ while (i--) {
+ rv = doModExp(default_n, default_d, buf, buf2, modulus_len);
+ if (rv != 0) {
+ fprintf(stderr, "Error in modexp operation\n");
+ exit(1);
+ }
+ }
+ TimingEnd(timeCtx);
+ printf("%ld iterations in %s\n", iters, TimingGenerateString(timeCtx));
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.c b/security/nss/lib/freebl/mpi/montmulf.c
new file mode 100644
index 000000000..ce8fbc31d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.c
@@ -0,0 +1,286 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef SOLARIS
+#define RF_INLINE_MACROS 1
+#endif
+
+static const double TwoTo16 = 65536.0;
+static const double TwoToMinus16 = 1.0 / 65536.0;
+static const double Zero = 0.0;
+static const double TwoTo32 = 65536.0 * 65536.0;
+static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0);
+
+#ifdef RF_INLINE_MACROS
+
+double upper32(double);
+double lower32(double, double);
+double mod(double, double, double);
+
+void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+ const double * /* 2^16*/,
+ const double * /* 0 */,
+ double * /*result16*/,
+ double * /* result32 */,
+ float * /*source - should be unsigned int* converted to float* */);
+
+#else
+#ifdef MP_USE_FLOOR
+#include <math.h>
+#else
+#define floor(d) ((double)((unsigned long long)(d)))
+#endif
+
+static double
+upper32(double x)
+{
+ return floor(x * TwoToMinus32);
+}
+
+static double
+lower32(double x, double y)
+{
+ return x - TwoTo32 * floor(x * TwoToMinus32);
+}
+
+static double
+mod(double x, double oneoverm, double m)
+{
+ return x - m * floor(x * oneoverm);
+}
+
+#endif
+
+static void
+cleanup(double *dt, int from, int tlen)
+{
+ int i;
+ double tmp, tmp1, x, x1;
+
+ tmp = tmp1 = Zero;
+ /* original code **
+ for(i=2*from;i<2*tlen-2;i++)
+ {
+ x=dt[i];
+ dt[i]=lower32(x,Zero)+tmp1;
+ tmp1=tmp;
+ tmp=upper32(x);
+ }
+ dt[tlen-2]+=tmp1;
+ dt[tlen-1]+=tmp;
+ **end original code ***/
+ /* new code ***/
+ for (i = 2 * from; i < 2 * tlen; i += 2) {
+ x = dt[i];
+ x1 = dt[i + 1];
+ dt[i] = lower32(x, Zero) + tmp;
+ dt[i + 1] = lower32(x1, Zero) + tmp1;
+ tmp = upper32(x);
+ tmp1 = upper32(x1);
+ }
+ /** end new code **/
+}
+
+void
+conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+{
+ int i;
+ long long t, t1, a, b, c, d;
+
+ t1 = 0;
+ a = (long long)d16[0];
+ b = (long long)d16[1];
+ for (i = 0; i < ilen - 1; i++) {
+ c = (long long)d16[2 * i + 2];
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ d = (long long)d16[2 * i + 3];
+ t1 += (b & 0xffff) << 16;
+ t += (b >> 16) + (t1 >> 32);
+ i32[i] = (unsigned int)t1;
+ t1 = t;
+ a = c;
+ b = d;
+ }
+ t1 += (unsigned int)a;
+ t = (a >> 32);
+ t1 += (b & 0xffff) << 16;
+ i32[i] = (unsigned int)t1;
+}
+
+void
+conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+{
+ int i;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++)
+ d32[i] = (double)(i32[i]);
+}
+
+void
+conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+{
+ int i;
+ unsigned int a;
+
+#pragma pipeloop(0)
+ for (i = 0; i < len; i++) {
+ a = i32[i];
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len)
+{
+ int i = 0;
+ unsigned int a;
+
+#pragma pipeloop(0)
+#ifdef RF_INLINE_MACROS
+ for (; i < len - 3; i += 4) {
+ i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+ &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i])));
+ }
+#endif
+ for (; i < len; i++) {
+ a = i32[i];
+ d32[i] = (double)(i32[i]);
+ d16[2 * i] = (double)(a & 0xffff);
+ d16[2 * i + 1] = (double)(a >> 16);
+ }
+}
+
+void
+adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+{
+ long long acc;
+ int i;
+
+ if (i32[len] > 0)
+ i = -1;
+ else {
+ for (i = len - 1; i >= 0; i--) {
+ if (i32[i] != nint[i])
+ break;
+ }
+ }
+ if ((i < 0) || (i32[i] > nint[i])) {
+ acc = 0;
+ for (i = 0; i < len; i++) {
+ acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]);
+ i32[i] = (unsigned int)acc;
+ acc = acc >> 32;
+ }
+ }
+}
+
+/*
+** the lengths of the input arrays should be at least the following:
+** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+** all of them should be different from one another
+**
+*/
+void
+mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0)
+{
+ int i, j, jj;
+ int tmp;
+ double digit, m2j, nextm2j, a, b;
+ double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+
+ pdm1 = &(dm1[0]);
+ pdm2 = &(dm2[0]);
+ pdn = &(dn[0]);
+ pdm2[2 * nlen] = Zero;
+
+ if (nlen != 16) {
+ for (i = 0; i < 4 * nlen + 2; i++)
+ dt[i] = Zero;
+
+ a = dt[0] = pdm1[0] * pdm2[0];
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+
+ pdtj = &(dt[0]);
+ for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) {
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn[0] * digit;
+ b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+#pragma pipeloop(0)
+ for (i = 1; i < nlen; i++) {
+ pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit;
+ }
+ if ((jj == 30)) {
+ cleanup(dt, j / 2 + 1, 2 * nlen + 1);
+ jj = 0;
+ }
+
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ } else {
+ a = dt[0] = pdm1[0] * pdm2[0];
+
+ dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] =
+ dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] =
+ dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] =
+ dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] =
+ dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] =
+ dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] =
+ dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] =
+ dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] =
+ dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] =
+ dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] =
+ dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero;
+
+ pdn_0 = pdn[0];
+ pdm1_0 = pdm1[0];
+
+ digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16);
+ pdtj = &(dt[0]);
+
+ for (j = 0; j < 32; j++, pdtj++) {
+
+ m2j = pdm2[j];
+ a = pdtj[0] + pdn_0 * digit;
+ b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16;
+ pdtj[1] = b;
+
+ /**** this loop will be fully unrolled:
+ for(i=1;i<16;i++)
+ {
+ pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+ }
+ *************************************/
+ pdtj[2] += pdm1[1] * m2j + pdn[1] * digit;
+ pdtj[4] += pdm1[2] * m2j + pdn[2] * digit;
+ pdtj[6] += pdm1[3] * m2j + pdn[3] * digit;
+ pdtj[8] += pdm1[4] * m2j + pdn[4] * digit;
+ pdtj[10] += pdm1[5] * m2j + pdn[5] * digit;
+ pdtj[12] += pdm1[6] * m2j + pdn[6] * digit;
+ pdtj[14] += pdm1[7] * m2j + pdn[7] * digit;
+ pdtj[16] += pdm1[8] * m2j + pdn[8] * digit;
+ pdtj[18] += pdm1[9] * m2j + pdn[9] * digit;
+ pdtj[20] += pdm1[10] * m2j + pdn[10] * digit;
+ pdtj[22] += pdm1[11] * m2j + pdn[11] * digit;
+ pdtj[24] += pdm1[12] * m2j + pdn[12] * digit;
+ pdtj[26] += pdm1[13] * m2j + pdn[13] * digit;
+ pdtj[28] += pdm1[14] * m2j + pdn[14] * digit;
+ pdtj[30] += pdm1[15] * m2j + pdn[15] * digit;
+ /* no need for cleenup, cannot overflow */
+ digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16);
+ }
+ }
+
+ conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1);
+
+ adjust_montf_result(result, nint, nlen);
+}
diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h
new file mode 100644
index 000000000..69bed4acb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* The functions that are to be called from outside of the .s file have the
+ * following interfaces and array size requirements:
+ */
+
+void conv_i32_to_d32(double *d32, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles, so that each double
+ * corresponds to an int. len is the number of items converted.
+ * Does not allocate the output array.
+ * The pointers d32 and i32 should point to arrays of size at least len
+ * (doubles and unsigned ints, respectively)
+ */
+
+void conv_i32_to_d16(double *d16, unsigned int *i32, int len);
+
+/* Converts an array of int's to an array of doubles so that each element
+ * of the int array is converted to a pair of doubles, the first one
+ * corresponding to the lower (least significant) 16 bits of the int and
+ * the second one corresponding to the upper (most significant) 16 bits of
+ * the 32-bit int. len is the number of ints converted.
+ * Does not allocate the output array.
+ * The pointer d16 should point to an array of doubles of size at least
+ * 2*len and i32 should point an array of ints of size at least len
+ */
+
+void conv_i32_to_d32_and_d16(double *d32, double *d16,
+ unsigned int *i32, int len);
+
+/* Does the above two conversions together, it is much faster than doing
+ * both of those in succession
+ */
+
+void mont_mulf_noconv(unsigned int *result,
+ double *dm1, double *dm2, double *dt,
+ double *dn, unsigned int *nint,
+ int nlen, double dn0);
+
+/* Does the Montgomery multiplication of the numbers stored in the arrays
+ * pointed to by dm1 and dm2, writing the result to the array pointed to by
+ * result. It uses the array pointed to by dt as a temporary work area.
+ * nint should point to the modulus in the array-of-integers representation,
+ * dn should point to its array-of-doubles as obtained as a result of the
+ * function call conv_i32_to_d32(dn, nint, nlen);
+ * nlen is the length of the array containing the modulus.
+ * The representation used for dm1 is the one that is a result of the function
+ * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the
+ * result of the function call conv_i32_to_d16(dm2, m2, nlen).
+ * Note that m1 and m2 should both be of length nlen, so they should be
+ * padded with 0's if necessary before the conversion. The result comes in
+ * this form (int representation, padded with 0's).
+ * dn0 is the value of the 16 least significant bits of n0'.
+ * The function does not allocate memory for any of the arrays, so the
+ * pointers should point to arrays with the following minimal sizes:
+ * result - nlen+1
+ * dm1 - nlen
+ * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons )
+ * dt - 4*nlen+2
+ * dn - nlen
+ * nint - nlen
+ * No two arrays should point to overlapping areas of memory.
+ */
diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il
new file mode 100644
index 000000000..4952d0fb8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s
new file mode 100644
index 000000000..69d2a3c51
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulf.s
@@ -0,0 +1,1938 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".data",#alloc,#write
+ .align 8
+TwoTo16: /* frequency 1.0 confidence 0.0 */
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+TwoToMinus16: /* frequency 1.0 confidence 0.0 */
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+Zero: /* frequency 1.0 confidence 0.0 */
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+TwoTo32: /* frequency 1.0 confidence 0.0 */
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+TwoToMinus32: /* frequency 1.0 confidence 0.0 */
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE cleanup
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global cleanup
+ cleanup: /* frequency 1.0 confidence 0.0 */
+! FILE montmulf.c
+
+! 1 !#define RF_INLINE_MACROS
+! 3 !static double TwoTo16=65536.0;
+! 4 !static double TwoToMinus16=1.0/65536.0;
+! 5 !static double Zero=0.0;
+! 6 !static double TwoTo32=65536.0*65536.0;
+! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0);
+! 9 !#ifdef RF_INLINE_MACROS
+! 11 !double upper32(double);
+! 12 !double lower32(double, double);
+! 13 !double mod(double, double, double);
+! 15 !#else
+! 17 !static double upper32(double x)
+! 18 !{
+! 19 ! return floor(x*TwoToMinus32);
+! 20 !}
+! 22 !static double lower32(double x, double y)
+! 23 !{
+! 24 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 25 !}
+! 27 !static double mod(double x, double oneoverm, double m)
+! 28 !{
+! 29 ! return x-m*floor(x*oneoverm);
+! 30 !}
+! 32 !#endif
+! 35 !void cleanup(double *dt, int from, int tlen)
+! 36 !{
+! 37 ! int i;
+! 38 ! double tmp,tmp1,x,x1;
+! 40 ! tmp=tmp1=Zero;
+
+/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2
+
+! 41 ! /* original code **
+! 42 ! for(i=2*from;i<2*tlen-2;i++)
+! 43 ! {
+! 44 ! x=dt[i];
+! 45 ! dt[i]=lower32(x,Zero)+tmp1;
+! 46 ! tmp1=tmp;
+! 47 ! tmp=upper32(x);
+! 48 ! }
+! 49 ! dt[tlen-2]+=tmp1;
+! 50 ! dt[tlen-1]+=tmp;
+! 51 ! **end original code ***/
+! 52 ! /* new code ***/
+! 53 ! for(i=2*from;i<2*tlen;i+=2)
+
+/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3
+/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0
+/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2
+/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4
+/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1
+/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4
+/* 0x001c 53 ( 3 4) */ cmp %g4,%g3
+/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56
+/* 0x0024 ( 4 5) */ fmovd %f0,%f2
+/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1
+/* 0x002c ( 4 5) */ sub %g3,1,%g3
+
+! 54 ! {
+! 55 ! x=dt[i];
+
+/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8
+ .L900000114: /* frequency 6.4 confidence 0.0 */
+/* 0x0034 ( 0 3) */ fdtox %f8,%f6
+
+! 56 ! x1=dt[i+1];
+
+/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10
+
+! 57 ! dt[i]=lower32(x,Zero)+tmp;
+! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 59 ! tmp=upper32(x);
+! 60 ! tmp1=upper32(x1);
+
+/* 0x003c 60 ( 0 1) */ add %g4,2,%g4
+/* 0x0040 ( 1 4) */ fdtox %f8,%f8
+/* 0x0044 ( 1 2) */ cmp %g4,%g3
+/* 0x0048 ( 5 6) */ fmovs %f0,%f6
+/* 0x004c ( 7 10) */ fxtod %f6,%f6
+/* 0x0050 ( 8 11) */ fdtox %f10,%f0
+/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2
+/* 0x0058 (10 11) */ std %f2,[%g1]
+/* 0x005c (12 15) */ ldd [%g2],%f2
+/* 0x0060 (14 15) */ fmovs %f2,%f0
+/* 0x0064 (16 19) */ fxtod %f0,%f6
+/* 0x0068 (17 20) */ fdtox %f10,%f0
+/* 0x006c (18 21) */ fitod %f8,%f2
+/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4
+/* 0x0074 (19 20) */ std %f4,[%g1+8]
+/* 0x0078 60 (19 20) */ add %g1,16,%g1
+/* 0x007c (20 23) */ fitod %f0,%f4
+/* 0x0080 (20 23) */ ldd [%g2],%f0
+/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86
+/* 0x0088 (21 24) */ ldd [%g1],%f8
+ .L77000116: /* frequency 1.0 confidence 0.0 */
+/* 0x008c ( 0 2) */ retl ! Result =
+/* 0x0090 ( 1 2) */ nop
+/* 0x0094 0 ( 0 0) */ .type cleanup,2
+/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-136,%sp
+
+! 61 ! }
+! 62 ! /** end new code **/
+! 63 !}
+! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 67 !{
+! 68 !int i;
+! 69 !long long t, t1, a, b, c, d;
+! 71 ! t1=0;
+! 72 ! a=(long long)d16[0];
+
+/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0
+
+! 73 ! b=(long long)d16[1];
+! 74 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 74 ( 1 2) */ sub %i3,1,%g2
+/* 0x000c 67 ( 1 2) */ or %g0,%i0,%g5
+/* 0x0010 74 ( 2 3) */ cmp %g2,0
+/* 0x0014 71 ( 2 3) */ or %g0,0,%o4
+/* 0x0018 72 ( 3 6) */ fdtox %f0,%f0
+/* 0x001c ( 3 4) */ std %f0,[%sp+120]
+/* 0x0020 74 ( 3 4) */ or %g0,0,%o7
+/* 0x0024 67 ( 4 5) */ or %g0,%i3,%o0
+/* 0x0028 ( 4 5) */ sub %i3,2,%o2
+/* 0x002c 73 ( 5 8) */ ldd [%i1+8],%f0
+/* 0x0030 67 ( 5 6) */ sethi %hi(0xfc00),%o0
+/* 0x0034 ( 5 6) */ add %o2,1,%g3
+/* 0x0038 ( 6 7) */ add %o0,1023,%o1
+/* 0x003c ( 6 7) */ or %g0,%g5,%o5
+/* 0x0040 73 ( 7 10) */ fdtox %f0,%f0
+/* 0x0044 ( 7 8) */ std %f0,[%sp+112]
+/* 0x0048 72 (11 13) */ ldx [%sp+120],%g4
+/* 0x004c 73 (12 14) */ ldx [%sp+112],%g1
+/* 0x0050 74 (12 13) */ ble,pt %icc,.L900000214 ! tprob=0.56
+/* 0x0054 (12 13) */ sethi %hi(0xfc00),%g2
+/* 0x0058 67 (13 14) */ or %g0,-1,%g2
+/* 0x005c 74 (13 14) */ cmp %g3,3
+/* 0x0060 67 (14 15) */ srl %g2,0,%o3
+/* 0x0064 (14 15) */ or %g0,%i1,%g2
+/* 0x0068 74 (14 15) */ bl,pn %icc,.L77000134 ! tprob=0.44
+/* 0x006c (15 18) */ ldd [%g2+16],%f0
+
+! 75 ! {
+! 76 ! c=(long long)d16[2*i+2];
+! 77 ! t1+=a&0xffffffff;
+! 78 ! t=(a>>32);
+! 79 ! d=(long long)d16[2*i+3];
+! 80 ! t1+=(b&0xffff)<<16;
+
+/* 0x0070 80 (15 16) */ and %g1,%o1,%o0
+
+! 81 ! t+=(b>>16)+(t1>>32);
+! 82 ! i32[i]=t1&0xffffffff;
+! 83 ! t1=t;
+! 84 ! a=c;
+! 85 ! b=d;
+
+/* 0x0074 85 (15 16) */ add %g2,16,%g2
+/* 0x0078 80 (16 17) */ sllx %o0,16,%g3
+/* 0x007c 77 (16 17) */ and %g4,%o3,%o0
+/* 0x0080 76 (17 20) */ fdtox %f0,%f0
+/* 0x0084 (17 18) */ std %f0,[%sp+104]
+/* 0x0088 74 (17 18) */ add %o0,%g3,%o4
+/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2
+/* 0x0090 81 (18 19) */ srax %g1,16,%o0
+/* 0x0094 82 (18 19) */ and %o4,%o3,%o7
+/* 0x0098 81 (19 20) */ stx %o0,[%sp+112]
+/* 0x009c (19 20) */ srax %o4,32,%o0
+/* 0x00a0 85 (19 20) */ add %g5,4,%o5
+/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120]
+/* 0x00a8 78 (20 21) */ srax %g4,32,%o4
+/* 0x00ac 79 (20 23) */ fdtox %f2,%f0
+/* 0x00b0 (21 22) */ std %f0,[%sp+96]
+/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0
+/* 0x00b8 (23 25) */ ldx [%sp+120],%g4
+/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3
+/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4
+/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1
+/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4
+/* 0x00cc 82 (27 28) */ st %o7,[%g5]
+/* 0x00d0 (27 28) */ or %g0,1,%o7
+/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4
+ .L900000209: /* frequency 64.0 confidence 0.0 */
+/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0
+/* 0x00dc 85 (17 18) */ add %o7,1,%o7
+/* 0x00e0 (17 18) */ add %o5,4,%o5
+/* 0x00e4 (18 18) */ cmp %o7,%o2
+/* 0x00e8 (18 19) */ add %g2,16,%g2
+/* 0x00ec 76 (19 22) */ fdtox %f0,%f0
+/* 0x00f0 (20 21) */ std %f0,[%sp+104]
+/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0
+/* 0x00f8 (23 26) */ fdtox %f0,%f0
+/* 0x00fc (24 25) */ std %f0,[%sp+96]
+/* 0x0100 80 (25 26) */ and %g1,%o1,%g3
+/* 0x0104 (26 27) */ sllx %g3,16,%g3
+/* 0x0108 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x010c 77 (26 27) */ and %g4,%o3,%g3
+/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128]
+/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x0118 (27 27) */ add %g3,%o7,%g3
+/* 0x011c ( 0 0) */ ldx [%sp+128],%o7
+/* 0x0120 81 (28 29) */ srax %g1,16,%g1
+/* 0x0124 74 (28 28) */ add %g3,%o4,%g3
+/* 0x0128 81 (29 30) */ srax %g3,32,%o4
+/* 0x012c ( 0 0) */ stx %o4,[%sp+112]
+/* 0x0130 78 (30 31) */ srax %g4,32,%o4
+/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4
+/* 0x0138 (30 31) */ add %g1,%g4,%g4
+/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1
+/* 0x0140 81 (31 32) */ add %o4,%g4,%o4
+/* 0x0144 82 (32 33) */ and %g3,%o3,%g3
+/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4
+/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50
+/* 0x0150 (33 34) */ st %g3,[%o5-4]
+ .L900000212: /* frequency 8.0 confidence 0.0 */
+/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00
+/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000134: /* frequency 0.7 confidence 0.0 */
+ .L900000213: /* frequency 6.4 confidence 0.0 */
+/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0
+/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3
+/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0
+/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0
+/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104]
+/* 0x0170 85 ( 1 2) */ add %o7,1,%o7
+/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x017c 85 ( 2 3) */ add %g2,16,%g2
+/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128]
+/* 0x0188 ( 4 5) */ srax %g1,16,%o0
+/* 0x018c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3
+/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0
+/* 0x0198 ( 5 6) */ stx %o0,[%sp+120]
+/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0
+/* 0x01a0 ( 6 7) */ std %f0,[%sp+96]
+/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4
+/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7
+/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4
+/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1
+/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4
+/* 0x01b8 (11 13) */ ldx [%sp+128],%o7
+/* 0x01bc (11 12) */ add %o4,%g4,%o4
+/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0
+/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4
+/* 0x01c8 82 (13 14) */ st %g3,[%o5]
+/* 0x01cc 85 (13 14) */ add %o5,4,%o5
+/* 0x01d0 (13 14) */ cmp %o7,%o2
+/* 0x01d4 (14 15) */ or %g0,%o0,%g1
+/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86
+/* 0x01dc (14 17) */ ldd [%g2+16],%f0
+ .L77000127: /* frequency 1.0 confidence 0.0 */
+
+! 86 ! }
+! 87 ! t1+=a&0xffffffff;
+! 88 ! t=(a>>32);
+! 89 ! t1+=(b&0xffff)<<16;
+! 90 ! i32[i]=t1&0xffffffff;
+
+/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000214: /* frequency 1.0 confidence 0.0 */
+/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3
+/* 0x01e8 ( 0 1) */ add %g2,1023,%g2
+/* 0x01ec ( 1 2) */ srl %g3,0,%g3
+/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2
+/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4
+/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2
+/* 0x01fc ( 3 4) */ add %o4,%g4,%g4
+/* 0x0200 ( 4 5) */ add %g4,%g2,%g2
+/* 0x0204 ( 5 6) */ sll %o7,2,%g4
+/* 0x0208 ( 5 6) */ and %g2,%g3,%g2
+/* 0x020c ( 6 7) */ st %g2,[%g5+%g4]
+/* 0x0210 ( 7 9) */ ret ! Result =
+/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0
+/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2
+/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ orcc %g0,%o2,%g1
+
+! 92 !}
+! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 95 !{
+! 96 !int i;
+! 98 !#pragma pipeloop(0)
+! 99 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 99 ( 0 1) */ ble,pt %icc,.L77000140 ! tprob=0.56
+/* 0x0008 ( 0 1) */ nop
+/* 0x000c ( 1 2) */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0010 95 ( 1 2) */ or %g0,%o1,%g4
+/* 0x0014 99 ( 2 3) */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x0018 ( 2 3) */ or %g0,0,%o5
+/* 0x001c 95 ( 3 4) */ or %g0,%o0,%g5
+/* 0x0020 99 ( 3 4) */ sub %o2,1,%g3
+/* 0x0024 ( 4 5) */ cmp %o2,9
+/* 0x0028 ( 4 5) */ bl,pn %icc,.L77000144 ! tprob=0.44
+/* 0x002c ( 4 7) */ ldd [%g2],%f8
+/* 0x0030 ( 5 8) */ ld [%o1],%f7
+/* 0x0034 ( 5 6) */ add %o1,16,%g4
+/* 0x0038 ( 5 6) */ sub %o2,5,%g1
+/* 0x003c ( 6 9) */ ld [%o1+4],%f5
+/* 0x0040 ( 6 7) */ or %g0,4,%o5
+/* 0x0044 ( 7 10) */ ld [%o1+8],%f3
+/* 0x0048 ( 7 8) */ fmovs %f8,%f6
+/* 0x004c ( 8 11) */ ld [%o1+12],%f1
+ .L900000305: /* frequency 64.0 confidence 0.0 */
+/* 0x0050 ( 8 16) */ ld [%g4],%f11
+/* 0x0054 ( 8 9) */ add %o5,5,%o5
+/* 0x0058 ( 8 9) */ add %g4,20,%g4
+/* 0x005c ( 8 11) */ fsubd %f6,%f8,%f6
+/* 0x0060 ( 9 10) */ std %f6,[%g5]
+/* 0x0064 ( 9 9) */ cmp %o5,%g1
+/* 0x0068 ( 9 10) */ add %g5,40,%g5
+/* 0x006c ( 0 0) */ fmovs %f8,%f4
+/* 0x0070 (10 18) */ ld [%g4-16],%f7
+/* 0x0074 (10 13) */ fsubd %f4,%f8,%f12
+/* 0x0078 ( 0 0) */ fmovs %f8,%f2
+/* 0x007c (11 12) */ std %f12,[%g5-32]
+/* 0x0080 (12 20) */ ld [%g4-12],%f5
+/* 0x0084 (12 15) */ fsubd %f2,%f8,%f12
+/* 0x0088 ( 0 0) */ fmovs %f8,%f0
+/* 0x008c (13 14) */ std %f12,[%g5-24]
+/* 0x0090 (14 22) */ ld [%g4-8],%f3
+/* 0x0094 (14 17) */ fsubd %f0,%f8,%f12
+/* 0x0098 ( 0 0) */ fmovs %f8,%f10
+/* 0x009c (15 16) */ std %f12,[%g5-16]
+/* 0x00a0 (16 24) */ ld [%g4-4],%f1
+/* 0x00a4 (16 19) */ fsubd %f10,%f8,%f10
+/* 0x00a8 ( 0 0) */ fmovs %f8,%f6
+/* 0x00ac (17 18) */ ble,pt %icc,.L900000305 ! tprob=0.50
+/* 0x00b0 (17 18) */ std %f10,[%g5-8]
+ .L900000308: /* frequency 8.0 confidence 0.0 */
+/* 0x00b4 ( 0 1) */ fmovs %f8,%f4
+/* 0x00b8 ( 0 1) */ add %g5,32,%g5
+/* 0x00bc ( 0 1) */ cmp %o5,%g3
+/* 0x00c0 ( 1 2) */ fmovs %f8,%f2
+/* 0x00c4 ( 2 3) */ fmovs %f8,%f0
+/* 0x00c8 ( 4 7) */ fsubd %f6,%f8,%f6
+/* 0x00cc ( 4 5) */ std %f6,[%g5-32]
+/* 0x00d0 ( 5 8) */ fsubd %f4,%f8,%f4
+/* 0x00d4 ( 5 6) */ std %f4,[%g5-24]
+/* 0x00d8 ( 6 9) */ fsubd %f2,%f8,%f2
+/* 0x00dc ( 6 7) */ std %f2,[%g5-16]
+/* 0x00e0 ( 7 10) */ fsubd %f0,%f8,%f0
+/* 0x00e4 ( 7 8) */ bg,pn %icc,.L77000140 ! tprob=0.14
+/* 0x00e8 ( 7 8) */ std %f0,[%g5-8]
+ .L77000144: /* frequency 0.7 confidence 0.0 */
+/* 0x00ec ( 0 3) */ ld [%g4],%f1
+ .L900000309: /* frequency 6.4 confidence 0.0 */
+/* 0x00f0 ( 0 3) */ ldd [%g2],%f8
+/* 0x00f4 ( 0 1) */ add %o5,1,%o5
+/* 0x00f8 ( 0 1) */ add %g4,4,%g4
+/* 0x00fc ( 1 2) */ cmp %o5,%g3
+/* 0x0100 ( 2 3) */ fmovs %f8,%f0
+/* 0x0104 ( 4 7) */ fsubd %f0,%f8,%f0
+/* 0x0108 ( 4 5) */ std %f0,[%g5]
+/* 0x010c ( 4 5) */ add %g5,8,%g5
+/* 0x0110 ( 4 5) */ ble,a,pt %icc,.L900000309 ! tprob=0.86
+/* 0x0114 ( 6 9) */ ld [%g4],%f1
+ .L77000140: /* frequency 1.0 confidence 0.0 */
+/* 0x0118 ( 0 2) */ retl ! Result =
+/* 0x011c ( 1 2) */ nop
+/* 0x0120 0 ( 0 0) */ .type conv_i32_to_d32,2
+/* 0x0120 ( 0 0) */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ orcc %g0,%i2,%o0
+
+! 100 !}
+! 103 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 104 !{
+! 105 !int i;
+! 106 !unsigned int a;
+! 108 !#pragma pipeloop(0)
+! 109 ! for(i=0;i<len;i++)
+
+/* 0x0008 109 ( 1 2) */ ble,pt %icc,.L77000150 ! tprob=0.56
+/* 0x000c ( 1 2) */ nop
+/* 0x0010 ( 2 3) */ sub %o0,1,%o5
+/* 0x0014 ( 2 3) */ sethi %hi(0xfc00),%g2
+
+! 110 ! {
+! 111 ! a=i32[i];
+! 112 ! d16[2*i]=(double)(a&0xffff);
+! 113 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0
+/* 0x001c ( 3 4) */ add %o5,1,%g3
+/* 0x0020 ( 4 5) */ add %g2,1023,%o4
+/* 0x0024 109 ( 4 5) */ or %g0,0,%g1
+/* 0x0028 ( 5 6) */ cmp %g3,3
+/* 0x002c ( 5 6) */ or %g0,%i1,%o7
+/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3
+/* 0x0034 ( 6 7) */ or %g0,%i0,%g2
+/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44
+/* 0x003c ( 7 8) */ add %o7,4,%o0
+/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0
+/* 0x0044 113 ( 7 8) */ or %g0,1,%g1
+/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1
+/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7
+/* 0x0050 112 (10 11) */ and %o1,%o4,%o0
+ .L900000406: /* frequency 64.0 confidence 0.0 */
+/* 0x0054 112 (22 23) */ st %o0,[%sp+96]
+/* 0x0058 113 (22 23) */ add %g1,1,%g1
+/* 0x005c (22 23) */ add %g2,16,%g2
+/* 0x0060 (23 23) */ cmp %g1,%o5
+/* 0x0064 (23 24) */ add %o7,4,%o7
+/* 0x0068 112 (29 31) */ ld [%sp+96],%f3
+/* 0x006c ( 0 0) */ fmovs %f0,%f2
+/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2
+/* 0x0074 113 (32 33) */ srl %o1,16,%o0
+/* 0x0078 112 (32 33) */ std %f2,[%g2-16]
+/* 0x007c 113 (33 34) */ st %o0,[%sp+92]
+/* 0x0080 (40 42) */ ld [%sp+92],%f3
+/* 0x0084 111 (41 43) */ ld [%o7-4],%o1
+/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2
+/* 0x008c (42 45) */ fsubd %f2,%f0,%f2
+/* 0x0090 112 (43 44) */ and %o1,%o4,%o0
+/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50
+/* 0x0098 (43 44) */ std %f2,[%g2-8]
+ .L900000409: /* frequency 8.0 confidence 0.0 */
+/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96]
+/* 0x00a0 ( 0 1) */ fmovs %f0,%f2
+/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2
+/* 0x00a8 ( 1 2) */ srl %o1,16,%o0
+/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x00b4 ( 6 7) */ std %f2,[%g2-16]
+/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x00bc (10 11) */ fmovs %f0,%f2
+/* 0x00c0 (11 14) */ ld [%sp+92],%f3
+/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x00c8 (13 14) */ std %f0,[%g2-8]
+/* 0x00cc (14 16) */ ret ! Result =
+/* 0x00d0 (16 17) */ restore %g0,%g0,%g0
+ .L77000154: /* frequency 0.7 confidence 0.0 */
+/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0
+ .L900000410: /* frequency 6.4 confidence 0.0 */
+/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1
+/* 0x00dc ( 0 1) */ st %o1,[%sp+96]
+/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1
+/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0
+/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0
+/* 0x00ec ( 1 2) */ add %o7,4,%o7
+/* 0x00f0 ( 2 3) */ cmp %g1,%o5
+/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2
+/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3
+/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2
+/* 0x0100 ( 6 7) */ std %f2,[%g2]
+/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92]
+/* 0x0108 (10 11) */ fmovs %f0,%f2
+/* 0x010c (11 14) */ ld [%sp+92],%f3
+/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0
+/* 0x0114 (13 14) */ std %f0,[%g2+8]
+/* 0x0118 (13 14) */ add %g2,16,%g2
+/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86
+/* 0x0120 (14 17) */ ld [%o7],%o0
+ .L77000150: /* frequency 1.0 confidence 0.0 */
+/* 0x0124 ( 0 2) */ ret ! Result =
+/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2
+/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */
+/* 000000 0 ( 0 0) */ .word 1127219200,0
+/* 0x0008 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-104,%sp
+/* 0x0004 ( 1 2) */ or %g0,%i3,%i4
+/* 0x0008 ( 1 2) */ or %g0,%i2,%g1
+
+! 114 ! }
+! 115 !}
+! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! 119 ! double * /* 0 */,
+! 120 ! double * /*result16*/, double * /* result32 */,
+! 121 ! float * /*source - should be unsigned int*
+! 122 ! converted to float* */);
+! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 127 ! unsigned int *i32, int len)
+! 128 !{
+! 129 !int i;
+! 130 !unsigned int a;
+! 132 !#pragma pipeloop(0)
+! 133 ! for(i=0;i<len-3;i+=4)
+
+/* 0x000c 133 ( 2 3) */ sub %i4,3,%g2
+/* 0x0010 ( 2 3) */ or %g0,0,%o7
+/* 0x0014 ( 3 4) */ cmp %g2,0
+/* 0x0018 128 ( 3 4) */ or %g0,%i0,%i3
+/* 0x001c 133 ( 3 4) */ ble,pt %icc,.L900000515 ! tprob=0.56
+/* 0x0020 ( 4 5) */ cmp %o7,%i4
+
+! 134 ! {
+! 135 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 136 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0024 136 ( 4 5) */ sethi %hi(Zero),%g2
+/* 0x0028 133 ( 5 6) */ or %g0,%g1,%o3
+/* 0x002c ( 5 6) */ sub %i4,4,%o2
+/* 0x0030 136 ( 6 7) */ add %g2,%lo(Zero),%o1
+/* 0x0034 133 ( 6 7) */ or %g0,0,%o5
+/* 0x0038 ( 7 8) */ or %g0,0,%o4
+/* 0x003c 136 ( 7 8) */ or %g0,%o3,%g4
+ .L900000514: /* frequency 6.4 confidence 0.0 */
+/* 0x0040 ( 0 3) */ ldd [%o1],%f2
+/* 0x0044 136 ( 0 1) */ add %i3,%o5,%g2
+/* 0x0048 ( 0 1) */ add %i1,%o4,%g3
+/* 0x004c ( 1 4) */ ldd [%o1-8],%f0
+/* 0x0050 ( 1 2) */ add %o7,4,%o7
+/* 0x0054 ( 1 2) */ add %o3,16,%o3
+/* 0x0058 ( 2 3) */ fmovd %f2,%f14
+/* 0x005c ( 2 5) */ ld [%g4],%f15
+/* 0x0060 ( 2 3) */ cmp %o7,%o2
+/* 0x0064 ( 3 4) */ fmovd %f2,%f10
+/* 0x0068 ( 3 6) */ ld [%g4+4],%f11
+/* 0x006c ( 4 5) */ fmovd %f2,%f6
+/* 0x0070 ( 4 7) */ ld [%g4+8],%f7
+/* 0x0074 ( 5 8) */ ld [%g4+12],%f3
+/* 0x0078 ( 5 8) */ fxtod %f14,%f14
+/* 0x007c ( 6 9) */ fxtod %f10,%f10
+/* 0x0080 ( 6 9) */ ldd [%o1-16],%f16
+/* 0x0084 ( 7 10) */ fxtod %f6,%f6
+/* 0x0088 ( 7 8) */ std %f14,[%i3+%o5]
+/* 0x008c ( 7 8) */ add %o5,32,%o5
+/* 0x0090 ( 8 11) */ fxtod %f2,%f2
+/* 0x0094 ( 8 11) */ fmuld %f0,%f14,%f12
+/* 0x0098 ( 8 9) */ std %f10,[%g2+8]
+/* 0x009c ( 9 12) */ fmuld %f0,%f10,%f8
+/* 0x00a0 ( 9 10) */ std %f6,[%g2+16]
+/* 0x00a4 (10 13) */ fmuld %f0,%f6,%f4
+/* 0x00a8 (10 11) */ std %f2,[%g2+24]
+/* 0x00ac (11 14) */ fmuld %f0,%f2,%f0
+/* 0x00b0 (11 14) */ fdtox %f12,%f12
+/* 0x00b4 (12 15) */ fdtox %f8,%f8
+/* 0x00b8 (13 16) */ fdtox %f4,%f4
+/* 0x00bc (14 17) */ fdtox %f0,%f0
+/* 0x00c0 (15 18) */ fxtod %f12,%f12
+/* 0x00c4 (15 16) */ std %f12,[%g3+8]
+/* 0x00c8 (16 19) */ fxtod %f8,%f8
+/* 0x00cc (16 17) */ std %f8,[%g3+24]
+/* 0x00d0 (17 20) */ fxtod %f4,%f4
+/* 0x00d4 (17 18) */ std %f4,[%g3+40]
+/* 0x00d8 (18 21) */ fxtod %f0,%f0
+/* 0x00dc (18 21) */ fmuld %f12,%f16,%f12
+/* 0x00e0 (18 19) */ std %f0,[%g3+56]
+/* 0x00e4 (19 22) */ fmuld %f8,%f16,%f8
+/* 0x00e8 (20 23) */ fmuld %f4,%f16,%f4
+/* 0x00ec (21 24) */ fmuld %f0,%f16,%f0
+/* 0x00f0 (21 24) */ fsubd %f14,%f12,%f12
+/* 0x00f4 (21 22) */ std %f12,[%i1+%o4]
+/* 0x00f8 (22 25) */ fsubd %f10,%f8,%f8
+/* 0x00fc (22 23) */ std %f8,[%g3+16]
+/* 0x0100 (22 23) */ add %o4,64,%o4
+/* 0x0104 (23 26) */ fsubd %f6,%f4,%f4
+/* 0x0108 (23 24) */ std %f4,[%g3+32]
+/* 0x010c (24 27) */ fsubd %f2,%f0,%f0
+/* 0x0110 (24 25) */ std %f0,[%g3+48]
+/* 0x0114 (24 25) */ ble,pt %icc,.L900000514 ! tprob=0.86
+/* 0x0118 (25 26) */ or %g0,%o3,%g4
+ .L77000159: /* frequency 1.0 confidence 0.0 */
+
+! 137 ! }
+! 138 ! for(;i<len;i++)
+
+/* 0x011c 138 ( 0 1) */ cmp %o7,%i4
+ .L900000515: /* frequency 1.0 confidence 0.0 */
+/* 0x0120 138 ( 0 1) */ bge,pt %icc,.L77000164 ! tprob=0.56
+/* 0x0124 ( 0 1) */ nop
+
+! 139 ! {
+! 140 ! a=i32[i];
+! 141 ! d32[i]=(double)(i32[i]);
+! 142 ! d16[2*i]=(double)(a&0xffff);
+! 143 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1
+/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0
+/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0
+/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3
+/* 0x0138 ( 2 3) */ sll %o7,2,%g2
+/* 0x013c ( 2 3) */ add %o0,1023,%o3
+/* 0x0140 ( 3 4) */ sll %o7,3,%g4
+/* 0x0144 ( 3 4) */ cmp %g3,3
+/* 0x0148 ( 4 5) */ add %g1,%g2,%o0
+/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2
+/* 0x0150 ( 5 6) */ add %i3,%g4,%o4
+/* 0x0154 ( 5 6) */ sub %i4,1,%o1
+/* 0x0158 ( 6 7) */ sll %o7,4,%g5
+/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44
+/* 0x0160 ( 7 8) */ add %i1,%g5,%o5
+/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3
+/* 0x0168 143 ( 7 8) */ add %o4,8,%o4
+/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1
+/* 0x0170 143 ( 8 9) */ add %o5,16,%o5
+/* 0x0174 ( 8 9) */ add %o7,1,%o7
+/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2
+/* 0x017c 143 ( 9 10) */ add %o0,4,%o0
+/* 0x0180 142 (10 11) */ and %g1,%o3,%g2
+/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x0188 (11 12) */ std %f2,[%o4-8]
+/* 0x018c 143 (11 12) */ srl %g1,16,%g1
+/* 0x0190 142 (12 13) */ st %g2,[%sp+96]
+/* 0x0194 (15 16) */ fmovs %f0,%f2
+/* 0x0198 (16 19) */ ld [%sp+96],%f3
+/* 0x019c (18 21) */ fsubd %f2,%f0,%f2
+/* 0x01a0 (18 19) */ std %f2,[%o5-16]
+/* 0x01a4 143 (19 20) */ st %g1,[%sp+92]
+/* 0x01a8 (22 23) */ fmovs %f0,%f2
+/* 0x01ac (23 26) */ ld [%sp+92],%f3
+/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2
+/* 0x01b4 (25 26) */ std %f2,[%o5-8]
+ .L900000509: /* frequency 64.0 confidence 0.0 */
+/* 0x01b8 141 (26 28) */ ld [%o0],%f3
+/* 0x01bc 143 (26 27) */ add %o7,2,%o7
+/* 0x01c0 (26 27) */ add %o5,32,%o5
+/* 0x01c4 140 (27 29) */ ld [%o0],%g1
+/* 0x01c8 143 (27 27) */ cmp %o7,%o1
+/* 0x01cc (27 28) */ add %o4,16,%o4
+/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2
+/* 0x01d8 (29 30) */ std %f2,[%o4-16]
+/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2
+/* 0x01e0 (30 31) */ st %g2,[%sp+96]
+/* 0x01e4 (37 39) */ ld [%sp+96],%f3
+/* 0x01e8 ( 0 0) */ fmovs %f0,%f2
+/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2
+/* 0x01f0 143 (40 41) */ srl %g1,16,%g1
+/* 0x01f4 142 (40 41) */ std %f2,[%o5-32]
+/* 0x01f8 143 (41 42) */ st %g1,[%sp+92]
+/* 0x01fc (48 50) */ ld [%sp+92],%f3
+/* 0x0200 ( 0 0) */ fmovs %f0,%f2
+/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2
+/* 0x0208 (51 52) */ std %f2,[%o5-24]
+/* 0x020c (51 52) */ add %o0,4,%o0
+/* 0x0210 141 (52 54) */ ld [%o0],%f3
+/* 0x0214 140 (53 55) */ ld [%o0],%g1
+/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2
+/* 0x021c (54 57) */ fsubd %f2,%f0,%f2
+/* 0x0220 (55 56) */ std %f2,[%o4-8]
+/* 0x0224 142 (55 56) */ and %g1,%o3,%g2
+/* 0x0228 (56 57) */ st %g2,[%sp+96]
+/* 0x022c (63 65) */ ld [%sp+96],%f3
+/* 0x0230 ( 0 0) */ fmovs %f0,%f2
+/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2
+/* 0x0238 143 (66 67) */ srl %g1,16,%g1
+/* 0x023c 142 (66 67) */ std %f2,[%o5-16]
+/* 0x0240 143 (67 68) */ st %g1,[%sp+92]
+/* 0x0244 (74 76) */ ld [%sp+92],%f3
+/* 0x0248 ( 0 0) */ fmovs %f0,%f2
+/* 0x024c (76 79) */ fsubd %f2,%f0,%f2
+/* 0x0250 (77 78) */ std %f2,[%o5-8]
+/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50
+/* 0x0258 (77 78) */ add %o0,4,%o0
+ .L900000512: /* frequency 8.0 confidence 0.0 */
+/* 0x025c 143 ( 0 1) */ cmp %o7,%i4
+/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14
+/* 0x0264 ( 0 1) */ nop
+ .L77000161: /* frequency 0.7 confidence 0.0 */
+/* 0x0268 141 ( 0 3) */ ld [%o0],%f3
+ .L900000513: /* frequency 6.4 confidence 0.0 */
+/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0
+/* 0x0270 143 ( 0 1) */ add %o7,1,%o7
+/* 0x0274 140 ( 1 4) */ ld [%o0],%o1
+/* 0x0278 143 ( 1 2) */ add %o0,4,%o0
+/* 0x027c ( 1 2) */ cmp %o7,%i4
+/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2
+/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1
+/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2
+/* 0x028c ( 4 5) */ std %f2,[%o4]
+/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1
+/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96]
+/* 0x0298 143 ( 5 6) */ add %o4,8,%o4
+/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2
+/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3
+/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2
+/* 0x02a8 (11 12) */ std %f2,[%o5]
+/* 0x02ac 143 (12 13) */ st %o1,[%sp+92]
+/* 0x02b0 (15 16) */ fmovs %f0,%f2
+/* 0x02b4 (16 19) */ ld [%sp+92],%f3
+/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0
+/* 0x02bc (18 19) */ std %f0,[%o5+8]
+/* 0x02c0 (18 19) */ add %o5,16,%o5
+/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86
+/* 0x02c8 (19 22) */ ld [%o0],%f3
+ .L77000164: /* frequency 1.0 confidence 0.0 */
+/* 0x02cc ( 0 2) */ ret ! Result =
+/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2
+/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global adjust_montf_result
+ adjust_montf_result: /* frequency 1.0 confidence 0.0 */
+
+! 144 ! }
+! 145 !}
+! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 149 !{
+! 150 !long long acc;
+! 151 !int i;
+! 153 ! if(i32[len]>0) i=-1;
+
+/* 000000 153 ( 0 1) */ sll %o2,2,%g1
+/* 0x0004 ( 0 1) */ or %g0,-1,%g3
+/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1
+/* 0x000c ( 3 4) */ cmp %g1,0
+/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50
+/* 0x0014 ( 3 4) */ or %g0,%o1,%o3
+/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00
+/* 0x001c ( 4 5) */ cmp %g3,0
+ .L77000175: /* frequency 0.8 confidence 0.0 */
+
+! 154 ! else
+! 155 ! {
+! 156 ! for(i=len-1; i>=0; i++)
+
+/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3
+/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60
+/* 0x0028 ( 1 2) */ cmp %g3,0
+/* 0x002c ( 1 2) */ sll %g3,2,%g1
+/* 0x0030 ( 2 3) */ add %o0,%g1,%g2
+/* 0x0034 ( 2 3) */ add %o1,%g1,%g1
+
+! 157 ! {
+! 158 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0038 158 ( 3 6) */ ld [%g1],%g5
+ .L900000610: /* frequency 5.3 confidence 0.0 */
+/* 0x003c 158 ( 0 3) */ ld [%g2],%o5
+/* 0x0040 ( 0 1) */ add %g1,4,%g1
+/* 0x0044 ( 0 1) */ add %g2,4,%g2
+/* 0x0048 ( 2 3) */ cmp %o5,%g5
+/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16
+/* 0x0050 ( 2 3) */ nop
+/* 0x0054 ( 3 4) */ addcc %g3,1,%g3
+/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84
+/* 0x005c ( 3 6) */ ld [%g1],%g5
+ .L77000182: /* frequency 1.0 confidence 0.0 */
+
+! 159 ! }
+! 160 ! }
+! 161 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0060 161 ( 0 1) */ cmp %g3,0
+ .L900000611: /* frequency 1.0 confidence 0.0 */
+/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50
+/* 0x0068 ( 0 1) */ sll %g3,2,%g2
+/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1
+/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2
+/* 0x0074 ( 4 5) */ cmp %g2,%g1
+/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56
+/* 0x007c ( 4 5) */ nop
+ .L77000198: /* frequency 0.8 confidence 0.0 */
+
+! 162 ! {
+! 163 ! acc=0;
+! 164 ! for(i=0;i<len;i++)
+
+/* 0x0080 164 ( 0 1) */ cmp %o2,0
+/* 0x0084 ( 0 1) */ ble,pt %icc,.L77000191 ! tprob=0.60
+/* 0x0088 ( 0 1) */ nop
+/* 0x008c 161 ( 1 2) */ or %g0,-1,%g2
+/* 0x0090 ( 1 2) */ sub %o2,1,%g4
+/* 0x0094 ( 2 3) */ srl %g2,0,%g3
+/* 0x0098 163 ( 2 3) */ or %g0,0,%g5
+/* 0x009c 164 ( 3 4) */ or %g0,0,%o5
+/* 0x00a0 161 ( 3 4) */ or %g0,%o0,%o4
+/* 0x00a4 ( 4 5) */ cmp %o2,3
+/* 0x00a8 ( 4 5) */ add %o1,4,%g2
+/* 0x00ac 164 ( 4 5) */ bl,pn %icc,.L77000199 ! tprob=0.40
+/* 0x00b0 ( 5 6) */ add %o0,8,%g1
+
+! 165 ! {
+! 166 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b4 166 ( 5 8) */ ld [%o0],%o2
+/* 0x00b8 0 ( 5 6) */ or %g0,%g2,%o3
+/* 0x00bc 166 ( 6 9) */ ld [%o1],%o1
+/* 0x00c0 0 ( 6 7) */ or %g0,%g1,%o4
+
+! 167 ! i32[i]=acc&0xffffffff;
+! 168 ! acc=acc>>32;
+
+/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5
+/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1
+/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2
+/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5
+/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2
+/* 0x00d8 ( 9 10) */ st %o2,[%o0]
+/* 0x00dc 168 (10 11) */ srax %g5,32,%g5
+ .L900000605: /* frequency 64.0 confidence 0.0 */
+/* 0x00e0 166 (12 20) */ ld [%o3],%o2
+/* 0x00e4 168 (12 13) */ add %o5,1,%o5
+/* 0x00e8 (12 13) */ add %o3,4,%o3
+/* 0x00ec (13 13) */ cmp %o5,%g4
+/* 0x00f0 (13 14) */ add %o4,4,%o4
+/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1
+/* 0x00f8 (15 15) */ add %g1,%g5,%g5
+/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2
+/* 0x0100 166 (16 24) */ ld [%o4-4],%g1
+/* 0x0104 167 (17 18) */ st %o2,[%o4-8]
+/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50
+/* 0x010c (17 18) */ srax %g5,32,%g5
+ .L900000608: /* frequency 8.0 confidence 0.0 */
+/* 0x0110 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0118 ( 3 4) */ add %g1,%g5,%g1
+/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2
+/* 0x0120 ( 5 7) */ retl ! Result =
+/* 0x0124 ( 6 7) */ st %g2,[%o4-4]
+ .L77000199: /* frequency 0.6 confidence 0.0 */
+/* 0x0128 166 ( 0 3) */ ld [%o4],%g1
+ .L900000609: /* frequency 5.3 confidence 0.0 */
+/* 0x012c 166 ( 0 3) */ ld [%o3],%g2
+/* 0x0130 ( 0 1) */ add %g5,%g1,%g1
+/* 0x0134 168 ( 0 1) */ add %o5,1,%o5
+/* 0x0138 ( 1 2) */ add %o3,4,%o3
+/* 0x013c ( 1 2) */ cmp %o5,%g4
+/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1
+/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2
+/* 0x0148 ( 3 4) */ st %g2,[%o4]
+/* 0x014c 168 ( 3 4) */ add %o4,4,%o4
+/* 0x0150 ( 4 5) */ srax %g1,32,%g5
+/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84
+/* 0x0158 ( 4 7) */ ld [%o4],%g1
+ .L77000191: /* frequency 1.0 confidence 0.0 */
+/* 0x015c ( 0 2) */ retl ! Result =
+/* 0x0160 ( 1 2) */ nop
+/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2
+/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .align 32
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */
+/* 000000 ( 0 1) */ save %sp,-144,%sp
+/* 0x0004 ( 1 2) */ st %i0,[%fp+68]
+
+! 169 ! }
+! 170 ! }
+! 171 !}
+! 175 !void cleanup(double *dt, int from, int tlen);
+! 177 !/*
+! 178 !** the lengths of the input arrays should be at least the following:
+! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 180 !** all of them should be different from one another
+! 181 !**
+! 182 !*/
+! 183 !void mont_mulf_noconv(unsigned int *result,
+! 184 ! double *dm1, double *dm2, double *dt,
+! 185 ! double *dn, unsigned int *nint,
+! 186 ! int nlen, double dn0)
+! 187 !{
+! 188 ! int i, j, jj;
+! 189 ! int tmp;
+! 190 ! double digit, m2j, nextm2j, a, b;
+! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 193 ! pdm1=&(dm1[0]);
+! 194 ! pdm2=&(dm2[0]);
+! 195 ! pdn=&(dn[0]);
+! 196 ! pdm2[2*nlen]=Zero;
+
+/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2
+/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1
+/* 0x0010 ( 2 3) */ st %i5,[%fp+88]
+/* 0x0014 ( 2 3) */ or %g0,%i3,%o2
+/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4
+/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2
+/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5
+/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0
+/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2
+
+! 198 ! if (nlen!=16)
+! 199 ! {
+! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 202 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 205 ! pdtj=&(dt[0]);
+! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 207 ! {
+! 208 ! m2j=pdm2[j];
+! 209 ! a=pdtj[0]+pdn[0]*digit;
+! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 211 ! pdtj[1]=b;
+! 213 !#pragma pipeloop(0)
+! 214 ! for(i=1;i<nlen;i++)
+! 215 ! {
+! 216 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 217 ! }
+! 218 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 219 !
+! 220 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 221 ! }
+! 222 ! }
+! 223 ! else
+! 224 ! {
+! 225 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 227 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 228 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 229 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 230 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 231 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 232 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 233 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 234 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 235 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 236 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 237 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 239 ! pdn_0=pdn[0];
+! 240 ! pdm1_0=pdm1[0];
+! 242 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 243 ! pdtj=&(dt[0]);
+! 245 ! for(j=0;j<32;j++,pdtj++)
+! 246 ! {
+! 248 ! m2j=pdm2[j];
+! 249 ! a=pdtj[0]+pdn_0*digit;
+! 250 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+! 251 ! pdtj[1]=b;
+! 253 ! /**** this loop will be fully unrolled:
+! 254 ! for(i=1;i<16;i++)
+! 255 ! {
+! 256 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 257 ! }
+! 258 ! *************************************/
+! 259 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 260 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 261 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 262 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 263 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 264 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 265 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 266 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 267 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 268 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 269 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 270 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 271 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 272 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 273 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 274 ! /* no need for cleenup, cannot overflow */
+! 275 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 276 ! }
+! 277 ! }
+! 279 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+! 281 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x002c 281 ( 4 5) */ or %g0,1,%o4
+/* 0x0030 187 ( 6 9) */ ldd [%fp+96],%f0
+/* 0x0034 196 ( 7 10) */ ld [%fp+92],%o0
+/* 0x0038 187 ( 8 9) */ fmovd %f0,%f16
+/* 0x003c 196 ( 9 10) */ sll %o0,4,%g2
+/* 0x0040 ( 9 10) */ or %g0,%o0,%g1
+/* 0x0044 198 (10 11) */ cmp %o0,16
+/* 0x0048 (10 11) */ be,pn %icc,.L77000289 ! tprob=0.50
+/* 0x004c (10 11) */ std %f2,[%o1+%g2]
+/* 0x0050 200 (11 12) */ sll %o0,2,%g2
+/* 0x0054 (11 14) */ ldd [%g4],%f2
+/* 0x0058 (12 13) */ add %g2,2,%o1
+/* 0x005c (12 13) */ add %g2,1,%o3
+/* 0x0060 196 (13 14) */ sll %o0,1,%o7
+/* 0x0064 200 (13 14) */ cmp %o1,0
+/* 0x0068 (13 14) */ ble,a,pt %icc,.L900000755 ! tprob=0.55
+/* 0x006c (14 17) */ ldd [%i1],%f0
+/* 0x0070 (14 15) */ cmp %o1,3
+/* 0x0074 281 (14 15) */ or %g0,1,%o1
+/* 0x0078 (14 15) */ bl,pn %icc,.L77000279 ! tprob=0.40
+/* 0x007c (15 16) */ add %o2,8,%o0
+/* 0x0080 (15 16) */ std %f2,[%g5]
+/* 0x0084 0 (16 17) */ or %g0,%o0,%o2
+ .L900000726: /* frequency 64.0 confidence 0.0 */
+/* 0x0088 ( 3 5) */ ldd [%g4],%f0
+/* 0x008c ( 3 4) */ add %o4,1,%o4
+/* 0x0090 ( 3 4) */ add %o2,8,%o2
+/* 0x0094 ( 4 4) */ cmp %o4,%o3
+/* 0x0098 ( 5 6) */ ble,pt %icc,.L900000726 ! tprob=0.50
+/* 0x009c ( 5 6) */ std %f0,[%o2-8]
+ .L900000729: /* frequency 8.0 confidence 0.0 */
+/* 0x00a0 ( 0 1) */ ba .L900000755 ! tprob=1.00
+/* 0x00a4 ( 0 3) */ ldd [%i1],%f0
+ .L77000279: /* frequency 0.6 confidence 0.0 */
+/* 0x00a8 ( 0 1) */ std %f2,[%o2]
+ .L900000754: /* frequency 5.3 confidence 0.0 */
+/* 0x00ac ( 0 3) */ ldd [%g4],%f2
+/* 0x00b0 ( 0 1) */ cmp %o1,%o3
+/* 0x00b4 ( 0 1) */ add %o2,8,%o2
+/* 0x00b8 ( 1 2) */ add %o1,1,%o1
+/* 0x00bc ( 1 2) */ ble,a,pt %icc,.L900000754 ! tprob=0.87
+/* 0x00c0 ( 3 4) */ std %f2,[%o2]
+ .L77000284: /* frequency 0.8 confidence 0.0 */
+/* 0x00c4 202 ( 0 3) */ ldd [%i1],%f0
+ .L900000755: /* frequency 0.8 confidence 0.0 */
+/* 0x00c8 202 ( 0 3) */ ldd [%i0],%f2
+/* 0x00cc ( 0 1) */ add %o7,1,%o2
+/* 0x00d0 206 ( 0 1) */ cmp %o7,0
+/* 0x00d4 ( 1 2) */ sll %o2,1,%o0
+/* 0x00d8 ( 1 2) */ sub %o7,1,%o1
+/* 0x00dc 202 ( 2 5) */ fmuld %f0,%f2,%f0
+/* 0x00e0 ( 2 3) */ std %f0,[%g5]
+/* 0x00e4 ( 2 3) */ sub %g1,1,%o7
+/* 0x00e8 ( 3 6) */ ldd [%g4],%f6
+/* 0x00ec 0 ( 3 4) */ or %g0,%o7,%g3
+/* 0x00f0 ( 3 4) */ or %g0,0,%l0
+/* 0x00f4 ( 4 7) */ ldd [%g4-8],%f2
+/* 0x00f8 ( 4 5) */ or %g0,0,%i5
+/* 0x00fc ( 4 5) */ or %g0,%o1,%o5
+/* 0x0100 ( 5 8) */ fdtox %f0,%f0
+/* 0x0104 ( 5 8) */ ldd [%g4-16],%f4
+/* 0x0108 ( 5 6) */ or %g0,%o0,%o3
+/* 0x010c 210 ( 6 7) */ add %i0,8,%o4
+/* 0x0110 ( 6 7) */ or %g0,0,%i4
+/* 0x0114 ( 9 10) */ fmovs %f6,%f0
+/* 0x0118 (11 14) */ fxtod %f0,%f0
+/* 0x011c 203 (14 17) */ fmuld %f0,%f16,%f0
+/* 0x0120 (17 20) */ fmuld %f0,%f2,%f2
+/* 0x0124 (20 23) */ fdtox %f2,%f2
+/* 0x0128 (23 26) */ fxtod %f2,%f2
+/* 0x012c (26 29) */ fmuld %f2,%f4,%f2
+/* 0x0130 (29 32) */ fsubd %f0,%f2,%f22
+/* 0x0134 206 (29 30) */ ble,pt %icc,.L900000748 ! tprob=0.60
+/* 0x0138 (29 30) */ sll %g1,4,%g2
+/* 0x013c 210 (30 33) */ ldd [%i2],%f0
+ .L900000749: /* frequency 5.3 confidence 0.0 */
+/* 0x0140 210 ( 0 3) */ fmuld %f0,%f22,%f8
+/* 0x0144 ( 0 3) */ ldd [%i1],%f0
+/* 0x0148 214 ( 0 1) */ cmp %g1,1
+/* 0x014c 210 ( 1 4) */ ldd [%o4+%i4],%f6
+/* 0x0150 ( 1 2) */ add %i1,8,%o0
+/* 0x0154 214 ( 1 2) */ or %g0,1,%o1
+/* 0x0158 210 ( 2 5) */ ldd [%i3],%f2
+/* 0x015c ( 2 3) */ add %i3,16,%l1
+/* 0x0160 ( 3 6) */ fmuld %f0,%f6,%f6
+/* 0x0164 ( 3 6) */ ldd [%g4-8],%f4
+/* 0x0168 ( 4 7) */ faddd %f2,%f8,%f2
+/* 0x016c ( 4 7) */ ldd [%i3+8],%f0
+/* 0x0170 208 ( 5 8) */ ldd [%i0+%i4],%f20
+/* 0x0174 210 ( 6 9) */ faddd %f0,%f6,%f0
+/* 0x0178 ( 7 10) */ fmuld %f2,%f4,%f2
+/* 0x017c (10 13) */ faddd %f0,%f2,%f18
+/* 0x0180 211 (10 11) */ std %f18,[%i3+8]
+/* 0x0184 214 (10 11) */ ble,pt %icc,.L900000753 ! tprob=0.54
+/* 0x0188 (11 12) */ srl %i5,31,%g2
+/* 0x018c (11 12) */ cmp %g3,7
+/* 0x0190 210 (12 13) */ add %i2,8,%g2
+/* 0x0194 214 (12 13) */ bl,pn %icc,.L77000281 ! tprob=0.36
+/* 0x0198 (13 14) */ add %g2,24,%o2
+/* 0x019c 216 (13 16) */ ldd [%o0+16],%f14
+/* 0x01a0 (13 14) */ add %i3,48,%l1
+/* 0x01a4 (14 17) */ ldd [%o0+24],%f12
+/* 0x01a8 0 (14 15) */ or %g0,%o2,%g2
+/* 0x01ac 214 (14 15) */ sub %g1,3,%o2
+/* 0x01b0 216 (15 18) */ ldd [%o0],%f2
+/* 0x01b4 (15 16) */ or %g0,5,%o1
+/* 0x01b8 (16 19) */ ldd [%g2-24],%f0
+/* 0x01bc (17 20) */ ldd [%o0+8],%f6
+/* 0x01c0 (17 20) */ fmuld %f2,%f20,%f2
+/* 0x01c4 (17 18) */ add %o0,32,%o0
+/* 0x01c8 (18 21) */ ldd [%g2-16],%f8
+/* 0x01cc (18 21) */ fmuld %f0,%f22,%f4
+/* 0x01d0 (19 22) */ ldd [%i3+16],%f0
+/* 0x01d4 (19 22) */ fmuld %f6,%f20,%f10
+/* 0x01d8 (20 23) */ ldd [%g2-8],%f6
+/* 0x01dc (21 24) */ faddd %f2,%f4,%f4
+/* 0x01e0 (21 24) */ ldd [%i3+32],%f2
+ .L900000738: /* frequency 512.0 confidence 0.0 */
+/* 0x01e4 216 (16 24) */ ldd [%g2],%f24
+/* 0x01e8 (16 17) */ add %o1,3,%o1
+/* 0x01ec (16 17) */ add %g2,24,%g2
+/* 0x01f0 (16 19) */ fmuld %f8,%f22,%f8
+/* 0x01f4 (17 25) */ ldd [%l1],%f28
+/* 0x01f8 (17 17) */ cmp %o1,%o2
+/* 0x01fc (17 18) */ add %o0,24,%o0
+/* 0x0200 (18 26) */ ldd [%o0-24],%f26
+/* 0x0204 (18 21) */ faddd %f0,%f4,%f0
+/* 0x0208 (18 19) */ add %l1,48,%l1
+/* 0x020c (19 22) */ faddd %f10,%f8,%f10
+/* 0x0210 (19 22) */ fmuld %f14,%f20,%f4
+/* 0x0214 (19 20) */ std %f0,[%l1-80]
+/* 0x0218 (20 28) */ ldd [%g2-16],%f8
+/* 0x021c (20 23) */ fmuld %f6,%f22,%f6
+/* 0x0220 (21 29) */ ldd [%l1-32],%f0
+/* 0x0224 (22 30) */ ldd [%o0-16],%f14
+/* 0x0228 (22 25) */ faddd %f2,%f10,%f2
+/* 0x022c (23 26) */ faddd %f4,%f6,%f10
+/* 0x0230 (23 26) */ fmuld %f12,%f20,%f4
+/* 0x0234 (23 24) */ std %f2,[%l1-64]
+/* 0x0238 (24 32) */ ldd [%g2-8],%f6
+/* 0x023c (24 27) */ fmuld %f24,%f22,%f24
+/* 0x0240 (25 33) */ ldd [%l1-16],%f2
+/* 0x0244 (26 34) */ ldd [%o0-8],%f12
+/* 0x0248 (26 29) */ faddd %f28,%f10,%f10
+/* 0x024c (27 28) */ std %f10,[%l1-48]
+/* 0x0250 (27 30) */ fmuld %f26,%f20,%f10
+/* 0x0254 (27 28) */ ble,pt %icc,.L900000738 ! tprob=0.50
+/* 0x0258 (27 30) */ faddd %f4,%f24,%f4
+ .L900000741: /* frequency 64.0 confidence 0.0 */
+/* 0x025c 216 ( 0 3) */ fmuld %f8,%f22,%f28
+/* 0x0260 ( 0 3) */ ldd [%g2],%f24
+/* 0x0264 ( 0 3) */ faddd %f0,%f4,%f26
+/* 0x0268 ( 1 4) */ fmuld %f12,%f20,%f8
+/* 0x026c ( 1 2) */ add %l1,32,%l1
+/* 0x0270 ( 1 2) */ cmp %o1,%g3
+/* 0x0274 ( 2 5) */ fmuld %f14,%f20,%f14
+/* 0x0278 ( 2 5) */ ldd [%l1-32],%f4
+/* 0x027c ( 2 3) */ add %g2,8,%g2
+/* 0x0280 ( 3 6) */ faddd %f10,%f28,%f12
+/* 0x0284 ( 3 6) */ fmuld %f6,%f22,%f6
+/* 0x0288 ( 3 6) */ ldd [%l1-16],%f0
+/* 0x028c ( 4 7) */ fmuld %f24,%f22,%f10
+/* 0x0290 ( 4 5) */ std %f26,[%l1-64]
+/* 0x0294 ( 6 9) */ faddd %f2,%f12,%f2
+/* 0x0298 ( 6 7) */ std %f2,[%l1-48]
+/* 0x029c ( 7 10) */ faddd %f14,%f6,%f6
+/* 0x02a0 ( 8 11) */ faddd %f8,%f10,%f2
+/* 0x02a4 (10 13) */ faddd %f4,%f6,%f4
+/* 0x02a8 (10 11) */ std %f4,[%l1-32]
+/* 0x02ac (11 14) */ faddd %f0,%f2,%f0
+/* 0x02b0 (11 12) */ bg,pn %icc,.L77000213 ! tprob=0.13
+/* 0x02b4 (11 12) */ std %f0,[%l1-16]
+ .L77000281: /* frequency 4.0 confidence 0.0 */
+/* 0x02b8 216 ( 0 3) */ ldd [%o0],%f0
+ .L900000752: /* frequency 36.6 confidence 0.0 */
+/* 0x02bc 216 ( 0 3) */ ldd [%g2],%f4
+/* 0x02c0 ( 0 3) */ fmuld %f0,%f20,%f2
+/* 0x02c4 ( 0 1) */ add %o1,1,%o1
+/* 0x02c8 ( 1 4) */ ldd [%l1],%f0
+/* 0x02cc ( 1 2) */ add %o0,8,%o0
+/* 0x02d0 ( 1 2) */ add %g2,8,%g2
+/* 0x02d4 ( 2 5) */ fmuld %f4,%f22,%f4
+/* 0x02d8 ( 2 3) */ cmp %o1,%g3
+/* 0x02dc ( 5 8) */ faddd %f2,%f4,%f2
+/* 0x02e0 ( 8 11) */ faddd %f0,%f2,%f0
+/* 0x02e4 ( 8 9) */ std %f0,[%l1]
+/* 0x02e8 ( 8 9) */ add %l1,16,%l1
+/* 0x02ec ( 8 9) */ ble,a,pt %icc,.L900000752 ! tprob=0.87
+/* 0x02f0 (10 13) */ ldd [%o0],%f0
+ .L77000213: /* frequency 5.3 confidence 0.0 */
+/* 0x02f4 ( 0 1) */ srl %i5,31,%g2
+ .L900000753: /* frequency 5.3 confidence 0.0 */
+/* 0x02f8 218 ( 0 1) */ cmp %l0,30
+/* 0x02fc ( 0 1) */ bne,a,pt %icc,.L900000751 ! tprob=0.54
+/* 0x0300 ( 0 3) */ fdtox %f18,%f0
+/* 0x0304 ( 1 2) */ add %i5,%g2,%g2
+/* 0x0308 ( 1 2) */ sub %o3,1,%o2
+/* 0x030c ( 2 3) */ sra %g2,1,%o0
+/* 0x0310 216 ( 2 5) */ ldd [%g4],%f0
+/* 0x0314 ( 3 4) */ add %o0,1,%g2
+/* 0x0318 ( 4 5) */ sll %g2,1,%o0
+/* 0x031c ( 4 5) */ fmovd %f0,%f2
+/* 0x0320 ( 5 6) */ sll %g2,4,%o1
+/* 0x0324 ( 5 6) */ cmp %o0,%o3
+/* 0x0328 ( 5 6) */ bge,pt %icc,.L77000215 ! tprob=0.53
+/* 0x032c ( 6 7) */ or %g0,0,%l0
+/* 0x0330 218 ( 6 7) */ add %g5,%o1,%o1
+/* 0x0334 216 ( 7 10) */ ldd [%o1],%f8
+ .L900000750: /* frequency 32.0 confidence 0.0 */
+/* 0x0338 ( 0 3) */ fdtox %f8,%f6
+/* 0x033c ( 0 3) */ ldd [%g4],%f10
+/* 0x0340 ( 0 1) */ add %o0,2,%o0
+/* 0x0344 ( 1 4) */ ldd [%o1+8],%f4
+/* 0x0348 ( 1 4) */ fdtox %f8,%f8
+/* 0x034c ( 1 2) */ cmp %o0,%o2
+/* 0x0350 ( 5 6) */ fmovs %f10,%f6
+/* 0x0354 ( 7 10) */ fxtod %f6,%f10
+/* 0x0358 ( 8 11) */ fdtox %f4,%f6
+/* 0x035c ( 9 12) */ fdtox %f4,%f4
+/* 0x0360 (10 13) */ faddd %f10,%f2,%f2
+/* 0x0364 (10 11) */ std %f2,[%o1]
+/* 0x0368 (12 15) */ ldd [%g4],%f2
+/* 0x036c (14 15) */ fmovs %f2,%f6
+/* 0x0370 (16 19) */ fxtod %f6,%f6
+/* 0x0374 (17 20) */ fitod %f8,%f2
+/* 0x0378 (19 22) */ faddd %f6,%f0,%f0
+/* 0x037c (19 20) */ std %f0,[%o1+8]
+/* 0x0380 (19 20) */ add %o1,16,%o1
+/* 0x0384 (20 23) */ fitod %f4,%f0
+/* 0x0388 (20 21) */ ble,a,pt %icc,.L900000750 ! tprob=0.87
+/* 0x038c (20 23) */ ldd [%o1],%f8
+ .L77000233: /* frequency 4.6 confidence 0.0 */
+/* 0x0390 ( 0 0) */ or %g0,0,%l0
+ .L77000215: /* frequency 5.3 confidence 0.0 */
+/* 0x0394 ( 0 3) */ fdtox %f18,%f0
+ .L900000751: /* frequency 5.3 confidence 0.0 */
+/* 0x0398 ( 0 3) */ ldd [%g4],%f6
+/* 0x039c 220 ( 0 1) */ add %i5,1,%i5
+/* 0x03a0 ( 0 1) */ add %i4,8,%i4
+/* 0x03a4 ( 1 4) */ ldd [%g4-8],%f2
+/* 0x03a8 ( 1 2) */ add %l0,1,%l0
+/* 0x03ac ( 1 2) */ add %i3,8,%i3
+/* 0x03b0 ( 2 3) */ fmovs %f6,%f0
+/* 0x03b4 ( 2 5) */ ldd [%g4-16],%f4
+/* 0x03b8 ( 2 3) */ cmp %i5,%o5
+/* 0x03bc ( 4 7) */ fxtod %f0,%f0
+/* 0x03c0 ( 7 10) */ fmuld %f0,%f16,%f0
+/* 0x03c4 (10 13) */ fmuld %f0,%f2,%f2
+/* 0x03c8 (13 16) */ fdtox %f2,%f2
+/* 0x03cc (16 19) */ fxtod %f2,%f2
+/* 0x03d0 (19 22) */ fmuld %f2,%f4,%f2
+/* 0x03d4 (22 25) */ fsubd %f0,%f2,%f22
+/* 0x03d8 (22 23) */ ble,a,pt %icc,.L900000749 ! tprob=0.89
+/* 0x03dc (22 25) */ ldd [%i2],%f0
+ .L900000725: /* frequency 0.7 confidence 0.0 */
+/* 0x03e0 220 ( 0 1) */ ba .L900000748 ! tprob=1.00
+/* 0x03e4 ( 0 1) */ sll %g1,4,%g2
+
+
+ .L77000289: /* frequency 0.8 confidence 0.0 */
+/* 0x03e8 225 ( 0 3) */ ldd [%o1],%f6
+/* 0x03ec 242 ( 0 1) */ add %g4,-8,%g2
+/* 0x03f0 ( 0 1) */ add %g4,-16,%g3
+/* 0x03f4 225 ( 1 4) */ ldd [%i1],%f2
+/* 0x03f8 245 ( 1 2) */ or %g0,0,%o3
+/* 0x03fc ( 1 2) */ or %g0,0,%o0
+/* 0x0400 225 ( 3 6) */ fmuld %f2,%f6,%f2
+/* 0x0404 ( 3 4) */ std %f2,[%o2]
+/* 0x0408 ( 4 7) */ ldd [%g4],%f6
+/* 0x040c 237 ( 7 8) */ std %f6,[%o2+8]
+/* 0x0410 ( 8 9) */ std %f6,[%o2+16]
+/* 0x0414 ( 9 10) */ std %f6,[%o2+24]
+/* 0x0418 (10 11) */ std %f6,[%o2+32]
+/* 0x041c (11 12) */ std %f6,[%o2+40]
+/* 0x0420 (12 13) */ std %f6,[%o2+48]
+/* 0x0424 (13 14) */ std %f6,[%o2+56]
+/* 0x0428 (14 15) */ std %f6,[%o2+64]
+/* 0x042c (15 16) */ std %f6,[%o2+72]
+! prefetch [%i4],0
+! prefetch [%i4+32],0
+! prefetch [%i4+64],0
+! prefetch [%i4+96],0
+! prefetch [%i4+120],0
+! prefetch [%i1],0
+! prefetch [%i1+32],0
+! prefetch [%i1+64],0
+! prefetch [%i1+96],0
+! prefetch [%i1+120],0
+/* 0x0430 (16 17) */ std %f6,[%o2+80]
+/* 0x0434 (17 18) */ std %f6,[%o2+88]
+/* 0x0438 (18 19) */ std %f6,[%o2+96]
+/* 0x043c (19 20) */ std %f6,[%o2+104]
+/* 0x0440 (20 21) */ std %f6,[%o2+112]
+/* 0x0444 (21 22) */ std %f6,[%o2+120]
+/* 0x0448 (22 23) */ std %f6,[%o2+128]
+/* 0x044c (23 24) */ std %f6,[%o2+136]
+/* 0x0450 (24 25) */ std %f6,[%o2+144]
+/* 0x0454 (25 26) */ std %f6,[%o2+152]
+/* 0x0458 (26 27) */ std %f6,[%o2+160]
+/* 0x045c (27 28) */ std %f6,[%o2+168]
+/* 0x0460 (27 30) */ fdtox %f2,%f2
+/* 0x0464 (28 29) */ std %f6,[%o2+176]
+/* 0x0468 (29 30) */ std %f6,[%o2+184]
+/* 0x046c (30 31) */ std %f6,[%o2+192]
+/* 0x0470 (31 32) */ std %f6,[%o2+200]
+/* 0x0474 (32 33) */ std %f6,[%o2+208]
+/* 0x0478 (33 34) */ std %f6,[%o2+216]
+/* 0x047c (34 35) */ std %f6,[%o2+224]
+/* 0x0480 (35 36) */ std %f6,[%o2+232]
+/* 0x0484 (36 37) */ std %f6,[%o2+240]
+/* 0x0488 (37 38) */ std %f6,[%o2+248]
+/* 0x048c (38 39) */ std %f6,[%o2+256]
+/* 0x0490 (39 40) */ std %f6,[%o2+264]
+/* 0x0494 (40 41) */ std %f6,[%o2+272]
+/* 0x0498 (41 42) */ std %f6,[%o2+280]
+/* 0x049c (42 43) */ std %f6,[%o2+288]
+/* 0x04a0 (43 44) */ std %f6,[%o2+296]
+/* 0x04a4 (44 45) */ std %f6,[%o2+304]
+/* 0x04a8 (45 46) */ std %f6,[%o2+312]
+/* 0x04ac (46 47) */ std %f6,[%o2+320]
+/* 0x04b0 (47 48) */ std %f6,[%o2+328]
+/* 0x04b4 (48 49) */ std %f6,[%o2+336]
+/* 0x04b8 (49 50) */ std %f6,[%o2+344]
+/* 0x04bc (50 51) */ std %f6,[%o2+352]
+/* 0x04c0 (51 52) */ std %f6,[%o2+360]
+/* 0x04c4 (52 53) */ std %f6,[%o2+368]
+/* 0x04c8 (53 54) */ std %f6,[%o2+376]
+/* 0x04cc (54 55) */ std %f6,[%o2+384]
+/* 0x04d0 (55 56) */ std %f6,[%o2+392]
+/* 0x04d4 (56 57) */ std %f6,[%o2+400]
+/* 0x04d8 (57 58) */ std %f6,[%o2+408]
+/* 0x04dc (58 59) */ std %f6,[%o2+416]
+/* 0x04e0 (59 60) */ std %f6,[%o2+424]
+/* 0x04e4 (60 61) */ std %f6,[%o2+432]
+/* 0x04e8 (61 62) */ std %f6,[%o2+440]
+/* 0x04ec (62 63) */ std %f6,[%o2+448]
+/* 0x04f0 (63 64) */ std %f6,[%o2+456]
+/* 0x04f4 (64 65) */ std %f6,[%o2+464]
+/* 0x04f8 (65 66) */ std %f6,[%o2+472]
+/* 0x04fc (66 67) */ std %f6,[%o2+480]
+/* 0x0500 (67 68) */ std %f6,[%o2+488]
+/* 0x0504 (68 69) */ std %f6,[%o2+496]
+/* 0x0508 (69 70) */ std %f6,[%o2+504]
+/* 0x050c (70 71) */ std %f6,[%o2+512]
+/* 0x0510 (71 72) */ std %f6,[%o2+520]
+/* 0x0514 242 (72 75) */ ld [%g4],%f2 ! dalign
+/* 0x0518 (73 76) */ ld [%g2],%f6 ! dalign
+/* 0x051c (74 77) */ fxtod %f2,%f10
+/* 0x0520 (74 77) */ ld [%g2+4],%f7
+/* 0x0524 (75 78) */ ld [%g3],%f8 ! dalign
+/* 0x0528 (76 79) */ ld [%g3+4],%f9
+/* 0x052c (77 80) */ fmuld %f10,%f0,%f0
+/* 0x0530 239 (77 80) */ ldd [%i4],%f4
+/* 0x0534 240 (78 81) */ ldd [%i1],%f2
+/* 0x0538 (80 83) */ fmuld %f0,%f6,%f6
+/* 0x053c (83 86) */ fdtox %f6,%f6
+/* 0x0540 (86 89) */ fxtod %f6,%f6
+/* 0x0544 (89 92) */ fmuld %f6,%f8,%f6
+/* 0x0548 (92 95) */ fsubd %f0,%f6,%f0
+/* 0x054c 250 (95 98) */ fmuld %f4,%f0,%f10
+ .L900000747: /* frequency 6.4 confidence 0.0 */
+
+
+ fmovd %f0,%f0
+ fmovd %f16,%f18
+ ldd [%i4],%f2
+ ldd [%o2],%f8
+ ldd [%i1],%f10
+ ldd [%g4-8],%f14
+ ldd [%g4-16],%f16
+ ldd [%o1],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%i4+112],%f60
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+
+ nop
+ nop
+!
+ .L99999999:
+!1
+!!!
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4
+!2
+!!!
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20
+!3
+!!!
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22
+!4
+!!!
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32
+!5
+!!!
+ ldd [%o1+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36
+!6
+!!!
+ add %o1,8,%o1
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42
+!7
+!!!
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44
+!8
+!!!
+ ldd [%o2+16],%f22
+ fmuld %f10,%f6,%f12
+!9
+!!!
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4
+!10
+!!!
+ ldd [%o2+48],%f36
+ fmuld %f30,%f24,%f48
+!11
+!!!
+ ldd [%o2+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!12
+!!!
+ std %f20,[%o2+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52
+!13
+!!!
+ ldd [%o2+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56
+!14
+!!!
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32
+!15
+!!!
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48
+!16
+!!!
+ ldd [%o2+112],%f50
+ faddd %f52,%f56,%f52
+!17
+!!!
+ ldd [%o2+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20
+!18
+!!!
+ std %f32,[%o2+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22
+!19
+!!!
+ std %f42,[%o2+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32
+!20
+!!!
+ std %f48,[%o2+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36
+!21
+!!!
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4
+!22
+!!!
+ std %f52,[%o2+144]
+ faddd %f20,%f22,%f20
+!23
+!!!
+ ldd [%i4+120],%f44
+!24
+!!!
+ ldd [%o2+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42
+!25
+!!!
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4
+!26
+!!!
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44
+!27
+!!!
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48
+!28
+!!!
+ ldd [%o2+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50
+!29
+!!!
+ std %f20,[%o2+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52
+!30
+!!!
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56
+!31
+!!!
+ ldd [%o2+240],%f44
+ faddd %f32,%f36,%f32
+!32
+!!!
+ std %f32,[%o2+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20
+!33
+!!!
+ ldd [%o2+32],%f50
+ fmuld %f4,%f18,%f12
+!34
+!!!
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22
+!35
+!!!
+ ldd [%o2+64],%f56
+ faddd %f42,%f44,%f42
+!36
+!!!
+ std %f42,[%o2+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32
+!37
+!!!
+ std %f48,[%o2+32]
+ fmuld %f12,%f14,%f4
+!38
+!!!
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36
+!39
+!!!
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20
+!40
+!!!
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52
+!41
+!!!
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42
+!42
+!!!
+ std %f56,[%o2+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44
+!43
+!!!
+ ldd [%o2+96],%f22
+ fmuld %f48,%f24,%f48
+!44
+!!!
+ ldd [%o2+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50
+!45
+!!!
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56
+!46
+!!!
+ add %o2,8,%o2
+ faddd %f42,%f44,%f42
+!47
+!!!
+ ldd [%o2+160-8],%f44
+ faddd %f20,%f22,%f20
+!48
+!!!
+ std %f20,[%o2+96-8]
+ faddd %f48,%f50,%f48
+!49
+!!!
+ ldd [%o2+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4
+!50
+!!!
+ ldd [%o2+224-8],%f56
+ faddd %f32,%f36,%f32
+!51
+!!!
+ std %f32,[%o2+128-8]
+ faddd %f42,%f44,%f42
+!52
+ add %o3,1,%o3
+ std %f42,[%o2+160-8]
+ faddd %f48,%f50,%f48
+!53
+!!!
+ cmp %o3,31
+ std %f48,[%o2+192-8]
+ faddd %f52,%f56,%f52
+!54
+ std %f52,[%o2+224-8]
+ ble,pt %icc,.L99999999
+ fsubd %f12,%f4,%f0
+
+
+
+!55
+ std %f8,[%o2]
+
+
+
+
+
+
+ .L77000285: /* frequency 1.0 confidence 0.0 */
+/* 0x07a8 279 ( 0 1) */ sll %g1,4,%g2
+ .L900000748: /* frequency 1.0 confidence 0.0 */
+/* 0x07ac 279 ( 0 3) */ ldd [%g5+%g2],%f0
+/* 0x07b0 ( 0 1) */ add %g5,%g2,%i1
+/* 0x07b4 ( 0 1) */ or %g0,0,%o4
+/* 0x07b8 206 ( 1 4) */ ld [%fp+68],%o0
+/* 0x07bc 279 ( 1 2) */ or %g0,0,%i0
+/* 0x07c0 ( 1 2) */ cmp %g1,0
+/* 0x07c4 ( 2 5) */ fdtox %f0,%f0
+/* 0x07c8 ( 2 3) */ std %f0,[%sp+120]
+/* 0x07cc 275 ( 2 3) */ sethi %hi(0xfc00),%o1
+/* 0x07d0 206 ( 3 4) */ or %g0,%o0,%o3
+/* 0x07d4 275 ( 3 4) */ sub %g1,1,%g4
+/* 0x07d8 279 ( 4 7) */ ldd [%i1+8],%f0
+/* 0x07dc ( 4 5) */ or %g0,%o0,%g5
+/* 0x07e0 ( 4 5) */ add %o1,1023,%o1
+/* 0x07e4 ( 6 9) */ fdtox %f0,%f0
+/* 0x07e8 ( 6 7) */ std %f0,[%sp+112]
+/* 0x07ec (10 12) */ ldx [%sp+112],%o5
+/* 0x07f0 (11 13) */ ldx [%sp+120],%o7
+/* 0x07f4 (11 12) */ ble,pt %icc,.L900000746 ! tprob=0.56
+/* 0x07f8 (11 12) */ sethi %hi(0xfc00),%g2
+/* 0x07fc 275 (12 13) */ or %g0,-1,%g2
+/* 0x0800 279 (12 13) */ cmp %g1,3
+/* 0x0804 275 (13 14) */ srl %g2,0,%o2
+/* 0x0808 279 (13 14) */ bl,pn %icc,.L77000286 ! tprob=0.44
+/* 0x080c (13 14) */ or %g0,%i1,%g2
+/* 0x0810 (14 17) */ ldd [%i1+16],%f0
+/* 0x0814 (14 15) */ and %o5,%o1,%o0
+/* 0x0818 (14 15) */ add %i1,16,%g2
+/* 0x081c (15 16) */ sllx %o0,16,%g3
+/* 0x0820 (15 16) */ and %o7,%o2,%o0
+/* 0x0824 (16 19) */ fdtox %f0,%f0
+/* 0x0828 (16 17) */ std %f0,[%sp+104]
+/* 0x082c (16 17) */ add %o0,%g3,%o4
+/* 0x0830 (17 20) */ ldd [%i1+24],%f2
+/* 0x0834 (17 18) */ srax %o5,16,%o0
+/* 0x0838 (17 18) */ add %o3,4,%g5
+/* 0x083c (18 19) */ stx %o0,[%sp+128]
+/* 0x0840 (18 19) */ and %o4,%o2,%o0
+/* 0x0844 (18 19) */ or %g0,1,%i0
+/* 0x0848 (19 20) */ stx %o0,[%sp+112]
+/* 0x084c (19 20) */ srax %o4,32,%o0
+/* 0x0850 (19 22) */ fdtox %f2,%f0
+/* 0x0854 (20 21) */ stx %o0,[%sp+136]
+/* 0x0858 (20 21) */ srax %o7,32,%o4
+/* 0x085c (21 22) */ std %f0,[%sp+96]
+/* 0x0860 (22 24) */ ldx [%sp+136],%o7
+/* 0x0864 (23 25) */ ldx [%sp+128],%o0
+/* 0x0868 (25 27) */ ldx [%sp+104],%g3
+/* 0x086c (25 26) */ add %o0,%o7,%o0
+/* 0x0870 (26 28) */ ldx [%sp+112],%o7
+/* 0x0874 (26 27) */ add %o4,%o0,%o4
+/* 0x0878 (27 29) */ ldx [%sp+96],%o5
+/* 0x087c (28 29) */ st %o7,[%o3]
+/* 0x0880 (28 29) */ or %g0,%g3,%o7
+ .L900000730: /* frequency 64.0 confidence 0.0 */
+/* 0x0884 (17 19) */ ldd [%g2+16],%f0
+/* 0x0888 (17 18) */ add %i0,1,%i0
+/* 0x088c (17 18) */ add %g5,4,%g5
+/* 0x0890 (18 18) */ cmp %i0,%g4
+/* 0x0894 (18 19) */ add %g2,16,%g2
+/* 0x0898 (19 22) */ fdtox %f0,%f0
+/* 0x089c (20 21) */ std %f0,[%sp+104]
+/* 0x08a0 (21 23) */ ldd [%g2+8],%f0
+/* 0x08a4 (23 26) */ fdtox %f0,%f0
+/* 0x08a8 (24 25) */ std %f0,[%sp+96]
+/* 0x08ac (25 26) */ and %o5,%o1,%g3
+/* 0x08b0 (26 27) */ sllx %g3,16,%g3
+/* 0x08b4 ( 0 0) */ stx %g3,[%sp+120]
+/* 0x08b8 (26 27) */ and %o7,%o2,%g3
+/* 0x08bc ( 0 0) */ stx %o7,[%sp+128]
+/* 0x08c0 ( 0 0) */ ldx [%sp+120],%o7
+/* 0x08c4 (27 27) */ add %g3,%o7,%g3
+/* 0x08c8 ( 0 0) */ ldx [%sp+128],%o7
+/* 0x08cc (28 29) */ srax %o5,16,%o5
+/* 0x08d0 (28 28) */ add %g3,%o4,%g3
+/* 0x08d4 (29 30) */ srax %g3,32,%o4
+/* 0x08d8 ( 0 0) */ stx %o4,[%sp+112]
+/* 0x08dc (30 31) */ srax %o7,32,%o4
+/* 0x08e0 ( 0 0) */ ldx [%sp+112],%o7
+/* 0x08e4 (30 31) */ add %o5,%o7,%o7
+/* 0x08e8 (31 33) */ ldx [%sp+96],%o5
+/* 0x08ec (31 32) */ add %o4,%o7,%o4
+/* 0x08f0 (32 33) */ and %g3,%o2,%g3
+/* 0x08f4 ( 0 0) */ ldx [%sp+104],%o7
+/* 0x08f8 (33 34) */ ble,pt %icc,.L900000730 ! tprob=0.50
+/* 0x08fc (33 34) */ st %g3,[%g5-4]
+ .L900000733: /* frequency 8.0 confidence 0.0 */
+/* 0x0900 ( 0 1) */ ba .L900000746 ! tprob=1.00
+/* 0x0904 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L77000286: /* frequency 0.7 confidence 0.0 */
+/* 0x0908 ( 0 3) */ ldd [%g2+16],%f0
+ .L900000745: /* frequency 6.4 confidence 0.0 */
+/* 0x090c ( 0 1) */ and %o7,%o2,%o0
+/* 0x0910 ( 0 1) */ and %o5,%o1,%g3
+/* 0x0914 ( 0 3) */ fdtox %f0,%f0
+/* 0x0918 ( 1 2) */ add %o4,%o0,%o0
+/* 0x091c ( 1 2) */ std %f0,[%sp+104]
+/* 0x0920 ( 1 2) */ add %i0,1,%i0
+/* 0x0924 ( 2 3) */ sllx %g3,16,%o4
+/* 0x0928 ( 2 5) */ ldd [%g2+24],%f2
+/* 0x092c ( 2 3) */ add %g2,16,%g2
+/* 0x0930 ( 3 4) */ add %o0,%o4,%o4
+/* 0x0934 ( 3 4) */ cmp %i0,%g4
+/* 0x0938 ( 4 5) */ srax %o5,16,%o0
+/* 0x093c ( 4 5) */ stx %o0,[%sp+112]
+/* 0x0940 ( 4 5) */ and %o4,%o2,%g3
+/* 0x0944 ( 5 6) */ srax %o4,32,%o5
+/* 0x0948 ( 5 8) */ fdtox %f2,%f0
+/* 0x094c ( 5 6) */ std %f0,[%sp+96]
+/* 0x0950 ( 6 7) */ srax %o7,32,%o4
+/* 0x0954 ( 6 8) */ ldx [%sp+112],%o7
+/* 0x0958 ( 8 9) */ add %o7,%o5,%o7
+/* 0x095c ( 9 11) */ ldx [%sp+104],%o5
+/* 0x0960 ( 9 10) */ add %o4,%o7,%o4
+/* 0x0964 (10 12) */ ldx [%sp+96],%o0
+/* 0x0968 (11 12) */ st %g3,[%g5]
+/* 0x096c (11 12) */ or %g0,%o5,%o7
+/* 0x0970 (11 12) */ add %g5,4,%g5
+/* 0x0974 (12 13) */ or %g0,%o0,%o5
+/* 0x0978 (12 13) */ ble,a,pt %icc,.L900000745 ! tprob=0.86
+/* 0x097c (12 15) */ ldd [%g2+16],%f0
+ .L77000236: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 ( 0 1) */ sethi %hi(0xfc00),%g2
+ .L900000746: /* frequency 1.0 confidence 0.0 */
+/* 0x0984 ( 0 1) */ or %g0,-1,%o0
+/* 0x0988 ( 0 1) */ add %g2,1023,%g2
+/* 0x098c ( 0 3) */ ld [%fp+88],%o1
+/* 0x0990 ( 1 2) */ srl %o0,0,%g3
+/* 0x0994 ( 1 2) */ and %o5,%g2,%g2
+/* 0x0998 ( 2 3) */ and %o7,%g3,%g4
+/* 0x099c 281 ( 2 3) */ or %g0,-1,%o5
+/* 0x09a0 275 ( 3 4) */ sllx %g2,16,%g2
+/* 0x09a4 ( 3 4) */ add %o4,%g4,%g4
+/* 0x09a8 ( 4 5) */ add %g4,%g2,%g2
+/* 0x09ac ( 5 6) */ sll %i0,2,%g4
+/* 0x09b0 ( 5 6) */ and %g2,%g3,%g2
+/* 0x09b4 ( 6 7) */ st %g2,[%o3+%g4]
+/* 0x09b8 281 ( 6 7) */ sll %g1,2,%g2
+/* 0x09bc ( 7 10) */ ld [%o3+%g2],%g2
+/* 0x09c0 ( 9 10) */ cmp %g2,0
+/* 0x09c4 ( 9 10) */ bleu,pn %icc,.L77000241 ! tprob=0.50
+/* 0x09c8 ( 9 10) */ or %g0,%o1,%o2
+/* 0x09cc (10 11) */ ba .L900000744 ! tprob=1.00
+/* 0x09d0 (10 11) */ cmp %o5,0
+ .L77000241: /* frequency 0.8 confidence 0.0 */
+/* 0x09d4 ( 0 1) */ subcc %g1,1,%o5
+/* 0x09d8 ( 0 1) */ bneg,pt %icc,.L900000744 ! tprob=0.60
+/* 0x09dc ( 1 2) */ cmp %o5,0
+/* 0x09e0 ( 1 2) */ sll %o5,2,%g2
+/* 0x09e4 ( 2 3) */ add %o1,%g2,%o0
+/* 0x09e8 ( 2 3) */ add %o3,%g2,%o4
+/* 0x09ec ( 3 6) */ ld [%o0],%g2
+ .L900000743: /* frequency 5.3 confidence 0.0 */
+/* 0x09f0 ( 0 3) */ ld [%o4],%g3
+/* 0x09f4 ( 0 1) */ add %o0,4,%o0
+/* 0x09f8 ( 0 1) */ add %o4,4,%o4
+/* 0x09fc ( 2 3) */ cmp %g3,%g2
+/* 0x0a00 ( 2 3) */ bne,pn %icc,.L77000244 ! tprob=0.16
+/* 0x0a04 ( 2 3) */ nop
+/* 0x0a08 ( 3 4) */ addcc %o5,1,%o5
+/* 0x0a0c ( 3 4) */ bpos,a,pt %icc,.L900000743 ! tprob=0.84
+/* 0x0a10 ( 3 6) */ ld [%o0],%g2
+ .L77000244: /* frequency 1.0 confidence 0.0 */
+/* 0x0a14 ( 0 1) */ cmp %o5,0
+ .L900000744: /* frequency 1.0 confidence 0.0 */
+/* 0x0a18 ( 0 1) */ bl,pn %icc,.L77000287 ! tprob=0.50
+/* 0x0a1c ( 0 1) */ sll %o5,2,%g2
+/* 0x0a20 ( 1 4) */ ld [%o2+%g2],%g3
+/* 0x0a24 ( 2 5) */ ld [%o3+%g2],%g2
+/* 0x0a28 ( 4 5) */ cmp %g2,%g3
+/* 0x0a2c ( 4 5) */ bleu,pt %icc,.L77000224 ! tprob=0.56
+/* 0x0a30 ( 4 5) */ nop
+ .L77000287: /* frequency 0.8 confidence 0.0 */
+/* 0x0a34 ( 0 1) */ cmp %g1,0
+/* 0x0a38 ( 0 1) */ ble,pt %icc,.L77000224 ! tprob=0.60
+/* 0x0a3c ( 0 1) */ nop
+/* 0x0a40 281 ( 1 2) */ sub %g1,1,%o7
+/* 0x0a44 ( 1 2) */ or %g0,-1,%g2
+/* 0x0a48 ( 2 3) */ srl %g2,0,%o4
+/* 0x0a4c ( 2 3) */ add %o7,1,%o0
+/* 0x0a50 279 ( 3 4) */ or %g0,0,%o5
+/* 0x0a54 ( 3 4) */ or %g0,0,%g1
+/* 0x0a58 ( 4 5) */ cmp %o0,3
+/* 0x0a5c ( 4 5) */ bl,pn %icc,.L77000288 ! tprob=0.40
+/* 0x0a60 ( 4 5) */ add %o3,8,%o1
+/* 0x0a64 ( 5 6) */ add %o2,4,%o0
+/* 0x0a68 ( 5 8) */ ld [%o1-8],%g2
+/* 0x0a6c 0 ( 5 6) */ or %g0,%o1,%o3
+/* 0x0a70 279 ( 6 9) */ ld [%o0-4],%g3
+/* 0x0a74 0 ( 6 7) */ or %g0,%o0,%o2
+/* 0x0a78 279 ( 6 7) */ or %g0,2,%g1
+/* 0x0a7c ( 7 10) */ ld [%o3-4],%o0
+/* 0x0a80 ( 8 9) */ sub %g2,%g3,%g2
+/* 0x0a84 ( 9 10) */ or %g0,%g2,%o5
+/* 0x0a88 ( 9 10) */ and %g2,%o4,%g2
+/* 0x0a8c ( 9 10) */ st %g2,[%o3-8]
+/* 0x0a90 (10 11) */ srax %o5,32,%o5
+ .L900000734: /* frequency 64.0 confidence 0.0 */
+/* 0x0a94 (12 20) */ ld [%o2],%g2
+/* 0x0a98 (12 13) */ add %g1,1,%g1
+/* 0x0a9c (12 13) */ add %o2,4,%o2
+/* 0x0aa0 (13 13) */ cmp %g1,%o7
+/* 0x0aa4 (13 14) */ add %o3,4,%o3
+/* 0x0aa8 (14 14) */ sub %o0,%g2,%o0
+/* 0x0aac (15 15) */ add %o0,%o5,%o5
+/* 0x0ab0 (16 17) */ and %o5,%o4,%g2
+/* 0x0ab4 (16 24) */ ld [%o3-4],%o0
+/* 0x0ab8 (17 18) */ st %g2,[%o3-8]
+/* 0x0abc (17 18) */ ble,pt %icc,.L900000734 ! tprob=0.50
+/* 0x0ac0 (17 18) */ srax %o5,32,%o5
+ .L900000737: /* frequency 8.0 confidence 0.0 */
+/* 0x0ac4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ac8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0acc ( 3 4) */ add %o0,%o5,%o0
+/* 0x0ad0 ( 4 5) */ and %o0,%o4,%o1
+/* 0x0ad4 ( 4 5) */ st %o1,[%o3-4]
+/* 0x0ad8 ( 5 7) */ ret ! Result =
+/* 0x0adc ( 7 8) */ restore %g0,%g0,%g0
+ .L77000288: /* frequency 0.6 confidence 0.0 */
+/* 0x0ae0 ( 0 3) */ ld [%o3],%o0
+ .L900000742: /* frequency 5.3 confidence 0.0 */
+/* 0x0ae4 ( 0 3) */ ld [%o2],%o1
+/* 0x0ae8 ( 0 1) */ add %o5,%o0,%o0
+/* 0x0aec ( 0 1) */ add %g1,1,%g1
+/* 0x0af0 ( 1 2) */ add %o2,4,%o2
+/* 0x0af4 ( 1 2) */ cmp %g1,%o7
+/* 0x0af8 ( 2 3) */ sub %o0,%o1,%o0
+/* 0x0afc ( 3 4) */ and %o0,%o4,%o1
+/* 0x0b00 ( 3 4) */ st %o1,[%o3]
+/* 0x0b04 ( 3 4) */ add %o3,4,%o3
+/* 0x0b08 ( 4 5) */ srax %o0,32,%o5
+/* 0x0b0c ( 4 5) */ ble,a,pt %icc,.L900000742 ! tprob=0.84
+/* 0x0b10 ( 4 7) */ ld [%o3],%o0
+ .L77000224: /* frequency 1.0 confidence 0.0 */
+/* 0x0b14 ( 0 2) */ ret ! Result =
+/* 0x0b18 ( 2 3) */ restore %g0,%g0,%g0
+/* 0x0b1c 0 ( 0 0) */ .type mont_mulf_noconv,2
+/* 0x0b1c ( 0 0) */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.il b/security/nss/lib/freebl/mpi/montmulfv8.il
new file mode 100644
index 000000000..4952d0fb8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.il
@@ -0,0 +1,108 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+
+ fdtox %f10,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f12
+
+ fdtox %f10,%f10
+ fmovs %f12,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f2
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+
+ fmuld %f2,%f4,%f4
+ fdtox %f4,%f4
+ fxtod %f4,%f4
+ fmuld %f4,%f6,%f4
+ fsubd %f2,%f4,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv8.s b/security/nss/lib/freebl/mpi/montmulfv8.s
new file mode 100644
index 000000000..ca738880f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv8.s
@@ -0,0 +1,1818 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-128,%sp
+! FILE montmulf.c
+
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f0
+/* 0x0008 110 */ or %g0,%i1,%o0
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x000c 117 */ sub %i3,1,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 114 */ or %g0,0,%o4
+/* 0x0018 115 */ fdtox %f0,%f0
+/* 0x001c */ std %f0,[%sp+120]
+/* 0x0020 117 */ or %g0,0,%o7
+/* 0x0024 110 */ or %g0,%i3,%o1
+/* 0x0028 */ sub %i3,2,%o2
+/* 0x002c 116 */ ldd [%o0+8],%f0
+/* 0x0030 110 */ sethi %hi(0xfc00),%o1
+/* 0x0034 */ add %o2,1,%g3
+/* 0x0038 */ add %o1,1023,%o1
+/* 0x003c */ or %g0,%i0,%o5
+/* 0x0040 116 */ fdtox %f0,%f0
+/* 0x0044 */ std %f0,[%sp+112]
+/* 0x0048 */ ldx [%sp+112],%g1
+/* 0x004c 115 */ ldx [%sp+120],%g4
+/* 0x0050 117 */ ble,pt %icc,.L900000117
+/* 0x0054 */ sethi %hi(0xfc00),%g2
+/* 0x0058 110 */ or %g0,-1,%g2
+/* 0x005c 117 */ cmp %g3,3
+/* 0x0060 110 */ srl %g2,0,%o3
+/* 0x0064 117 */ bl,pn %icc,.L77000134
+/* 0x0068 */ or %g0,%o0,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+
+/* 0x006c 119 */ ldd [%o0+16],%f0
+
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+! 124 ! t+=(b>>16)+(t1>>32);
+! 125 ! i32[i]=t1&0xffffffff;
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x0070 128 */ add %o0,16,%g2
+/* 0x0074 123 */ and %g1,%o1,%o0
+/* 0x0078 */ sllx %o0,16,%g3
+/* 0x007c 120 */ and %g4,%o3,%o0
+/* 0x0080 117 */ add %o0,%g3,%o4
+/* 0x0084 119 */ fdtox %f0,%f0
+/* 0x0088 */ std %f0,[%sp+104]
+/* 0x008c 125 */ and %o4,%o3,%g5
+/* 0x0090 122 */ ldd [%g2+8],%f2
+/* 0x0094 128 */ add %o5,4,%o5
+/* 0x0098 124 */ srax %o4,32,%o4
+/* 0x009c */ stx %o4,[%sp+112]
+/* 0x00a0 122 */ fdtox %f2,%f0
+/* 0x00a4 */ std %f0,[%sp+96]
+/* 0x00a8 124 */ srax %g1,16,%o0
+/* 0x00ac */ ldx [%sp+112],%o7
+/* 0x00b0 121 */ srax %g4,32,%o4
+/* 0x00b4 124 */ add %o0,%o7,%g4
+/* 0x00b8 128 */ or %g0,1,%o7
+/* 0x00bc 119 */ ldx [%sp+104],%g3
+/* 0x00c0 124 */ add %o4,%g4,%o4
+/* 0x00c4 122 */ ldx [%sp+96],%g1
+/* 0x00c8 125 */ st %g5,[%o5-4]
+/* 0x00cc 127 */ or %g0,%g3,%g4
+ .L900000112:
+/* 0x00d0 119 */ ldd [%g2+16],%f0
+/* 0x00d4 128 */ add %o7,1,%o7
+/* 0x00d8 */ add %o5,4,%o5
+/* 0x00dc */ cmp %o7,%o2
+/* 0x00e0 */ add %g2,16,%g2
+/* 0x00e4 119 */ fdtox %f0,%f0
+/* 0x00e8 */ std %f0,[%sp+104]
+/* 0x00ec 122 */ ldd [%g2+8],%f0
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ std %f0,[%sp+96]
+/* 0x00f8 123 */ and %g1,%o1,%g3
+/* 0x00fc */ sllx %g3,16,%g5
+/* 0x0100 120 */ and %g4,%o3,%g3
+/* 0x0104 117 */ add %g3,%g5,%g3
+/* 0x0108 124 */ srax %g1,16,%g1
+/* 0x010c 117 */ add %g3,%o4,%g3
+/* 0x0110 124 */ srax %g3,32,%o4
+/* 0x0114 */ stx %o4,[%sp+112]
+/* 0x0118 119 */ ldx [%sp+104],%g5
+/* 0x011c 121 */ srax %g4,32,%o4
+/* 0x0120 124 */ ldx [%sp+112],%g4
+/* 0x0124 */ add %g1,%g4,%g4
+/* 0x0128 122 */ ldx [%sp+96],%g1
+/* 0x012c 124 */ add %o4,%g4,%o4
+/* 0x0130 125 */ and %g3,%o3,%g3
+/* 0x0134 127 */ or %g0,%g5,%g4
+/* 0x0138 128 */ ble,pt %icc,.L900000112
+/* 0x013c */ st %g3,[%o5-4]
+ .L900000115:
+/* 0x0140 128 */ ba .L900000117
+/* 0x0144 */ sethi %hi(0xfc00),%g2
+ .L77000134:
+/* 0x0148 119 */ ldd [%g2+16],%f0
+ .L900000116:
+/* 0x014c 120 */ and %g4,%o3,%o0
+/* 0x0150 123 */ and %g1,%o1,%g3
+/* 0x0154 119 */ fdtox %f0,%f0
+/* 0x0158 120 */ add %o4,%o0,%o0
+/* 0x015c 119 */ std %f0,[%sp+104]
+/* 0x0160 128 */ add %o7,1,%o7
+/* 0x0164 123 */ sllx %g3,16,%o4
+/* 0x0168 122 */ ldd [%g2+24],%f2
+/* 0x016c 128 */ add %g2,16,%g2
+/* 0x0170 123 */ add %o0,%o4,%o0
+/* 0x0174 128 */ cmp %o7,%o2
+/* 0x0178 125 */ and %o0,%o3,%g3
+/* 0x017c 122 */ fdtox %f2,%f0
+/* 0x0180 */ std %f0,[%sp+96]
+/* 0x0184 124 */ srax %o0,32,%o0
+/* 0x0188 */ stx %o0,[%sp+112]
+/* 0x018c 121 */ srax %g4,32,%o4
+/* 0x0190 122 */ ldx [%sp+96],%o0
+/* 0x0194 124 */ srax %g1,16,%g5
+/* 0x0198 */ ldx [%sp+112],%g4
+/* 0x019c 119 */ ldx [%sp+104],%g1
+/* 0x01a0 125 */ st %g3,[%o5]
+/* 0x01a4 124 */ add %g5,%g4,%g4
+/* 0x01a8 128 */ add %o5,4,%o5
+/* 0x01ac 124 */ add %o4,%g4,%o4
+/* 0x01b0 127 */ or %g0,%g1,%g4
+/* 0x01b4 128 */ or %g0,%o0,%g1
+/* 0x01b8 */ ble,a,pt %icc,.L900000116
+/* 0x01bc */ ldd [%g2+16],%f0
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x01c0 133 */ sethi %hi(0xfc00),%g2
+ .L900000117:
+/* 0x01c4 133 */ or %g0,-1,%g3
+/* 0x01c8 */ add %g2,1023,%g2
+/* 0x01cc */ srl %g3,0,%g3
+/* 0x01d0 */ and %g1,%g2,%g2
+/* 0x01d4 */ and %g4,%g3,%g4
+/* 0x01d8 */ sllx %g2,16,%g2
+/* 0x01dc */ add %o4,%g4,%g4
+/* 0x01e0 */ add %g4,%g2,%g2
+/* 0x01e4 */ sll %o7,2,%g4
+/* 0x01e8 */ and %g2,%g3,%g2
+/* 0x01ec */ st %g2,[%i0+%g4]
+/* 0x01f0 */ ret ! Result =
+/* 0x01f4 */ restore %g0,%g0,%g0
+/* 0x01f8 0 */ .type conv_d16_to_i32,2
+/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g2
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o5
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 */ or %g0,%o0,%g5
+/* 0x001c */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g2,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g2
+/* 0x002c 138 */ or %g0,%o1,%g4
+/* 0x0030 142 */ add %g2,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ sub %o2,1,%g3
+/* 0x0038 */ ld [%g1+%g2],%g2
+/* 0x003c */ cmp %o2,9
+/* 0x0040 */ bl,pn %icc,.L77000144
+/* 0x0044 */ ldd [%g2],%f8
+/* 0x0048 */ add %o1,16,%g4
+/* 0x004c */ sub %o2,5,%g1
+/* 0x0050 */ ld [%o1],%f7
+/* 0x0054 */ or %g0,4,%o5
+/* 0x0058 */ ld [%o1+4],%f5
+/* 0x005c */ ld [%o1+8],%f3
+/* 0x0060 */ fmovs %f8,%f6
+/* 0x0064 */ ld [%o1+12],%f1
+ .L900000205:
+/* 0x0068 */ ld [%g4],%f11
+/* 0x006c */ add %o5,5,%o5
+/* 0x0070 */ add %g4,20,%g4
+/* 0x0074 */ fsubd %f6,%f8,%f6
+/* 0x0078 */ std %f6,[%g5]
+/* 0x007c */ cmp %o5,%g1
+/* 0x0080 */ add %g5,40,%g5
+/* 0x0084 */ fmovs %f8,%f4
+/* 0x0088 */ ld [%g4-16],%f7
+/* 0x008c */ fsubd %f4,%f8,%f12
+/* 0x0090 */ fmovs %f8,%f2
+/* 0x0094 */ std %f12,[%g5-32]
+/* 0x0098 */ ld [%g4-12],%f5
+/* 0x009c */ fsubd %f2,%f8,%f12
+/* 0x00a0 */ fmovs %f8,%f0
+/* 0x00a4 */ std %f12,[%g5-24]
+/* 0x00a8 */ ld [%g4-8],%f3
+/* 0x00ac */ fsubd %f0,%f8,%f12
+/* 0x00b0 */ fmovs %f8,%f10
+/* 0x00b4 */ std %f12,[%g5-16]
+/* 0x00b8 */ ld [%g4-4],%f1
+/* 0x00bc */ fsubd %f10,%f8,%f10
+/* 0x00c0 */ fmovs %f8,%f6
+/* 0x00c4 */ ble,pt %icc,.L900000205
+/* 0x00c8 */ std %f10,[%g5-8]
+ .L900000208:
+/* 0x00cc */ fmovs %f8,%f4
+/* 0x00d0 */ add %g5,32,%g5
+/* 0x00d4 */ cmp %o5,%g3
+/* 0x00d8 */ fmovs %f8,%f2
+/* 0x00dc */ fmovs %f8,%f0
+/* 0x00e0 */ fsubd %f6,%f8,%f6
+/* 0x00e4 */ std %f6,[%g5-32]
+/* 0x00e8 */ fsubd %f4,%f8,%f4
+/* 0x00ec */ std %f4,[%g5-24]
+/* 0x00f0 */ fsubd %f2,%f8,%f2
+/* 0x00f4 */ std %f2,[%g5-16]
+/* 0x00f8 */ fsubd %f0,%f8,%f0
+/* 0x00fc */ bg,pn %icc,.L77000140
+/* 0x0100 */ std %f0,[%g5-8]
+ .L77000144:
+/* 0x0104 */ ld [%g4],%f1
+ .L900000211:
+/* 0x0108 */ ldd [%g2],%f8
+/* 0x010c */ add %o5,1,%o5
+/* 0x0110 */ add %g4,4,%g4
+/* 0x0114 */ cmp %o5,%g3
+/* 0x0118 */ fmovs %f8,%f0
+/* 0x011c */ fsubd %f0,%f8,%f0
+/* 0x0120 */ std %f0,[%g5]
+/* 0x0124 */ add %g5,8,%g5
+/* 0x0128 */ ble,a,pt %icc,.L900000211
+/* 0x012c */ ld [%g4],%f1
+ .L77000140:
+/* 0x0130 */ retl ! Result =
+/* 0x0134 */ nop
+/* 0x0138 0 */ .type conv_i32_to_d32,2
+/* 0x0138 */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-104,%sp
+/* 0x0004 */ or %g0,%i2,%o0
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2
+ .L900000310:
+/* 0x000c */ call .+8
+/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ cmp %o0,0
+/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x001c 152 */ ble,pt %icc,.L77000150
+/* 0x0020 */ add %g3,%o7,%o2
+/* 0x0024 */ sub %i2,1,%o5
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1
+/* 0x002c 152 */ sethi %hi(0xfc00),%o0
+/* 0x0030 */ ld [%o2+%o1],%o3
+/* 0x0034 */ add %o5,1,%g2
+/* 0x0038 */ or %g0,0,%g1
+/* 0x003c */ cmp %g2,3
+/* 0x0040 */ or %g0,%i1,%o7
+/* 0x0044 */ add %o0,1023,%o4
+/* 0x0048 */ or %g0,%i0,%g3
+/* 0x004c */ bl,pn %icc,.L77000154
+/* 0x0050 */ add %o7,4,%o0
+/* 0x0054 155 */ ldd [%o3],%f0
+/* 0x0058 156 */ or %g0,1,%g1
+/* 0x005c 154 */ ld [%o0-4],%o1
+/* 0x0060 0 */ or %g0,%o0,%o7
+/* 0x0064 155 */ and %o1,%o4,%o0
+ .L900000306:
+/* 0x0068 155 */ st %o0,[%sp+96]
+/* 0x006c 156 */ add %g1,1,%g1
+/* 0x0070 */ add %g3,16,%g3
+/* 0x0074 */ cmp %g1,%o5
+/* 0x0078 */ add %o7,4,%o7
+/* 0x007c 155 */ ld [%sp+96],%f3
+/* 0x0080 */ fmovs %f0,%f2
+/* 0x0084 */ fsubd %f2,%f0,%f2
+/* 0x0088 156 */ srl %o1,16,%o0
+/* 0x008c 155 */ std %f2,[%g3-16]
+/* 0x0090 156 */ st %o0,[%sp+92]
+/* 0x0094 */ ld [%sp+92],%f3
+/* 0x0098 154 */ ld [%o7-4],%o1
+/* 0x009c 156 */ fmovs %f0,%f2
+/* 0x00a0 */ fsubd %f2,%f0,%f2
+/* 0x00a4 155 */ and %o1,%o4,%o0
+/* 0x00a8 156 */ ble,pt %icc,.L900000306
+/* 0x00ac */ std %f2,[%g3-8]
+ .L900000309:
+/* 0x00b0 155 */ st %o0,[%sp+96]
+/* 0x00b4 */ fmovs %f0,%f2
+/* 0x00b8 156 */ add %g3,16,%g3
+/* 0x00bc */ srl %o1,16,%o0
+/* 0x00c0 155 */ ld [%sp+96],%f3
+/* 0x00c4 */ fsubd %f2,%f0,%f2
+/* 0x00c8 */ std %f2,[%g3-16]
+/* 0x00cc 156 */ st %o0,[%sp+92]
+/* 0x00d0 */ fmovs %f0,%f2
+/* 0x00d4 */ ld [%sp+92],%f3
+/* 0x00d8 */ fsubd %f2,%f0,%f0
+/* 0x00dc */ std %f0,[%g3-8]
+/* 0x00e0 */ ret ! Result =
+/* 0x00e4 */ restore %g0,%g0,%g0
+ .L77000154:
+/* 0x00e8 154 */ ld [%o7],%o0
+ .L900000311:
+/* 0x00ec 155 */ and %o0,%o4,%o1
+/* 0x00f0 */ st %o1,[%sp+96]
+/* 0x00f4 156 */ add %g1,1,%g1
+/* 0x00f8 155 */ ldd [%o3],%f0
+/* 0x00fc 156 */ srl %o0,16,%o0
+/* 0x0100 */ add %o7,4,%o7
+/* 0x0104 */ cmp %g1,%o5
+/* 0x0108 155 */ fmovs %f0,%f2
+/* 0x010c */ ld [%sp+96],%f3
+/* 0x0110 */ fsubd %f2,%f0,%f2
+/* 0x0114 */ std %f2,[%g3]
+/* 0x0118 156 */ st %o0,[%sp+92]
+/* 0x011c */ fmovs %f0,%f2
+/* 0x0120 */ ld [%sp+92],%f3
+/* 0x0124 */ fsubd %f2,%f0,%f0
+/* 0x0128 */ std %f0,[%g3+8]
+/* 0x012c */ add %g3,16,%g3
+/* 0x0130 */ ble,a,pt %icc,.L900000311
+/* 0x0134 */ ld [%o7],%o0
+ .L77000150:
+/* 0x0138 */ ret ! Result =
+/* 0x013c */ restore %g0,%g0,%g0
+/* 0x0140 0 */ .type conv_i32_to_d16,2
+/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 4
+/* 0x0008 */ .skip 16
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-120,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+
+/* 0x000c 169 */ sub %i3,3,%g2
+/* 0x0010 */ cmp %g2,0
+/* 0x0014 163 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4
+
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x0018 172 */ sethi %hi(Zero),%g2
+/* 0x001c 163 */ add %g4,%o7,%o4
+/* 0x0020 172 */ add %g2,%lo(Zero),%g2
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 */ ld [%o4+%g2],%o1
+/* 0x002c */ sethi %hi(TwoTo16),%g4
+/* 0x0030 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0034 */ ld [%o4+%g2],%o3
+/* 0x0038 164 */ or %g0,0,%g5
+/* 0x003c 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0040 */ ld [%o4+%g3],%o2
+/* 0x0044 163 */ or %g0,%i0,%i4
+/* 0x0048 169 */ or %g0,%i2,%o7
+/* 0x004c */ ble,pt %icc,.L900000418
+/* 0x0050 */ cmp %g5,%i3
+/* 0x0054 172 */ stx %o7,[%sp+104]
+/* 0x0058 169 */ sub %i3,4,%o5
+/* 0x005c */ or %g0,0,%g4
+/* 0x0060 */ or %g0,0,%g1
+ .L900000417:
+/* 0x0064 */ ldd [%o1],%f2
+/* 0x0068 172 */ add %i4,%g4,%g2
+/* 0x006c */ add %i1,%g1,%g3
+/* 0x0070 */ ldd [%o3],%f0
+/* 0x0074 */ add %g5,4,%g5
+/* 0x0078 */ fmovd %f2,%f14
+/* 0x007c */ ld [%o7],%f15
+/* 0x0080 */ cmp %g5,%o5
+/* 0x0084 */ fmovd %f2,%f10
+/* 0x0088 */ ld [%o7+4],%f11
+/* 0x008c */ add %o7,16,%o7
+/* 0x0090 */ ldx [%sp+104],%o0
+/* 0x0094 */ fmovd %f2,%f6
+/* 0x0098 */ stx %o7,[%sp+112]
+/* 0x009c */ fxtod %f14,%f14
+/* 0x00a0 */ ld [%o0+8],%f7
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ ld [%o0+12],%f3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ ldd [%o2],%f16
+/* 0x00b4 */ fmuld %f0,%f14,%f12
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f10,%f8
+/* 0x00c0 */ std %f14,[%i4+%g4]
+/* 0x00c4 */ ldx [%sp+112],%o7
+/* 0x00c8 */ add %g4,32,%g4
+/* 0x00cc */ fmuld %f0,%f6,%f4
+/* 0x00d0 */ fdtox %f12,%f12
+/* 0x00d4 */ std %f10,[%g2+8]
+/* 0x00d8 */ fmuld %f0,%f2,%f0
+/* 0x00dc */ fdtox %f8,%f8
+/* 0x00e0 */ std %f6,[%g2+16]
+/* 0x00e4 */ std %f2,[%g2+24]
+/* 0x00e8 */ fdtox %f4,%f4
+/* 0x00ec */ fdtox %f0,%f0
+/* 0x00f0 */ fxtod %f12,%f12
+/* 0x00f4 */ std %f12,[%g3+8]
+/* 0x00f8 */ fxtod %f8,%f8
+/* 0x00fc */ std %f8,[%g3+24]
+/* 0x0100 */ fxtod %f4,%f4
+/* 0x0104 */ std %f4,[%g3+40]
+/* 0x0108 */ fxtod %f0,%f0
+/* 0x010c */ std %f0,[%g3+56]
+/* 0x0110 */ fmuld %f12,%f16,%f12
+/* 0x0114 */ fmuld %f8,%f16,%f8
+/* 0x0118 */ fmuld %f4,%f16,%f4
+/* 0x011c */ fsubd %f14,%f12,%f12
+/* 0x0120 */ std %f12,[%i1+%g1]
+/* 0x0124 */ fmuld %f0,%f16,%f0
+/* 0x0128 */ fsubd %f10,%f8,%f8
+/* 0x012c */ std %f8,[%g3+16]
+/* 0x0130 */ add %g1,64,%g1
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g3+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g3+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ stx %o7,[%sp+104]
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %g5,%i3
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1
+/* 0x0160 175 */ sethi %hi(0xfc00),%o0
+/* 0x0164 */ ld [%o4+%o1],%o2
+/* 0x0168 */ sll %g5,2,%o3
+/* 0x016c */ sub %i3,%g5,%g3
+/* 0x0170 */ sll %g5,3,%g2
+/* 0x0174 */ add %o0,1023,%o4
+/* 0x0178 178 */ ldd [%o2],%f0
+/* 0x017c */ add %i2,%o3,%o0
+/* 0x0180 175 */ cmp %g3,3
+/* 0x0184 */ add %i4,%g2,%o3
+/* 0x0188 */ sub %i3,1,%o1
+/* 0x018c */ sll %g5,4,%g4
+/* 0x0190 */ bl,pn %icc,.L77000161
+/* 0x0194 */ add %i1,%g4,%o5
+/* 0x0198 178 */ ld [%o0],%f3
+/* 0x019c 180 */ add %o3,8,%o3
+/* 0x01a0 177 */ ld [%o0],%o7
+/* 0x01a4 180 */ add %o5,16,%o5
+/* 0x01a8 */ add %g5,1,%g5
+/* 0x01ac 178 */ fmovs %f0,%f2
+/* 0x01b0 180 */ add %o0,4,%o0
+/* 0x01b4 179 */ and %o7,%o4,%g1
+/* 0x01b8 178 */ fsubd %f2,%f0,%f2
+/* 0x01bc */ std %f2,[%o3-8]
+/* 0x01c0 180 */ srl %o7,16,%o7
+/* 0x01c4 179 */ st %g1,[%sp+96]
+/* 0x01c8 */ fmovs %f0,%f2
+/* 0x01cc */ ld [%sp+96],%f3
+/* 0x01d0 */ fsubd %f2,%f0,%f2
+/* 0x01d4 */ std %f2,[%o5-16]
+/* 0x01d8 180 */ st %o7,[%sp+92]
+/* 0x01dc */ fmovs %f0,%f2
+/* 0x01e0 */ ld [%sp+92],%f3
+/* 0x01e4 */ fsubd %f2,%f0,%f2
+/* 0x01e8 */ std %f2,[%o5-8]
+ .L900000411:
+/* 0x01ec 178 */ ld [%o0],%f3
+/* 0x01f0 180 */ add %g5,2,%g5
+/* 0x01f4 */ add %o5,32,%o5
+/* 0x01f8 177 */ ld [%o0],%o7
+/* 0x01fc 180 */ cmp %g5,%o1
+/* 0x0200 */ add %o3,16,%o3
+/* 0x0204 178 */ fmovs %f0,%f2
+/* 0x0208 */ fsubd %f2,%f0,%f2
+/* 0x020c */ std %f2,[%o3-16]
+/* 0x0210 179 */ and %o7,%o4,%g1
+/* 0x0214 */ st %g1,[%sp+96]
+/* 0x0218 */ ld [%sp+96],%f3
+/* 0x021c */ fmovs %f0,%f2
+/* 0x0220 */ fsubd %f2,%f0,%f2
+/* 0x0224 180 */ srl %o7,16,%o7
+/* 0x0228 179 */ std %f2,[%o5-32]
+/* 0x022c 180 */ st %o7,[%sp+92]
+/* 0x0230 */ ld [%sp+92],%f3
+/* 0x0234 */ fmovs %f0,%f2
+/* 0x0238 */ fsubd %f2,%f0,%f2
+/* 0x023c */ std %f2,[%o5-24]
+/* 0x0240 */ add %o0,4,%o0
+/* 0x0244 178 */ ld [%o0],%f3
+/* 0x0248 177 */ ld [%o0],%o7
+/* 0x024c 178 */ fmovs %f0,%f2
+/* 0x0250 */ fsubd %f2,%f0,%f2
+/* 0x0254 */ std %f2,[%o3-8]
+/* 0x0258 179 */ and %o7,%o4,%g1
+/* 0x025c */ st %g1,[%sp+96]
+/* 0x0260 */ ld [%sp+96],%f3
+/* 0x0264 */ fmovs %f0,%f2
+/* 0x0268 */ fsubd %f2,%f0,%f2
+/* 0x026c 180 */ srl %o7,16,%o7
+/* 0x0270 179 */ std %f2,[%o5-16]
+/* 0x0274 180 */ st %o7,[%sp+92]
+/* 0x0278 */ ld [%sp+92],%f3
+/* 0x027c */ fmovs %f0,%f2
+/* 0x0280 */ fsubd %f2,%f0,%f2
+/* 0x0284 */ std %f2,[%o5-8]
+/* 0x0288 */ bl,pt %icc,.L900000411
+/* 0x028c */ add %o0,4,%o0
+ .L900000414:
+/* 0x0290 180 */ cmp %g5,%i3
+/* 0x0294 */ bge,pn %icc,.L77000164
+/* 0x0298 */ nop
+ .L77000161:
+/* 0x029c 178 */ ld [%o0],%f3
+ .L900000416:
+/* 0x02a0 178 */ ldd [%o2],%f0
+/* 0x02a4 180 */ add %g5,1,%g5
+/* 0x02a8 177 */ ld [%o0],%o1
+/* 0x02ac 180 */ add %o0,4,%o0
+/* 0x02b0 */ cmp %g5,%i3
+/* 0x02b4 178 */ fmovs %f0,%f2
+/* 0x02b8 179 */ and %o1,%o4,%o7
+/* 0x02bc 178 */ fsubd %f2,%f0,%f2
+/* 0x02c0 */ std %f2,[%o3]
+/* 0x02c4 180 */ srl %o1,16,%o1
+/* 0x02c8 179 */ st %o7,[%sp+96]
+/* 0x02cc 180 */ add %o3,8,%o3
+/* 0x02d0 179 */ fmovs %f0,%f2
+/* 0x02d4 */ ld [%sp+96],%f3
+/* 0x02d8 */ fsubd %f2,%f0,%f2
+/* 0x02dc */ std %f2,[%o5]
+/* 0x02e0 180 */ st %o1,[%sp+92]
+/* 0x02e4 */ fmovs %f0,%f2
+/* 0x02e8 */ ld [%sp+92],%f3
+/* 0x02ec */ fsubd %f2,%f0,%f0
+/* 0x02f0 */ std %f0,[%o5+8]
+/* 0x02f4 */ add %o5,16,%o5
+/* 0x02f8 */ bl,a,pt %icc,.L900000416
+/* 0x02fc */ ld [%o0],%f3
+ .L77000164:
+/* 0x0300 */ ret ! Result =
+/* 0x0304 */ restore %g0,%g0,%g0
+/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ or %g0,%o2,%g5
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x0004 190 */ or %g0,-1,%g4
+/* 0x0008 */ sll %o2,2,%g1
+/* 0x000c */ ld [%o0+%g1],%g1
+/* 0x0010 */ cmp %g1,0
+/* 0x0014 */ bleu,pn %icc,.L77000175
+/* 0x0018 */ or %g0,%o1,%o3
+/* 0x001c */ ba .L900000511
+/* 0x0020 */ cmp %g4,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0024 193 */ sub %o2,1,%g4
+/* 0x0028 */ sll %g4,2,%g1
+/* 0x002c */ cmp %g4,0
+/* 0x0030 */ bl,pt %icc,.L900000511
+/* 0x0034 */ cmp %g4,0
+/* 0x0038 */ add %o1,%g1,%g2
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x003c 195 */ ld [%g2],%o5
+/* 0x0040 193 */ add %o0,%g1,%g3
+ .L900000510:
+/* 0x0044 195 */ ld [%g3],%o2
+/* 0x0048 */ sub %g4,1,%g1
+/* 0x004c */ sub %g2,4,%g2
+/* 0x0050 */ sub %g3,4,%g3
+/* 0x0054 */ cmp %o2,%o5
+/* 0x0058 */ bne,pn %icc,.L77000182
+/* 0x005c */ nop
+/* 0x0060 0 */ or %g0,%g1,%g4
+/* 0x0064 195 */ cmp %g1,0
+/* 0x0068 */ bge,a,pt %icc,.L900000510
+/* 0x006c */ ld [%g2],%o5
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x0070 198 */ cmp %g4,0
+ .L900000511:
+/* 0x0074 198 */ bl,pn %icc,.L77000198
+/* 0x0078 */ sll %g4,2,%g2
+/* 0x007c */ ld [%o1+%g2],%g1
+/* 0x0080 */ ld [%o0+%g2],%g2
+/* 0x0084 */ cmp %g2,%g1
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %g5,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c */ or %g0,%g5,%g1
+/* 0x00a0 198 */ or %g0,-1,%g2
+/* 0x00a4 */ srl %g2,0,%g3
+/* 0x00a8 */ sub %g5,1,%g4
+/* 0x00ac 200 */ or %g0,0,%g5
+/* 0x00b0 201 */ or %g0,0,%o5
+/* 0x00b4 198 */ or %g0,%o0,%o4
+/* 0x00b8 */ cmp %g1,3
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ add %o0,8,%g1
+/* 0x00c4 */ add %o1,4,%g2
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00c8 203 */ ld [%o0],%o2
+/* 0x00cc */ ld [%o1],%o1
+/* 0x00d0 0 */ or %g0,%g1,%o4
+/* 0x00d4 */ or %g0,%g2,%o3
+/* 0x00d8 203 */ ld [%o0+4],%g1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00dc 205 */ or %g0,2,%o5
+/* 0x00e0 201 */ sub %o2,%o1,%o2
+/* 0x00e4 */ or %g0,%o2,%g5
+/* 0x00e8 204 */ and %o2,%g3,%o2
+/* 0x00ec */ st %o2,[%o0]
+/* 0x00f0 205 */ srax %g5,32,%g5
+ .L900000505:
+/* 0x00f4 203 */ ld [%o3],%o2
+/* 0x00f8 205 */ add %o5,1,%o5
+/* 0x00fc */ add %o3,4,%o3
+/* 0x0100 */ cmp %o5,%g4
+/* 0x0104 */ add %o4,4,%o4
+/* 0x0108 201 */ sub %g1,%o2,%g1
+/* 0x010c */ add %g1,%g5,%g5
+/* 0x0110 204 */ and %g5,%g3,%o2
+/* 0x0114 203 */ ld [%o4-4],%g1
+/* 0x0118 204 */ st %o2,[%o4-8]
+/* 0x011c 205 */ ble,pt %icc,.L900000505
+/* 0x0120 */ srax %g5,32,%g5
+ .L900000508:
+/* 0x0124 203 */ ld [%o3],%g2
+/* 0x0128 201 */ sub %g1,%g2,%g1
+/* 0x012c */ add %g1,%g5,%g1
+/* 0x0130 204 */ and %g1,%g3,%g2
+/* 0x0134 */ retl ! Result =
+/* 0x0138 */ st %g2,[%o4-4]
+ .L77000199:
+/* 0x013c 203 */ ld [%o4],%g1
+ .L900000509:
+/* 0x0140 203 */ ld [%o3],%g2
+/* 0x0144 */ add %g5,%g1,%g1
+/* 0x0148 205 */ add %o5,1,%o5
+/* 0x014c */ add %o3,4,%o3
+/* 0x0150 */ cmp %o5,%g4
+/* 0x0154 203 */ sub %g1,%g2,%g1
+/* 0x0158 204 */ and %g1,%g3,%g2
+/* 0x015c */ st %g2,[%o4]
+/* 0x0160 205 */ add %o4,4,%o4
+/* 0x0164 */ srax %g1,32,%g5
+/* 0x0168 */ ble,a,pt %icc,.L900000509
+/* 0x016c */ ld [%o4],%g1
+ .L77000191:
+/* 0x0170 */ retl ! Result =
+/* 0x0174 */ nop
+/* 0x0178 0 */ .type adjust_montf_result,2
+/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 4
+/* 000000 */ .skip 16
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-144,%sp
+ .L900000646:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x000c 232 */ ld [%fp+92],%o1
+/* 0x0010 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ ldd [%fp+96],%f2
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 223 */ st %i0,[%fp+68]
+/* 0x0024 */ add %g5,%o7,%o3
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x002c 232 */ ld [%o3+%g2],%l0
+/* 0x0030 239 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 223 */ or %g0,%i2,%o2
+/* 0x0038 */ fmovd %f2,%f16
+/* 0x003c */ st %i5,[%fp+88]
+/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x0044 223 */ or %g0,%i1,%i2
+/* 0x0048 232 */ ldd [%l0],%f0
+/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0050 223 */ or %g0,%i3,%o0
+/* 0x0054 232 */ sll %o1,4,%g4
+/* 0x0058 239 */ ld [%o3+%g2],%g5
+/* 0x005c 223 */ or %g0,%i3,%i1
+/* 0x0060 239 */ ld [%o3+%g3],%g1
+/* 0x0064 232 */ or %g0,%o1,%i0
+/* 0x0068 */ or %g0,%o2,%i3
+/* 0x006c 234 */ cmp %o1,16
+/* 0x0070 */ be,pn %icc,.L77000279
+/* 0x0074 */ std %f0,[%o2+%g4]
+/* 0x0078 236 */ sll %o1,2,%g2
+/* 0x007c */ or %g0,%o0,%o3
+/* 0x0080 232 */ sll %o1,1,%o1
+/* 0x0084 236 */ add %g2,2,%o2
+/* 0x0088 */ cmp %o2,0
+/* 0x008c */ ble,a,pt %icc,.L900000660
+/* 0x0090 */ ldd [%i2],%f0
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x0094 281 */ add %g2,2,%o0
+/* 0x0098 236 */ add %g2,1,%o2
+/* 0x009c 281 */ cmp %o0,3
+/* 0x00a0 */ bl,pn %icc,.L77000280
+/* 0x00a4 */ or %g0,1,%o0
+/* 0x00a8 */ add %o3,8,%o3
+/* 0x00ac */ or %g0,1,%o4
+/* 0x00b0 */ std %f0,[%o3-8]
+ .L900000630:
+/* 0x00b4 */ std %f0,[%o3]
+/* 0x00b8 */ add %o4,2,%o4
+/* 0x00bc */ add %o3,16,%o3
+/* 0x00c0 */ cmp %o4,%g2
+/* 0x00c4 */ ble,pt %icc,.L900000630
+/* 0x00c8 */ std %f0,[%o3-8]
+ .L900000633:
+/* 0x00cc */ cmp %o4,%o2
+/* 0x00d0 */ bg,pn %icc,.L77000285
+/* 0x00d4 */ add %o4,1,%o0
+ .L77000280:
+/* 0x00d8 */ std %f0,[%o3]
+ .L900000659:
+/* 0x00dc */ ldd [%l0],%f0
+/* 0x00e0 */ cmp %o0,%o2
+/* 0x00e4 */ add %o3,8,%o3
+/* 0x00e8 */ add %o0,1,%o0
+/* 0x00ec */ ble,a,pt %icc,.L900000659
+/* 0x00f0 */ std %f0,[%o3]
+ .L77000285:
+/* 0x00f4 238 */ ldd [%i2],%f0
+ .L900000660:
+/* 0x00f8 238 */ ldd [%i3],%f2
+/* 0x00fc */ add %o1,1,%o2
+/* 0x0100 242 */ cmp %o1,0
+/* 0x0104 */ sll %o2,1,%o0
+/* 0x0108 */ sub %o1,1,%o1
+/* 0x010c 238 */ fmuld %f0,%f2,%f0
+/* 0x0110 */ std %f0,[%i1]
+/* 0x0114 0 */ or %g0,0,%l1
+/* 0x0118 */ ldd [%l0],%f6
+/* 0x011c */ or %g0,0,%g4
+/* 0x0120 */ or %g0,%o2,%i5
+/* 0x0124 */ ldd [%g5],%f2
+/* 0x0128 */ or %g0,%o1,%g3
+/* 0x012c */ or %g0,%o0,%o3
+/* 0x0130 */ fdtox %f0,%f0
+/* 0x0134 */ ldd [%g1],%f4
+/* 0x0138 246 */ add %i3,8,%o4
+/* 0x013c */ or %g0,0,%l2
+/* 0x0140 */ or %g0,%i1,%o5
+/* 0x0144 */ sub %i0,1,%o7
+/* 0x0148 */ fmovs %f6,%f0
+/* 0x014c */ fxtod %f0,%f0
+/* 0x0150 239 */ fmuld %f0,%f16,%f0
+/* 0x0154 */ fmuld %f0,%f2,%f2
+/* 0x0158 */ fdtox %f2,%f2
+/* 0x015c */ fxtod %f2,%f2
+/* 0x0160 */ fmuld %f2,%f4,%f2
+/* 0x0164 */ fsubd %f0,%f2,%f22
+/* 0x0168 242 */ ble,pt %icc,.L900000653
+/* 0x016c */ sll %i0,4,%g2
+/* 0x0170 246 */ ldd [%i4],%f0
+ .L900000654:
+/* 0x0174 246 */ fmuld %f0,%f22,%f8
+/* 0x0178 */ ldd [%i2],%f0
+/* 0x017c 250 */ cmp %i0,1
+/* 0x0180 246 */ ldd [%o4+%l2],%f6
+/* 0x0184 */ add %i2,8,%o0
+/* 0x0188 250 */ or %g0,1,%o1
+/* 0x018c 246 */ ldd [%o5],%f2
+/* 0x0190 */ add %o5,16,%l3
+/* 0x0194 */ fmuld %f0,%f6,%f6
+/* 0x0198 */ ldd [%g5],%f4
+/* 0x019c */ faddd %f2,%f8,%f2
+/* 0x01a0 */ ldd [%o5+8],%f0
+/* 0x01a4 244 */ ldd [%i3+%l2],%f20
+/* 0x01a8 246 */ faddd %f0,%f6,%f0
+/* 0x01ac */ fmuld %f2,%f4,%f2
+/* 0x01b0 */ faddd %f0,%f2,%f18
+/* 0x01b4 247 */ std %f18,[%o5+8]
+/* 0x01b8 250 */ ble,pt %icc,.L900000658
+/* 0x01bc */ srl %g4,31,%g2
+/* 0x01c0 */ cmp %o7,7
+/* 0x01c4 246 */ add %i4,8,%g2
+/* 0x01c8 250 */ bl,pn %icc,.L77000284
+/* 0x01cc */ add %g2,24,%o2
+/* 0x01d0 252 */ ldd [%o0+24],%f12
+/* 0x01d4 */ add %o5,48,%l3
+/* 0x01d8 */ ldd [%o0],%f2
+/* 0x01dc 0 */ or %g0,%o2,%g2
+/* 0x01e0 250 */ sub %o7,2,%o2
+/* 0x01e4 252 */ ldd [%g2-24],%f0
+/* 0x01e8 */ or %g0,5,%o1
+/* 0x01ec */ ldd [%o0+8],%f6
+/* 0x01f0 */ fmuld %f2,%f20,%f2
+/* 0x01f4 */ ldd [%o0+16],%f14
+/* 0x01f8 */ fmuld %f0,%f22,%f4
+/* 0x01fc */ add %o0,32,%o0
+/* 0x0200 */ ldd [%g2-16],%f8
+/* 0x0204 */ fmuld %f6,%f20,%f10
+/* 0x0208 */ ldd [%o5+16],%f0
+/* 0x020c */ ldd [%g2-8],%f6
+/* 0x0210 */ faddd %f2,%f4,%f4
+/* 0x0214 */ ldd [%o5+32],%f2
+ .L900000642:
+/* 0x0218 252 */ ldd [%g2],%f24
+/* 0x021c */ add %o1,3,%o1
+/* 0x0220 */ add %g2,24,%g2
+/* 0x0224 */ fmuld %f8,%f22,%f8
+/* 0x0228 */ ldd [%l3],%f28
+/* 0x022c */ cmp %o1,%o2
+/* 0x0230 */ add %o0,24,%o0
+/* 0x0234 */ ldd [%o0-24],%f26
+/* 0x0238 */ faddd %f0,%f4,%f0
+/* 0x023c */ add %l3,48,%l3
+/* 0x0240 */ faddd %f10,%f8,%f10
+/* 0x0244 */ fmuld %f14,%f20,%f4
+/* 0x0248 */ std %f0,[%l3-80]
+/* 0x024c */ ldd [%g2-16],%f8
+/* 0x0250 */ fmuld %f6,%f22,%f6
+/* 0x0254 */ ldd [%l3-32],%f0
+/* 0x0258 */ ldd [%o0-16],%f14
+/* 0x025c */ faddd %f2,%f10,%f2
+/* 0x0260 */ faddd %f4,%f6,%f10
+/* 0x0264 */ fmuld %f12,%f20,%f4
+/* 0x0268 */ std %f2,[%l3-64]
+/* 0x026c */ ldd [%g2-8],%f6
+/* 0x0270 */ fmuld %f24,%f22,%f24
+/* 0x0274 */ ldd [%l3-16],%f2
+/* 0x0278 */ ldd [%o0-8],%f12
+/* 0x027c */ faddd %f28,%f10,%f10
+/* 0x0280 */ std %f10,[%l3-48]
+/* 0x0284 */ fmuld %f26,%f20,%f10
+/* 0x0288 */ ble,pt %icc,.L900000642
+/* 0x028c */ faddd %f4,%f24,%f4
+ .L900000645:
+/* 0x0290 252 */ fmuld %f8,%f22,%f28
+/* 0x0294 */ ldd [%g2],%f24
+/* 0x0298 */ faddd %f0,%f4,%f26
+/* 0x029c */ fmuld %f12,%f20,%f8
+/* 0x02a0 */ add %l3,32,%l3
+/* 0x02a4 */ cmp %o1,%o7
+/* 0x02a8 */ fmuld %f14,%f20,%f14
+/* 0x02ac */ ldd [%l3-32],%f4
+/* 0x02b0 */ add %g2,8,%g2
+/* 0x02b4 */ faddd %f10,%f28,%f12
+/* 0x02b8 */ fmuld %f6,%f22,%f6
+/* 0x02bc */ ldd [%l3-16],%f0
+/* 0x02c0 */ fmuld %f24,%f22,%f10
+/* 0x02c4 */ std %f26,[%l3-64]
+/* 0x02c8 */ faddd %f2,%f12,%f2
+/* 0x02cc */ std %f2,[%l3-48]
+/* 0x02d0 */ faddd %f14,%f6,%f6
+/* 0x02d4 */ faddd %f8,%f10,%f2
+/* 0x02d8 */ faddd %f4,%f6,%f4
+/* 0x02dc */ std %f4,[%l3-32]
+/* 0x02e0 */ faddd %f0,%f2,%f0
+/* 0x02e4 */ bg,pn %icc,.L77000213
+/* 0x02e8 */ std %f0,[%l3-16]
+ .L77000284:
+/* 0x02ec 252 */ ldd [%o0],%f0
+ .L900000657:
+/* 0x02f0 252 */ ldd [%g2],%f4
+/* 0x02f4 */ fmuld %f0,%f20,%f2
+/* 0x02f8 */ add %o1,1,%o1
+/* 0x02fc */ ldd [%l3],%f0
+/* 0x0300 */ add %o0,8,%o0
+/* 0x0304 */ add %g2,8,%g2
+/* 0x0308 */ fmuld %f4,%f22,%f4
+/* 0x030c */ cmp %o1,%o7
+/* 0x0310 */ faddd %f2,%f4,%f2
+/* 0x0314 */ faddd %f0,%f2,%f0
+/* 0x0318 */ std %f0,[%l3]
+/* 0x031c */ add %l3,16,%l3
+/* 0x0320 */ ble,a,pt %icc,.L900000657
+/* 0x0324 */ ldd [%o0],%f0
+ .L77000213:
+/* 0x0328 */ srl %g4,31,%g2
+ .L900000658:
+/* 0x032c 254 */ cmp %l1,30
+/* 0x0330 */ bne,a,pt %icc,.L900000656
+/* 0x0334 */ fdtox %f18,%f0
+/* 0x0338 */ add %g4,%g2,%g2
+/* 0x033c */ sra %g2,1,%o0
+/* 0x0340 281 */ ldd [%l0],%f0
+/* 0x0344 */ sll %i5,1,%o2
+/* 0x0348 */ add %o0,1,%g2
+/* 0x034c */ sll %g2,1,%o0
+/* 0x0350 254 */ sub %o2,1,%o2
+/* 0x0354 281 */ fmovd %f0,%f2
+/* 0x0358 */ sll %g2,4,%o1
+/* 0x035c */ cmp %o0,%o3
+/* 0x0360 */ bge,pt %icc,.L77000215
+/* 0x0364 */ or %g0,0,%l1
+/* 0x0368 254 */ add %i1,%o1,%o1
+/* 0x036c 281 */ ldd [%o1],%f6
+ .L900000655:
+/* 0x0370 */ fdtox %f6,%f10
+/* 0x0374 */ ldd [%o1+8],%f4
+/* 0x0378 */ add %o0,2,%o0
+/* 0x037c */ ldd [%l0],%f12
+/* 0x0380 */ fdtox %f6,%f6
+/* 0x0384 */ cmp %o0,%o2
+/* 0x0388 */ fdtox %f4,%f8
+/* 0x038c */ fdtox %f4,%f4
+/* 0x0390 */ fmovs %f12,%f10
+/* 0x0394 */ fmovs %f12,%f8
+/* 0x0398 */ fxtod %f10,%f10
+/* 0x039c */ fxtod %f8,%f8
+/* 0x03a0 */ faddd %f10,%f2,%f2
+/* 0x03a4 */ std %f2,[%o1]
+/* 0x03a8 */ faddd %f8,%f0,%f0
+/* 0x03ac */ std %f0,[%o1+8]
+/* 0x03b0 */ add %o1,16,%o1
+/* 0x03b4 */ fitod %f6,%f2
+/* 0x03b8 */ fitod %f4,%f0
+/* 0x03bc */ ble,a,pt %icc,.L900000655
+/* 0x03c0 */ ldd [%o1],%f6
+ .L77000233:
+/* 0x03c4 */ or %g0,0,%l1
+ .L77000215:
+/* 0x03c8 */ fdtox %f18,%f0
+ .L900000656:
+/* 0x03cc */ ldd [%l0],%f6
+/* 0x03d0 256 */ add %g4,1,%g4
+/* 0x03d4 */ add %l2,8,%l2
+/* 0x03d8 */ ldd [%g5],%f2
+/* 0x03dc */ add %l1,1,%l1
+/* 0x03e0 */ add %o5,8,%o5
+/* 0x03e4 */ fmovs %f6,%f0
+/* 0x03e8 */ ldd [%g1],%f4
+/* 0x03ec */ cmp %g4,%g3
+/* 0x03f0 */ fxtod %f0,%f0
+/* 0x03f4 */ fmuld %f0,%f16,%f0
+/* 0x03f8 */ fmuld %f0,%f2,%f2
+/* 0x03fc */ fdtox %f2,%f2
+/* 0x0400 */ fxtod %f2,%f2
+/* 0x0404 */ fmuld %f2,%f4,%f2
+/* 0x0408 */ fsubd %f0,%f2,%f22
+/* 0x040c */ ble,a,pt %icc,.L900000654
+/* 0x0410 */ ldd [%i4],%f0
+ .L900000629:
+/* 0x0414 256 */ ba .L900000653
+/* 0x0418 */ sll %i0,4,%g2
+ .L77000279:
+/* 0x041c 261 */ ldd [%o2],%f6
+/* 0x0420 279 */ or %g0,%o0,%o4
+/* 0x0424 281 */ or %g0,0,%o3
+/* 0x0428 261 */ ldd [%i2],%f4
+/* 0x042c 273 */ std %f0,[%o0+8]
+/* 0x0430 */ std %f0,[%o0+16]
+/* 0x0434 261 */ fmuld %f4,%f6,%f4
+/* 0x0438 */ std %f4,[%o0]
+/* 0x043c 273 */ std %f0,[%o0+24]
+/* 0x0440 */ std %f0,[%o0+32]
+/* 0x0444 */ fdtox %f4,%f4
+/* 0x0448 */ std %f0,[%o0+40]
+/* 0x044c */ std %f0,[%o0+48]
+/* 0x0450 */ std %f0,[%o0+56]
+/* 0x0454 */ std %f0,[%o0+64]
+/* 0x0458 */ std %f0,[%o0+72]
+/* 0x045c */ std %f0,[%o0+80]
+/* 0x0460 */ std %f0,[%o0+88]
+/* 0x0464 */ std %f0,[%o0+96]
+/* 0x0468 */ std %f0,[%o0+104]
+/* 0x046c */ std %f0,[%o0+112]
+/* 0x0470 */ std %f0,[%o0+120]
+/* 0x0474 */ std %f0,[%o0+128]
+/* 0x0478 */ std %f0,[%o0+136]
+/* 0x047c */ std %f0,[%o0+144]
+/* 0x0480 */ std %f0,[%o0+152]
+/* 0x0484 */ std %f0,[%o0+160]
+/* 0x0488 */ std %f0,[%o0+168]
+/* 0x048c */ fmovs %f0,%f4
+/* 0x0490 */ std %f0,[%o0+176]
+/* 0x0494 281 */ or %g0,0,%o1
+/* 0x0498 273 */ std %f0,[%o0+184]
+/* 0x049c */ fxtod %f4,%f4
+/* 0x04a0 */ std %f0,[%o0+192]
+/* 0x04a4 */ std %f0,[%o0+200]
+/* 0x04a8 */ std %f0,[%o0+208]
+/* 0x04ac 278 */ fmuld %f4,%f2,%f2
+/* 0x04b0 273 */ std %f0,[%o0+216]
+/* 0x04b4 */ std %f0,[%o0+224]
+/* 0x04b8 */ std %f0,[%o0+232]
+/* 0x04bc */ std %f0,[%o0+240]
+/* 0x04c0 */ std %f0,[%o0+248]
+/* 0x04c4 */ std %f0,[%o0+256]
+/* 0x04c8 */ std %f0,[%o0+264]
+/* 0x04cc */ std %f0,[%o0+272]
+/* 0x04d0 */ std %f0,[%o0+280]
+/* 0x04d4 */ std %f0,[%o0+288]
+/* 0x04d8 */ std %f0,[%o0+296]
+/* 0x04dc */ std %f0,[%o0+304]
+/* 0x04e0 */ std %f0,[%o0+312]
+/* 0x04e4 */ std %f0,[%o0+320]
+/* 0x04e8 */ std %f0,[%o0+328]
+/* 0x04ec */ std %f0,[%o0+336]
+/* 0x04f0 */ std %f0,[%o0+344]
+/* 0x04f4 */ std %f0,[%o0+352]
+/* 0x04f8 */ std %f0,[%o0+360]
+/* 0x04fc */ std %f0,[%o0+368]
+/* 0x0500 */ std %f0,[%o0+376]
+/* 0x0504 */ std %f0,[%o0+384]
+/* 0x0508 */ std %f0,[%o0+392]
+/* 0x050c */ std %f0,[%o0+400]
+/* 0x0510 */ std %f0,[%o0+408]
+/* 0x0514 */ std %f0,[%o0+416]
+/* 0x0518 */ std %f0,[%o0+424]
+/* 0x051c */ std %f0,[%o0+432]
+/* 0x0520 */ std %f0,[%o0+440]
+/* 0x0524 */ std %f0,[%o0+448]
+/* 0x0528 */ std %f0,[%o0+456]
+/* 0x052c */ std %f0,[%o0+464]
+/* 0x0530 */ std %f0,[%o0+472]
+/* 0x0534 */ std %f0,[%o0+480]
+/* 0x0538 */ std %f0,[%o0+488]
+/* 0x053c */ std %f0,[%o0+496]
+/* 0x0540 */ std %f0,[%o0+504]
+/* 0x0544 */ std %f0,[%o0+512]
+/* 0x0548 */ std %f0,[%o0+520]
+/* 0x054c */ ldd [%g5],%f0
+/* 0x0550 */ ldd [%g1],%f8
+/* 0x0554 */ fmuld %f2,%f0,%f6
+/* 0x0558 275 */ ldd [%i4],%f4
+/* 0x055c 276 */ ldd [%i2],%f0
+/* 0x0560 */ fdtox %f6,%f6
+/* 0x0564 */ fxtod %f6,%f6
+/* 0x0568 */ fmuld %f6,%f8,%f6
+/* 0x056c */ fsubd %f2,%f6,%f2
+/* 0x0570 286 */ fmuld %f4,%f2,%f12
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f16,%f18 ! hand modified
+ ldd [%i4],%f2
+ ldd [%o4],%f8
+ ldd [%i2],%f10
+ ldd [%g5],%f14 ! hand modified
+ ldd [%g1],%f16 ! hand modified
+ ldd [%i3],%f24
+
+ ldd [%i2+8],%f26
+ ldd [%i2+16],%f40
+ ldd [%i2+48],%f46
+ ldd [%i2+56],%f30
+ ldd [%i2+64],%f54
+ ldd [%i2+104],%f34
+ ldd [%i2+112],%f58
+
+ ldd [%i4+8],%f28
+ ldd [%i4+104],%f38
+ ldd [%i4+112],%f60
+
+ .L99999999: !1
+ ldd [%i2+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%i4+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i2+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%i4+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i3+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i3,8,%i3
+ ldd [%i4+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i2+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o4+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%i4+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o4+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o4+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o4+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o4+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i2+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%i4+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o4+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o4+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o4+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o4+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o4+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i2+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o4+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%i4+120],%f44 !24
+ ldd [%o4+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%i4+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i2+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%i4+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o4+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o4+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%i4+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o4+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o4+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o4+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%i4+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o4+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o4+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o4+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i2+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%i4+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i2+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%i4+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o4+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o4+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o4+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o4,8,%o4
+ faddd %f42,%f44,%f42 !47
+ ldd [%o4+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o4+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o4+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o4+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o4+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %o3,1,%o3
+ std %f42,[%o4+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %o3,31
+ std %f48,[%o4+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o4+224-8] !55
+ std %f8,[%o4]
+
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x07c8 315 */ sll %i0,4,%g2
+ .L900000653:
+/* 0x07cc 315 */ add %i1,%g2,%i1
+/* 0x07d0 242 */ ld [%fp+68],%o0
+/* 0x07d4 315 */ or %g0,0,%o4
+/* 0x07d8 */ ldd [%i1],%f0
+/* 0x07dc */ or %g0,0,%g5
+/* 0x07e0 */ cmp %i0,0
+/* 0x07e4 242 */ or %g0,%o0,%o3
+/* 0x07e8 311 */ sub %i0,1,%g1
+/* 0x07ec 315 */ fdtox %f0,%f0
+/* 0x07f0 */ std %f0,[%sp+120]
+/* 0x07f4 311 */ sethi %hi(0xfc00),%o1
+/* 0x07f8 */ add %g1,1,%g3
+/* 0x07fc */ or %g0,%o0,%g4
+/* 0x0800 315 */ ldd [%i1+8],%f0
+/* 0x0804 */ add %o1,1023,%o1
+/* 0x0808 */ fdtox %f0,%f0
+/* 0x080c */ std %f0,[%sp+112]
+/* 0x0810 */ ldx [%sp+112],%o5
+/* 0x0814 */ ldx [%sp+120],%o7
+/* 0x0818 */ ble,pt %icc,.L900000651
+/* 0x081c */ sethi %hi(0xfc00),%g2
+/* 0x0820 311 */ or %g0,-1,%g2
+/* 0x0824 315 */ cmp %g3,3
+/* 0x0828 311 */ srl %g2,0,%o2
+/* 0x082c 315 */ bl,pn %icc,.L77000287
+/* 0x0830 */ or %g0,%i1,%g2
+/* 0x0834 */ ldd [%i1+16],%f0
+/* 0x0838 */ and %o5,%o1,%o0
+/* 0x083c */ add %i1,16,%g2
+/* 0x0840 */ sllx %o0,16,%g3
+/* 0x0844 */ and %o7,%o2,%o0
+/* 0x0848 */ fdtox %f0,%f0
+/* 0x084c */ std %f0,[%sp+104]
+/* 0x0850 */ add %o0,%g3,%o4
+/* 0x0854 */ ldd [%i1+24],%f2
+/* 0x0858 */ srax %o5,16,%o0
+/* 0x085c */ add %o3,4,%g4
+/* 0x0860 */ stx %o0,[%sp+128]
+/* 0x0864 */ and %o4,%o2,%o0
+/* 0x0868 */ stx %o0,[%sp+112]
+/* 0x086c */ srax %o4,32,%o0
+/* 0x0870 */ fdtox %f2,%f0
+/* 0x0874 */ stx %o0,[%sp+136]
+/* 0x0878 */ srax %o7,32,%o4
+/* 0x087c */ std %f0,[%sp+96]
+/* 0x0880 */ ldx [%sp+128],%g5
+/* 0x0884 */ ldx [%sp+136],%o7
+/* 0x0888 */ ldx [%sp+104],%g3
+/* 0x088c */ add %g5,%o7,%o0
+/* 0x0890 */ or %g0,1,%g5
+/* 0x0894 */ ldx [%sp+112],%o7
+/* 0x0898 */ add %o4,%o0,%o4
+/* 0x089c */ ldx [%sp+96],%o5
+/* 0x08a0 */ st %o7,[%o3]
+/* 0x08a4 */ or %g0,%g3,%o7
+ .L900000634:
+/* 0x08a8 */ ldd [%g2+16],%f0
+/* 0x08ac */ add %g5,1,%g5
+/* 0x08b0 */ add %g4,4,%g4
+/* 0x08b4 */ cmp %g5,%g1
+/* 0x08b8 */ add %g2,16,%g2
+/* 0x08bc */ fdtox %f0,%f0
+/* 0x08c0 */ std %f0,[%sp+104]
+/* 0x08c4 */ ldd [%g2+8],%f0
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+96]
+/* 0x08d0 */ and %o5,%o1,%g3
+/* 0x08d4 */ sllx %g3,16,%g3
+/* 0x08d8 */ stx %g3,[%sp+120]
+/* 0x08dc */ and %o7,%o2,%g3
+/* 0x08e0 */ stx %o7,[%sp+128]
+/* 0x08e4 */ ldx [%sp+120],%o7
+/* 0x08e8 */ add %g3,%o7,%g3
+/* 0x08ec */ ldx [%sp+128],%o7
+/* 0x08f0 */ srax %o5,16,%o5
+/* 0x08f4 */ add %g3,%o4,%g3
+/* 0x08f8 */ srax %g3,32,%o4
+/* 0x08fc */ stx %o4,[%sp+112]
+/* 0x0900 */ srax %o7,32,%o4
+/* 0x0904 */ ldx [%sp+112],%o7
+/* 0x0908 */ add %o5,%o7,%o7
+/* 0x090c */ ldx [%sp+96],%o5
+/* 0x0910 */ add %o4,%o7,%o4
+/* 0x0914 */ and %g3,%o2,%g3
+/* 0x0918 */ ldx [%sp+104],%o7
+/* 0x091c */ ble,pt %icc,.L900000634
+/* 0x0920 */ st %g3,[%g4-4]
+ .L900000637:
+/* 0x0924 */ ba .L900000651
+/* 0x0928 */ sethi %hi(0xfc00),%g2
+ .L77000287:
+/* 0x092c */ ldd [%g2+16],%f0
+ .L900000650:
+/* 0x0930 */ and %o7,%o2,%o0
+/* 0x0934 */ and %o5,%o1,%g3
+/* 0x0938 */ fdtox %f0,%f0
+/* 0x093c */ add %o4,%o0,%o0
+/* 0x0940 */ std %f0,[%sp+104]
+/* 0x0944 */ add %g5,1,%g5
+/* 0x0948 */ sllx %g3,16,%o4
+/* 0x094c */ ldd [%g2+24],%f2
+/* 0x0950 */ add %g2,16,%g2
+/* 0x0954 */ add %o0,%o4,%o4
+/* 0x0958 */ cmp %g5,%g1
+/* 0x095c */ srax %o5,16,%o0
+/* 0x0960 */ stx %o0,[%sp+112]
+/* 0x0964 */ and %o4,%o2,%g3
+/* 0x0968 */ srax %o4,32,%o5
+/* 0x096c */ fdtox %f2,%f0
+/* 0x0970 */ std %f0,[%sp+96]
+/* 0x0974 */ srax %o7,32,%o4
+/* 0x0978 */ ldx [%sp+112],%o7
+/* 0x097c */ add %o7,%o5,%o7
+/* 0x0980 */ ldx [%sp+104],%o5
+/* 0x0984 */ add %o4,%o7,%o4
+/* 0x0988 */ ldx [%sp+96],%o0
+/* 0x098c */ st %g3,[%g4]
+/* 0x0990 */ or %g0,%o5,%o7
+/* 0x0994 */ add %g4,4,%g4
+/* 0x0998 */ or %g0,%o0,%o5
+/* 0x099c */ ble,a,pt %icc,.L900000650
+/* 0x09a0 */ ldd [%g2+16],%f0
+ .L77000236:
+/* 0x09a4 */ sethi %hi(0xfc00),%g2
+ .L900000651:
+/* 0x09a8 */ or %g0,-1,%o0
+/* 0x09ac */ add %g2,1023,%g2
+/* 0x09b0 */ ld [%fp+88],%o1
+/* 0x09b4 */ srl %o0,0,%g3
+/* 0x09b8 */ and %o5,%g2,%g2
+/* 0x09bc */ and %o7,%g3,%g4
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x09c0 317 */ or %g0,-1,%o5
+/* 0x09c4 311 */ sllx %g2,16,%g2
+/* 0x09c8 */ add %o4,%g4,%g4
+/* 0x09cc */ add %g4,%g2,%g2
+/* 0x09d0 */ sll %g5,2,%g4
+/* 0x09d4 */ and %g2,%g3,%g2
+/* 0x09d8 */ st %g2,[%o3+%g4]
+/* 0x09dc 317 */ sll %i0,2,%g2
+/* 0x09e0 */ ld [%o3+%g2],%g2
+/* 0x09e4 */ cmp %g2,0
+/* 0x09e8 */ bleu,pn %icc,.L77000241
+/* 0x09ec */ or %g0,%o1,%o2
+/* 0x09f0 */ ba .L900000649
+/* 0x09f4 */ cmp %o5,0
+ .L77000241:
+/* 0x09f8 */ sub %i0,1,%o5
+/* 0x09fc */ sll %o5,2,%g2
+/* 0x0a00 */ cmp %o5,0
+/* 0x0a04 */ bl,pt %icc,.L900000649
+/* 0x0a08 */ cmp %o5,0
+/* 0x0a0c */ add %o1,%g2,%o1
+/* 0x0a10 */ add %o3,%g2,%o4
+/* 0x0a14 */ ld [%o1],%g2
+ .L900000648:
+/* 0x0a18 */ ld [%o4],%g3
+/* 0x0a1c */ sub %o5,1,%o0
+/* 0x0a20 */ sub %o1,4,%o1
+/* 0x0a24 */ sub %o4,4,%o4
+/* 0x0a28 */ cmp %g3,%g2
+/* 0x0a2c */ bne,pn %icc,.L77000244
+/* 0x0a30 */ nop
+/* 0x0a34 0 */ or %g0,%o0,%o5
+/* 0x0a38 317 */ cmp %o0,0
+/* 0x0a3c */ bge,a,pt %icc,.L900000648
+/* 0x0a40 */ ld [%o1],%g2
+ .L77000244:
+/* 0x0a44 */ cmp %o5,0
+ .L900000649:
+/* 0x0a48 */ bl,pn %icc,.L77000288
+/* 0x0a4c */ sll %o5,2,%g2
+/* 0x0a50 */ ld [%o2+%g2],%g3
+/* 0x0a54 */ ld [%o3+%g2],%g2
+/* 0x0a58 */ cmp %g2,%g3
+/* 0x0a5c */ bleu,pt %icc,.L77000224
+/* 0x0a60 */ nop
+ .L77000288:
+/* 0x0a64 */ cmp %i0,0
+/* 0x0a68 */ ble,pt %icc,.L77000224
+/* 0x0a6c */ nop
+/* 0x0a70 317 */ sub %i0,1,%o7
+/* 0x0a74 */ or %g0,-1,%g2
+/* 0x0a78 */ srl %g2,0,%o4
+/* 0x0a7c */ add %o7,1,%o0
+/* 0x0a80 315 */ or %g0,0,%o5
+/* 0x0a84 */ or %g0,0,%g1
+/* 0x0a88 */ cmp %o0,3
+/* 0x0a8c */ bl,pn %icc,.L77000289
+/* 0x0a90 */ add %o3,8,%o1
+/* 0x0a94 */ add %o2,4,%o0
+/* 0x0a98 */ ld [%o1-8],%g2
+/* 0x0a9c 0 */ or %g0,%o1,%o3
+/* 0x0aa0 315 */ ld [%o0-4],%g3
+/* 0x0aa4 0 */ or %g0,%o0,%o2
+/* 0x0aa8 315 */ or %g0,2,%g1
+/* 0x0aac */ ld [%o3-4],%o0
+/* 0x0ab0 */ sub %g2,%g3,%g2
+/* 0x0ab4 */ or %g0,%g2,%o5
+/* 0x0ab8 */ and %g2,%o4,%g2
+/* 0x0abc */ st %g2,[%o3-8]
+/* 0x0ac0 */ srax %o5,32,%o5
+ .L900000638:
+/* 0x0ac4 */ ld [%o2],%g2
+/* 0x0ac8 */ add %g1,1,%g1
+/* 0x0acc */ add %o2,4,%o2
+/* 0x0ad0 */ cmp %g1,%o7
+/* 0x0ad4 */ add %o3,4,%o3
+/* 0x0ad8 */ sub %o0,%g2,%o0
+/* 0x0adc */ add %o0,%o5,%o5
+/* 0x0ae0 */ and %o5,%o4,%g2
+/* 0x0ae4 */ ld [%o3-4],%o0
+/* 0x0ae8 */ st %g2,[%o3-8]
+/* 0x0aec */ ble,pt %icc,.L900000638
+/* 0x0af0 */ srax %o5,32,%o5
+ .L900000641:
+/* 0x0af4 */ ld [%o2],%o1
+/* 0x0af8 */ sub %o0,%o1,%o0
+/* 0x0afc */ add %o0,%o5,%o0
+/* 0x0b00 */ and %o0,%o4,%o1
+/* 0x0b04 */ st %o1,[%o3-4]
+/* 0x0b08 */ ret ! Result =
+/* 0x0b0c */ restore %g0,%g0,%g0
+ .L77000289:
+/* 0x0b10 */ ld [%o3],%o0
+ .L900000647:
+/* 0x0b14 */ ld [%o2],%o1
+/* 0x0b18 */ add %o5,%o0,%o0
+/* 0x0b1c */ add %g1,1,%g1
+/* 0x0b20 */ add %o2,4,%o2
+/* 0x0b24 */ cmp %g1,%o7
+/* 0x0b28 */ sub %o0,%o1,%o0
+/* 0x0b2c */ and %o0,%o4,%o1
+/* 0x0b30 */ st %o1,[%o3]
+/* 0x0b34 */ add %o3,4,%o3
+/* 0x0b38 */ srax %o0,32,%o5
+/* 0x0b3c */ ble,a,pt %icc,.L900000647
+/* 0x0b40 */ ld [%o3],%o0
+ .L77000224:
+/* 0x0b44 */ ret ! Result =
+/* 0x0b48 */ restore %g0,%g0,%g0
+/* 0x0b4c 0 */ .type mont_mulf_noconv,2
+/* 0x0b4c */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.il b/security/nss/lib/freebl/mpi/montmulfv9.il
new file mode 100644
index 000000000..006f47431
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.il
@@ -0,0 +1,93 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+!
+! double upper32(double /*frs1*/);
+!
+ .inline upper32,8
+ fdtox %f0,%f10
+ fitod %f10,%f0
+ .end
+
+!
+! double lower32(double /*frs1*/, double /* Zero */);
+!
+ .inline lower32,8
+ fdtox %f0,%f10
+ fmovs %f2,%f10
+ fxtod %f10,%f0
+ .end
+
+!
+! double mod(double /*x*/, double /*1/m*/, double /*m*/);
+!
+ .inline mod,12
+ fmuld %f0,%f2,%f2
+ fdtox %f2,%f2
+ fxtod %f2,%f2
+ fmuld %f2,%f4,%f2
+ fsubd %f0,%f2,%f0
+ .end
+
+
+!
+! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/,
+! double * /* 0 */,
+! double * /*result16*/, double * /* result32 */
+! float * /*source - should be unsigned int*
+! converted to float* */);
+!
+ .inline i16_to_d16_and_d32x4,24
+ ldd [%o0],%f2 ! 1/(2^16)
+ ldd [%o1],%f4 ! 2^16
+ ldd [%o2],%f22
+
+ fmovd %f22,%f6
+ ld [%o5],%f7
+ fmovd %f22,%f10
+ ld [%o5+4],%f11
+ fmovd %f22,%f14
+ ld [%o5+8],%f15
+ fmovd %f22,%f18
+ ld [%o5+12],%f19
+ fxtod %f6,%f6
+ std %f6,[%o4]
+ fxtod %f10,%f10
+ std %f10,[%o4+8]
+ fxtod %f14,%f14
+ std %f14,[%o4+16]
+ fxtod %f18,%f18
+ std %f18,[%o4+24]
+ fmuld %f2,%f6,%f8
+ fmuld %f2,%f10,%f12
+ fmuld %f2,%f14,%f16
+ fmuld %f2,%f18,%f20
+ fdtox %f8,%f8
+ fdtox %f12,%f12
+ fdtox %f16,%f16
+ fdtox %f20,%f20
+ fxtod %f8,%f8
+ std %f8,[%o3+8]
+ fxtod %f12,%f12
+ std %f12,[%o3+24]
+ fxtod %f16,%f16
+ std %f16,[%o3+40]
+ fxtod %f20,%f20
+ std %f20,[%o3+56]
+ fmuld %f8,%f4,%f8
+ fmuld %f12,%f4,%f12
+ fmuld %f16,%f4,%f16
+ fmuld %f20,%f4,%f20
+ fsubd %f6,%f8,%f8
+ std %f8,[%o3]
+ fsubd %f10,%f12,%f12
+ std %f12,[%o3+16]
+ fsubd %f14,%f16,%f16
+ std %f16,[%o3+32]
+ fsubd %f18,%f20,%f20
+ std %f20,[%o3+48]
+ .end
+
+
diff --git a/security/nss/lib/freebl/mpi/montmulfv9.s b/security/nss/lib/freebl/mpi/montmulfv9.s
new file mode 100644
index 000000000..560e47f7b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/montmulfv9.s
@@ -0,0 +1,2346 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+ .file "montmulf.c"
+
+ .section ".rodata",#alloc
+ .global TwoTo16
+ .align 8
+!
+! CONSTANT POOL
+!
+ .global TwoTo16
+TwoTo16:
+ .word 1089470464
+ .word 0
+ .type TwoTo16,#object
+ .size TwoTo16,8
+ .global TwoToMinus16
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus16
+TwoToMinus16:
+ .word 1055916032
+ .word 0
+ .type TwoToMinus16,#object
+ .size TwoToMinus16,8
+ .global Zero
+!
+! CONSTANT POOL
+!
+ .global Zero
+Zero:
+ .word 0
+ .word 0
+ .type Zero,#object
+ .size Zero,8
+ .global TwoTo32
+!
+! CONSTANT POOL
+!
+ .global TwoTo32
+TwoTo32:
+ .word 1106247680
+ .word 0
+ .type TwoTo32,#object
+ .size TwoTo32,8
+ .global TwoToMinus32
+!
+! CONSTANT POOL
+!
+ .global TwoToMinus32
+TwoToMinus32:
+ .word 1039138816
+ .word 0
+ .type TwoToMinus32,#object
+ .size TwoToMinus32,8
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .register %g3,#scratch
+/* 000000 */ .register %g2,#scratch
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE conv_d16_to_i32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_d16_to_i32
+ conv_d16_to_i32:
+/* 000000 */ save %sp,-208,%sp
+! FILE montmulf.c
+
+! 1 !/*
+! 2 ! * The contents of this file are subject to the Mozilla Public
+! 3 ! * License Version 1.1 (the "License"); you may not use this file
+! 4 ! * except in compliance with the License. You may obtain a copy of
+! 5 ! * the License at http://www.mozilla.org/MPL/
+! 6 ! *
+! 7 ! * Software distributed under the License is distributed on an "AS
+! 8 ! * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
+! 9 ! * implied. See the License for the specific language governing
+! 10 ! * rights and limitations under the License.
+! 11 ! *
+! 12 ! * The Original Code is SPARC optimized Montgomery multiply functions.
+! 13 ! *
+! 14 ! * The Initial Developer of the Original Code is Sun Microsystems Inc.
+! 15 ! * Portions created by Sun Microsystems Inc. are
+! 16 ! * Copyright (C) 1999-2000 Sun Microsystems Inc. All Rights Reserved.
+! 17 ! *
+! 18 ! * Contributor(s):
+! 19 ! * Netscape Communications Corporation
+! 20 ! *
+! 21 ! * Alternatively, the contents of this file may be used under the
+! 22 ! * terms of the GNU General Public License Version 2 or later (the
+! 23 ! * "GPL"), in which case the provisions of the GPL are applicable
+! 24 ! * instead of those above. If you wish to allow use of your
+! 25 ! * version of this file only under the terms of the GPL and not to
+! 26 ! * allow others to use your version of this file under the MPL,
+! 27 ! * indicate your decision by deleting the provisions above and
+! 28 ! * replace them with the notice and other provisions required by
+! 29 ! * the GPL. If you do not delete the provisions above, a recipient
+! 30 ! * may use your version of this file under either the MPL or the
+! 31 ! * GPL.
+! 34 ! */
+! 36 !#define RF_INLINE_MACROS
+! 38 !static const double TwoTo16=65536.0;
+! 39 !static const double TwoToMinus16=1.0/65536.0;
+! 40 !static const double Zero=0.0;
+! 41 !static const double TwoTo32=65536.0*65536.0;
+! 42 !static const double TwoToMinus32=1.0/(65536.0*65536.0);
+! 44 !#ifdef RF_INLINE_MACROS
+! 46 !double upper32(double);
+! 47 !double lower32(double, double);
+! 48 !double mod(double, double, double);
+! 50 !void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/,
+! 51 ! const double * /* 2^16*/,
+! 52 ! const double * /* 0 */,
+! 53 ! double * /*result16*/,
+! 54 ! double * /* result32 */,
+! 55 ! float * /*source - should be unsigned int*
+! 56 ! converted to float* */);
+! 58 !#else
+! 60 !static double upper32(double x)
+! 61 !{
+! 62 ! return floor(x*TwoToMinus32);
+! 63 !}
+! 65 !static double lower32(double x, double y)
+! 66 !{
+! 67 ! return x-TwoTo32*floor(x*TwoToMinus32);
+! 68 !}
+! 70 !static double mod(double x, double oneoverm, double m)
+! 71 !{
+! 72 ! return x-m*floor(x*oneoverm);
+! 73 !}
+! 75 !#endif
+! 78 !static void cleanup(double *dt, int from, int tlen)
+! 79 !{
+! 80 ! int i;
+! 81 ! double tmp,tmp1,x,x1;
+! 83 ! tmp=tmp1=Zero;
+! 84 ! /* original code **
+! 85 ! for(i=2*from;i<2*tlen-2;i++)
+! 86 ! {
+! 87 ! x=dt[i];
+! 88 ! dt[i]=lower32(x,Zero)+tmp1;
+! 89 ! tmp1=tmp;
+! 90 ! tmp=upper32(x);
+! 91 ! }
+! 92 ! dt[tlen-2]+=tmp1;
+! 93 ! dt[tlen-1]+=tmp;
+! 94 ! **end original code ***/
+! 95 ! /* new code ***/
+! 96 ! for(i=2*from;i<2*tlen;i+=2)
+! 97 ! {
+! 98 ! x=dt[i];
+! 99 ! x1=dt[i+1];
+! 100 ! dt[i]=lower32(x,Zero)+tmp;
+! 101 ! dt[i+1]=lower32(x1,Zero)+tmp1;
+! 102 ! tmp=upper32(x);
+! 103 ! tmp1=upper32(x1);
+! 104 ! }
+! 105 ! /** end new code **/
+! 106 !}
+! 109 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen)
+! 110 !{
+! 111 !int i;
+! 112 !long long t, t1, a, b, c, d;
+! 114 ! t1=0;
+! 115 ! a=(long long)d16[0];
+
+/* 0x0004 115 */ ldd [%i1],%f2
+
+! 116 ! b=(long long)d16[1];
+! 117 ! for(i=0; i<ilen-1; i++)
+
+/* 0x0008 117 */ sub %i3,1,%o1
+/* 0x000c 110 */ or %g0,%i0,%g1
+/* 0x0010 116 */ ldd [%i1+8],%f4
+/* 0x0014 117 */ cmp %o1,0
+/* 0x0018 114 */ or %g0,0,%g5
+/* 0x001c 115 */ fdtox %f2,%f2
+/* 0x0020 */ std %f2,[%sp+2247]
+/* 0x0024 117 */ or %g0,0,%o0
+/* 0x0028 116 */ fdtox %f4,%f2
+/* 0x002c */ std %f2,[%sp+2239]
+/* 0x0030 110 */ sub %o1,1,%o7
+/* 0x0034 */ or %g0,%i1,%o4
+/* 0x0038 */ sethi %hi(0xfc00),%o3
+/* 0x003c */ or %g0,-1,%o1
+/* 0x0040 */ or %g0,2,%i1
+/* 0x0044 */ srl %o1,0,%g3
+/* 0x0048 */ or %g0,%o4,%g4
+/* 0x004c 116 */ ldx [%sp+2239],%i2
+/* 0x0050 */ add %o3,1023,%o5
+/* 0x0054 117 */ sub %o7,1,%o2
+/* 0x0058 115 */ ldx [%sp+2247],%i3
+/* 0x005c 117 */ ble,pt %icc,.L900000113
+/* 0x0060 */ sethi %hi(0xfc00),%g2
+/* 0x0064 */ add %o7,1,%g2
+
+! 118 ! {
+! 119 ! c=(long long)d16[2*i+2];
+! 120 ! t1+=a&0xffffffff;
+! 121 ! t=(a>>32);
+! 122 ! d=(long long)d16[2*i+3];
+! 123 ! t1+=(b&0xffff)<<16;
+
+/* 0x0068 123 */ and %i2,%o5,%i4
+/* 0x006c */ sllx %i4,16,%o1
+/* 0x0070 117 */ cmp %g2,6
+/* 0x0074 */ bl,pn %icc,.L77000134
+/* 0x0078 */ or %g0,3,%i0
+/* 0x007c 119 */ ldd [%o4+16],%f0
+/* 0x0080 120 */ and %i3,%g3,%o3
+
+! 124 ! t+=(b>>16)+(t1>>32);
+
+/* 0x0084 124 */ srax %i2,16,%i5
+/* 0x0088 117 */ add %o3,%o1,%i4
+/* 0x008c 121 */ srax %i3,32,%i3
+/* 0x0090 119 */ fdtox %f0,%f0
+/* 0x0094 */ std %f0,[%sp+2231]
+
+! 125 ! i32[i]=t1&0xffffffff;
+
+/* 0x0098 125 */ and %i4,%g3,%l0
+/* 0x009c 117 */ or %g0,72,%o3
+/* 0x00a0 122 */ ldd [%g4+24],%f0
+/* 0x00a4 117 */ or %g0,64,%o4
+/* 0x00a8 */ or %g0,4,%o1
+
+! 126 ! t1=t;
+! 127 ! a=c;
+! 128 ! b=d;
+
+/* 0x00ac 128 */ or %g0,5,%i0
+/* 0x00b0 */ or %g0,4,%i1
+/* 0x00b4 119 */ ldx [%sp+2231],%g2
+/* 0x00b8 122 */ fdtox %f0,%f0
+/* 0x00bc 128 */ or %g0,4,%o0
+/* 0x00c0 122 */ std %f0,[%sp+2223]
+/* 0x00c4 */ ldd [%g4+40],%f2
+/* 0x00c8 120 */ and %g2,%g3,%i2
+/* 0x00cc 119 */ ldd [%g4+32],%f0
+/* 0x00d0 121 */ srax %g2,32,%g2
+/* 0x00d4 122 */ ldd [%g4+56],%f4
+/* 0x00d8 */ fdtox %f2,%f2
+/* 0x00dc */ ldx [%sp+2223],%g5
+/* 0x00e0 119 */ fdtox %f0,%f0
+/* 0x00e4 125 */ st %l0,[%g1]
+/* 0x00e8 124 */ srax %i4,32,%l0
+/* 0x00ec 122 */ fdtox %f4,%f4
+/* 0x00f0 */ std %f2,[%sp+2223]
+/* 0x00f4 123 */ and %g5,%o5,%i4
+/* 0x00f8 124 */ add %i5,%l0,%i5
+/* 0x00fc 119 */ std %f0,[%sp+2231]
+/* 0x0100 123 */ sllx %i4,16,%i4
+/* 0x0104 124 */ add %i3,%i5,%i3
+/* 0x0108 119 */ ldd [%g4+48],%f2
+/* 0x010c 124 */ srax %g5,16,%g5
+/* 0x0110 117 */ add %i2,%i4,%i2
+/* 0x0114 122 */ ldd [%g4+72],%f0
+/* 0x0118 117 */ add %i2,%i3,%i4
+/* 0x011c 124 */ srax %i4,32,%i5
+/* 0x0120 119 */ fdtox %f2,%f2
+/* 0x0124 125 */ and %i4,%g3,%i4
+/* 0x0128 122 */ ldx [%sp+2223],%i2
+/* 0x012c 124 */ add %g5,%i5,%g5
+/* 0x0130 119 */ ldx [%sp+2231],%i3
+/* 0x0134 124 */ add %g2,%g5,%g5
+/* 0x0138 119 */ std %f2,[%sp+2231]
+/* 0x013c 122 */ std %f4,[%sp+2223]
+/* 0x0140 119 */ ldd [%g4+64],%f2
+/* 0x0144 125 */ st %i4,[%g1+4]
+ .L900000108:
+/* 0x0148 122 */ ldx [%sp+2223],%i4
+/* 0x014c 128 */ add %o0,2,%o0
+/* 0x0150 */ add %i0,4,%i0
+/* 0x0154 119 */ ldx [%sp+2231],%l0
+/* 0x0158 117 */ add %o3,16,%o3
+/* 0x015c 123 */ and %i2,%o5,%g2
+/* 0x0160 */ sllx %g2,16,%i5
+/* 0x0164 120 */ and %i3,%g3,%g2
+/* 0x0168 122 */ ldd [%g4+%o3],%f4
+/* 0x016c */ fdtox %f0,%f0
+/* 0x0170 */ std %f0,[%sp+2223]
+/* 0x0174 124 */ srax %i2,16,%i2
+/* 0x0178 117 */ add %g2,%i5,%g2
+/* 0x017c 119 */ fdtox %f2,%f0
+/* 0x0180 117 */ add %o4,16,%o4
+/* 0x0184 119 */ std %f0,[%sp+2231]
+/* 0x0188 117 */ add %g2,%g5,%g2
+/* 0x018c 119 */ ldd [%g4+%o4],%f2
+/* 0x0190 124 */ srax %g2,32,%i5
+/* 0x0194 128 */ cmp %o0,%o2
+/* 0x0198 121 */ srax %i3,32,%g5
+/* 0x019c 124 */ add %i2,%i5,%i2
+/* 0x01a0 */ add %g5,%i2,%i5
+/* 0x01a4 117 */ add %o1,4,%o1
+/* 0x01a8 125 */ and %g2,%g3,%g2
+/* 0x01ac 127 */ or %g0,%l0,%g5
+/* 0x01b0 125 */ st %g2,[%g1+%o1]
+/* 0x01b4 128 */ add %i1,4,%i1
+/* 0x01b8 122 */ ldx [%sp+2223],%i2
+/* 0x01bc 119 */ ldx [%sp+2231],%i3
+/* 0x01c0 117 */ add %o3,16,%o3
+/* 0x01c4 123 */ and %i4,%o5,%g2
+/* 0x01c8 */ sllx %g2,16,%l0
+/* 0x01cc 120 */ and %g5,%g3,%g2
+/* 0x01d0 122 */ ldd [%g4+%o3],%f0
+/* 0x01d4 */ fdtox %f4,%f4
+/* 0x01d8 */ std %f4,[%sp+2223]
+/* 0x01dc 124 */ srax %i4,16,%i4
+/* 0x01e0 117 */ add %g2,%l0,%g2
+/* 0x01e4 119 */ fdtox %f2,%f2
+/* 0x01e8 117 */ add %o4,16,%o4
+/* 0x01ec 119 */ std %f2,[%sp+2231]
+/* 0x01f0 117 */ add %g2,%i5,%g2
+/* 0x01f4 119 */ ldd [%g4+%o4],%f2
+/* 0x01f8 124 */ srax %g2,32,%i5
+/* 0x01fc 121 */ srax %g5,32,%g5
+/* 0x0200 124 */ add %i4,%i5,%i4
+/* 0x0204 */ add %g5,%i4,%g5
+/* 0x0208 117 */ add %o1,4,%o1
+/* 0x020c 125 */ and %g2,%g3,%g2
+/* 0x0210 128 */ ble,pt %icc,.L900000108
+/* 0x0214 */ st %g2,[%g1+%o1]
+ .L900000111:
+/* 0x0218 122 */ ldx [%sp+2223],%o2
+/* 0x021c 123 */ and %i2,%o5,%i4
+/* 0x0220 120 */ and %i3,%g3,%g2
+/* 0x0224 123 */ sllx %i4,16,%i4
+/* 0x0228 119 */ ldx [%sp+2231],%i5
+/* 0x022c 128 */ cmp %o0,%o7
+/* 0x0230 124 */ srax %i2,16,%i2
+/* 0x0234 117 */ add %g2,%i4,%g2
+/* 0x0238 122 */ fdtox %f0,%f4
+/* 0x023c */ std %f4,[%sp+2223]
+/* 0x0240 117 */ add %g2,%g5,%g5
+/* 0x0244 123 */ and %o2,%o5,%l0
+/* 0x0248 124 */ srax %g5,32,%l1
+/* 0x024c 120 */ and %i5,%g3,%i4
+/* 0x0250 119 */ fdtox %f2,%f0
+/* 0x0254 121 */ srax %i3,32,%g2
+/* 0x0258 119 */ std %f0,[%sp+2231]
+/* 0x025c 124 */ add %i2,%l1,%i2
+/* 0x0260 123 */ sllx %l0,16,%i3
+/* 0x0264 124 */ add %g2,%i2,%i2
+/* 0x0268 */ srax %o2,16,%o2
+/* 0x026c 117 */ add %o1,4,%g2
+/* 0x0270 */ add %i4,%i3,%o1
+/* 0x0274 125 */ and %g5,%g3,%g5
+/* 0x0278 */ st %g5,[%g1+%g2]
+/* 0x027c 119 */ ldx [%sp+2231],%i3
+/* 0x0280 117 */ add %o1,%i2,%o1
+/* 0x0284 */ add %g2,4,%g2
+/* 0x0288 124 */ srax %o1,32,%i4
+/* 0x028c 122 */ ldx [%sp+2223],%i2
+/* 0x0290 125 */ and %o1,%g3,%g5
+/* 0x0294 121 */ srax %i5,32,%o1
+/* 0x0298 124 */ add %o2,%i4,%o2
+/* 0x029c 125 */ st %g5,[%g1+%g2]
+/* 0x02a0 128 */ bg,pn %icc,.L77000127
+/* 0x02a4 */ add %o1,%o2,%g5
+/* 0x02a8 */ add %i0,6,%i0
+/* 0x02ac */ add %i1,6,%i1
+ .L77000134:
+/* 0x02b0 119 */ sra %i1,0,%o2
+ .L900000112:
+/* 0x02b4 119 */ sllx %o2,3,%o3
+/* 0x02b8 120 */ and %i3,%g3,%o1
+/* 0x02bc 119 */ ldd [%g4+%o3],%f0
+/* 0x02c0 122 */ sra %i0,0,%o3
+/* 0x02c4 123 */ and %i2,%o5,%o2
+/* 0x02c8 122 */ sllx %o3,3,%o3
+/* 0x02cc 120 */ add %g5,%o1,%o1
+/* 0x02d0 119 */ fdtox %f0,%f0
+/* 0x02d4 */ std %f0,[%sp+2231]
+/* 0x02d8 123 */ sllx %o2,16,%o2
+/* 0x02dc */ add %o1,%o2,%o2
+/* 0x02e0 128 */ add %i1,2,%i1
+/* 0x02e4 122 */ ldd [%g4+%o3],%f0
+/* 0x02e8 124 */ srax %o2,32,%g2
+/* 0x02ec 125 */ and %o2,%g3,%o3
+/* 0x02f0 124 */ srax %i2,16,%o1
+/* 0x02f4 128 */ add %i0,2,%i0
+/* 0x02f8 122 */ fdtox %f0,%f0
+/* 0x02fc */ std %f0,[%sp+2223]
+/* 0x0300 125 */ sra %o0,0,%o2
+/* 0x0304 */ sllx %o2,2,%o2
+/* 0x0308 124 */ add %o1,%g2,%g5
+/* 0x030c 121 */ srax %i3,32,%g2
+/* 0x0310 128 */ add %o0,1,%o0
+/* 0x0314 124 */ add %g2,%g5,%g5
+/* 0x0318 128 */ cmp %o0,%o7
+/* 0x031c 119 */ ldx [%sp+2231],%o4
+/* 0x0320 122 */ ldx [%sp+2223],%i2
+/* 0x0324 125 */ st %o3,[%g1+%o2]
+/* 0x0328 127 */ or %g0,%o4,%i3
+/* 0x032c 128 */ ble,pt %icc,.L900000112
+/* 0x0330 */ sra %i1,0,%o2
+ .L77000127:
+
+! 129 ! }
+! 130 ! t1+=a&0xffffffff;
+! 131 ! t=(a>>32);
+! 132 ! t1+=(b&0xffff)<<16;
+! 133 ! i32[i]=t1&0xffffffff;
+
+/* 0x0334 133 */ sethi %hi(0xfc00),%g2
+ .L900000113:
+/* 0x0338 133 */ or %g0,-1,%g3
+/* 0x033c */ add %g2,1023,%g2
+/* 0x0340 */ srl %g3,0,%g3
+/* 0x0344 */ and %i2,%g2,%g2
+/* 0x0348 */ and %i3,%g3,%g4
+/* 0x034c */ sllx %g2,16,%g2
+/* 0x0350 */ add %g5,%g4,%g4
+/* 0x0354 */ sra %o0,0,%g5
+/* 0x0358 */ add %g4,%g2,%g4
+/* 0x035c */ sllx %g5,2,%g2
+/* 0x0360 */ and %g4,%g3,%g3
+/* 0x0364 */ st %g3,[%g1+%g2]
+/* 0x0368 */ ret ! Result =
+/* 0x036c */ restore %g0,%g0,%g0
+/* 0x0370 0 */ .type conv_d16_to_i32,2
+/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000201:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32
+ conv_i32_to_d32:
+/* 000000 */ or %g0,%o7,%g3
+
+! 135 !}
+! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len)
+! 138 !{
+! 139 !int i;
+! 141 !#pragma pipeloop(0)
+! 142 ! for(i=0;i<len;i++) d32[i]=(double)(i32[i]);
+
+/* 0x0004 142 */ cmp %o2,0
+ .L900000210:
+/* 0x0008 */ call .+8
+/* 0x000c */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0010 142 */ or %g0,0,%o3
+/* 0x0014 138 */ add %g4,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000210-.)),%g4
+/* 0x0018 142 */ sub %o2,1,%o4
+/* 0x001c 138 */ add %g4,%o7,%g1
+/* 0x0020 142 */ ble,pt %icc,.L77000140
+/* 0x0024 */ or %g0,%g3,%o7
+/* 0x0028 */ sethi %hi(.L_const_seg_900000201),%g3
+/* 0x002c */ cmp %o2,12
+/* 0x0030 */ add %g3,%lo(.L_const_seg_900000201),%g2
+/* 0x0034 */ or %g0,%o1,%g5
+/* 0x0038 */ ldx [%g1+%g2],%g4
+/* 0x003c */ or %g0,0,%g1
+/* 0x0040 */ or %g0,24,%g2
+/* 0x0044 */ bl,pn %icc,.L77000144
+/* 0x0048 */ or %g0,0,%g3
+/* 0x004c */ ld [%o1],%f13
+/* 0x0050 */ or %g0,7,%o3
+/* 0x0054 */ ldd [%g4],%f8
+/* 0x0058 */ sub %o2,5,%g3
+/* 0x005c */ or %g0,8,%g1
+/* 0x0060 */ ld [%o1+4],%f11
+/* 0x0064 */ ld [%o1+8],%f7
+/* 0x0068 */ fmovs %f8,%f12
+/* 0x006c */ ld [%o1+12],%f5
+/* 0x0070 */ fmovs %f8,%f10
+/* 0x0074 */ ld [%o1+16],%f3
+/* 0x0078 */ fmovs %f8,%f6
+/* 0x007c */ ld [%o1+20],%f1
+/* 0x0080 */ fsubd %f12,%f8,%f12
+/* 0x0084 */ std %f12,[%o0]
+/* 0x0088 */ fsubd %f10,%f8,%f10
+/* 0x008c */ std %f10,[%o0+8]
+ .L900000205:
+/* 0x0090 */ ld [%o1+%g2],%f11
+/* 0x0094 */ add %g1,8,%g1
+/* 0x0098 */ add %o3,5,%o3
+/* 0x009c */ fsubd %f6,%f8,%f6
+/* 0x00a0 */ add %g2,4,%g2
+/* 0x00a4 */ std %f6,[%o0+%g1]
+/* 0x00a8 */ cmp %o3,%g3
+/* 0x00ac */ fmovs %f8,%f4
+/* 0x00b0 */ ld [%o1+%g2],%f7
+/* 0x00b4 */ fsubd %f4,%f8,%f12
+/* 0x00b8 */ add %g1,8,%g1
+/* 0x00bc */ add %g2,4,%g2
+/* 0x00c0 */ fmovs %f8,%f2
+/* 0x00c4 */ std %f12,[%o0+%g1]
+/* 0x00c8 */ ld [%o1+%g2],%f5
+/* 0x00cc */ fsubd %f2,%f8,%f12
+/* 0x00d0 */ add %g1,8,%g1
+/* 0x00d4 */ add %g2,4,%g2
+/* 0x00d8 */ fmovs %f8,%f0
+/* 0x00dc */ std %f12,[%o0+%g1]
+/* 0x00e0 */ ld [%o1+%g2],%f3
+/* 0x00e4 */ fsubd %f0,%f8,%f12
+/* 0x00e8 */ add %g1,8,%g1
+/* 0x00ec */ add %g2,4,%g2
+/* 0x00f0 */ fmovs %f8,%f10
+/* 0x00f4 */ std %f12,[%o0+%g1]
+/* 0x00f8 */ ld [%o1+%g2],%f1
+/* 0x00fc */ fsubd %f10,%f8,%f10
+/* 0x0100 */ add %g1,8,%g1
+/* 0x0104 */ add %g2,4,%g2
+/* 0x0108 */ std %f10,[%o0+%g1]
+/* 0x010c */ ble,pt %icc,.L900000205
+/* 0x0110 */ fmovs %f8,%f6
+ .L900000208:
+/* 0x0114 */ fmovs %f8,%f4
+/* 0x0118 */ ld [%o1+%g2],%f11
+/* 0x011c */ add %g1,8,%g3
+/* 0x0120 */ fmovs %f8,%f2
+/* 0x0124 */ add %g1,16,%g1
+/* 0x0128 */ cmp %o3,%o4
+/* 0x012c */ fmovs %f8,%f0
+/* 0x0130 */ add %g1,8,%o1
+/* 0x0134 */ add %g1,16,%o2
+/* 0x0138 */ fmovs %f8,%f10
+/* 0x013c */ add %g1,24,%g2
+/* 0x0140 */ fsubd %f6,%f8,%f6
+/* 0x0144 */ std %f6,[%o0+%g3]
+/* 0x0148 */ fsubd %f4,%f8,%f4
+/* 0x014c */ std %f4,[%o0+%g1]
+/* 0x0150 */ sra %o3,0,%g1
+/* 0x0154 */ fsubd %f2,%f8,%f2
+/* 0x0158 */ std %f2,[%o0+%o1]
+/* 0x015c */ sllx %g1,2,%g3
+/* 0x0160 */ fsubd %f0,%f8,%f0
+/* 0x0164 */ std %f0,[%o0+%o2]
+/* 0x0168 */ fsubd %f10,%f8,%f0
+/* 0x016c */ bg,pn %icc,.L77000140
+/* 0x0170 */ std %f0,[%o0+%g2]
+ .L77000144:
+/* 0x0174 */ ldd [%g4],%f8
+ .L900000211:
+/* 0x0178 */ ld [%g5+%g3],%f13
+/* 0x017c */ sllx %g1,3,%g2
+/* 0x0180 */ add %o3,1,%o3
+/* 0x0184 */ sra %o3,0,%g1
+/* 0x0188 */ cmp %o3,%o4
+/* 0x018c */ fmovs %f8,%f12
+/* 0x0190 */ sllx %g1,2,%g3
+/* 0x0194 */ fsubd %f12,%f8,%f0
+/* 0x0198 */ std %f0,[%o0+%g2]
+/* 0x019c */ ble,a,pt %icc,.L900000211
+/* 0x01a0 */ ldd [%g4],%f8
+ .L77000140:
+/* 0x01a4 */ retl ! Result =
+/* 0x01a8 */ nop
+/* 0x01ac 0 */ .type conv_i32_to_d32,2
+/* 0x01ac */ .size conv_i32_to_d32,(.-conv_i32_to_d32)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000301:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d16
+ conv_i32_to_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000310:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+
+! 143 !}
+! 146 !void conv_i32_to_d16(double *d16, unsigned int *i32, int len)
+! 147 !{
+! 148 !int i;
+! 149 !unsigned int a;
+! 151 !#pragma pipeloop(0)
+! 152 ! for(i=0;i<len;i++)
+
+/* 0x000c 152 */ cmp %i2,0
+/* 0x0010 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3
+/* 0x0014 152 */ ble,pt %icc,.L77000150
+/* 0x0018 */ add %g3,%o7,%o0
+
+! 153 ! {
+! 154 ! a=i32[i];
+! 155 ! d16[2*i]=(double)(a&0xffff);
+! 156 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2
+/* 0x0020 147 */ or %g0,%i2,%o1
+/* 0x0024 152 */ sethi %hi(0xfc00),%g3
+/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2
+/* 0x002c 152 */ or %g0,%o1,%g4
+/* 0x0030 156 */ ldx [%o0+%g2],%o5
+/* 0x0034 152 */ add %g3,1023,%g1
+/* 0x0038 147 */ or %g0,%i1,%o7
+/* 0x003c 152 */ or %g0,0,%i2
+/* 0x0040 */ sub %o1,1,%g5
+/* 0x0044 */ or %g0,0,%g3
+/* 0x0048 */ or %g0,1,%g2
+/* 0x004c 154 */ or %g0,0,%o2
+/* 0x0050 */ cmp %g4,6
+/* 0x0054 152 */ bl,pn %icc,.L77000154
+/* 0x0058 */ ldd [%o5],%f0
+/* 0x005c */ sub %o1,2,%o3
+/* 0x0060 */ or %g0,16,%o2
+/* 0x0064 154 */ ld [%i1],%o4
+/* 0x0068 156 */ or %g0,3,%g2
+/* 0x006c */ or %g0,2,%g3
+/* 0x0070 155 */ fmovs %f0,%f2
+/* 0x0074 156 */ or %g0,4,%i2
+/* 0x0078 155 */ and %o4,%g1,%o0
+/* 0x007c */ st %o0,[%sp+2227]
+/* 0x0080 */ fmovs %f0,%f4
+/* 0x0084 156 */ srl %o4,16,%i4
+/* 0x0088 152 */ or %g0,12,%o4
+/* 0x008c */ or %g0,24,%o0
+/* 0x0090 155 */ ld [%sp+2227],%f3
+/* 0x0094 */ fsubd %f2,%f0,%f2
+/* 0x0098 */ std %f2,[%i0]
+/* 0x009c 156 */ st %i4,[%sp+2223]
+/* 0x00a0 154 */ ld [%o7+4],%o1
+/* 0x00a4 156 */ fmovs %f0,%f2
+/* 0x00a8 155 */ and %o1,%g1,%i1
+/* 0x00ac 156 */ ld [%sp+2223],%f3
+/* 0x00b0 */ srl %o1,16,%o1
+/* 0x00b4 */ fsubd %f2,%f0,%f2
+/* 0x00b8 */ std %f2,[%i0+8]
+/* 0x00bc */ st %o1,[%sp+2223]
+/* 0x00c0 155 */ st %i1,[%sp+2227]
+/* 0x00c4 154 */ ld [%o7+8],%o1
+/* 0x00c8 156 */ fmovs %f0,%f2
+/* 0x00cc 155 */ and %o1,%g1,%g4
+/* 0x00d0 */ ld [%sp+2227],%f5
+/* 0x00d4 156 */ srl %o1,16,%o1
+/* 0x00d8 */ ld [%sp+2223],%f3
+/* 0x00dc */ st %o1,[%sp+2223]
+/* 0x00e0 155 */ fsubd %f4,%f0,%f4
+/* 0x00e4 */ st %g4,[%sp+2227]
+/* 0x00e8 156 */ fsubd %f2,%f0,%f2
+/* 0x00ec 154 */ ld [%o7+12],%o1
+/* 0x00f0 155 */ std %f4,[%i0+16]
+/* 0x00f4 156 */ std %f2,[%i0+24]
+ .L900000306:
+/* 0x00f8 155 */ ld [%sp+2227],%f5
+/* 0x00fc 156 */ add %i2,2,%i2
+/* 0x0100 */ add %g2,4,%g2
+/* 0x0104 */ ld [%sp+2223],%f3
+/* 0x0108 */ cmp %i2,%o3
+/* 0x010c */ add %g3,4,%g3
+/* 0x0110 155 */ and %o1,%g1,%g4
+/* 0x0114 156 */ srl %o1,16,%o1
+/* 0x0118 155 */ st %g4,[%sp+2227]
+/* 0x011c 156 */ st %o1,[%sp+2223]
+/* 0x0120 152 */ add %o4,4,%o1
+/* 0x0124 154 */ ld [%o7+%o1],%o4
+/* 0x0128 156 */ fmovs %f0,%f2
+/* 0x012c 155 */ fmovs %f0,%f4
+/* 0x0130 */ fsubd %f4,%f0,%f4
+/* 0x0134 152 */ add %o2,16,%o2
+/* 0x0138 156 */ fsubd %f2,%f0,%f2
+/* 0x013c 155 */ std %f4,[%i0+%o2]
+/* 0x0140 152 */ add %o0,16,%o0
+/* 0x0144 156 */ std %f2,[%i0+%o0]
+/* 0x0148 155 */ ld [%sp+2227],%f5
+/* 0x014c 156 */ ld [%sp+2223],%f3
+/* 0x0150 155 */ and %o4,%g1,%g4
+/* 0x0154 156 */ srl %o4,16,%o4
+/* 0x0158 155 */ st %g4,[%sp+2227]
+/* 0x015c 156 */ st %o4,[%sp+2223]
+/* 0x0160 152 */ add %o1,4,%o4
+/* 0x0164 154 */ ld [%o7+%o4],%o1
+/* 0x0168 156 */ fmovs %f0,%f2
+/* 0x016c 155 */ fmovs %f0,%f4
+/* 0x0170 */ fsubd %f4,%f0,%f4
+/* 0x0174 152 */ add %o2,16,%o2
+/* 0x0178 156 */ fsubd %f2,%f0,%f2
+/* 0x017c 155 */ std %f4,[%i0+%o2]
+/* 0x0180 152 */ add %o0,16,%o0
+/* 0x0184 156 */ ble,pt %icc,.L900000306
+/* 0x0188 */ std %f2,[%i0+%o0]
+ .L900000309:
+/* 0x018c 155 */ ld [%sp+2227],%f5
+/* 0x0190 156 */ fmovs %f0,%f2
+/* 0x0194 */ srl %o1,16,%o3
+/* 0x0198 */ ld [%sp+2223],%f3
+/* 0x019c 155 */ and %o1,%g1,%i1
+/* 0x01a0 152 */ add %o2,16,%g4
+/* 0x01a4 155 */ fmovs %f0,%f4
+/* 0x01a8 */ st %i1,[%sp+2227]
+/* 0x01ac 152 */ add %o0,16,%o2
+/* 0x01b0 156 */ st %o3,[%sp+2223]
+/* 0x01b4 154 */ sra %i2,0,%o3
+/* 0x01b8 152 */ add %g4,16,%o1
+/* 0x01bc 155 */ fsubd %f4,%f0,%f4
+/* 0x01c0 */ std %f4,[%i0+%g4]
+/* 0x01c4 152 */ add %o0,32,%o0
+/* 0x01c8 156 */ fsubd %f2,%f0,%f2
+/* 0x01cc */ std %f2,[%i0+%o2]
+/* 0x01d0 */ sllx %o3,2,%o2
+/* 0x01d4 155 */ ld [%sp+2227],%f5
+/* 0x01d8 156 */ cmp %i2,%g5
+/* 0x01dc */ add %g2,6,%g2
+/* 0x01e0 */ ld [%sp+2223],%f3
+/* 0x01e4 */ add %g3,6,%g3
+/* 0x01e8 155 */ fmovs %f0,%f4
+/* 0x01ec 156 */ fmovs %f0,%f2
+/* 0x01f0 155 */ fsubd %f4,%f0,%f4
+/* 0x01f4 */ std %f4,[%i0+%o1]
+/* 0x01f8 156 */ fsubd %f2,%f0,%f0
+/* 0x01fc */ bg,pn %icc,.L77000150
+/* 0x0200 */ std %f0,[%i0+%o0]
+ .L77000154:
+/* 0x0204 155 */ ldd [%o5],%f0
+ .L900000311:
+/* 0x0208 154 */ ld [%o7+%o2],%o0
+/* 0x020c 155 */ sra %g3,0,%o1
+/* 0x0210 */ fmovs %f0,%f2
+/* 0x0214 */ sllx %o1,3,%o2
+/* 0x0218 156 */ add %i2,1,%i2
+/* 0x021c 155 */ and %o0,%g1,%o1
+/* 0x0220 */ st %o1,[%sp+2227]
+/* 0x0224 156 */ add %g3,2,%g3
+/* 0x0228 */ srl %o0,16,%o1
+/* 0x022c */ cmp %i2,%g5
+/* 0x0230 */ sra %g2,0,%o0
+/* 0x0234 */ add %g2,2,%g2
+/* 0x0238 */ sllx %o0,3,%o0
+/* 0x023c 155 */ ld [%sp+2227],%f3
+/* 0x0240 154 */ sra %i2,0,%o3
+/* 0x0244 155 */ fsubd %f2,%f0,%f2
+/* 0x0248 */ std %f2,[%i0+%o2]
+/* 0x024c */ sllx %o3,2,%o2
+/* 0x0250 156 */ st %o1,[%sp+2223]
+/* 0x0254 */ fmovs %f0,%f2
+/* 0x0258 */ ld [%sp+2223],%f3
+/* 0x025c */ fsubd %f2,%f0,%f0
+/* 0x0260 */ std %f0,[%i0+%o0]
+/* 0x0264 */ ble,a,pt %icc,.L900000311
+/* 0x0268 */ ldd [%o5],%f0
+ .L77000150:
+/* 0x026c */ ret ! Result =
+/* 0x0270 */ restore %g0,%g0,%g0
+/* 0x0274 0 */ .type conv_i32_to_d16,2
+/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! CONSTANT POOL
+!
+ .L_const_seg_900000401:
+/* 000000 0 */ .word 1127219200,0
+/* 0x0008 0 */ .align 8
+/* 0x0008 */ .skip 24
+!
+! SUBROUTINE conv_i32_to_d32_and_d16
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global conv_i32_to_d32_and_d16
+ conv_i32_to_d32_and_d16:
+/* 000000 */ save %sp,-192,%sp
+ .L900000415:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+
+! 157 ! }
+! 158 !}
+! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16,
+! 162 ! unsigned int *i32, int len)
+! 163 !{
+! 164 !int i = 0;
+! 165 !unsigned int a;
+! 167 !#pragma pipeloop(0)
+! 168 !#ifdef RF_INLINE_MACROS
+! 169 ! for(;i<len-3;i+=4)
+! 170 ! {
+! 171 ! i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero,
+! 172 ! &(d16[2*i]), &(d32[i]), (float *)(&(i32[i])));
+
+/* 0x000c 172 */ sethi %hi(Zero),%g2
+/* 0x0010 163 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3
+/* 0x0014 */ or %g0,%i3,%g5
+/* 0x0018 */ add %g3,%o7,%o3
+/* 0x001c 172 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ ldx [%o3+%g2],%o0
+/* 0x0024 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0028 163 */ or %g0,%i0,%i3
+/* 0x002c 169 */ sub %g5,3,%o1
+/* 0x0030 172 */ sethi %hi(TwoTo16),%g4
+/* 0x0034 163 */ or %g0,%i2,%i0
+/* 0x0038 172 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c */ ldx [%o3+%g2],%o2
+/* 0x0040 169 */ cmp %o1,0
+/* 0x0044 164 */ or %g0,0,%i2
+/* 0x0048 169 */ ble,pt %icc,.L900000418
+/* 0x004c */ cmp %i2,%g5
+/* 0x0050 */ ldd [%o0],%f2
+/* 0x0054 172 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0058 */ ldx [%o3+%g3],%o1
+/* 0x005c 169 */ sub %g5,4,%o4
+/* 0x0060 */ or %g0,0,%o5
+ .L900000417:
+/* 0x0064 172 */ sra %i2,0,%g2
+/* 0x0068 */ fmovd %f2,%f14
+/* 0x006c */ ldd [%o2],%f0
+/* 0x0070 */ sllx %g2,2,%g3
+/* 0x0074 */ fmovd %f2,%f10
+/* 0x0078 */ ldd [%o1],%f16
+/* 0x007c */ ld [%g3+%i0],%f15
+/* 0x0080 */ add %i0,%g3,%g3
+/* 0x0084 */ fmovd %f2,%f6
+/* 0x0088 */ ld [%g3+4],%f11
+/* 0x008c */ sra %o5,0,%g4
+/* 0x0090 */ add %i2,4,%i2
+/* 0x0094 */ ld [%g3+8],%f7
+/* 0x0098 */ fxtod %f14,%f14
+/* 0x009c */ sllx %g2,3,%g2
+/* 0x00a0 */ ld [%g3+12],%f3
+/* 0x00a4 */ fxtod %f10,%f10
+/* 0x00a8 */ sllx %g4,3,%g3
+/* 0x00ac */ fxtod %f6,%f6
+/* 0x00b0 */ std %f14,[%g2+%i3]
+/* 0x00b4 */ add %i3,%g2,%g4
+/* 0x00b8 */ fxtod %f2,%f2
+/* 0x00bc */ fmuld %f0,%f14,%f12
+/* 0x00c0 */ std %f2,[%g4+24]
+/* 0x00c4 */ fmuld %f0,%f10,%f8
+/* 0x00c8 */ std %f10,[%g4+8]
+/* 0x00cc */ add %i1,%g3,%g2
+/* 0x00d0 */ fmuld %f0,%f6,%f4
+/* 0x00d4 */ std %f6,[%g4+16]
+/* 0x00d8 */ cmp %i2,%o4
+/* 0x00dc */ fmuld %f0,%f2,%f0
+/* 0x00e0 */ fdtox %f12,%f12
+/* 0x00e4 */ add %o5,8,%o5
+/* 0x00e8 */ fdtox %f8,%f8
+/* 0x00ec */ fdtox %f4,%f4
+/* 0x00f0 */ fdtox %f0,%f0
+/* 0x00f4 */ fxtod %f12,%f12
+/* 0x00f8 */ std %f12,[%g2+8]
+/* 0x00fc */ fxtod %f8,%f8
+/* 0x0100 */ std %f8,[%g2+24]
+/* 0x0104 */ fxtod %f4,%f4
+/* 0x0108 */ std %f4,[%g2+40]
+/* 0x010c */ fxtod %f0,%f0
+/* 0x0110 */ std %f0,[%g2+56]
+/* 0x0114 */ fmuld %f12,%f16,%f12
+/* 0x0118 */ fmuld %f8,%f16,%f8
+/* 0x011c */ fmuld %f4,%f16,%f4
+/* 0x0120 */ fsubd %f14,%f12,%f12
+/* 0x0124 */ std %f12,[%g3+%i1]
+/* 0x0128 */ fmuld %f0,%f16,%f0
+/* 0x012c */ fsubd %f10,%f8,%f8
+/* 0x0130 */ std %f8,[%g2+16]
+/* 0x0134 */ fsubd %f6,%f4,%f4
+/* 0x0138 */ std %f4,[%g2+32]
+/* 0x013c */ fsubd %f2,%f0,%f0
+/* 0x0140 */ std %f0,[%g2+48]
+/* 0x0144 */ ble,a,pt %icc,.L900000417
+/* 0x0148 */ ldd [%o0],%f2
+ .L77000159:
+
+! 173 ! }
+! 174 !#endif
+! 175 ! for(;i<len;i++)
+
+/* 0x014c 175 */ cmp %i2,%g5
+ .L900000418:
+/* 0x0150 175 */ bge,pt %icc,.L77000164
+/* 0x0154 */ nop
+
+! 176 ! {
+! 177 ! a=i32[i];
+! 178 ! d32[i]=(double)(i32[i]);
+! 179 ! d16[2*i]=(double)(a&0xffff);
+! 180 ! d16[2*i+1]=(double)(a>>16);
+
+/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2
+/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2
+/* 0x0160 175 */ sethi %hi(0xfc00),%g3
+/* 0x0164 180 */ ldx [%o3+%g2],%g1
+/* 0x0168 175 */ sll %i2,1,%i4
+/* 0x016c */ sub %g5,%i2,%g4
+/* 0x0170 177 */ sra %i2,0,%o3
+/* 0x0174 175 */ add %g3,1023,%g3
+/* 0x0178 178 */ ldd [%g1],%f2
+/* 0x017c */ sllx %o3,2,%o2
+/* 0x0180 175 */ add %i4,1,%g2
+/* 0x0184 177 */ or %g0,%o3,%o1
+/* 0x0188 */ cmp %g4,6
+/* 0x018c 175 */ bl,pn %icc,.L77000161
+/* 0x0190 */ sra %i2,0,%o3
+/* 0x0194 177 */ or %g0,%o2,%o0
+/* 0x0198 178 */ ld [%i0+%o2],%f5
+/* 0x019c 179 */ fmovs %f2,%f8
+/* 0x01a0 175 */ add %o0,4,%o3
+/* 0x01a4 177 */ ld [%i0+%o0],%o7
+/* 0x01a8 180 */ fmovs %f2,%f6
+/* 0x01ac 178 */ fmovs %f2,%f4
+/* 0x01b0 */ sllx %o1,3,%o2
+/* 0x01b4 175 */ add %o3,4,%o5
+/* 0x01b8 179 */ sra %i4,0,%o0
+/* 0x01bc 175 */ add %o3,8,%o4
+/* 0x01c0 178 */ fsubd %f4,%f2,%f4
+/* 0x01c4 */ std %f4,[%i3+%o2]
+/* 0x01c8 179 */ sllx %o0,3,%i5
+/* 0x01cc */ and %o7,%g3,%o0
+/* 0x01d0 */ st %o0,[%sp+2227]
+/* 0x01d4 175 */ add %i5,16,%o1
+/* 0x01d8 180 */ srl %o7,16,%g4
+/* 0x01dc */ add %i2,1,%i2
+/* 0x01e0 */ sra %g2,0,%o0
+/* 0x01e4 175 */ add %o2,8,%o2
+/* 0x01e8 179 */ fmovs %f2,%f4
+/* 0x01ec 180 */ sllx %o0,3,%l0
+/* 0x01f0 */ add %i4,3,%g2
+/* 0x01f4 179 */ ld [%sp+2227],%f5
+/* 0x01f8 175 */ add %l0,16,%o0
+/* 0x01fc 180 */ add %i4,2,%i4
+/* 0x0200 175 */ sub %g5,1,%o7
+/* 0x0204 180 */ add %i2,3,%i2
+/* 0x0208 179 */ fsubd %f4,%f2,%f4
+/* 0x020c */ std %f4,[%i1+%i5]
+/* 0x0210 180 */ st %g4,[%sp+2223]
+/* 0x0214 177 */ ld [%i0+%o3],%i5
+/* 0x0218 180 */ fmovs %f2,%f4
+/* 0x021c */ srl %i5,16,%g4
+/* 0x0220 179 */ and %i5,%g3,%i5
+/* 0x0224 180 */ ld [%sp+2223],%f5
+/* 0x0228 */ fsubd %f4,%f2,%f4
+/* 0x022c */ std %f4,[%i1+%l0]
+/* 0x0230 */ st %g4,[%sp+2223]
+/* 0x0234 177 */ ld [%i0+%o5],%g4
+/* 0x0238 179 */ st %i5,[%sp+2227]
+/* 0x023c 178 */ fmovs %f2,%f4
+/* 0x0240 180 */ srl %g4,16,%i5
+/* 0x0244 179 */ and %g4,%g3,%g4
+/* 0x0248 180 */ ld [%sp+2223],%f7
+/* 0x024c */ st %i5,[%sp+2223]
+/* 0x0250 178 */ ld [%i0+%o3],%f5
+/* 0x0254 180 */ fsubd %f6,%f2,%f6
+/* 0x0258 177 */ ld [%i0+%o4],%o3
+/* 0x025c 178 */ fsubd %f4,%f2,%f4
+/* 0x0260 179 */ ld [%sp+2227],%f9
+/* 0x0264 180 */ ld [%sp+2223],%f1
+/* 0x0268 179 */ st %g4,[%sp+2227]
+/* 0x026c */ fsubd %f8,%f2,%f8
+/* 0x0270 */ std %f8,[%i1+%o1]
+/* 0x0274 180 */ std %f6,[%i1+%o0]
+/* 0x0278 178 */ std %f4,[%i3+%o2]
+ .L900000411:
+/* 0x027c 179 */ ld [%sp+2227],%f13
+/* 0x0280 180 */ srl %o3,16,%g4
+/* 0x0284 */ add %i2,2,%i2
+/* 0x0288 */ st %g4,[%sp+2223]
+/* 0x028c */ cmp %i2,%o7
+/* 0x0290 */ add %g2,4,%g2
+/* 0x0294 178 */ ld [%i0+%o5],%f11
+/* 0x0298 180 */ add %i4,4,%i4
+/* 0x029c 175 */ add %o4,4,%o5
+/* 0x02a0 177 */ ld [%i0+%o5],%g4
+/* 0x02a4 179 */ and %o3,%g3,%o3
+/* 0x02a8 */ st %o3,[%sp+2227]
+/* 0x02ac 180 */ fmovs %f2,%f0
+/* 0x02b0 179 */ fmovs %f2,%f12
+/* 0x02b4 180 */ fsubd %f0,%f2,%f8
+/* 0x02b8 179 */ fsubd %f12,%f2,%f4
+/* 0x02bc 175 */ add %o1,16,%o1
+/* 0x02c0 180 */ ld [%sp+2223],%f7
+/* 0x02c4 178 */ fmovs %f2,%f10
+/* 0x02c8 179 */ std %f4,[%i1+%o1]
+/* 0x02cc 175 */ add %o0,16,%o0
+/* 0x02d0 178 */ fsubd %f10,%f2,%f4
+/* 0x02d4 175 */ add %o2,8,%o2
+/* 0x02d8 180 */ std %f8,[%i1+%o0]
+/* 0x02dc 178 */ std %f4,[%i3+%o2]
+/* 0x02e0 179 */ ld [%sp+2227],%f9
+/* 0x02e4 180 */ srl %g4,16,%o3
+/* 0x02e8 */ st %o3,[%sp+2223]
+/* 0x02ec 178 */ ld [%i0+%o4],%f5
+/* 0x02f0 175 */ add %o4,8,%o4
+/* 0x02f4 177 */ ld [%i0+%o4],%o3
+/* 0x02f8 179 */ and %g4,%g3,%g4
+/* 0x02fc */ st %g4,[%sp+2227]
+/* 0x0300 180 */ fmovs %f2,%f6
+/* 0x0304 179 */ fmovs %f2,%f8
+/* 0x0308 180 */ fsubd %f6,%f2,%f6
+/* 0x030c 179 */ fsubd %f8,%f2,%f8
+/* 0x0310 175 */ add %o1,16,%o1
+/* 0x0314 180 */ ld [%sp+2223],%f1
+/* 0x0318 178 */ fmovs %f2,%f4
+/* 0x031c 179 */ std %f8,[%i1+%o1]
+/* 0x0320 175 */ add %o0,16,%o0
+/* 0x0324 178 */ fsubd %f4,%f2,%f4
+/* 0x0328 175 */ add %o2,8,%o2
+/* 0x032c 180 */ std %f6,[%i1+%o0]
+/* 0x0330 */ bl,pt %icc,.L900000411
+/* 0x0334 */ std %f4,[%i3+%o2]
+ .L900000414:
+/* 0x0338 180 */ srl %o3,16,%o7
+/* 0x033c */ st %o7,[%sp+2223]
+/* 0x0340 179 */ fmovs %f2,%f12
+/* 0x0344 178 */ ld [%i0+%o5],%f11
+/* 0x0348 180 */ fmovs %f2,%f0
+/* 0x034c 179 */ and %o3,%g3,%g4
+/* 0x0350 180 */ fmovs %f2,%f6
+/* 0x0354 175 */ add %o1,16,%o3
+/* 0x0358 */ add %o0,16,%o7
+/* 0x035c 178 */ fmovs %f2,%f10
+/* 0x0360 175 */ add %o2,8,%o2
+/* 0x0364 */ add %o1,32,%o5
+/* 0x0368 179 */ ld [%sp+2227],%f13
+/* 0x036c 178 */ fmovs %f2,%f4
+/* 0x0370 175 */ add %o0,32,%o1
+/* 0x0374 180 */ ld [%sp+2223],%f7
+/* 0x0378 175 */ add %o2,8,%o0
+/* 0x037c 180 */ cmp %i2,%g5
+/* 0x0380 179 */ st %g4,[%sp+2227]
+/* 0x0384 */ fsubd %f12,%f2,%f8
+/* 0x0388 180 */ add %g2,6,%g2
+/* 0x038c 179 */ std %f8,[%i1+%o3]
+/* 0x0390 180 */ fsubd %f0,%f2,%f0
+/* 0x0394 177 */ sra %i2,0,%o3
+/* 0x0398 180 */ std %f0,[%i1+%o7]
+/* 0x039c 178 */ fsubd %f10,%f2,%f0
+/* 0x03a0 180 */ add %i4,6,%i4
+/* 0x03a4 178 */ std %f0,[%i3+%o2]
+/* 0x03a8 */ sllx %o3,2,%o2
+/* 0x03ac 179 */ ld [%sp+2227],%f9
+/* 0x03b0 178 */ ld [%i0+%o4],%f5
+/* 0x03b4 179 */ fmovs %f2,%f8
+/* 0x03b8 */ fsubd %f8,%f2,%f0
+/* 0x03bc */ std %f0,[%i1+%o5]
+/* 0x03c0 180 */ fsubd %f6,%f2,%f0
+/* 0x03c4 */ std %f0,[%i1+%o1]
+/* 0x03c8 178 */ fsubd %f4,%f2,%f0
+/* 0x03cc 180 */ bge,pn %icc,.L77000164
+/* 0x03d0 */ std %f0,[%i3+%o0]
+ .L77000161:
+/* 0x03d4 178 */ ldd [%g1],%f2
+ .L900000416:
+/* 0x03d8 178 */ ld [%i0+%o2],%f5
+/* 0x03dc 179 */ sra %i4,0,%o0
+/* 0x03e0 180 */ add %i2,1,%i2
+/* 0x03e4 177 */ ld [%i0+%o2],%o1
+/* 0x03e8 178 */ sllx %o3,3,%o3
+/* 0x03ec 180 */ add %i4,2,%i4
+/* 0x03f0 178 */ fmovs %f2,%f4
+/* 0x03f4 179 */ sllx %o0,3,%o4
+/* 0x03f8 180 */ cmp %i2,%g5
+/* 0x03fc 179 */ and %o1,%g3,%o0
+/* 0x0400 178 */ fsubd %f4,%f2,%f0
+/* 0x0404 */ std %f0,[%i3+%o3]
+/* 0x0408 180 */ srl %o1,16,%o1
+/* 0x040c 179 */ st %o0,[%sp+2227]
+/* 0x0410 180 */ sra %g2,0,%o0
+/* 0x0414 */ add %g2,2,%g2
+/* 0x0418 177 */ sra %i2,0,%o3
+/* 0x041c 180 */ sllx %o0,3,%o0
+/* 0x0420 179 */ fmovs %f2,%f4
+/* 0x0424 */ sllx %o3,2,%o2
+/* 0x0428 */ ld [%sp+2227],%f5
+/* 0x042c */ fsubd %f4,%f2,%f0
+/* 0x0430 */ std %f0,[%i1+%o4]
+/* 0x0434 180 */ st %o1,[%sp+2223]
+/* 0x0438 */ fmovs %f2,%f4
+/* 0x043c */ ld [%sp+2223],%f5
+/* 0x0440 */ fsubd %f4,%f2,%f0
+/* 0x0444 */ std %f0,[%i1+%o0]
+/* 0x0448 */ bl,a,pt %icc,.L900000416
+/* 0x044c */ ldd [%g1],%f2
+ .L77000164:
+/* 0x0450 */ ret ! Result =
+/* 0x0454 */ restore %g0,%g0,%g0
+/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2
+/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+!
+! SUBROUTINE adjust_montf_result
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global adjust_montf_result
+ adjust_montf_result:
+/* 000000 */ save %sp,-176,%sp
+/* 0x0004 */ or %g0,%i2,%o1
+/* 0x0008 */ or %g0,%i0,%i2
+
+! 181 ! }
+! 182 !}
+! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len)
+! 186 !{
+! 187 !long long acc;
+! 188 !int i;
+! 190 ! if(i32[len]>0) i=-1;
+
+/* 0x000c 190 */ sra %o1,0,%g2
+/* 0x0010 */ or %g0,-1,%o2
+/* 0x0014 */ sllx %g2,2,%g2
+/* 0x0018 */ ld [%i2+%g2],%g2
+/* 0x001c */ cmp %g2,0
+/* 0x0020 */ bleu,pn %icc,.L77000175
+/* 0x0024 */ or %g0,%i1,%i0
+/* 0x0028 */ ba .L900000511
+/* 0x002c */ cmp %o2,0
+ .L77000175:
+
+! 191 ! else
+! 192 ! {
+! 193 ! for(i=len-1; i>=0; i--)
+
+/* 0x0030 193 */ sub %o1,1,%o2
+/* 0x0034 */ cmp %o2,0
+/* 0x0038 */ bl,pn %icc,.L77000182
+/* 0x003c */ sra %o2,0,%g2
+ .L900000510:
+
+! 194 ! {
+! 195 ! if(i32[i]!=nint[i]) break;
+
+/* 0x0040 195 */ sllx %g2,2,%g2
+/* 0x0044 */ sub %o2,1,%o0
+/* 0x0048 */ ld [%i1+%g2],%g3
+/* 0x004c */ ld [%i2+%g2],%g2
+/* 0x0050 */ cmp %g2,%g3
+/* 0x0054 */ bne,pn %icc,.L77000182
+/* 0x0058 */ nop
+/* 0x005c 0 */ or %g0,%o0,%o2
+/* 0x0060 195 */ cmp %o0,0
+/* 0x0064 */ bge,pt %icc,.L900000510
+/* 0x0068 */ sra %o2,0,%g2
+ .L77000182:
+
+! 196 ! }
+! 197 ! }
+! 198 ! if((i<0)||(i32[i]>nint[i]))
+
+/* 0x006c 198 */ cmp %o2,0
+ .L900000511:
+/* 0x0070 198 */ bl,pn %icc,.L77000198
+/* 0x0074 */ sra %o2,0,%g2
+/* 0x0078 */ sllx %g2,2,%g2
+/* 0x007c */ ld [%i1+%g2],%g3
+/* 0x0080 */ ld [%i2+%g2],%g2
+/* 0x0084 */ cmp %g2,%g3
+/* 0x0088 */ bleu,pt %icc,.L77000191
+/* 0x008c */ nop
+ .L77000198:
+
+! 199 ! {
+! 200 ! acc=0;
+! 201 ! for(i=0;i<len;i++)
+
+/* 0x0090 201 */ cmp %o1,0
+/* 0x0094 */ ble,pt %icc,.L77000191
+/* 0x0098 */ nop
+/* 0x009c 198 */ or %g0,-1,%g2
+/* 0x00a0 201 */ or %g0,%o1,%g3
+/* 0x00a4 198 */ srl %g2,0,%g2
+/* 0x00a8 */ sub %o1,1,%g4
+/* 0x00ac */ cmp %o1,9
+/* 0x00b0 201 */ or %g0,0,%i1
+/* 0x00b4 200 */ or %g0,0,%g5
+
+! 202 ! {
+! 203 ! acc=acc+(unsigned long long)(i32[i])-(unsigned long long)(nint[i]);
+
+/* 0x00b8 203 */ or %g0,0,%o1
+/* 0x00bc 201 */ bl,pn %icc,.L77000199
+/* 0x00c0 */ sub %g3,4,%o7
+/* 0x00c4 203 */ ld [%i2],%o1
+
+! 204 ! i32[i]=acc&0xffffffff;
+! 205 ! acc=acc>>32;
+
+/* 0x00c8 205 */ or %g0,5,%i1
+/* 0x00cc 203 */ ld [%i0],%o2
+/* 0x00d0 201 */ or %g0,8,%o5
+/* 0x00d4 */ or %g0,12,%o4
+/* 0x00d8 203 */ ld [%i0+4],%o3
+/* 0x00dc 201 */ or %g0,16,%g1
+/* 0x00e0 203 */ ld [%i2+4],%o0
+/* 0x00e4 201 */ sub %o1,%o2,%o1
+/* 0x00e8 203 */ ld [%i0+8],%i3
+/* 0x00ec 204 */ and %o1,%g2,%g5
+/* 0x00f0 */ st %g5,[%i2]
+/* 0x00f4 205 */ srax %o1,32,%g5
+/* 0x00f8 201 */ sub %o0,%o3,%o0
+/* 0x00fc 203 */ ld [%i0+12],%o2
+/* 0x0100 201 */ add %o0,%g5,%o0
+/* 0x0104 204 */ and %o0,%g2,%g5
+/* 0x0108 */ st %g5,[%i2+4]
+/* 0x010c 205 */ srax %o0,32,%o0
+/* 0x0110 203 */ ld [%i2+8],%o1
+/* 0x0114 */ ld [%i2+12],%o3
+/* 0x0118 201 */ sub %o1,%i3,%o1
+ .L900000505:
+/* 0x011c */ add %g1,4,%g3
+/* 0x0120 203 */ ld [%g1+%i2],%g5
+/* 0x0124 201 */ add %o1,%o0,%o0
+/* 0x0128 203 */ ld [%i0+%g1],%i3
+/* 0x012c 201 */ sub %o3,%o2,%o1
+/* 0x0130 204 */ and %o0,%g2,%o2
+/* 0x0134 */ st %o2,[%o5+%i2]
+/* 0x0138 205 */ srax %o0,32,%o2
+/* 0x013c */ add %i1,4,%i1
+/* 0x0140 201 */ add %g1,8,%o5
+/* 0x0144 203 */ ld [%g3+%i2],%o0
+/* 0x0148 201 */ add %o1,%o2,%o1
+/* 0x014c 203 */ ld [%i0+%g3],%o3
+/* 0x0150 201 */ sub %g5,%i3,%o2
+/* 0x0154 204 */ and %o1,%g2,%g5
+/* 0x0158 */ st %g5,[%o4+%i2]
+/* 0x015c 205 */ srax %o1,32,%g5
+/* 0x0160 */ cmp %i1,%o7
+/* 0x0164 201 */ add %g1,12,%o4
+/* 0x0168 203 */ ld [%o5+%i2],%o1
+/* 0x016c 201 */ add %o2,%g5,%o2
+/* 0x0170 203 */ ld [%i0+%o5],%i3
+/* 0x0174 201 */ sub %o0,%o3,%o0
+/* 0x0178 204 */ and %o2,%g2,%o3
+/* 0x017c */ st %o3,[%g1+%i2]
+/* 0x0180 205 */ srax %o2,32,%g5
+/* 0x0184 203 */ ld [%o4+%i2],%o3
+/* 0x0188 201 */ add %g1,16,%g1
+/* 0x018c */ add %o0,%g5,%o0
+/* 0x0190 203 */ ld [%i0+%o4],%o2
+/* 0x0194 201 */ sub %o1,%i3,%o1
+/* 0x0198 204 */ and %o0,%g2,%g5
+/* 0x019c */ st %g5,[%g3+%i2]
+/* 0x01a0 205 */ ble,pt %icc,.L900000505
+/* 0x01a4 */ srax %o0,32,%o0
+ .L900000508:
+/* 0x01a8 */ add %o1,%o0,%g3
+/* 0x01ac */ sub %o3,%o2,%o1
+/* 0x01b0 203 */ ld [%g1+%i2],%o0
+/* 0x01b4 */ ld [%i0+%g1],%o2
+/* 0x01b8 205 */ srax %g3,32,%o7
+/* 0x01bc 204 */ and %g3,%g2,%o3
+/* 0x01c0 201 */ add %o1,%o7,%o1
+/* 0x01c4 204 */ st %o3,[%o5+%i2]
+/* 0x01c8 205 */ cmp %i1,%g4
+/* 0x01cc 201 */ sub %o0,%o2,%o0
+/* 0x01d0 204 */ and %o1,%g2,%o2
+/* 0x01d4 */ st %o2,[%o4+%i2]
+/* 0x01d8 205 */ srax %o1,32,%o1
+/* 0x01dc 203 */ sra %i1,0,%o2
+/* 0x01e0 201 */ add %o0,%o1,%o0
+/* 0x01e4 205 */ srax %o0,32,%g5
+/* 0x01e8 204 */ and %o0,%g2,%o1
+/* 0x01ec */ st %o1,[%g1+%i2]
+/* 0x01f0 205 */ bg,pn %icc,.L77000191
+/* 0x01f4 */ sllx %o2,2,%o1
+ .L77000199:
+/* 0x01f8 0 */ or %g0,%o1,%g1
+ .L900000509:
+/* 0x01fc 203 */ ld [%o1+%i2],%o0
+/* 0x0200 205 */ add %i1,1,%i1
+/* 0x0204 203 */ ld [%i0+%o1],%o1
+/* 0x0208 */ sra %i1,0,%o2
+/* 0x020c 205 */ cmp %i1,%g4
+/* 0x0210 203 */ add %g5,%o0,%o0
+/* 0x0214 */ sub %o0,%o1,%o0
+/* 0x0218 205 */ srax %o0,32,%g5
+/* 0x021c 204 */ and %o0,%g2,%o1
+/* 0x0220 */ st %o1,[%g1+%i2]
+/* 0x0224 */ sllx %o2,2,%o1
+/* 0x0228 205 */ ble,pt %icc,.L900000509
+/* 0x022c */ or %g0,%o1,%g1
+ .L77000191:
+/* 0x0230 */ ret ! Result =
+/* 0x0234 */ restore %g0,%g0,%g0
+/* 0x0238 0 */ .type adjust_montf_result,2
+/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result)
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 */ .align 8
+/* 000000 */ .skip 24
+!
+! SUBROUTINE mont_mulf_noconv
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION
+
+ .global mont_mulf_noconv
+ mont_mulf_noconv:
+/* 000000 */ save %sp,-224,%sp
+ .L900000643:
+/* 0x0004 */ call .+8
+/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x000c */ ldx [%fp+2223],%l0
+
+! 206 ! }
+! 207 ! }
+! 208 !}
+! 213 !/*
+! 214 !** the lengths of the input arrays should be at least the following:
+! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen]
+! 216 !** all of them should be different from one another
+! 217 !**
+! 218 !*/
+! 219 !void mont_mulf_noconv(unsigned int *result,
+! 220 ! double *dm1, double *dm2, double *dt,
+! 221 ! double *dn, unsigned int *nint,
+! 222 ! int nlen, double dn0)
+! 223 !{
+! 224 ! int i, j, jj;
+! 225 ! int tmp;
+! 226 ! double digit, m2j, nextm2j, a, b;
+! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0;
+! 229 ! pdm1=&(dm1[0]);
+! 230 ! pdm2=&(dm2[0]);
+! 231 ! pdn=&(dn[0]);
+! 232 ! pdm2[2*nlen]=Zero;
+
+/* 0x0010 232 */ sethi %hi(Zero),%g2
+/* 0x0014 223 */ fmovd %f14,%f30
+/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5
+/* 0x001c 232 */ add %g2,%lo(Zero),%g2
+/* 0x0020 */ sll %l0,1,%o3
+/* 0x0024 223 */ add %g5,%o7,%o4
+/* 0x0028 232 */ sra %o3,0,%g5
+/* 0x002c */ ldx [%o4+%g2],%o7
+
+! 234 ! if (nlen!=16)
+! 235 ! {
+! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero;
+! 238 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3
+/* 0x0034 */ sethi %hi(TwoTo16),%g4
+/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2
+/* 0x003c 232 */ ldd [%o7],%f0
+/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3
+/* 0x0044 223 */ or %g0,%i4,%o0
+/* 0x0048 232 */ sllx %g5,3,%g4
+/* 0x004c 239 */ ldx [%o4+%g2],%o5
+/* 0x0050 223 */ or %g0,%i5,%l3
+/* 0x0054 */ or %g0,%i0,%l2
+/* 0x0058 239 */ ldx [%o4+%g3],%o4
+/* 0x005c 234 */ cmp %l0,16
+/* 0x0060 232 */ std %f0,[%i2+%g4]
+/* 0x0064 234 */ be,pn %icc,.L77000279
+/* 0x0068 */ or %g0,%i3,%l4
+/* 0x006c 236 */ sll %l0,2,%g2
+/* 0x0070 223 */ or %g0,%o0,%i5
+/* 0x0074 236 */ add %g2,2,%o0
+/* 0x0078 223 */ or %g0,%i1,%i4
+/* 0x007c 236 */ cmp %o0,0
+/* 0x0080 223 */ or %g0,%i2,%l1
+/* 0x0084 236 */ ble,a,pt %icc,.L900000657
+/* 0x0088 */ ldd [%i1],%f6
+
+! 241 ! pdtj=&(dt[0]);
+! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++)
+! 243 ! {
+! 244 ! m2j=pdm2[j];
+! 245 ! a=pdtj[0]+pdn[0]*digit;
+! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16;
+! 247 ! pdtj[1]=b;
+! 249 !#pragma pipeloop(0)
+! 250 ! for(i=1;i<nlen;i++)
+! 251 ! {
+! 252 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 253 ! }
+! 254 ! if((jj==30)) {cleanup(dt,j/2+1,2*nlen+1); jj=0;}
+! 255 !
+! 256 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 257 ! }
+! 258 ! }
+! 259 ! else
+! 260 ! {
+! 261 ! a=dt[0]=pdm1[0]*pdm2[0];
+! 263 ! dt[65]= dt[64]= dt[63]= dt[62]= dt[61]= dt[60]=
+! 264 ! dt[59]= dt[58]= dt[57]= dt[56]= dt[55]= dt[54]=
+! 265 ! dt[53]= dt[52]= dt[51]= dt[50]= dt[49]= dt[48]=
+! 266 ! dt[47]= dt[46]= dt[45]= dt[44]= dt[43]= dt[42]=
+! 267 ! dt[41]= dt[40]= dt[39]= dt[38]= dt[37]= dt[36]=
+! 268 ! dt[35]= dt[34]= dt[33]= dt[32]= dt[31]= dt[30]=
+! 269 ! dt[29]= dt[28]= dt[27]= dt[26]= dt[25]= dt[24]=
+! 270 ! dt[23]= dt[22]= dt[21]= dt[20]= dt[19]= dt[18]=
+! 271 ! dt[17]= dt[16]= dt[15]= dt[14]= dt[13]= dt[12]=
+! 272 ! dt[11]= dt[10]= dt[ 9]= dt[ 8]= dt[ 7]= dt[ 6]=
+! 273 ! dt[ 5]= dt[ 4]= dt[ 3]= dt[ 2]= dt[ 1]=Zero;
+! 275 ! pdn_0=pdn[0];
+! 276 ! pdm1_0=pdm1[0];
+! 278 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16);
+! 279 ! pdtj=&(dt[0]);
+! 281 ! for(j=0;j<32;j++,pdtj++)
+
+/* 0x008c 281 */ or %g0,%o0,%o1
+/* 0x0090 236 */ sub %o0,1,%g1
+/* 0x0094 */ or %g0,0,%g2
+/* 0x0098 281 */ cmp %o1,5
+/* 0x009c */ bl,pn %icc,.L77000280
+/* 0x00a0 */ or %g0,8,%o0
+/* 0x00a4 */ std %f0,[%i3]
+/* 0x00a8 */ or %g0,2,%g2
+/* 0x00ac */ sub %g1,2,%o1
+ .L900000627:
+/* 0x00b0 */ add %o0,8,%g3
+/* 0x00b4 */ std %f0,[%i3+%o0]
+/* 0x00b8 */ add %g2,3,%g2
+/* 0x00bc */ add %o0,16,%o2
+/* 0x00c0 */ std %f0,[%i3+%g3]
+/* 0x00c4 */ cmp %g2,%o1
+/* 0x00c8 */ add %o0,24,%o0
+/* 0x00cc */ ble,pt %icc,.L900000627
+/* 0x00d0 */ std %f0,[%i3+%o2]
+ .L900000630:
+/* 0x00d4 */ cmp %g2,%g1
+/* 0x00d8 */ bg,pn %icc,.L77000285
+/* 0x00dc */ std %f0,[%i3+%o0]
+ .L77000280:
+/* 0x00e0 */ ldd [%o7],%f0
+ .L900000656:
+/* 0x00e4 */ sra %g2,0,%o0
+/* 0x00e8 */ add %g2,1,%g2
+/* 0x00ec */ sllx %o0,3,%o0
+/* 0x00f0 */ cmp %g2,%g1
+/* 0x00f4 */ std %f0,[%i3+%o0]
+/* 0x00f8 */ ble,a,pt %icc,.L900000656
+/* 0x00fc */ ldd [%o7],%f0
+ .L77000285:
+/* 0x0100 238 */ ldd [%i1],%f6
+ .L900000657:
+/* 0x0104 238 */ ldd [%i2],%f8
+/* 0x0108 242 */ cmp %o3,0
+/* 0x010c */ sub %o3,1,%o1
+/* 0x0110 239 */ ldd [%o7],%f10
+/* 0x0114 */ add %o3,1,%o2
+/* 0x0118 0 */ or %g0,0,%i2
+/* 0x011c 238 */ fmuld %f6,%f8,%f6
+/* 0x0120 */ std %f6,[%i3]
+/* 0x0124 0 */ or %g0,0,%g3
+/* 0x0128 239 */ ldd [%o5],%f8
+/* 0x012c 0 */ or %g0,%o2,%g1
+/* 0x0130 236 */ sub %l0,1,%i1
+/* 0x0134 239 */ ldd [%o4],%f12
+/* 0x0138 236 */ or %g0,1,%g4
+/* 0x013c */ fdtox %f6,%f0
+/* 0x0140 */ fmovs %f10,%f0
+/* 0x0144 */ fxtod %f0,%f6
+/* 0x0148 239 */ fmuld %f6,%f14,%f6
+/* 0x014c */ fmuld %f6,%f8,%f8
+/* 0x0150 */ fdtox %f8,%f8
+/* 0x0154 */ fxtod %f8,%f8
+/* 0x0158 */ fmuld %f8,%f12,%f8
+/* 0x015c */ fsubd %f6,%f8,%f20
+/* 0x0160 242 */ ble,pt %icc,.L900000650
+/* 0x0164 */ sllx %g5,3,%g2
+/* 0x0168 0 */ st %o1,[%sp+2223]
+/* 0x016c 246 */ ldd [%i5],%f6
+ .L900000651:
+/* 0x0170 246 */ sra %g4,0,%g2
+/* 0x0174 */ fmuld %f6,%f20,%f6
+/* 0x0178 */ ldd [%i3],%f12
+/* 0x017c */ sllx %g2,3,%g2
+/* 0x0180 */ ldd [%i4],%f8
+/* 0x0184 250 */ cmp %l0,1
+/* 0x0188 246 */ ldd [%l1+%g2],%f10
+/* 0x018c 244 */ sra %i2,0,%g2
+/* 0x0190 */ add %i2,1,%i0
+/* 0x0194 246 */ faddd %f12,%f6,%f6
+/* 0x0198 */ ldd [%o5],%f12
+/* 0x019c 244 */ sllx %g2,3,%g2
+/* 0x01a0 246 */ fmuld %f8,%f10,%f8
+/* 0x01a4 */ ldd [%i3+8],%f10
+/* 0x01a8 */ srl %i2,31,%o3
+/* 0x01ac 244 */ ldd [%l1+%g2],%f18
+/* 0x01b0 0 */ or %g0,1,%l5
+/* 0x01b4 236 */ or %g0,2,%g2
+/* 0x01b8 246 */ fmuld %f6,%f12,%f6
+/* 0x01bc 250 */ or %g0,32,%o1
+/* 0x01c0 */ or %g0,48,%o2
+/* 0x01c4 246 */ faddd %f10,%f8,%f8
+/* 0x01c8 */ faddd %f8,%f6,%f16
+/* 0x01cc 250 */ ble,pn %icc,.L77000213
+/* 0x01d0 */ std %f16,[%i3+8]
+/* 0x01d4 */ cmp %i1,8
+/* 0x01d8 */ sub %l0,3,%o3
+/* 0x01dc */ bl,pn %icc,.L77000284
+/* 0x01e0 */ or %g0,8,%o0
+/* 0x01e4 252 */ ldd [%i5+8],%f0
+/* 0x01e8 */ or %g0,6,%l5
+/* 0x01ec */ ldd [%i4+8],%f2
+/* 0x01f0 */ or %g0,4,%g2
+/* 0x01f4 250 */ or %g0,40,%o0
+/* 0x01f8 252 */ ldd [%i5+16],%f8
+/* 0x01fc */ fmuld %f0,%f20,%f10
+/* 0x0200 */ ldd [%i4+16],%f4
+/* 0x0204 */ fmuld %f2,%f18,%f2
+/* 0x0208 */ ldd [%i3+16],%f0
+/* 0x020c */ fmuld %f8,%f20,%f12
+/* 0x0210 */ ldd [%i4+24],%f6
+/* 0x0214 */ fmuld %f4,%f18,%f4
+/* 0x0218 */ ldd [%i5+24],%f8
+/* 0x021c */ faddd %f2,%f10,%f2
+/* 0x0220 */ ldd [%i4+32],%f14
+/* 0x0224 */ fmuld %f6,%f18,%f10
+/* 0x0228 */ ldd [%i5+32],%f6
+/* 0x022c */ faddd %f4,%f12,%f4
+/* 0x0230 */ ldd [%i4+40],%f12
+/* 0x0234 */ faddd %f0,%f2,%f0
+/* 0x0238 */ std %f0,[%i3+16]
+/* 0x023c */ ldd [%i3+32],%f0
+/* 0x0240 */ ldd [%i3+48],%f2
+ .L900000639:
+/* 0x0244 */ add %o2,16,%l6
+/* 0x0248 252 */ ldd [%i5+%o0],%f22
+/* 0x024c */ add %l5,3,%l5
+/* 0x0250 */ fmuld %f8,%f20,%f8
+/* 0x0254 250 */ add %o0,8,%o0
+/* 0x0258 252 */ ldd [%l6+%i3],%f26
+/* 0x025c */ cmp %l5,%o3
+/* 0x0260 */ ldd [%i4+%o0],%f24
+/* 0x0264 */ faddd %f0,%f4,%f0
+/* 0x0268 */ add %g2,6,%g2
+/* 0x026c */ faddd %f10,%f8,%f10
+/* 0x0270 */ fmuld %f14,%f18,%f4
+/* 0x0274 */ std %f0,[%o1+%i3]
+/* 0x0278 250 */ add %o2,32,%o1
+/* 0x027c 252 */ ldd [%i5+%o0],%f8
+/* 0x0280 */ fmuld %f6,%f20,%f6
+/* 0x0284 250 */ add %o0,8,%o0
+/* 0x0288 252 */ ldd [%o1+%i3],%f0
+/* 0x028c */ ldd [%i4+%o0],%f14
+/* 0x0290 */ faddd %f2,%f10,%f2
+/* 0x0294 */ faddd %f4,%f6,%f10
+/* 0x0298 */ fmuld %f12,%f18,%f4
+/* 0x029c */ std %f2,[%o2+%i3]
+/* 0x02a0 250 */ add %o2,48,%o2
+/* 0x02a4 252 */ ldd [%i5+%o0],%f6
+/* 0x02a8 */ fmuld %f22,%f20,%f22
+/* 0x02ac 250 */ add %o0,8,%o0
+/* 0x02b0 252 */ ldd [%o2+%i3],%f2
+/* 0x02b4 */ ldd [%i4+%o0],%f12
+/* 0x02b8 */ faddd %f26,%f10,%f10
+/* 0x02bc */ std %f10,[%l6+%i3]
+/* 0x02c0 */ fmuld %f24,%f18,%f10
+/* 0x02c4 */ ble,pt %icc,.L900000639
+/* 0x02c8 */ faddd %f4,%f22,%f4
+ .L900000642:
+/* 0x02cc 252 */ fmuld %f8,%f20,%f24
+/* 0x02d0 */ faddd %f0,%f4,%f8
+/* 0x02d4 250 */ add %o2,16,%o3
+/* 0x02d8 252 */ ldd [%o3+%i3],%f4
+/* 0x02dc */ fmuld %f14,%f18,%f0
+/* 0x02e0 */ cmp %l5,%i1
+/* 0x02e4 */ std %f8,[%o1+%i3]
+/* 0x02e8 */ fmuld %f12,%f18,%f8
+/* 0x02ec 250 */ add %o2,32,%o1
+/* 0x02f0 252 */ faddd %f10,%f24,%f12
+/* 0x02f4 */ ldd [%i5+%o0],%f22
+/* 0x02f8 */ fmuld %f6,%f20,%f6
+/* 0x02fc */ add %g2,8,%g2
+/* 0x0300 */ fmuld %f22,%f20,%f10
+/* 0x0304 */ faddd %f2,%f12,%f2
+/* 0x0308 */ faddd %f0,%f6,%f6
+/* 0x030c */ ldd [%o1+%i3],%f0
+/* 0x0310 */ std %f2,[%o2+%i3]
+/* 0x0314 */ faddd %f8,%f10,%f2
+/* 0x0318 */ sra %l5,0,%o2
+/* 0x031c */ sllx %o2,3,%o0
+/* 0x0320 */ faddd %f4,%f6,%f4
+/* 0x0324 */ std %f4,[%o3+%i3]
+/* 0x0328 */ faddd %f0,%f2,%f0
+/* 0x032c */ std %f0,[%o1+%i3]
+/* 0x0330 */ bg,a,pn %icc,.L77000213
+/* 0x0334 */ srl %i2,31,%o3
+ .L77000284:
+/* 0x0338 252 */ ldd [%i4+%o0],%f2
+ .L900000655:
+/* 0x033c 252 */ ldd [%i5+%o0],%f0
+/* 0x0340 */ fmuld %f2,%f18,%f2
+/* 0x0344 */ sra %g2,0,%o0
+/* 0x0348 */ sllx %o0,3,%o1
+/* 0x034c */ add %l5,1,%l5
+/* 0x0350 */ fmuld %f0,%f20,%f4
+/* 0x0354 */ ldd [%o1+%i3],%f0
+/* 0x0358 */ sra %l5,0,%o2
+/* 0x035c */ sllx %o2,3,%o0
+/* 0x0360 */ add %g2,2,%g2
+/* 0x0364 */ cmp %l5,%i1
+/* 0x0368 */ faddd %f2,%f4,%f2
+/* 0x036c */ faddd %f0,%f2,%f0
+/* 0x0370 */ std %f0,[%o1+%i3]
+/* 0x0374 */ ble,a,pt %icc,.L900000655
+/* 0x0378 */ ldd [%i4+%o0],%f2
+ .L900000626:
+/* 0x037c */ srl %i2,31,%o3
+/* 0x0380 252 */ ba .L900000654
+/* 0x0384 */ cmp %g3,30
+ .L77000213:
+/* 0x0388 254 */ cmp %g3,30
+ .L900000654:
+/* 0x038c */ add %i2,%o3,%o0
+/* 0x0390 254 */ bne,a,pt %icc,.L900000653
+/* 0x0394 */ fdtox %f16,%f0
+/* 0x0398 281 */ sra %o0,1,%g2
+/* 0x039c */ add %g2,1,%g2
+/* 0x03a0 */ ldd [%o7],%f0
+/* 0x03a4 */ sll %g2,1,%o1
+/* 0x03a8 */ sll %g1,1,%g2
+/* 0x03ac */ or %g0,%o1,%o2
+/* 0x03b0 */ fmovd %f0,%f2
+/* 0x03b4 */ or %g0,%g2,%o0
+/* 0x03b8 */ cmp %o1,%o0
+/* 0x03bc */ sub %g2,1,%o0
+/* 0x03c0 */ bge,pt %icc,.L77000215
+/* 0x03c4 */ or %g0,0,%g3
+/* 0x03c8 254 */ add %o1,1,%o1
+/* 0x03cc 281 */ sra %o2,0,%g2
+ .L900000652:
+/* 0x03d0 */ sllx %g2,3,%g2
+/* 0x03d4 */ ldd [%o7],%f6
+/* 0x03d8 */ add %o2,2,%o2
+/* 0x03dc */ sra %o1,0,%g3
+/* 0x03e0 */ ldd [%g2+%l4],%f8
+/* 0x03e4 */ cmp %o2,%o0
+/* 0x03e8 */ sllx %g3,3,%g3
+/* 0x03ec */ add %o1,2,%o1
+/* 0x03f0 */ ldd [%l4+%g3],%f10
+/* 0x03f4 */ fdtox %f8,%f12
+/* 0x03f8 */ fdtox %f10,%f4
+/* 0x03fc */ fmovd %f12,%f8
+/* 0x0400 */ fmovs %f6,%f12
+/* 0x0404 */ fmovs %f6,%f4
+/* 0x0408 */ fxtod %f12,%f6
+/* 0x040c */ fxtod %f4,%f12
+/* 0x0410 */ fdtox %f10,%f4
+/* 0x0414 */ faddd %f6,%f2,%f6
+/* 0x0418 */ std %f6,[%g2+%l4]
+/* 0x041c */ faddd %f12,%f0,%f6
+/* 0x0420 */ std %f6,[%l4+%g3]
+/* 0x0424 */ fitod %f8,%f2
+/* 0x0428 */ fitod %f4,%f0
+/* 0x042c */ ble,pt %icc,.L900000652
+/* 0x0430 */ sra %o2,0,%g2
+ .L77000233:
+/* 0x0434 */ or %g0,0,%g3
+ .L77000215:
+/* 0x0438 */ fdtox %f16,%f0
+ .L900000653:
+/* 0x043c 256 */ ldd [%o7],%f6
+/* 0x0440 */ add %g4,1,%g4
+/* 0x0444 */ or %g0,%i0,%i2
+/* 0x0448 */ ldd [%o5],%f8
+/* 0x044c */ add %g3,1,%g3
+/* 0x0450 */ add %i3,8,%i3
+/* 0x0454 */ fmovs %f6,%f0
+/* 0x0458 */ ldd [%o4],%f10
+/* 0x045c */ ld [%sp+2223],%o0
+/* 0x0460 */ fxtod %f0,%f6
+/* 0x0464 */ cmp %i0,%o0
+/* 0x0468 */ fmuld %f6,%f30,%f6
+/* 0x046c */ fmuld %f6,%f8,%f8
+/* 0x0470 */ fdtox %f8,%f8
+/* 0x0474 */ fxtod %f8,%f8
+/* 0x0478 */ fmuld %f8,%f10,%f8
+/* 0x047c */ fsubd %f6,%f8,%f20
+/* 0x0480 */ ble,a,pt %icc,.L900000651
+/* 0x0484 */ ldd [%i5],%f6
+ .L900000625:
+/* 0x0488 256 */ ba .L900000650
+/* 0x048c */ sllx %g5,3,%g2
+ .L77000279:
+/* 0x0490 261 */ ldd [%i1],%f4
+/* 0x0494 */ ldd [%i2],%f6
+/* 0x0498 273 */ std %f0,[%i3+8]
+/* 0x049c */ std %f0,[%i3+16]
+/* 0x04a0 261 */ fmuld %f4,%f6,%f6
+/* 0x04a4 */ std %f6,[%i3]
+/* 0x04a8 273 */ std %f0,[%i3+24]
+/* 0x04ac */ std %f0,[%i3+32]
+/* 0x04b0 */ fdtox %f6,%f2
+/* 0x04b4 */ std %f0,[%i3+40]
+/* 0x04b8 */ std %f0,[%i3+48]
+/* 0x04bc */ std %f0,[%i3+56]
+/* 0x04c0 */ std %f0,[%i3+64]
+/* 0x04c4 */ fmovs %f0,%f2
+/* 0x04c8 */ std %f0,[%i3+72]
+/* 0x04cc */ std %f0,[%i3+80]
+/* 0x04d0 */ std %f0,[%i3+88]
+/* 0x04d4 */ std %f0,[%i3+96]
+/* 0x04d8 */ std %f0,[%i3+104]
+/* 0x04dc */ std %f0,[%i3+112]
+/* 0x04e0 */ std %f0,[%i3+120]
+/* 0x04e4 */ std %f0,[%i3+128]
+/* 0x04e8 */ std %f0,[%i3+136]
+/* 0x04ec */ std %f0,[%i3+144]
+/* 0x04f0 */ std %f0,[%i3+152]
+/* 0x04f4 */ std %f0,[%i3+160]
+/* 0x04f8 */ std %f0,[%i3+168]
+/* 0x04fc */ fxtod %f2,%f6
+/* 0x0500 */ std %f0,[%i3+176]
+/* 0x0504 281 */ or %g0,1,%o2
+/* 0x0508 273 */ std %f0,[%i3+184]
+
+! 282 ! {
+! 284 ! m2j=pdm2[j];
+! 285 ! a=pdtj[0]+pdn_0*digit;
+! 286 ! b=pdtj[1]+pdm1_0*pdm2[j+1]+a*TwoToMinus16;
+
+/* 0x050c 286 */ sra %o2,0,%g2
+/* 0x0510 279 */ or %g0,%i3,%o3
+/* 0x0514 273 */ std %f0,[%i3+192]
+/* 0x0518 278 */ fmuld %f6,%f14,%f6
+/* 0x051c 281 */ or %g0,0,%g1
+/* 0x0520 273 */ std %f0,[%i3+200]
+/* 0x0524 */ std %f0,[%i3+208]
+/* 0x0528 */ std %f0,[%i3+216]
+/* 0x052c */ std %f0,[%i3+224]
+/* 0x0530 */ std %f0,[%i3+232]
+/* 0x0534 */ std %f0,[%i3+240]
+/* 0x0538 */ std %f0,[%i3+248]
+/* 0x053c */ std %f0,[%i3+256]
+/* 0x0540 */ std %f0,[%i3+264]
+/* 0x0544 */ std %f0,[%i3+272]
+/* 0x0548 */ std %f0,[%i3+280]
+/* 0x054c */ std %f0,[%i3+288]
+/* 0x0550 */ std %f0,[%i3+296]
+/* 0x0554 */ std %f0,[%i3+304]
+/* 0x0558 */ std %f0,[%i3+312]
+/* 0x055c */ std %f0,[%i3+320]
+/* 0x0560 */ std %f0,[%i3+328]
+/* 0x0564 */ std %f0,[%i3+336]
+/* 0x0568 */ std %f0,[%i3+344]
+/* 0x056c */ std %f0,[%i3+352]
+/* 0x0570 */ std %f0,[%i3+360]
+/* 0x0574 */ std %f0,[%i3+368]
+/* 0x0578 */ std %f0,[%i3+376]
+/* 0x057c */ std %f0,[%i3+384]
+/* 0x0580 */ std %f0,[%i3+392]
+/* 0x0584 */ std %f0,[%i3+400]
+/* 0x0588 */ std %f0,[%i3+408]
+/* 0x058c */ std %f0,[%i3+416]
+/* 0x0590 */ std %f0,[%i3+424]
+/* 0x0594 */ std %f0,[%i3+432]
+/* 0x0598 */ std %f0,[%i3+440]
+/* 0x059c */ std %f0,[%i3+448]
+/* 0x05a0 */ std %f0,[%i3+456]
+/* 0x05a4 */ std %f0,[%i3+464]
+/* 0x05a8 */ std %f0,[%i3+472]
+/* 0x05ac */ std %f0,[%i3+480]
+/* 0x05b0 */ std %f0,[%i3+488]
+/* 0x05b4 */ std %f0,[%i3+496]
+/* 0x05b8 278 */ ldd [%o5],%f8
+/* 0x05bc */ ldd [%o4],%f10
+/* 0x05c0 */ fmuld %f6,%f8,%f8
+/* 0x05c4 273 */ std %f0,[%i3+504]
+/* 0x05c8 */ std %f0,[%i3+512]
+/* 0x05cc */ std %f0,[%i3+520]
+/* 0x05d0 */ fdtox %f8,%f8
+/* 0x05d4 275 */ ldd [%o0],%f0
+/* 0x05d8 */ fxtod %f8,%f8
+/* 0x05dc */ fmuld %f8,%f10,%f8
+/* 0x05e0 */ fsubd %f6,%f8,%f2
+
+! 287 ! pdtj[1]=b;
+! 289 ! /**** this loop will be fully unrolled:
+! 290 ! for(i=1;i<16;i++)
+! 291 ! {
+! 292 ! pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit;
+! 293 ! }
+! 294 ! *************************************/
+! 295 ! pdtj[2]+=pdm1[1]*m2j+pdn[1]*digit;
+! 296 ! pdtj[4]+=pdm1[2]*m2j+pdn[2]*digit;
+! 297 ! pdtj[6]+=pdm1[3]*m2j+pdn[3]*digit;
+! 298 ! pdtj[8]+=pdm1[4]*m2j+pdn[4]*digit;
+! 299 ! pdtj[10]+=pdm1[5]*m2j+pdn[5]*digit;
+! 300 ! pdtj[12]+=pdm1[6]*m2j+pdn[6]*digit;
+! 301 ! pdtj[14]+=pdm1[7]*m2j+pdn[7]*digit;
+! 302 ! pdtj[16]+=pdm1[8]*m2j+pdn[8]*digit;
+! 303 ! pdtj[18]+=pdm1[9]*m2j+pdn[9]*digit;
+! 304 ! pdtj[20]+=pdm1[10]*m2j+pdn[10]*digit;
+! 305 ! pdtj[22]+=pdm1[11]*m2j+pdn[11]*digit;
+! 306 ! pdtj[24]+=pdm1[12]*m2j+pdn[12]*digit;
+! 307 ! pdtj[26]+=pdm1[13]*m2j+pdn[13]*digit;
+! 308 ! pdtj[28]+=pdm1[14]*m2j+pdn[14]*digit;
+! 309 ! pdtj[30]+=pdm1[15]*m2j+pdn[15]*digit;
+! 310 ! /* no need for cleenup, cannot overflow */
+! 311 ! digit=mod(lower32(b,Zero)*dn0,TwoToMinus16,TwoTo16);
+
+
+ fmovd %f2,%f0 ! hand modified
+ fmovd %f30,%f18 ! hand modified
+ ldd [%o0],%f2
+ ldd [%o3],%f8
+ ldd [%i1],%f10
+ ldd [%o5],%f14 ! hand modified
+ ldd [%o4],%f16 ! hand modified
+ ldd [%i2],%f24
+
+ ldd [%i1+8],%f26
+ ldd [%i1+16],%f40
+ ldd [%i1+48],%f46
+ ldd [%i1+56],%f30
+ ldd [%i1+64],%f54
+ ldd [%i1+104],%f34
+ ldd [%i1+112],%f58
+
+ ldd [%o0+8],%f28
+ ldd [%o0+104],%f38
+ ldd [%o0+112],%f60
+
+ .L99999999: !1
+ ldd [%i1+24],%f32
+ fmuld %f0,%f2,%f4 !2
+ ldd [%o0+24],%f36
+ fmuld %f26,%f24,%f20 !3
+ ldd [%i1+40],%f42
+ fmuld %f28,%f0,%f22 !4
+ ldd [%o0+40],%f44
+ fmuld %f32,%f24,%f32 !5
+ ldd [%i2+8],%f6
+ faddd %f4,%f8,%f4
+ fmuld %f36,%f0,%f36 !6
+ add %i2,8,%i2
+ ldd [%o0+56],%f50
+ fmuld %f42,%f24,%f42 !7
+ ldd [%i1+72],%f52
+ faddd %f20,%f22,%f20
+ fmuld %f44,%f0,%f44 !8
+ ldd [%o3+16],%f22
+ fmuld %f10,%f6,%f12 !9
+ ldd [%o0+72],%f56
+ faddd %f32,%f36,%f32
+ fmuld %f14,%f4,%f4 !10
+ ldd [%o3+48],%f36
+ fmuld %f30,%f24,%f48 !11
+ ldd [%o3+8],%f8
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !12
+ std %f20,[%o3+16]
+ faddd %f42,%f44,%f42
+ fmuld %f52,%f24,%f52 !13
+ ldd [%o3+80],%f44
+ faddd %f4,%f12,%f4
+ fmuld %f56,%f0,%f56 !14
+ ldd [%i1+88],%f20
+ faddd %f32,%f36,%f32 !15
+ ldd [%o0+88],%f22
+ faddd %f48,%f50,%f48 !16
+ ldd [%o3+112],%f50
+ faddd %f52,%f56,%f52 !17
+ ldd [%o3+144],%f56
+ faddd %f4,%f8,%f8
+ fmuld %f20,%f24,%f20 !18
+ std %f32,[%o3+48]
+ faddd %f42,%f44,%f42
+ fmuld %f22,%f0,%f22 !19
+ std %f42,[%o3+80]
+ faddd %f48,%f50,%f48
+ fmuld %f34,%f24,%f32 !20
+ std %f48,[%o3+112]
+ faddd %f52,%f56,%f52
+ fmuld %f38,%f0,%f36 !21
+ ldd [%i1+120],%f42
+ fdtox %f8,%f4 !22
+ std %f52,[%o3+144]
+ faddd %f20,%f22,%f20 !23
+ ldd [%o0+120],%f44 !24
+ ldd [%o3+176],%f22
+ faddd %f32,%f36,%f32
+ fmuld %f42,%f24,%f42 !25
+ ldd [%o0+16],%f50
+ fmovs %f17,%f4 !26
+ ldd [%i1+32],%f52
+ fmuld %f44,%f0,%f44 !27
+ ldd [%o0+32],%f56
+ fmuld %f40,%f24,%f48 !28
+ ldd [%o3+208],%f36
+ faddd %f20,%f22,%f20
+ fmuld %f50,%f0,%f50 !29
+ std %f20,[%o3+176]
+ fxtod %f4,%f4
+ fmuld %f52,%f24,%f52 !30
+ ldd [%o0+48],%f22
+ faddd %f42,%f44,%f42
+ fmuld %f56,%f0,%f56 !31
+ ldd [%o3+240],%f44
+ faddd %f32,%f36,%f32 !32
+ std %f32,[%o3+208]
+ faddd %f48,%f50,%f48
+ fmuld %f46,%f24,%f20 !33
+ ldd [%o3+32],%f50
+ fmuld %f4,%f18,%f12 !34
+ ldd [%o0+64],%f36
+ faddd %f52,%f56,%f52
+ fmuld %f22,%f0,%f22 !35
+ ldd [%o3+64],%f56
+ faddd %f42,%f44,%f42 !36
+ std %f42,[%o3+240]
+ faddd %f48,%f50,%f48
+ fmuld %f54,%f24,%f32 !37
+ std %f48,[%o3+32]
+ fmuld %f12,%f14,%f4 !38
+ ldd [%i1+80],%f42
+ faddd %f52,%f56,%f56 ! yes, tmp52!
+ fmuld %f36,%f0,%f36 !39
+ ldd [%o0+80],%f44
+ faddd %f20,%f22,%f20 !40
+ ldd [%i1+96],%f48
+ fmuld %f58,%f24,%f52 !41
+ ldd [%o0+96],%f50
+ fdtox %f4,%f4
+ fmuld %f42,%f24,%f42 !42
+ std %f56,[%o3+64] ! yes, tmp52!
+ faddd %f32,%f36,%f32
+ fmuld %f44,%f0,%f44 !43
+ ldd [%o3+96],%f22
+ fmuld %f48,%f24,%f48 !44
+ ldd [%o3+128],%f36
+ fmovd %f6,%f24
+ fmuld %f50,%f0,%f50 !45
+ fxtod %f4,%f4
+ fmuld %f60,%f0,%f56 !46
+ add %o3,8,%o3
+ faddd %f42,%f44,%f42 !47
+ ldd [%o3+160-8],%f44
+ faddd %f20,%f22,%f20 !48
+ std %f20,[%o3+96-8]
+ faddd %f48,%f50,%f48 !49
+ ldd [%o3+192-8],%f50
+ faddd %f52,%f56,%f52
+ fmuld %f4,%f16,%f4 !50
+ ldd [%o3+224-8],%f56
+ faddd %f32,%f36,%f32 !51
+ std %f32,[%o3+128-8]
+ faddd %f42,%f44,%f42 !52
+ add %g1,1,%g1
+ std %f42,[%o3+160-8]
+ faddd %f48,%f50,%f48 !53
+ cmp %g1,31
+ std %f48,[%o3+192-8]
+ fsubd %f12,%f4,%f0 !54
+ faddd %f52,%f56,%f52
+ ble,pt %icc,.L99999999
+ std %f52,[%o3+224-8] !55
+ std %f8,[%o3]
+! 312 ! }
+! 313 ! }
+! 315 ! conv_d16_to_i32(result,dt+2*nlen,(long long *)dt,nlen+1);
+
+/* 0x0844 315 */ sllx %g5,3,%g2
+ .L900000650:
+/* 0x0848 315 */ ldd [%g2+%l4],%f2
+/* 0x084c */ add %l4,%g2,%o0
+/* 0x0850 */ or %g0,0,%g1
+/* 0x0854 */ ldd [%o0+8],%f4
+/* 0x0858 */ or %g0,0,%i2
+/* 0x085c */ cmp %l0,0
+/* 0x0860 */ fdtox %f2,%f2
+/* 0x0864 */ std %f2,[%sp+2255]
+/* 0x0868 311 */ sethi %hi(0xfc00),%o3
+/* 0x086c 315 */ fdtox %f4,%f2
+/* 0x0870 */ std %f2,[%sp+2247]
+/* 0x0874 311 */ or %g0,-1,%o2
+/* 0x0878 */ srl %o2,0,%o5
+/* 0x087c */ or %g0,2,%g5
+/* 0x0880 */ sub %l0,1,%g3
+/* 0x0884 */ or %g0,%o0,%o7
+/* 0x0888 */ add %o3,1023,%o4
+/* 0x088c 315 */ or %g0,64,%o3
+/* 0x0890 */ ldx [%sp+2255],%i0
+/* 0x0894 */ sub %l0,2,%o1
+/* 0x0898 */ ldx [%sp+2247],%i1
+/* 0x089c */ ble,pt %icc,.L900000648
+/* 0x08a0 */ sethi %hi(0xfc00),%g2
+/* 0x08a4 */ cmp %l0,6
+/* 0x08a8 */ and %i0,%o5,%o2
+/* 0x08ac */ bl,pn %icc,.L77000287
+/* 0x08b0 */ or %g0,3,%g4
+/* 0x08b4 */ ldd [%o7+16],%f0
+/* 0x08b8 */ and %i1,%o4,%i3
+/* 0x08bc */ sllx %i3,16,%o0
+/* 0x08c0 */ or %g0,5,%g4
+/* 0x08c4 */ srax %i1,16,%i4
+/* 0x08c8 */ fdtox %f0,%f0
+/* 0x08cc */ std %f0,[%sp+2239]
+/* 0x08d0 */ srax %i0,32,%i1
+/* 0x08d4 */ add %o2,%o0,%i5
+/* 0x08d8 */ ldd [%o7+24],%f0
+/* 0x08dc */ and %i5,%o5,%l1
+/* 0x08e0 */ or %g0,72,%o2
+/* 0x08e4 */ or %g0,4,%o0
+/* 0x08e8 */ or %g0,4,%g5
+/* 0x08ec */ ldx [%sp+2239],%g1
+/* 0x08f0 */ fdtox %f0,%f0
+/* 0x08f4 */ or %g0,4,%i2
+/* 0x08f8 */ std %f0,[%sp+2231]
+/* 0x08fc */ ldd [%o7+40],%f2
+/* 0x0900 */ and %g1,%o5,%i3
+/* 0x0904 */ ldd [%o7+32],%f0
+/* 0x0908 */ srax %g1,32,%g1
+/* 0x090c */ ldd [%o7+56],%f4
+/* 0x0910 */ fdtox %f2,%f2
+/* 0x0914 */ ldx [%sp+2231],%g2
+/* 0x0918 */ fdtox %f0,%f0
+/* 0x091c */ st %l1,[%l2]
+/* 0x0920 */ srax %i5,32,%l1
+/* 0x0924 */ fdtox %f4,%f4
+/* 0x0928 */ std %f2,[%sp+2231]
+/* 0x092c */ and %g2,%o4,%i5
+/* 0x0930 */ add %i4,%l1,%i4
+/* 0x0934 */ std %f0,[%sp+2239]
+/* 0x0938 */ sllx %i5,16,%i0
+/* 0x093c */ add %i1,%i4,%i1
+/* 0x0940 */ ldd [%o7+48],%f2
+/* 0x0944 */ srax %g2,16,%g2
+/* 0x0948 */ add %i3,%i0,%i0
+/* 0x094c */ ldd [%o7+72],%f0
+/* 0x0950 */ add %i0,%i1,%i3
+/* 0x0954 */ srax %i3,32,%i4
+/* 0x0958 */ fdtox %f2,%f2
+/* 0x095c */ and %i3,%o5,%i3
+/* 0x0960 */ ldx [%sp+2231],%i1
+/* 0x0964 */ add %g2,%i4,%g2
+/* 0x0968 */ ldx [%sp+2239],%i0
+/* 0x096c */ add %g1,%g2,%g1
+/* 0x0970 */ std %f2,[%sp+2239]
+/* 0x0974 */ std %f4,[%sp+2231]
+/* 0x0978 */ ldd [%o7+64],%f2
+/* 0x097c */ st %i3,[%l2+4]
+ .L900000631:
+/* 0x0980 */ ldx [%sp+2231],%i3
+/* 0x0984 */ add %i2,2,%i2
+/* 0x0988 */ add %g4,4,%g4
+/* 0x098c */ ldx [%sp+2239],%i5
+/* 0x0990 */ add %o2,16,%o2
+/* 0x0994 */ and %i1,%o4,%g2
+/* 0x0998 */ sllx %g2,16,%i4
+/* 0x099c */ and %i0,%o5,%g2
+/* 0x09a0 */ ldd [%o7+%o2],%f4
+/* 0x09a4 */ fdtox %f0,%f0
+/* 0x09a8 */ std %f0,[%sp+2231]
+/* 0x09ac */ srax %i1,16,%i1
+/* 0x09b0 */ add %g2,%i4,%g2
+/* 0x09b4 */ fdtox %f2,%f0
+/* 0x09b8 */ add %o3,16,%o3
+/* 0x09bc */ std %f0,[%sp+2239]
+/* 0x09c0 */ add %g2,%g1,%g1
+/* 0x09c4 */ ldd [%o7+%o3],%f2
+/* 0x09c8 */ srax %g1,32,%i4
+/* 0x09cc */ cmp %i2,%o1
+/* 0x09d0 */ srax %i0,32,%g2
+/* 0x09d4 */ add %i1,%i4,%i0
+/* 0x09d8 */ add %g2,%i0,%i4
+/* 0x09dc */ add %o0,4,%o0
+/* 0x09e0 */ and %g1,%o5,%g2
+/* 0x09e4 */ or %g0,%i5,%g1
+/* 0x09e8 */ st %g2,[%l2+%o0]
+/* 0x09ec */ add %g5,4,%g5
+/* 0x09f0 */ ldx [%sp+2231],%i1
+/* 0x09f4 */ ldx [%sp+2239],%i0
+/* 0x09f8 */ add %o2,16,%o2
+/* 0x09fc */ and %i3,%o4,%g2
+/* 0x0a00 */ sllx %g2,16,%i5
+/* 0x0a04 */ and %g1,%o5,%g2
+/* 0x0a08 */ ldd [%o7+%o2],%f0
+/* 0x0a0c */ fdtox %f4,%f4
+/* 0x0a10 */ std %f4,[%sp+2231]
+/* 0x0a14 */ srax %i3,16,%i3
+/* 0x0a18 */ add %g2,%i5,%g2
+/* 0x0a1c */ fdtox %f2,%f2
+/* 0x0a20 */ add %o3,16,%o3
+/* 0x0a24 */ std %f2,[%sp+2239]
+/* 0x0a28 */ add %g2,%i4,%g2
+/* 0x0a2c */ ldd [%o7+%o3],%f2
+/* 0x0a30 */ srax %g2,32,%i4
+/* 0x0a34 */ srax %g1,32,%g1
+/* 0x0a38 */ add %i3,%i4,%i3
+/* 0x0a3c */ add %g1,%i3,%g1
+/* 0x0a40 */ add %o0,4,%o0
+/* 0x0a44 */ and %g2,%o5,%g2
+/* 0x0a48 */ ble,pt %icc,.L900000631
+/* 0x0a4c */ st %g2,[%l2+%o0]
+ .L900000634:
+/* 0x0a50 */ srax %i1,16,%i5
+/* 0x0a54 */ ldx [%sp+2231],%o1
+/* 0x0a58 */ and %i1,%o4,%i3
+/* 0x0a5c */ sllx %i3,16,%i3
+/* 0x0a60 */ ldx [%sp+2239],%i4
+/* 0x0a64 */ and %i0,%o5,%g2
+/* 0x0a68 */ add %g2,%i3,%g2
+/* 0x0a6c */ and %o1,%o4,%i3
+/* 0x0a70 */ fdtox %f0,%f4
+/* 0x0a74 */ sllx %i3,16,%i3
+/* 0x0a78 */ std %f4,[%sp+2231]
+/* 0x0a7c */ add %g2,%g1,%g2
+/* 0x0a80 */ srax %g2,32,%l1
+/* 0x0a84 */ and %i4,%o5,%i1
+/* 0x0a88 */ fdtox %f2,%f0
+/* 0x0a8c */ srax %i0,32,%g1
+/* 0x0a90 */ std %f0,[%sp+2239]
+/* 0x0a94 */ add %i5,%l1,%i0
+/* 0x0a98 */ srax %o1,16,%o1
+/* 0x0a9c */ add %g1,%i0,%i0
+/* 0x0aa0 */ add %o0,4,%g1
+/* 0x0aa4 */ add %i1,%i3,%o0
+/* 0x0aa8 */ and %g2,%o5,%g2
+/* 0x0aac */ st %g2,[%l2+%g1]
+/* 0x0ab0 */ add %o0,%i0,%o0
+/* 0x0ab4 */ srax %o0,32,%i3
+/* 0x0ab8 */ ldx [%sp+2231],%i1
+/* 0x0abc */ add %g1,4,%g1
+/* 0x0ac0 */ ldx [%sp+2239],%i0
+/* 0x0ac4 */ and %o0,%o5,%g2
+/* 0x0ac8 */ add %o1,%i3,%o1
+/* 0x0acc */ srax %i4,32,%o0
+/* 0x0ad0 */ cmp %i2,%g3
+/* 0x0ad4 */ st %g2,[%l2+%g1]
+/* 0x0ad8 */ bg,pn %icc,.L77000236
+/* 0x0adc */ add %o0,%o1,%g1
+/* 0x0ae0 */ add %g4,6,%g4
+/* 0x0ae4 */ add %g5,6,%g5
+ .L77000287:
+/* 0x0ae8 */ sra %g5,0,%o1
+ .L900000647:
+/* 0x0aec */ sllx %o1,3,%o2
+/* 0x0af0 */ and %i0,%o5,%o0
+/* 0x0af4 */ ldd [%o7+%o2],%f0
+/* 0x0af8 */ sra %g4,0,%o2
+/* 0x0afc */ and %i1,%o4,%o1
+/* 0x0b00 */ sllx %o2,3,%o2
+/* 0x0b04 */ add %g1,%o0,%o0
+/* 0x0b08 */ fdtox %f0,%f0
+/* 0x0b0c */ std %f0,[%sp+2239]
+/* 0x0b10 */ sllx %o1,16,%o1
+/* 0x0b14 */ add %o0,%o1,%o1
+/* 0x0b18 */ add %g5,2,%g5
+/* 0x0b1c */ ldd [%o7+%o2],%f0
+/* 0x0b20 */ srax %o1,32,%g1
+/* 0x0b24 */ and %o1,%o5,%o2
+/* 0x0b28 */ srax %i1,16,%o0
+/* 0x0b2c */ add %g4,2,%g4
+/* 0x0b30 */ fdtox %f0,%f0
+/* 0x0b34 */ std %f0,[%sp+2231]
+/* 0x0b38 */ sra %i2,0,%o1
+/* 0x0b3c */ sllx %o1,2,%o1
+/* 0x0b40 */ add %o0,%g1,%g2
+/* 0x0b44 */ srax %i0,32,%g1
+/* 0x0b48 */ add %i2,1,%i2
+/* 0x0b4c */ add %g1,%g2,%g1
+/* 0x0b50 */ cmp %i2,%g3
+/* 0x0b54 */ ldx [%sp+2239],%o3
+/* 0x0b58 */ ldx [%sp+2231],%i1
+/* 0x0b5c */ st %o2,[%l2+%o1]
+/* 0x0b60 */ or %g0,%o3,%i0
+/* 0x0b64 */ ble,pt %icc,.L900000647
+/* 0x0b68 */ sra %g5,0,%o1
+ .L77000236:
+/* 0x0b6c */ sethi %hi(0xfc00),%g2
+ .L900000648:
+/* 0x0b70 */ or %g0,-1,%o0
+/* 0x0b74 */ add %g2,1023,%g2
+/* 0x0b78 */ srl %o0,0,%g3
+/* 0x0b7c */ and %i1,%g2,%g2
+/* 0x0b80 */ and %i0,%g3,%g4
+/* 0x0b84 */ sllx %g2,16,%g2
+/* 0x0b88 */ add %g1,%g4,%g4
+/* 0x0b8c */ sra %i2,0,%g5
+/* 0x0b90 */ add %g4,%g2,%g4
+/* 0x0b94 */ sllx %g5,2,%g2
+/* 0x0b98 */ and %g4,%g3,%g3
+/* 0x0b9c */ st %g3,[%l2+%g2]
+
+! 317 ! adjust_montf_result(result,nint,nlen);
+
+/* 0x0ba0 317 */ sra %l0,0,%g4
+/* 0x0ba4 */ sllx %g4,2,%g2
+/* 0x0ba8 */ ld [%l2+%g2],%g2
+/* 0x0bac */ cmp %g2,0
+/* 0x0bb0 */ bleu,pn %icc,.L77000241
+/* 0x0bb4 */ or %g0,-1,%o1
+/* 0x0bb8 */ ba .L900000646
+/* 0x0bbc */ cmp %o1,0
+ .L77000241:
+/* 0x0bc0 */ sub %l0,1,%o1
+/* 0x0bc4 */ cmp %o1,0
+/* 0x0bc8 */ bl,pn %icc,.L77000244
+/* 0x0bcc */ sra %o1,0,%g2
+ .L900000645:
+/* 0x0bd0 */ sllx %g2,2,%g2
+/* 0x0bd4 */ sub %o1,1,%o0
+/* 0x0bd8 */ ld [%l3+%g2],%g3
+/* 0x0bdc */ ld [%l2+%g2],%g2
+/* 0x0be0 */ cmp %g2,%g3
+/* 0x0be4 */ bne,pn %icc,.L77000244
+/* 0x0be8 */ nop
+/* 0x0bec 0 */ or %g0,%o0,%o1
+/* 0x0bf0 317 */ cmp %o0,0
+/* 0x0bf4 */ bge,pt %icc,.L900000645
+/* 0x0bf8 */ sra %o1,0,%g2
+ .L77000244:
+/* 0x0bfc */ cmp %o1,0
+ .L900000646:
+/* 0x0c00 */ bl,pn %icc,.L77000288
+/* 0x0c04 */ sra %o1,0,%g2
+/* 0x0c08 */ sllx %g2,2,%g2
+/* 0x0c0c */ ld [%l3+%g2],%g3
+/* 0x0c10 */ ld [%l2+%g2],%g2
+/* 0x0c14 */ cmp %g2,%g3
+/* 0x0c18 */ bleu,pt %icc,.L77000224
+/* 0x0c1c */ nop
+ .L77000288:
+/* 0x0c20 */ cmp %l0,0
+/* 0x0c24 */ ble,pt %icc,.L77000224
+/* 0x0c28 */ nop
+/* 0x0c2c 317 */ or %g0,-1,%g2
+/* 0x0c30 315 */ or %g0,0,%i0
+/* 0x0c34 317 */ srl %g2,0,%g2
+/* 0x0c38 315 */ or %g0,0,%g4
+/* 0x0c3c */ or %g0,0,%o1
+/* 0x0c40 317 */ sub %l0,1,%g5
+/* 0x0c44 */ cmp %l0,9
+/* 0x0c48 315 */ or %g0,8,%o5
+/* 0x0c4c */ bl,pn %icc,.L77000289
+/* 0x0c50 */ sub %l0,4,%o7
+/* 0x0c54 */ ld [%l2],%o1
+/* 0x0c58 */ or %g0,5,%i0
+/* 0x0c5c */ ld [%l3],%o2
+/* 0x0c60 */ or %g0,12,%o4
+/* 0x0c64 */ or %g0,16,%g1
+/* 0x0c68 */ ld [%l3+4],%o3
+/* 0x0c6c */ ld [%l2+4],%o0
+/* 0x0c70 */ sub %o1,%o2,%o1
+/* 0x0c74 */ ld [%l3+8],%i1
+/* 0x0c78 */ and %o1,%g2,%g4
+/* 0x0c7c */ st %g4,[%l2]
+/* 0x0c80 */ srax %o1,32,%g4
+/* 0x0c84 */ sub %o0,%o3,%o0
+/* 0x0c88 */ ld [%l3+12],%o2
+/* 0x0c8c */ add %o0,%g4,%o0
+/* 0x0c90 */ and %o0,%g2,%g4
+/* 0x0c94 */ st %g4,[%l2+4]
+/* 0x0c98 */ srax %o0,32,%o0
+/* 0x0c9c */ ld [%l2+8],%o1
+/* 0x0ca0 */ ld [%l2+12],%o3
+/* 0x0ca4 */ sub %o1,%i1,%o1
+ .L900000635:
+/* 0x0ca8 */ add %g1,4,%g3
+/* 0x0cac */ ld [%g1+%l2],%g4
+/* 0x0cb0 */ add %o1,%o0,%o0
+/* 0x0cb4 */ ld [%l3+%g1],%i1
+/* 0x0cb8 */ sub %o3,%o2,%o1
+/* 0x0cbc */ and %o0,%g2,%o2
+/* 0x0cc0 */ st %o2,[%o5+%l2]
+/* 0x0cc4 */ srax %o0,32,%o2
+/* 0x0cc8 */ add %i0,4,%i0
+/* 0x0ccc */ add %g1,8,%o5
+/* 0x0cd0 */ ld [%g3+%l2],%o0
+/* 0x0cd4 */ add %o1,%o2,%o1
+/* 0x0cd8 */ ld [%l3+%g3],%o3
+/* 0x0cdc */ sub %g4,%i1,%o2
+/* 0x0ce0 */ and %o1,%g2,%g4
+/* 0x0ce4 */ st %g4,[%o4+%l2]
+/* 0x0ce8 */ srax %o1,32,%g4
+/* 0x0cec */ cmp %i0,%o7
+/* 0x0cf0 */ add %g1,12,%o4
+/* 0x0cf4 */ ld [%o5+%l2],%o1
+/* 0x0cf8 */ add %o2,%g4,%o2
+/* 0x0cfc */ ld [%l3+%o5],%i1
+/* 0x0d00 */ sub %o0,%o3,%o0
+/* 0x0d04 */ and %o2,%g2,%o3
+/* 0x0d08 */ st %o3,[%g1+%l2]
+/* 0x0d0c */ srax %o2,32,%g4
+/* 0x0d10 */ ld [%o4+%l2],%o3
+/* 0x0d14 */ add %g1,16,%g1
+/* 0x0d18 */ add %o0,%g4,%o0
+/* 0x0d1c */ ld [%l3+%o4],%o2
+/* 0x0d20 */ sub %o1,%i1,%o1
+/* 0x0d24 */ and %o0,%g2,%g4
+/* 0x0d28 */ st %g4,[%g3+%l2]
+/* 0x0d2c */ ble,pt %icc,.L900000635
+/* 0x0d30 */ srax %o0,32,%o0
+ .L900000638:
+/* 0x0d34 */ add %o1,%o0,%g3
+/* 0x0d38 */ sub %o3,%o2,%o1
+/* 0x0d3c */ ld [%g1+%l2],%o0
+/* 0x0d40 */ ld [%l3+%g1],%o2
+/* 0x0d44 */ srax %g3,32,%o7
+/* 0x0d48 */ and %g3,%g2,%o3
+/* 0x0d4c */ add %o1,%o7,%o1
+/* 0x0d50 */ st %o3,[%o5+%l2]
+/* 0x0d54 */ cmp %i0,%g5
+/* 0x0d58 */ sub %o0,%o2,%o0
+/* 0x0d5c */ and %o1,%g2,%o2
+/* 0x0d60 */ st %o2,[%o4+%l2]
+/* 0x0d64 */ srax %o1,32,%o1
+/* 0x0d68 */ sra %i0,0,%o2
+/* 0x0d6c */ add %o0,%o1,%o0
+/* 0x0d70 */ srax %o0,32,%g4
+/* 0x0d74 */ and %o0,%g2,%o1
+/* 0x0d78 */ st %o1,[%g1+%l2]
+/* 0x0d7c */ bg,pn %icc,.L77000224
+/* 0x0d80 */ sllx %o2,2,%o1
+ .L77000289:
+/* 0x0d84 0 */ or %g0,%o1,%g1
+ .L900000644:
+/* 0x0d88 */ ld [%o1+%l2],%o0
+/* 0x0d8c */ add %i0,1,%i0
+/* 0x0d90 */ ld [%l3+%o1],%o1
+/* 0x0d94 */ sra %i0,0,%o2
+/* 0x0d98 */ cmp %i0,%g5
+/* 0x0d9c */ add %g4,%o0,%o0
+/* 0x0da0 */ sub %o0,%o1,%o0
+/* 0x0da4 */ srax %o0,32,%g4
+/* 0x0da8 */ and %o0,%g2,%o1
+/* 0x0dac */ st %o1,[%g1+%l2]
+/* 0x0db0 */ sllx %o2,2,%o1
+/* 0x0db4 */ ble,pt %icc,.L900000644
+/* 0x0db8 */ or %g0,%o1,%g1
+ .L77000224:
+/* 0x0dbc */ ret ! Result =
+/* 0x0dc0 */ restore %g0,%g0,%g0
+/* 0x0dc4 0 */ .type mont_mulf_noconv,2
+/* 0x0dc4 */ .size mont_mulf_noconv,(.-mont_mulf_noconv)
+
diff --git a/security/nss/lib/freebl/mpi/mp_comba.c b/security/nss/lib/freebl/mpi/mp_comba.c
new file mode 100644
index 000000000..3b4937b98
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba.c
@@ -0,0 +1,3235 @@
+/*
+ * The below file is derived from TFM v0.03.
+ * It contains code from fp_mul_comba.c and
+ * fp_sqr_comba.c, which contained the following license.
+ *
+ * Right now, the assembly in this file limits
+ * this code to AMD 64.
+ *
+ * This file is public domain.
+ */
+
+/* TomsFastMath, a fast ISO C bignum library.
+ *
+ * This project is meant to fill in where LibTomMath
+ * falls short. That is speed ;-)
+ *
+ * This project is public domain and free for all purposes.
+ *
+ * Tom St Denis, tomstdenis@iahu.ca
+ */
+
+#include "mpi-priv.h"
+
+/* clamp digits */
+#define mp_clamp(a) \
+ { \
+ while ((a)->used && (a)->dp[(a)->used - 1] == 0) \
+ --((a)->used); \
+ (a)->sign = (a)->used ? (a)->sign : ZPOS; \
+ }
+
+/* anything you need at the start */
+#define COMBA_START
+
+/* clear the chaining variables */
+#define COMBA_CLEAR \
+ c0 = c1 = c2 = 0;
+
+/* forward the carry to the next digit */
+#define COMBA_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+/* anything you need at the end */
+#define COMBA_FINI
+
+/* this should multiply i and j */
+#define MULADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+/* sqr macros only */
+#define CLEAR_CARRY \
+ c0 = c1 = c2 = 0;
+
+#define COMBA_STORE(x) \
+ x = c0;
+
+#define COMBA_STORE2(x) \
+ x = c1;
+
+#define CARRY_FORWARD \
+ do { \
+ c0 = c1; \
+ c1 = c2; \
+ c2 = 0; \
+ } while (0);
+
+#define COMBA_FINI
+
+#define SQRADD(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %%rax \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADD2(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(c0), "=r"(c1), "=r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDSC(i, j) \
+ __asm__( \
+ "movq %3,%%rax \n\t" \
+ "mulq %4 \n\t" \
+ "movq %%rax,%0 \n\t" \
+ "movq %%rdx,%1 \n\t" \
+ "xorq %2,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDAC(i, j) \
+ __asm__( \
+ "movq %6,%%rax \n\t" \
+ "mulq %7 \n\t" \
+ "addq %%rax,%0 \n\t" \
+ "adcq %%rdx,%1 \n\t" \
+ "adcq $0,%2 \n\t" \
+ : "=r"(sc0), "=r"(sc1), "=r"(sc2) \
+ : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \
+ : "%rax", "%rdx", "cc");
+
+#define SQRADDDB \
+ __asm__( \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ "addq %6,%0 \n\t" \
+ "adcq %7,%1 \n\t" \
+ "adcq %8,%2 \n\t" \
+ : "=&r"(c0), "=&r"(c1), "=&r"(c2) \
+ : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \
+ : "cc");
+
+void
+s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[8];
+
+ memcpy(at, A->dp, 4 * sizeof(mp_digit));
+ memcpy(at + 4, B->dp, 4 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[4]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[5]);
+ MULADD(at[1], at[4]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[6]);
+ MULADD(at[1], at[5]);
+ MULADD(at[2], at[4]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[7]);
+ MULADD(at[1], at[6]);
+ MULADD(at[2], at[5]);
+ MULADD(at[3], at[4]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[7]);
+ MULADD(at[2], at[6]);
+ MULADD(at[3], at[5]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[7]);
+ MULADD(at[3], at[6]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[7]);
+ COMBA_STORE(C->dp[6]);
+ COMBA_STORE2(C->dp[7]);
+ C->used = 8;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[16];
+
+ memcpy(at, A->dp, 8 * sizeof(mp_digit));
+ memcpy(at + 8, B->dp, 8 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[8]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[9]);
+ MULADD(at[1], at[8]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[10]);
+ MULADD(at[1], at[9]);
+ MULADD(at[2], at[8]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[11]);
+ MULADD(at[1], at[10]);
+ MULADD(at[2], at[9]);
+ MULADD(at[3], at[8]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[12]);
+ MULADD(at[1], at[11]);
+ MULADD(at[2], at[10]);
+ MULADD(at[3], at[9]);
+ MULADD(at[4], at[8]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[13]);
+ MULADD(at[1], at[12]);
+ MULADD(at[2], at[11]);
+ MULADD(at[3], at[10]);
+ MULADD(at[4], at[9]);
+ MULADD(at[5], at[8]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[14]);
+ MULADD(at[1], at[13]);
+ MULADD(at[2], at[12]);
+ MULADD(at[3], at[11]);
+ MULADD(at[4], at[10]);
+ MULADD(at[5], at[9]);
+ MULADD(at[6], at[8]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[15]);
+ MULADD(at[1], at[14]);
+ MULADD(at[2], at[13]);
+ MULADD(at[3], at[12]);
+ MULADD(at[4], at[11]);
+ MULADD(at[5], at[10]);
+ MULADD(at[6], at[9]);
+ MULADD(at[7], at[8]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[15]);
+ MULADD(at[2], at[14]);
+ MULADD(at[3], at[13]);
+ MULADD(at[4], at[12]);
+ MULADD(at[5], at[11]);
+ MULADD(at[6], at[10]);
+ MULADD(at[7], at[9]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[15]);
+ MULADD(at[3], at[14]);
+ MULADD(at[4], at[13]);
+ MULADD(at[5], at[12]);
+ MULADD(at[6], at[11]);
+ MULADD(at[7], at[10]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[15]);
+ MULADD(at[4], at[14]);
+ MULADD(at[5], at[13]);
+ MULADD(at[6], at[12]);
+ MULADD(at[7], at[11]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[15]);
+ MULADD(at[5], at[14]);
+ MULADD(at[6], at[13]);
+ MULADD(at[7], at[12]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[15]);
+ MULADD(at[6], at[14]);
+ MULADD(at[7], at[13]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[15]);
+ MULADD(at[7], at[14]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[15]);
+ COMBA_STORE(C->dp[14]);
+ COMBA_STORE2(C->dp[15]);
+ C->used = 16;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[32];
+
+ memcpy(at, A->dp, 16 * sizeof(mp_digit));
+ memcpy(at + 16, B->dp, 16 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[16]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[17]);
+ MULADD(at[1], at[16]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[18]);
+ MULADD(at[1], at[17]);
+ MULADD(at[2], at[16]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[19]);
+ MULADD(at[1], at[18]);
+ MULADD(at[2], at[17]);
+ MULADD(at[3], at[16]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[20]);
+ MULADD(at[1], at[19]);
+ MULADD(at[2], at[18]);
+ MULADD(at[3], at[17]);
+ MULADD(at[4], at[16]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[21]);
+ MULADD(at[1], at[20]);
+ MULADD(at[2], at[19]);
+ MULADD(at[3], at[18]);
+ MULADD(at[4], at[17]);
+ MULADD(at[5], at[16]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[22]);
+ MULADD(at[1], at[21]);
+ MULADD(at[2], at[20]);
+ MULADD(at[3], at[19]);
+ MULADD(at[4], at[18]);
+ MULADD(at[5], at[17]);
+ MULADD(at[6], at[16]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[23]);
+ MULADD(at[1], at[22]);
+ MULADD(at[2], at[21]);
+ MULADD(at[3], at[20]);
+ MULADD(at[4], at[19]);
+ MULADD(at[5], at[18]);
+ MULADD(at[6], at[17]);
+ MULADD(at[7], at[16]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[24]);
+ MULADD(at[1], at[23]);
+ MULADD(at[2], at[22]);
+ MULADD(at[3], at[21]);
+ MULADD(at[4], at[20]);
+ MULADD(at[5], at[19]);
+ MULADD(at[6], at[18]);
+ MULADD(at[7], at[17]);
+ MULADD(at[8], at[16]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[25]);
+ MULADD(at[1], at[24]);
+ MULADD(at[2], at[23]);
+ MULADD(at[3], at[22]);
+ MULADD(at[4], at[21]);
+ MULADD(at[5], at[20]);
+ MULADD(at[6], at[19]);
+ MULADD(at[7], at[18]);
+ MULADD(at[8], at[17]);
+ MULADD(at[9], at[16]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[26]);
+ MULADD(at[1], at[25]);
+ MULADD(at[2], at[24]);
+ MULADD(at[3], at[23]);
+ MULADD(at[4], at[22]);
+ MULADD(at[5], at[21]);
+ MULADD(at[6], at[20]);
+ MULADD(at[7], at[19]);
+ MULADD(at[8], at[18]);
+ MULADD(at[9], at[17]);
+ MULADD(at[10], at[16]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[27]);
+ MULADD(at[1], at[26]);
+ MULADD(at[2], at[25]);
+ MULADD(at[3], at[24]);
+ MULADD(at[4], at[23]);
+ MULADD(at[5], at[22]);
+ MULADD(at[6], at[21]);
+ MULADD(at[7], at[20]);
+ MULADD(at[8], at[19]);
+ MULADD(at[9], at[18]);
+ MULADD(at[10], at[17]);
+ MULADD(at[11], at[16]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[28]);
+ MULADD(at[1], at[27]);
+ MULADD(at[2], at[26]);
+ MULADD(at[3], at[25]);
+ MULADD(at[4], at[24]);
+ MULADD(at[5], at[23]);
+ MULADD(at[6], at[22]);
+ MULADD(at[7], at[21]);
+ MULADD(at[8], at[20]);
+ MULADD(at[9], at[19]);
+ MULADD(at[10], at[18]);
+ MULADD(at[11], at[17]);
+ MULADD(at[12], at[16]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[29]);
+ MULADD(at[1], at[28]);
+ MULADD(at[2], at[27]);
+ MULADD(at[3], at[26]);
+ MULADD(at[4], at[25]);
+ MULADD(at[5], at[24]);
+ MULADD(at[6], at[23]);
+ MULADD(at[7], at[22]);
+ MULADD(at[8], at[21]);
+ MULADD(at[9], at[20]);
+ MULADD(at[10], at[19]);
+ MULADD(at[11], at[18]);
+ MULADD(at[12], at[17]);
+ MULADD(at[13], at[16]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[30]);
+ MULADD(at[1], at[29]);
+ MULADD(at[2], at[28]);
+ MULADD(at[3], at[27]);
+ MULADD(at[4], at[26]);
+ MULADD(at[5], at[25]);
+ MULADD(at[6], at[24]);
+ MULADD(at[7], at[23]);
+ MULADD(at[8], at[22]);
+ MULADD(at[9], at[21]);
+ MULADD(at[10], at[20]);
+ MULADD(at[11], at[19]);
+ MULADD(at[12], at[18]);
+ MULADD(at[13], at[17]);
+ MULADD(at[14], at[16]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[31]);
+ MULADD(at[1], at[30]);
+ MULADD(at[2], at[29]);
+ MULADD(at[3], at[28]);
+ MULADD(at[4], at[27]);
+ MULADD(at[5], at[26]);
+ MULADD(at[6], at[25]);
+ MULADD(at[7], at[24]);
+ MULADD(at[8], at[23]);
+ MULADD(at[9], at[22]);
+ MULADD(at[10], at[21]);
+ MULADD(at[11], at[20]);
+ MULADD(at[12], at[19]);
+ MULADD(at[13], at[18]);
+ MULADD(at[14], at[17]);
+ MULADD(at[15], at[16]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[31]);
+ MULADD(at[2], at[30]);
+ MULADD(at[3], at[29]);
+ MULADD(at[4], at[28]);
+ MULADD(at[5], at[27]);
+ MULADD(at[6], at[26]);
+ MULADD(at[7], at[25]);
+ MULADD(at[8], at[24]);
+ MULADD(at[9], at[23]);
+ MULADD(at[10], at[22]);
+ MULADD(at[11], at[21]);
+ MULADD(at[12], at[20]);
+ MULADD(at[13], at[19]);
+ MULADD(at[14], at[18]);
+ MULADD(at[15], at[17]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[31]);
+ MULADD(at[3], at[30]);
+ MULADD(at[4], at[29]);
+ MULADD(at[5], at[28]);
+ MULADD(at[6], at[27]);
+ MULADD(at[7], at[26]);
+ MULADD(at[8], at[25]);
+ MULADD(at[9], at[24]);
+ MULADD(at[10], at[23]);
+ MULADD(at[11], at[22]);
+ MULADD(at[12], at[21]);
+ MULADD(at[13], at[20]);
+ MULADD(at[14], at[19]);
+ MULADD(at[15], at[18]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[31]);
+ MULADD(at[4], at[30]);
+ MULADD(at[5], at[29]);
+ MULADD(at[6], at[28]);
+ MULADD(at[7], at[27]);
+ MULADD(at[8], at[26]);
+ MULADD(at[9], at[25]);
+ MULADD(at[10], at[24]);
+ MULADD(at[11], at[23]);
+ MULADD(at[12], at[22]);
+ MULADD(at[13], at[21]);
+ MULADD(at[14], at[20]);
+ MULADD(at[15], at[19]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[31]);
+ MULADD(at[5], at[30]);
+ MULADD(at[6], at[29]);
+ MULADD(at[7], at[28]);
+ MULADD(at[8], at[27]);
+ MULADD(at[9], at[26]);
+ MULADD(at[10], at[25]);
+ MULADD(at[11], at[24]);
+ MULADD(at[12], at[23]);
+ MULADD(at[13], at[22]);
+ MULADD(at[14], at[21]);
+ MULADD(at[15], at[20]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[31]);
+ MULADD(at[6], at[30]);
+ MULADD(at[7], at[29]);
+ MULADD(at[8], at[28]);
+ MULADD(at[9], at[27]);
+ MULADD(at[10], at[26]);
+ MULADD(at[11], at[25]);
+ MULADD(at[12], at[24]);
+ MULADD(at[13], at[23]);
+ MULADD(at[14], at[22]);
+ MULADD(at[15], at[21]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[31]);
+ MULADD(at[7], at[30]);
+ MULADD(at[8], at[29]);
+ MULADD(at[9], at[28]);
+ MULADD(at[10], at[27]);
+ MULADD(at[11], at[26]);
+ MULADD(at[12], at[25]);
+ MULADD(at[13], at[24]);
+ MULADD(at[14], at[23]);
+ MULADD(at[15], at[22]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[31]);
+ MULADD(at[8], at[30]);
+ MULADD(at[9], at[29]);
+ MULADD(at[10], at[28]);
+ MULADD(at[11], at[27]);
+ MULADD(at[12], at[26]);
+ MULADD(at[13], at[25]);
+ MULADD(at[14], at[24]);
+ MULADD(at[15], at[23]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[31]);
+ MULADD(at[9], at[30]);
+ MULADD(at[10], at[29]);
+ MULADD(at[11], at[28]);
+ MULADD(at[12], at[27]);
+ MULADD(at[13], at[26]);
+ MULADD(at[14], at[25]);
+ MULADD(at[15], at[24]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[31]);
+ MULADD(at[10], at[30]);
+ MULADD(at[11], at[29]);
+ MULADD(at[12], at[28]);
+ MULADD(at[13], at[27]);
+ MULADD(at[14], at[26]);
+ MULADD(at[15], at[25]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[31]);
+ MULADD(at[11], at[30]);
+ MULADD(at[12], at[29]);
+ MULADD(at[13], at[28]);
+ MULADD(at[14], at[27]);
+ MULADD(at[15], at[26]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[31]);
+ MULADD(at[12], at[30]);
+ MULADD(at[13], at[29]);
+ MULADD(at[14], at[28]);
+ MULADD(at[15], at[27]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[31]);
+ MULADD(at[13], at[30]);
+ MULADD(at[14], at[29]);
+ MULADD(at[15], at[28]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[31]);
+ MULADD(at[14], at[30]);
+ MULADD(at[15], at[29]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[31]);
+ MULADD(at[15], at[30]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[31]);
+ COMBA_STORE(C->dp[30]);
+ COMBA_STORE2(C->dp[31]);
+ C->used = 32;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+{
+ mp_digit c0, c1, c2, at[64];
+
+ memcpy(at, A->dp, 32 * sizeof(mp_digit));
+ memcpy(at + 32, B->dp, 32 * sizeof(mp_digit));
+ COMBA_START;
+
+ COMBA_CLEAR;
+ /* 0 */
+ MULADD(at[0], at[32]);
+ COMBA_STORE(C->dp[0]);
+ /* 1 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[33]);
+ MULADD(at[1], at[32]);
+ COMBA_STORE(C->dp[1]);
+ /* 2 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[34]);
+ MULADD(at[1], at[33]);
+ MULADD(at[2], at[32]);
+ COMBA_STORE(C->dp[2]);
+ /* 3 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[35]);
+ MULADD(at[1], at[34]);
+ MULADD(at[2], at[33]);
+ MULADD(at[3], at[32]);
+ COMBA_STORE(C->dp[3]);
+ /* 4 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[36]);
+ MULADD(at[1], at[35]);
+ MULADD(at[2], at[34]);
+ MULADD(at[3], at[33]);
+ MULADD(at[4], at[32]);
+ COMBA_STORE(C->dp[4]);
+ /* 5 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[37]);
+ MULADD(at[1], at[36]);
+ MULADD(at[2], at[35]);
+ MULADD(at[3], at[34]);
+ MULADD(at[4], at[33]);
+ MULADD(at[5], at[32]);
+ COMBA_STORE(C->dp[5]);
+ /* 6 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[38]);
+ MULADD(at[1], at[37]);
+ MULADD(at[2], at[36]);
+ MULADD(at[3], at[35]);
+ MULADD(at[4], at[34]);
+ MULADD(at[5], at[33]);
+ MULADD(at[6], at[32]);
+ COMBA_STORE(C->dp[6]);
+ /* 7 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[39]);
+ MULADD(at[1], at[38]);
+ MULADD(at[2], at[37]);
+ MULADD(at[3], at[36]);
+ MULADD(at[4], at[35]);
+ MULADD(at[5], at[34]);
+ MULADD(at[6], at[33]);
+ MULADD(at[7], at[32]);
+ COMBA_STORE(C->dp[7]);
+ /* 8 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[40]);
+ MULADD(at[1], at[39]);
+ MULADD(at[2], at[38]);
+ MULADD(at[3], at[37]);
+ MULADD(at[4], at[36]);
+ MULADD(at[5], at[35]);
+ MULADD(at[6], at[34]);
+ MULADD(at[7], at[33]);
+ MULADD(at[8], at[32]);
+ COMBA_STORE(C->dp[8]);
+ /* 9 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[41]);
+ MULADD(at[1], at[40]);
+ MULADD(at[2], at[39]);
+ MULADD(at[3], at[38]);
+ MULADD(at[4], at[37]);
+ MULADD(at[5], at[36]);
+ MULADD(at[6], at[35]);
+ MULADD(at[7], at[34]);
+ MULADD(at[8], at[33]);
+ MULADD(at[9], at[32]);
+ COMBA_STORE(C->dp[9]);
+ /* 10 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[42]);
+ MULADD(at[1], at[41]);
+ MULADD(at[2], at[40]);
+ MULADD(at[3], at[39]);
+ MULADD(at[4], at[38]);
+ MULADD(at[5], at[37]);
+ MULADD(at[6], at[36]);
+ MULADD(at[7], at[35]);
+ MULADD(at[8], at[34]);
+ MULADD(at[9], at[33]);
+ MULADD(at[10], at[32]);
+ COMBA_STORE(C->dp[10]);
+ /* 11 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[43]);
+ MULADD(at[1], at[42]);
+ MULADD(at[2], at[41]);
+ MULADD(at[3], at[40]);
+ MULADD(at[4], at[39]);
+ MULADD(at[5], at[38]);
+ MULADD(at[6], at[37]);
+ MULADD(at[7], at[36]);
+ MULADD(at[8], at[35]);
+ MULADD(at[9], at[34]);
+ MULADD(at[10], at[33]);
+ MULADD(at[11], at[32]);
+ COMBA_STORE(C->dp[11]);
+ /* 12 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[44]);
+ MULADD(at[1], at[43]);
+ MULADD(at[2], at[42]);
+ MULADD(at[3], at[41]);
+ MULADD(at[4], at[40]);
+ MULADD(at[5], at[39]);
+ MULADD(at[6], at[38]);
+ MULADD(at[7], at[37]);
+ MULADD(at[8], at[36]);
+ MULADD(at[9], at[35]);
+ MULADD(at[10], at[34]);
+ MULADD(at[11], at[33]);
+ MULADD(at[12], at[32]);
+ COMBA_STORE(C->dp[12]);
+ /* 13 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[45]);
+ MULADD(at[1], at[44]);
+ MULADD(at[2], at[43]);
+ MULADD(at[3], at[42]);
+ MULADD(at[4], at[41]);
+ MULADD(at[5], at[40]);
+ MULADD(at[6], at[39]);
+ MULADD(at[7], at[38]);
+ MULADD(at[8], at[37]);
+ MULADD(at[9], at[36]);
+ MULADD(at[10], at[35]);
+ MULADD(at[11], at[34]);
+ MULADD(at[12], at[33]);
+ MULADD(at[13], at[32]);
+ COMBA_STORE(C->dp[13]);
+ /* 14 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[46]);
+ MULADD(at[1], at[45]);
+ MULADD(at[2], at[44]);
+ MULADD(at[3], at[43]);
+ MULADD(at[4], at[42]);
+ MULADD(at[5], at[41]);
+ MULADD(at[6], at[40]);
+ MULADD(at[7], at[39]);
+ MULADD(at[8], at[38]);
+ MULADD(at[9], at[37]);
+ MULADD(at[10], at[36]);
+ MULADD(at[11], at[35]);
+ MULADD(at[12], at[34]);
+ MULADD(at[13], at[33]);
+ MULADD(at[14], at[32]);
+ COMBA_STORE(C->dp[14]);
+ /* 15 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[47]);
+ MULADD(at[1], at[46]);
+ MULADD(at[2], at[45]);
+ MULADD(at[3], at[44]);
+ MULADD(at[4], at[43]);
+ MULADD(at[5], at[42]);
+ MULADD(at[6], at[41]);
+ MULADD(at[7], at[40]);
+ MULADD(at[8], at[39]);
+ MULADD(at[9], at[38]);
+ MULADD(at[10], at[37]);
+ MULADD(at[11], at[36]);
+ MULADD(at[12], at[35]);
+ MULADD(at[13], at[34]);
+ MULADD(at[14], at[33]);
+ MULADD(at[15], at[32]);
+ COMBA_STORE(C->dp[15]);
+ /* 16 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[48]);
+ MULADD(at[1], at[47]);
+ MULADD(at[2], at[46]);
+ MULADD(at[3], at[45]);
+ MULADD(at[4], at[44]);
+ MULADD(at[5], at[43]);
+ MULADD(at[6], at[42]);
+ MULADD(at[7], at[41]);
+ MULADD(at[8], at[40]);
+ MULADD(at[9], at[39]);
+ MULADD(at[10], at[38]);
+ MULADD(at[11], at[37]);
+ MULADD(at[12], at[36]);
+ MULADD(at[13], at[35]);
+ MULADD(at[14], at[34]);
+ MULADD(at[15], at[33]);
+ MULADD(at[16], at[32]);
+ COMBA_STORE(C->dp[16]);
+ /* 17 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[49]);
+ MULADD(at[1], at[48]);
+ MULADD(at[2], at[47]);
+ MULADD(at[3], at[46]);
+ MULADD(at[4], at[45]);
+ MULADD(at[5], at[44]);
+ MULADD(at[6], at[43]);
+ MULADD(at[7], at[42]);
+ MULADD(at[8], at[41]);
+ MULADD(at[9], at[40]);
+ MULADD(at[10], at[39]);
+ MULADD(at[11], at[38]);
+ MULADD(at[12], at[37]);
+ MULADD(at[13], at[36]);
+ MULADD(at[14], at[35]);
+ MULADD(at[15], at[34]);
+ MULADD(at[16], at[33]);
+ MULADD(at[17], at[32]);
+ COMBA_STORE(C->dp[17]);
+ /* 18 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[50]);
+ MULADD(at[1], at[49]);
+ MULADD(at[2], at[48]);
+ MULADD(at[3], at[47]);
+ MULADD(at[4], at[46]);
+ MULADD(at[5], at[45]);
+ MULADD(at[6], at[44]);
+ MULADD(at[7], at[43]);
+ MULADD(at[8], at[42]);
+ MULADD(at[9], at[41]);
+ MULADD(at[10], at[40]);
+ MULADD(at[11], at[39]);
+ MULADD(at[12], at[38]);
+ MULADD(at[13], at[37]);
+ MULADD(at[14], at[36]);
+ MULADD(at[15], at[35]);
+ MULADD(at[16], at[34]);
+ MULADD(at[17], at[33]);
+ MULADD(at[18], at[32]);
+ COMBA_STORE(C->dp[18]);
+ /* 19 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[51]);
+ MULADD(at[1], at[50]);
+ MULADD(at[2], at[49]);
+ MULADD(at[3], at[48]);
+ MULADD(at[4], at[47]);
+ MULADD(at[5], at[46]);
+ MULADD(at[6], at[45]);
+ MULADD(at[7], at[44]);
+ MULADD(at[8], at[43]);
+ MULADD(at[9], at[42]);
+ MULADD(at[10], at[41]);
+ MULADD(at[11], at[40]);
+ MULADD(at[12], at[39]);
+ MULADD(at[13], at[38]);
+ MULADD(at[14], at[37]);
+ MULADD(at[15], at[36]);
+ MULADD(at[16], at[35]);
+ MULADD(at[17], at[34]);
+ MULADD(at[18], at[33]);
+ MULADD(at[19], at[32]);
+ COMBA_STORE(C->dp[19]);
+ /* 20 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[52]);
+ MULADD(at[1], at[51]);
+ MULADD(at[2], at[50]);
+ MULADD(at[3], at[49]);
+ MULADD(at[4], at[48]);
+ MULADD(at[5], at[47]);
+ MULADD(at[6], at[46]);
+ MULADD(at[7], at[45]);
+ MULADD(at[8], at[44]);
+ MULADD(at[9], at[43]);
+ MULADD(at[10], at[42]);
+ MULADD(at[11], at[41]);
+ MULADD(at[12], at[40]);
+ MULADD(at[13], at[39]);
+ MULADD(at[14], at[38]);
+ MULADD(at[15], at[37]);
+ MULADD(at[16], at[36]);
+ MULADD(at[17], at[35]);
+ MULADD(at[18], at[34]);
+ MULADD(at[19], at[33]);
+ MULADD(at[20], at[32]);
+ COMBA_STORE(C->dp[20]);
+ /* 21 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[53]);
+ MULADD(at[1], at[52]);
+ MULADD(at[2], at[51]);
+ MULADD(at[3], at[50]);
+ MULADD(at[4], at[49]);
+ MULADD(at[5], at[48]);
+ MULADD(at[6], at[47]);
+ MULADD(at[7], at[46]);
+ MULADD(at[8], at[45]);
+ MULADD(at[9], at[44]);
+ MULADD(at[10], at[43]);
+ MULADD(at[11], at[42]);
+ MULADD(at[12], at[41]);
+ MULADD(at[13], at[40]);
+ MULADD(at[14], at[39]);
+ MULADD(at[15], at[38]);
+ MULADD(at[16], at[37]);
+ MULADD(at[17], at[36]);
+ MULADD(at[18], at[35]);
+ MULADD(at[19], at[34]);
+ MULADD(at[20], at[33]);
+ MULADD(at[21], at[32]);
+ COMBA_STORE(C->dp[21]);
+ /* 22 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[54]);
+ MULADD(at[1], at[53]);
+ MULADD(at[2], at[52]);
+ MULADD(at[3], at[51]);
+ MULADD(at[4], at[50]);
+ MULADD(at[5], at[49]);
+ MULADD(at[6], at[48]);
+ MULADD(at[7], at[47]);
+ MULADD(at[8], at[46]);
+ MULADD(at[9], at[45]);
+ MULADD(at[10], at[44]);
+ MULADD(at[11], at[43]);
+ MULADD(at[12], at[42]);
+ MULADD(at[13], at[41]);
+ MULADD(at[14], at[40]);
+ MULADD(at[15], at[39]);
+ MULADD(at[16], at[38]);
+ MULADD(at[17], at[37]);
+ MULADD(at[18], at[36]);
+ MULADD(at[19], at[35]);
+ MULADD(at[20], at[34]);
+ MULADD(at[21], at[33]);
+ MULADD(at[22], at[32]);
+ COMBA_STORE(C->dp[22]);
+ /* 23 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[55]);
+ MULADD(at[1], at[54]);
+ MULADD(at[2], at[53]);
+ MULADD(at[3], at[52]);
+ MULADD(at[4], at[51]);
+ MULADD(at[5], at[50]);
+ MULADD(at[6], at[49]);
+ MULADD(at[7], at[48]);
+ MULADD(at[8], at[47]);
+ MULADD(at[9], at[46]);
+ MULADD(at[10], at[45]);
+ MULADD(at[11], at[44]);
+ MULADD(at[12], at[43]);
+ MULADD(at[13], at[42]);
+ MULADD(at[14], at[41]);
+ MULADD(at[15], at[40]);
+ MULADD(at[16], at[39]);
+ MULADD(at[17], at[38]);
+ MULADD(at[18], at[37]);
+ MULADD(at[19], at[36]);
+ MULADD(at[20], at[35]);
+ MULADD(at[21], at[34]);
+ MULADD(at[22], at[33]);
+ MULADD(at[23], at[32]);
+ COMBA_STORE(C->dp[23]);
+ /* 24 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[56]);
+ MULADD(at[1], at[55]);
+ MULADD(at[2], at[54]);
+ MULADD(at[3], at[53]);
+ MULADD(at[4], at[52]);
+ MULADD(at[5], at[51]);
+ MULADD(at[6], at[50]);
+ MULADD(at[7], at[49]);
+ MULADD(at[8], at[48]);
+ MULADD(at[9], at[47]);
+ MULADD(at[10], at[46]);
+ MULADD(at[11], at[45]);
+ MULADD(at[12], at[44]);
+ MULADD(at[13], at[43]);
+ MULADD(at[14], at[42]);
+ MULADD(at[15], at[41]);
+ MULADD(at[16], at[40]);
+ MULADD(at[17], at[39]);
+ MULADD(at[18], at[38]);
+ MULADD(at[19], at[37]);
+ MULADD(at[20], at[36]);
+ MULADD(at[21], at[35]);
+ MULADD(at[22], at[34]);
+ MULADD(at[23], at[33]);
+ MULADD(at[24], at[32]);
+ COMBA_STORE(C->dp[24]);
+ /* 25 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[57]);
+ MULADD(at[1], at[56]);
+ MULADD(at[2], at[55]);
+ MULADD(at[3], at[54]);
+ MULADD(at[4], at[53]);
+ MULADD(at[5], at[52]);
+ MULADD(at[6], at[51]);
+ MULADD(at[7], at[50]);
+ MULADD(at[8], at[49]);
+ MULADD(at[9], at[48]);
+ MULADD(at[10], at[47]);
+ MULADD(at[11], at[46]);
+ MULADD(at[12], at[45]);
+ MULADD(at[13], at[44]);
+ MULADD(at[14], at[43]);
+ MULADD(at[15], at[42]);
+ MULADD(at[16], at[41]);
+ MULADD(at[17], at[40]);
+ MULADD(at[18], at[39]);
+ MULADD(at[19], at[38]);
+ MULADD(at[20], at[37]);
+ MULADD(at[21], at[36]);
+ MULADD(at[22], at[35]);
+ MULADD(at[23], at[34]);
+ MULADD(at[24], at[33]);
+ MULADD(at[25], at[32]);
+ COMBA_STORE(C->dp[25]);
+ /* 26 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[58]);
+ MULADD(at[1], at[57]);
+ MULADD(at[2], at[56]);
+ MULADD(at[3], at[55]);
+ MULADD(at[4], at[54]);
+ MULADD(at[5], at[53]);
+ MULADD(at[6], at[52]);
+ MULADD(at[7], at[51]);
+ MULADD(at[8], at[50]);
+ MULADD(at[9], at[49]);
+ MULADD(at[10], at[48]);
+ MULADD(at[11], at[47]);
+ MULADD(at[12], at[46]);
+ MULADD(at[13], at[45]);
+ MULADD(at[14], at[44]);
+ MULADD(at[15], at[43]);
+ MULADD(at[16], at[42]);
+ MULADD(at[17], at[41]);
+ MULADD(at[18], at[40]);
+ MULADD(at[19], at[39]);
+ MULADD(at[20], at[38]);
+ MULADD(at[21], at[37]);
+ MULADD(at[22], at[36]);
+ MULADD(at[23], at[35]);
+ MULADD(at[24], at[34]);
+ MULADD(at[25], at[33]);
+ MULADD(at[26], at[32]);
+ COMBA_STORE(C->dp[26]);
+ /* 27 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[59]);
+ MULADD(at[1], at[58]);
+ MULADD(at[2], at[57]);
+ MULADD(at[3], at[56]);
+ MULADD(at[4], at[55]);
+ MULADD(at[5], at[54]);
+ MULADD(at[6], at[53]);
+ MULADD(at[7], at[52]);
+ MULADD(at[8], at[51]);
+ MULADD(at[9], at[50]);
+ MULADD(at[10], at[49]);
+ MULADD(at[11], at[48]);
+ MULADD(at[12], at[47]);
+ MULADD(at[13], at[46]);
+ MULADD(at[14], at[45]);
+ MULADD(at[15], at[44]);
+ MULADD(at[16], at[43]);
+ MULADD(at[17], at[42]);
+ MULADD(at[18], at[41]);
+ MULADD(at[19], at[40]);
+ MULADD(at[20], at[39]);
+ MULADD(at[21], at[38]);
+ MULADD(at[22], at[37]);
+ MULADD(at[23], at[36]);
+ MULADD(at[24], at[35]);
+ MULADD(at[25], at[34]);
+ MULADD(at[26], at[33]);
+ MULADD(at[27], at[32]);
+ COMBA_STORE(C->dp[27]);
+ /* 28 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[60]);
+ MULADD(at[1], at[59]);
+ MULADD(at[2], at[58]);
+ MULADD(at[3], at[57]);
+ MULADD(at[4], at[56]);
+ MULADD(at[5], at[55]);
+ MULADD(at[6], at[54]);
+ MULADD(at[7], at[53]);
+ MULADD(at[8], at[52]);
+ MULADD(at[9], at[51]);
+ MULADD(at[10], at[50]);
+ MULADD(at[11], at[49]);
+ MULADD(at[12], at[48]);
+ MULADD(at[13], at[47]);
+ MULADD(at[14], at[46]);
+ MULADD(at[15], at[45]);
+ MULADD(at[16], at[44]);
+ MULADD(at[17], at[43]);
+ MULADD(at[18], at[42]);
+ MULADD(at[19], at[41]);
+ MULADD(at[20], at[40]);
+ MULADD(at[21], at[39]);
+ MULADD(at[22], at[38]);
+ MULADD(at[23], at[37]);
+ MULADD(at[24], at[36]);
+ MULADD(at[25], at[35]);
+ MULADD(at[26], at[34]);
+ MULADD(at[27], at[33]);
+ MULADD(at[28], at[32]);
+ COMBA_STORE(C->dp[28]);
+ /* 29 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[61]);
+ MULADD(at[1], at[60]);
+ MULADD(at[2], at[59]);
+ MULADD(at[3], at[58]);
+ MULADD(at[4], at[57]);
+ MULADD(at[5], at[56]);
+ MULADD(at[6], at[55]);
+ MULADD(at[7], at[54]);
+ MULADD(at[8], at[53]);
+ MULADD(at[9], at[52]);
+ MULADD(at[10], at[51]);
+ MULADD(at[11], at[50]);
+ MULADD(at[12], at[49]);
+ MULADD(at[13], at[48]);
+ MULADD(at[14], at[47]);
+ MULADD(at[15], at[46]);
+ MULADD(at[16], at[45]);
+ MULADD(at[17], at[44]);
+ MULADD(at[18], at[43]);
+ MULADD(at[19], at[42]);
+ MULADD(at[20], at[41]);
+ MULADD(at[21], at[40]);
+ MULADD(at[22], at[39]);
+ MULADD(at[23], at[38]);
+ MULADD(at[24], at[37]);
+ MULADD(at[25], at[36]);
+ MULADD(at[26], at[35]);
+ MULADD(at[27], at[34]);
+ MULADD(at[28], at[33]);
+ MULADD(at[29], at[32]);
+ COMBA_STORE(C->dp[29]);
+ /* 30 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[62]);
+ MULADD(at[1], at[61]);
+ MULADD(at[2], at[60]);
+ MULADD(at[3], at[59]);
+ MULADD(at[4], at[58]);
+ MULADD(at[5], at[57]);
+ MULADD(at[6], at[56]);
+ MULADD(at[7], at[55]);
+ MULADD(at[8], at[54]);
+ MULADD(at[9], at[53]);
+ MULADD(at[10], at[52]);
+ MULADD(at[11], at[51]);
+ MULADD(at[12], at[50]);
+ MULADD(at[13], at[49]);
+ MULADD(at[14], at[48]);
+ MULADD(at[15], at[47]);
+ MULADD(at[16], at[46]);
+ MULADD(at[17], at[45]);
+ MULADD(at[18], at[44]);
+ MULADD(at[19], at[43]);
+ MULADD(at[20], at[42]);
+ MULADD(at[21], at[41]);
+ MULADD(at[22], at[40]);
+ MULADD(at[23], at[39]);
+ MULADD(at[24], at[38]);
+ MULADD(at[25], at[37]);
+ MULADD(at[26], at[36]);
+ MULADD(at[27], at[35]);
+ MULADD(at[28], at[34]);
+ MULADD(at[29], at[33]);
+ MULADD(at[30], at[32]);
+ COMBA_STORE(C->dp[30]);
+ /* 31 */
+ COMBA_FORWARD;
+ MULADD(at[0], at[63]);
+ MULADD(at[1], at[62]);
+ MULADD(at[2], at[61]);
+ MULADD(at[3], at[60]);
+ MULADD(at[4], at[59]);
+ MULADD(at[5], at[58]);
+ MULADD(at[6], at[57]);
+ MULADD(at[7], at[56]);
+ MULADD(at[8], at[55]);
+ MULADD(at[9], at[54]);
+ MULADD(at[10], at[53]);
+ MULADD(at[11], at[52]);
+ MULADD(at[12], at[51]);
+ MULADD(at[13], at[50]);
+ MULADD(at[14], at[49]);
+ MULADD(at[15], at[48]);
+ MULADD(at[16], at[47]);
+ MULADD(at[17], at[46]);
+ MULADD(at[18], at[45]);
+ MULADD(at[19], at[44]);
+ MULADD(at[20], at[43]);
+ MULADD(at[21], at[42]);
+ MULADD(at[22], at[41]);
+ MULADD(at[23], at[40]);
+ MULADD(at[24], at[39]);
+ MULADD(at[25], at[38]);
+ MULADD(at[26], at[37]);
+ MULADD(at[27], at[36]);
+ MULADD(at[28], at[35]);
+ MULADD(at[29], at[34]);
+ MULADD(at[30], at[33]);
+ MULADD(at[31], at[32]);
+ COMBA_STORE(C->dp[31]);
+ /* 32 */
+ COMBA_FORWARD;
+ MULADD(at[1], at[63]);
+ MULADD(at[2], at[62]);
+ MULADD(at[3], at[61]);
+ MULADD(at[4], at[60]);
+ MULADD(at[5], at[59]);
+ MULADD(at[6], at[58]);
+ MULADD(at[7], at[57]);
+ MULADD(at[8], at[56]);
+ MULADD(at[9], at[55]);
+ MULADD(at[10], at[54]);
+ MULADD(at[11], at[53]);
+ MULADD(at[12], at[52]);
+ MULADD(at[13], at[51]);
+ MULADD(at[14], at[50]);
+ MULADD(at[15], at[49]);
+ MULADD(at[16], at[48]);
+ MULADD(at[17], at[47]);
+ MULADD(at[18], at[46]);
+ MULADD(at[19], at[45]);
+ MULADD(at[20], at[44]);
+ MULADD(at[21], at[43]);
+ MULADD(at[22], at[42]);
+ MULADD(at[23], at[41]);
+ MULADD(at[24], at[40]);
+ MULADD(at[25], at[39]);
+ MULADD(at[26], at[38]);
+ MULADD(at[27], at[37]);
+ MULADD(at[28], at[36]);
+ MULADD(at[29], at[35]);
+ MULADD(at[30], at[34]);
+ MULADD(at[31], at[33]);
+ COMBA_STORE(C->dp[32]);
+ /* 33 */
+ COMBA_FORWARD;
+ MULADD(at[2], at[63]);
+ MULADD(at[3], at[62]);
+ MULADD(at[4], at[61]);
+ MULADD(at[5], at[60]);
+ MULADD(at[6], at[59]);
+ MULADD(at[7], at[58]);
+ MULADD(at[8], at[57]);
+ MULADD(at[9], at[56]);
+ MULADD(at[10], at[55]);
+ MULADD(at[11], at[54]);
+ MULADD(at[12], at[53]);
+ MULADD(at[13], at[52]);
+ MULADD(at[14], at[51]);
+ MULADD(at[15], at[50]);
+ MULADD(at[16], at[49]);
+ MULADD(at[17], at[48]);
+ MULADD(at[18], at[47]);
+ MULADD(at[19], at[46]);
+ MULADD(at[20], at[45]);
+ MULADD(at[21], at[44]);
+ MULADD(at[22], at[43]);
+ MULADD(at[23], at[42]);
+ MULADD(at[24], at[41]);
+ MULADD(at[25], at[40]);
+ MULADD(at[26], at[39]);
+ MULADD(at[27], at[38]);
+ MULADD(at[28], at[37]);
+ MULADD(at[29], at[36]);
+ MULADD(at[30], at[35]);
+ MULADD(at[31], at[34]);
+ COMBA_STORE(C->dp[33]);
+ /* 34 */
+ COMBA_FORWARD;
+ MULADD(at[3], at[63]);
+ MULADD(at[4], at[62]);
+ MULADD(at[5], at[61]);
+ MULADD(at[6], at[60]);
+ MULADD(at[7], at[59]);
+ MULADD(at[8], at[58]);
+ MULADD(at[9], at[57]);
+ MULADD(at[10], at[56]);
+ MULADD(at[11], at[55]);
+ MULADD(at[12], at[54]);
+ MULADD(at[13], at[53]);
+ MULADD(at[14], at[52]);
+ MULADD(at[15], at[51]);
+ MULADD(at[16], at[50]);
+ MULADD(at[17], at[49]);
+ MULADD(at[18], at[48]);
+ MULADD(at[19], at[47]);
+ MULADD(at[20], at[46]);
+ MULADD(at[21], at[45]);
+ MULADD(at[22], at[44]);
+ MULADD(at[23], at[43]);
+ MULADD(at[24], at[42]);
+ MULADD(at[25], at[41]);
+ MULADD(at[26], at[40]);
+ MULADD(at[27], at[39]);
+ MULADD(at[28], at[38]);
+ MULADD(at[29], at[37]);
+ MULADD(at[30], at[36]);
+ MULADD(at[31], at[35]);
+ COMBA_STORE(C->dp[34]);
+ /* 35 */
+ COMBA_FORWARD;
+ MULADD(at[4], at[63]);
+ MULADD(at[5], at[62]);
+ MULADD(at[6], at[61]);
+ MULADD(at[7], at[60]);
+ MULADD(at[8], at[59]);
+ MULADD(at[9], at[58]);
+ MULADD(at[10], at[57]);
+ MULADD(at[11], at[56]);
+ MULADD(at[12], at[55]);
+ MULADD(at[13], at[54]);
+ MULADD(at[14], at[53]);
+ MULADD(at[15], at[52]);
+ MULADD(at[16], at[51]);
+ MULADD(at[17], at[50]);
+ MULADD(at[18], at[49]);
+ MULADD(at[19], at[48]);
+ MULADD(at[20], at[47]);
+ MULADD(at[21], at[46]);
+ MULADD(at[22], at[45]);
+ MULADD(at[23], at[44]);
+ MULADD(at[24], at[43]);
+ MULADD(at[25], at[42]);
+ MULADD(at[26], at[41]);
+ MULADD(at[27], at[40]);
+ MULADD(at[28], at[39]);
+ MULADD(at[29], at[38]);
+ MULADD(at[30], at[37]);
+ MULADD(at[31], at[36]);
+ COMBA_STORE(C->dp[35]);
+ /* 36 */
+ COMBA_FORWARD;
+ MULADD(at[5], at[63]);
+ MULADD(at[6], at[62]);
+ MULADD(at[7], at[61]);
+ MULADD(at[8], at[60]);
+ MULADD(at[9], at[59]);
+ MULADD(at[10], at[58]);
+ MULADD(at[11], at[57]);
+ MULADD(at[12], at[56]);
+ MULADD(at[13], at[55]);
+ MULADD(at[14], at[54]);
+ MULADD(at[15], at[53]);
+ MULADD(at[16], at[52]);
+ MULADD(at[17], at[51]);
+ MULADD(at[18], at[50]);
+ MULADD(at[19], at[49]);
+ MULADD(at[20], at[48]);
+ MULADD(at[21], at[47]);
+ MULADD(at[22], at[46]);
+ MULADD(at[23], at[45]);
+ MULADD(at[24], at[44]);
+ MULADD(at[25], at[43]);
+ MULADD(at[26], at[42]);
+ MULADD(at[27], at[41]);
+ MULADD(at[28], at[40]);
+ MULADD(at[29], at[39]);
+ MULADD(at[30], at[38]);
+ MULADD(at[31], at[37]);
+ COMBA_STORE(C->dp[36]);
+ /* 37 */
+ COMBA_FORWARD;
+ MULADD(at[6], at[63]);
+ MULADD(at[7], at[62]);
+ MULADD(at[8], at[61]);
+ MULADD(at[9], at[60]);
+ MULADD(at[10], at[59]);
+ MULADD(at[11], at[58]);
+ MULADD(at[12], at[57]);
+ MULADD(at[13], at[56]);
+ MULADD(at[14], at[55]);
+ MULADD(at[15], at[54]);
+ MULADD(at[16], at[53]);
+ MULADD(at[17], at[52]);
+ MULADD(at[18], at[51]);
+ MULADD(at[19], at[50]);
+ MULADD(at[20], at[49]);
+ MULADD(at[21], at[48]);
+ MULADD(at[22], at[47]);
+ MULADD(at[23], at[46]);
+ MULADD(at[24], at[45]);
+ MULADD(at[25], at[44]);
+ MULADD(at[26], at[43]);
+ MULADD(at[27], at[42]);
+ MULADD(at[28], at[41]);
+ MULADD(at[29], at[40]);
+ MULADD(at[30], at[39]);
+ MULADD(at[31], at[38]);
+ COMBA_STORE(C->dp[37]);
+ /* 38 */
+ COMBA_FORWARD;
+ MULADD(at[7], at[63]);
+ MULADD(at[8], at[62]);
+ MULADD(at[9], at[61]);
+ MULADD(at[10], at[60]);
+ MULADD(at[11], at[59]);
+ MULADD(at[12], at[58]);
+ MULADD(at[13], at[57]);
+ MULADD(at[14], at[56]);
+ MULADD(at[15], at[55]);
+ MULADD(at[16], at[54]);
+ MULADD(at[17], at[53]);
+ MULADD(at[18], at[52]);
+ MULADD(at[19], at[51]);
+ MULADD(at[20], at[50]);
+ MULADD(at[21], at[49]);
+ MULADD(at[22], at[48]);
+ MULADD(at[23], at[47]);
+ MULADD(at[24], at[46]);
+ MULADD(at[25], at[45]);
+ MULADD(at[26], at[44]);
+ MULADD(at[27], at[43]);
+ MULADD(at[28], at[42]);
+ MULADD(at[29], at[41]);
+ MULADD(at[30], at[40]);
+ MULADD(at[31], at[39]);
+ COMBA_STORE(C->dp[38]);
+ /* 39 */
+ COMBA_FORWARD;
+ MULADD(at[8], at[63]);
+ MULADD(at[9], at[62]);
+ MULADD(at[10], at[61]);
+ MULADD(at[11], at[60]);
+ MULADD(at[12], at[59]);
+ MULADD(at[13], at[58]);
+ MULADD(at[14], at[57]);
+ MULADD(at[15], at[56]);
+ MULADD(at[16], at[55]);
+ MULADD(at[17], at[54]);
+ MULADD(at[18], at[53]);
+ MULADD(at[19], at[52]);
+ MULADD(at[20], at[51]);
+ MULADD(at[21], at[50]);
+ MULADD(at[22], at[49]);
+ MULADD(at[23], at[48]);
+ MULADD(at[24], at[47]);
+ MULADD(at[25], at[46]);
+ MULADD(at[26], at[45]);
+ MULADD(at[27], at[44]);
+ MULADD(at[28], at[43]);
+ MULADD(at[29], at[42]);
+ MULADD(at[30], at[41]);
+ MULADD(at[31], at[40]);
+ COMBA_STORE(C->dp[39]);
+ /* 40 */
+ COMBA_FORWARD;
+ MULADD(at[9], at[63]);
+ MULADD(at[10], at[62]);
+ MULADD(at[11], at[61]);
+ MULADD(at[12], at[60]);
+ MULADD(at[13], at[59]);
+ MULADD(at[14], at[58]);
+ MULADD(at[15], at[57]);
+ MULADD(at[16], at[56]);
+ MULADD(at[17], at[55]);
+ MULADD(at[18], at[54]);
+ MULADD(at[19], at[53]);
+ MULADD(at[20], at[52]);
+ MULADD(at[21], at[51]);
+ MULADD(at[22], at[50]);
+ MULADD(at[23], at[49]);
+ MULADD(at[24], at[48]);
+ MULADD(at[25], at[47]);
+ MULADD(at[26], at[46]);
+ MULADD(at[27], at[45]);
+ MULADD(at[28], at[44]);
+ MULADD(at[29], at[43]);
+ MULADD(at[30], at[42]);
+ MULADD(at[31], at[41]);
+ COMBA_STORE(C->dp[40]);
+ /* 41 */
+ COMBA_FORWARD;
+ MULADD(at[10], at[63]);
+ MULADD(at[11], at[62]);
+ MULADD(at[12], at[61]);
+ MULADD(at[13], at[60]);
+ MULADD(at[14], at[59]);
+ MULADD(at[15], at[58]);
+ MULADD(at[16], at[57]);
+ MULADD(at[17], at[56]);
+ MULADD(at[18], at[55]);
+ MULADD(at[19], at[54]);
+ MULADD(at[20], at[53]);
+ MULADD(at[21], at[52]);
+ MULADD(at[22], at[51]);
+ MULADD(at[23], at[50]);
+ MULADD(at[24], at[49]);
+ MULADD(at[25], at[48]);
+ MULADD(at[26], at[47]);
+ MULADD(at[27], at[46]);
+ MULADD(at[28], at[45]);
+ MULADD(at[29], at[44]);
+ MULADD(at[30], at[43]);
+ MULADD(at[31], at[42]);
+ COMBA_STORE(C->dp[41]);
+ /* 42 */
+ COMBA_FORWARD;
+ MULADD(at[11], at[63]);
+ MULADD(at[12], at[62]);
+ MULADD(at[13], at[61]);
+ MULADD(at[14], at[60]);
+ MULADD(at[15], at[59]);
+ MULADD(at[16], at[58]);
+ MULADD(at[17], at[57]);
+ MULADD(at[18], at[56]);
+ MULADD(at[19], at[55]);
+ MULADD(at[20], at[54]);
+ MULADD(at[21], at[53]);
+ MULADD(at[22], at[52]);
+ MULADD(at[23], at[51]);
+ MULADD(at[24], at[50]);
+ MULADD(at[25], at[49]);
+ MULADD(at[26], at[48]);
+ MULADD(at[27], at[47]);
+ MULADD(at[28], at[46]);
+ MULADD(at[29], at[45]);
+ MULADD(at[30], at[44]);
+ MULADD(at[31], at[43]);
+ COMBA_STORE(C->dp[42]);
+ /* 43 */
+ COMBA_FORWARD;
+ MULADD(at[12], at[63]);
+ MULADD(at[13], at[62]);
+ MULADD(at[14], at[61]);
+ MULADD(at[15], at[60]);
+ MULADD(at[16], at[59]);
+ MULADD(at[17], at[58]);
+ MULADD(at[18], at[57]);
+ MULADD(at[19], at[56]);
+ MULADD(at[20], at[55]);
+ MULADD(at[21], at[54]);
+ MULADD(at[22], at[53]);
+ MULADD(at[23], at[52]);
+ MULADD(at[24], at[51]);
+ MULADD(at[25], at[50]);
+ MULADD(at[26], at[49]);
+ MULADD(at[27], at[48]);
+ MULADD(at[28], at[47]);
+ MULADD(at[29], at[46]);
+ MULADD(at[30], at[45]);
+ MULADD(at[31], at[44]);
+ COMBA_STORE(C->dp[43]);
+ /* 44 */
+ COMBA_FORWARD;
+ MULADD(at[13], at[63]);
+ MULADD(at[14], at[62]);
+ MULADD(at[15], at[61]);
+ MULADD(at[16], at[60]);
+ MULADD(at[17], at[59]);
+ MULADD(at[18], at[58]);
+ MULADD(at[19], at[57]);
+ MULADD(at[20], at[56]);
+ MULADD(at[21], at[55]);
+ MULADD(at[22], at[54]);
+ MULADD(at[23], at[53]);
+ MULADD(at[24], at[52]);
+ MULADD(at[25], at[51]);
+ MULADD(at[26], at[50]);
+ MULADD(at[27], at[49]);
+ MULADD(at[28], at[48]);
+ MULADD(at[29], at[47]);
+ MULADD(at[30], at[46]);
+ MULADD(at[31], at[45]);
+ COMBA_STORE(C->dp[44]);
+ /* 45 */
+ COMBA_FORWARD;
+ MULADD(at[14], at[63]);
+ MULADD(at[15], at[62]);
+ MULADD(at[16], at[61]);
+ MULADD(at[17], at[60]);
+ MULADD(at[18], at[59]);
+ MULADD(at[19], at[58]);
+ MULADD(at[20], at[57]);
+ MULADD(at[21], at[56]);
+ MULADD(at[22], at[55]);
+ MULADD(at[23], at[54]);
+ MULADD(at[24], at[53]);
+ MULADD(at[25], at[52]);
+ MULADD(at[26], at[51]);
+ MULADD(at[27], at[50]);
+ MULADD(at[28], at[49]);
+ MULADD(at[29], at[48]);
+ MULADD(at[30], at[47]);
+ MULADD(at[31], at[46]);
+ COMBA_STORE(C->dp[45]);
+ /* 46 */
+ COMBA_FORWARD;
+ MULADD(at[15], at[63]);
+ MULADD(at[16], at[62]);
+ MULADD(at[17], at[61]);
+ MULADD(at[18], at[60]);
+ MULADD(at[19], at[59]);
+ MULADD(at[20], at[58]);
+ MULADD(at[21], at[57]);
+ MULADD(at[22], at[56]);
+ MULADD(at[23], at[55]);
+ MULADD(at[24], at[54]);
+ MULADD(at[25], at[53]);
+ MULADD(at[26], at[52]);
+ MULADD(at[27], at[51]);
+ MULADD(at[28], at[50]);
+ MULADD(at[29], at[49]);
+ MULADD(at[30], at[48]);
+ MULADD(at[31], at[47]);
+ COMBA_STORE(C->dp[46]);
+ /* 47 */
+ COMBA_FORWARD;
+ MULADD(at[16], at[63]);
+ MULADD(at[17], at[62]);
+ MULADD(at[18], at[61]);
+ MULADD(at[19], at[60]);
+ MULADD(at[20], at[59]);
+ MULADD(at[21], at[58]);
+ MULADD(at[22], at[57]);
+ MULADD(at[23], at[56]);
+ MULADD(at[24], at[55]);
+ MULADD(at[25], at[54]);
+ MULADD(at[26], at[53]);
+ MULADD(at[27], at[52]);
+ MULADD(at[28], at[51]);
+ MULADD(at[29], at[50]);
+ MULADD(at[30], at[49]);
+ MULADD(at[31], at[48]);
+ COMBA_STORE(C->dp[47]);
+ /* 48 */
+ COMBA_FORWARD;
+ MULADD(at[17], at[63]);
+ MULADD(at[18], at[62]);
+ MULADD(at[19], at[61]);
+ MULADD(at[20], at[60]);
+ MULADD(at[21], at[59]);
+ MULADD(at[22], at[58]);
+ MULADD(at[23], at[57]);
+ MULADD(at[24], at[56]);
+ MULADD(at[25], at[55]);
+ MULADD(at[26], at[54]);
+ MULADD(at[27], at[53]);
+ MULADD(at[28], at[52]);
+ MULADD(at[29], at[51]);
+ MULADD(at[30], at[50]);
+ MULADD(at[31], at[49]);
+ COMBA_STORE(C->dp[48]);
+ /* 49 */
+ COMBA_FORWARD;
+ MULADD(at[18], at[63]);
+ MULADD(at[19], at[62]);
+ MULADD(at[20], at[61]);
+ MULADD(at[21], at[60]);
+ MULADD(at[22], at[59]);
+ MULADD(at[23], at[58]);
+ MULADD(at[24], at[57]);
+ MULADD(at[25], at[56]);
+ MULADD(at[26], at[55]);
+ MULADD(at[27], at[54]);
+ MULADD(at[28], at[53]);
+ MULADD(at[29], at[52]);
+ MULADD(at[30], at[51]);
+ MULADD(at[31], at[50]);
+ COMBA_STORE(C->dp[49]);
+ /* 50 */
+ COMBA_FORWARD;
+ MULADD(at[19], at[63]);
+ MULADD(at[20], at[62]);
+ MULADD(at[21], at[61]);
+ MULADD(at[22], at[60]);
+ MULADD(at[23], at[59]);
+ MULADD(at[24], at[58]);
+ MULADD(at[25], at[57]);
+ MULADD(at[26], at[56]);
+ MULADD(at[27], at[55]);
+ MULADD(at[28], at[54]);
+ MULADD(at[29], at[53]);
+ MULADD(at[30], at[52]);
+ MULADD(at[31], at[51]);
+ COMBA_STORE(C->dp[50]);
+ /* 51 */
+ COMBA_FORWARD;
+ MULADD(at[20], at[63]);
+ MULADD(at[21], at[62]);
+ MULADD(at[22], at[61]);
+ MULADD(at[23], at[60]);
+ MULADD(at[24], at[59]);
+ MULADD(at[25], at[58]);
+ MULADD(at[26], at[57]);
+ MULADD(at[27], at[56]);
+ MULADD(at[28], at[55]);
+ MULADD(at[29], at[54]);
+ MULADD(at[30], at[53]);
+ MULADD(at[31], at[52]);
+ COMBA_STORE(C->dp[51]);
+ /* 52 */
+ COMBA_FORWARD;
+ MULADD(at[21], at[63]);
+ MULADD(at[22], at[62]);
+ MULADD(at[23], at[61]);
+ MULADD(at[24], at[60]);
+ MULADD(at[25], at[59]);
+ MULADD(at[26], at[58]);
+ MULADD(at[27], at[57]);
+ MULADD(at[28], at[56]);
+ MULADD(at[29], at[55]);
+ MULADD(at[30], at[54]);
+ MULADD(at[31], at[53]);
+ COMBA_STORE(C->dp[52]);
+ /* 53 */
+ COMBA_FORWARD;
+ MULADD(at[22], at[63]);
+ MULADD(at[23], at[62]);
+ MULADD(at[24], at[61]);
+ MULADD(at[25], at[60]);
+ MULADD(at[26], at[59]);
+ MULADD(at[27], at[58]);
+ MULADD(at[28], at[57]);
+ MULADD(at[29], at[56]);
+ MULADD(at[30], at[55]);
+ MULADD(at[31], at[54]);
+ COMBA_STORE(C->dp[53]);
+ /* 54 */
+ COMBA_FORWARD;
+ MULADD(at[23], at[63]);
+ MULADD(at[24], at[62]);
+ MULADD(at[25], at[61]);
+ MULADD(at[26], at[60]);
+ MULADD(at[27], at[59]);
+ MULADD(at[28], at[58]);
+ MULADD(at[29], at[57]);
+ MULADD(at[30], at[56]);
+ MULADD(at[31], at[55]);
+ COMBA_STORE(C->dp[54]);
+ /* 55 */
+ COMBA_FORWARD;
+ MULADD(at[24], at[63]);
+ MULADD(at[25], at[62]);
+ MULADD(at[26], at[61]);
+ MULADD(at[27], at[60]);
+ MULADD(at[28], at[59]);
+ MULADD(at[29], at[58]);
+ MULADD(at[30], at[57]);
+ MULADD(at[31], at[56]);
+ COMBA_STORE(C->dp[55]);
+ /* 56 */
+ COMBA_FORWARD;
+ MULADD(at[25], at[63]);
+ MULADD(at[26], at[62]);
+ MULADD(at[27], at[61]);
+ MULADD(at[28], at[60]);
+ MULADD(at[29], at[59]);
+ MULADD(at[30], at[58]);
+ MULADD(at[31], at[57]);
+ COMBA_STORE(C->dp[56]);
+ /* 57 */
+ COMBA_FORWARD;
+ MULADD(at[26], at[63]);
+ MULADD(at[27], at[62]);
+ MULADD(at[28], at[61]);
+ MULADD(at[29], at[60]);
+ MULADD(at[30], at[59]);
+ MULADD(at[31], at[58]);
+ COMBA_STORE(C->dp[57]);
+ /* 58 */
+ COMBA_FORWARD;
+ MULADD(at[27], at[63]);
+ MULADD(at[28], at[62]);
+ MULADD(at[29], at[61]);
+ MULADD(at[30], at[60]);
+ MULADD(at[31], at[59]);
+ COMBA_STORE(C->dp[58]);
+ /* 59 */
+ COMBA_FORWARD;
+ MULADD(at[28], at[63]);
+ MULADD(at[29], at[62]);
+ MULADD(at[30], at[61]);
+ MULADD(at[31], at[60]);
+ COMBA_STORE(C->dp[59]);
+ /* 60 */
+ COMBA_FORWARD;
+ MULADD(at[29], at[63]);
+ MULADD(at[30], at[62]);
+ MULADD(at[31], at[61]);
+ COMBA_STORE(C->dp[60]);
+ /* 61 */
+ COMBA_FORWARD;
+ MULADD(at[30], at[63]);
+ MULADD(at[31], at[62]);
+ COMBA_STORE(C->dp[61]);
+ /* 62 */
+ COMBA_FORWARD;
+ MULADD(at[31], at[63]);
+ COMBA_STORE(C->dp[62]);
+ COMBA_STORE2(C->dp[63]);
+ C->used = 64;
+ C->sign = A->sign ^ B->sign;
+ mp_clamp(C);
+ COMBA_FINI;
+}
+
+void
+s_mp_sqr_comba_4(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[8], c0, c1, c2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADD2(a[2], a[3]);
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+ COMBA_STORE2(b[7]);
+ COMBA_FINI;
+
+ B->used = 8;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 8 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_8(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADD2(a[3], a[7]);
+ SQRADD2(a[4], a[6]);
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADD2(a[4], a[7]);
+ SQRADD2(a[5], a[6]);
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADD2(a[5], a[7]);
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADD2(a[6], a[7]);
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+ COMBA_STORE2(b[15]);
+ COMBA_FINI;
+
+ B->used = 16;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 16 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADD2(a[11], a[15]);
+ SQRADD2(a[12], a[14]);
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADD2(a[12], a[15]);
+ SQRADD2(a[13], a[14]);
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADD2(a[13], a[15]);
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADD2(a[14], a[15]);
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+ COMBA_STORE2(b[31]);
+ COMBA_FINI;
+
+ B->used = 32;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 32 * sizeof(mp_digit));
+ mp_clamp(B);
+}
+
+void
+s_mp_sqr_comba_32(const mp_int *A, mp_int *B)
+{
+ mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2;
+
+ a = A->dp;
+ COMBA_START;
+
+ /* clear carries */
+ CLEAR_CARRY;
+
+ /* output 0 */
+ SQRADD(a[0], a[0]);
+ COMBA_STORE(b[0]);
+
+ /* output 1 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[1]);
+ COMBA_STORE(b[1]);
+
+ /* output 2 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[2]);
+ SQRADD(a[1], a[1]);
+ COMBA_STORE(b[2]);
+
+ /* output 3 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[3]);
+ SQRADD2(a[1], a[2]);
+ COMBA_STORE(b[3]);
+
+ /* output 4 */
+ CARRY_FORWARD;
+ SQRADD2(a[0], a[4]);
+ SQRADD2(a[1], a[3]);
+ SQRADD(a[2], a[2]);
+ COMBA_STORE(b[4]);
+
+ /* output 5 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[5]);
+ SQRADDAC(a[1], a[4]);
+ SQRADDAC(a[2], a[3]);
+ SQRADDDB;
+ COMBA_STORE(b[5]);
+
+ /* output 6 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[6]);
+ SQRADDAC(a[1], a[5]);
+ SQRADDAC(a[2], a[4]);
+ SQRADDDB;
+ SQRADD(a[3], a[3]);
+ COMBA_STORE(b[6]);
+
+ /* output 7 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[7]);
+ SQRADDAC(a[1], a[6]);
+ SQRADDAC(a[2], a[5]);
+ SQRADDAC(a[3], a[4]);
+ SQRADDDB;
+ COMBA_STORE(b[7]);
+
+ /* output 8 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[8]);
+ SQRADDAC(a[1], a[7]);
+ SQRADDAC(a[2], a[6]);
+ SQRADDAC(a[3], a[5]);
+ SQRADDDB;
+ SQRADD(a[4], a[4]);
+ COMBA_STORE(b[8]);
+
+ /* output 9 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[9]);
+ SQRADDAC(a[1], a[8]);
+ SQRADDAC(a[2], a[7]);
+ SQRADDAC(a[3], a[6]);
+ SQRADDAC(a[4], a[5]);
+ SQRADDDB;
+ COMBA_STORE(b[9]);
+
+ /* output 10 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[10]);
+ SQRADDAC(a[1], a[9]);
+ SQRADDAC(a[2], a[8]);
+ SQRADDAC(a[3], a[7]);
+ SQRADDAC(a[4], a[6]);
+ SQRADDDB;
+ SQRADD(a[5], a[5]);
+ COMBA_STORE(b[10]);
+
+ /* output 11 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[11]);
+ SQRADDAC(a[1], a[10]);
+ SQRADDAC(a[2], a[9]);
+ SQRADDAC(a[3], a[8]);
+ SQRADDAC(a[4], a[7]);
+ SQRADDAC(a[5], a[6]);
+ SQRADDDB;
+ COMBA_STORE(b[11]);
+
+ /* output 12 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[12]);
+ SQRADDAC(a[1], a[11]);
+ SQRADDAC(a[2], a[10]);
+ SQRADDAC(a[3], a[9]);
+ SQRADDAC(a[4], a[8]);
+ SQRADDAC(a[5], a[7]);
+ SQRADDDB;
+ SQRADD(a[6], a[6]);
+ COMBA_STORE(b[12]);
+
+ /* output 13 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[13]);
+ SQRADDAC(a[1], a[12]);
+ SQRADDAC(a[2], a[11]);
+ SQRADDAC(a[3], a[10]);
+ SQRADDAC(a[4], a[9]);
+ SQRADDAC(a[5], a[8]);
+ SQRADDAC(a[6], a[7]);
+ SQRADDDB;
+ COMBA_STORE(b[13]);
+
+ /* output 14 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[14]);
+ SQRADDAC(a[1], a[13]);
+ SQRADDAC(a[2], a[12]);
+ SQRADDAC(a[3], a[11]);
+ SQRADDAC(a[4], a[10]);
+ SQRADDAC(a[5], a[9]);
+ SQRADDAC(a[6], a[8]);
+ SQRADDDB;
+ SQRADD(a[7], a[7]);
+ COMBA_STORE(b[14]);
+
+ /* output 15 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[15]);
+ SQRADDAC(a[1], a[14]);
+ SQRADDAC(a[2], a[13]);
+ SQRADDAC(a[3], a[12]);
+ SQRADDAC(a[4], a[11]);
+ SQRADDAC(a[5], a[10]);
+ SQRADDAC(a[6], a[9]);
+ SQRADDAC(a[7], a[8]);
+ SQRADDDB;
+ COMBA_STORE(b[15]);
+
+ /* output 16 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[16]);
+ SQRADDAC(a[1], a[15]);
+ SQRADDAC(a[2], a[14]);
+ SQRADDAC(a[3], a[13]);
+ SQRADDAC(a[4], a[12]);
+ SQRADDAC(a[5], a[11]);
+ SQRADDAC(a[6], a[10]);
+ SQRADDAC(a[7], a[9]);
+ SQRADDDB;
+ SQRADD(a[8], a[8]);
+ COMBA_STORE(b[16]);
+
+ /* output 17 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[17]);
+ SQRADDAC(a[1], a[16]);
+ SQRADDAC(a[2], a[15]);
+ SQRADDAC(a[3], a[14]);
+ SQRADDAC(a[4], a[13]);
+ SQRADDAC(a[5], a[12]);
+ SQRADDAC(a[6], a[11]);
+ SQRADDAC(a[7], a[10]);
+ SQRADDAC(a[8], a[9]);
+ SQRADDDB;
+ COMBA_STORE(b[17]);
+
+ /* output 18 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[18]);
+ SQRADDAC(a[1], a[17]);
+ SQRADDAC(a[2], a[16]);
+ SQRADDAC(a[3], a[15]);
+ SQRADDAC(a[4], a[14]);
+ SQRADDAC(a[5], a[13]);
+ SQRADDAC(a[6], a[12]);
+ SQRADDAC(a[7], a[11]);
+ SQRADDAC(a[8], a[10]);
+ SQRADDDB;
+ SQRADD(a[9], a[9]);
+ COMBA_STORE(b[18]);
+
+ /* output 19 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[19]);
+ SQRADDAC(a[1], a[18]);
+ SQRADDAC(a[2], a[17]);
+ SQRADDAC(a[3], a[16]);
+ SQRADDAC(a[4], a[15]);
+ SQRADDAC(a[5], a[14]);
+ SQRADDAC(a[6], a[13]);
+ SQRADDAC(a[7], a[12]);
+ SQRADDAC(a[8], a[11]);
+ SQRADDAC(a[9], a[10]);
+ SQRADDDB;
+ COMBA_STORE(b[19]);
+
+ /* output 20 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[20]);
+ SQRADDAC(a[1], a[19]);
+ SQRADDAC(a[2], a[18]);
+ SQRADDAC(a[3], a[17]);
+ SQRADDAC(a[4], a[16]);
+ SQRADDAC(a[5], a[15]);
+ SQRADDAC(a[6], a[14]);
+ SQRADDAC(a[7], a[13]);
+ SQRADDAC(a[8], a[12]);
+ SQRADDAC(a[9], a[11]);
+ SQRADDDB;
+ SQRADD(a[10], a[10]);
+ COMBA_STORE(b[20]);
+
+ /* output 21 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[21]);
+ SQRADDAC(a[1], a[20]);
+ SQRADDAC(a[2], a[19]);
+ SQRADDAC(a[3], a[18]);
+ SQRADDAC(a[4], a[17]);
+ SQRADDAC(a[5], a[16]);
+ SQRADDAC(a[6], a[15]);
+ SQRADDAC(a[7], a[14]);
+ SQRADDAC(a[8], a[13]);
+ SQRADDAC(a[9], a[12]);
+ SQRADDAC(a[10], a[11]);
+ SQRADDDB;
+ COMBA_STORE(b[21]);
+
+ /* output 22 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[22]);
+ SQRADDAC(a[1], a[21]);
+ SQRADDAC(a[2], a[20]);
+ SQRADDAC(a[3], a[19]);
+ SQRADDAC(a[4], a[18]);
+ SQRADDAC(a[5], a[17]);
+ SQRADDAC(a[6], a[16]);
+ SQRADDAC(a[7], a[15]);
+ SQRADDAC(a[8], a[14]);
+ SQRADDAC(a[9], a[13]);
+ SQRADDAC(a[10], a[12]);
+ SQRADDDB;
+ SQRADD(a[11], a[11]);
+ COMBA_STORE(b[22]);
+
+ /* output 23 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[23]);
+ SQRADDAC(a[1], a[22]);
+ SQRADDAC(a[2], a[21]);
+ SQRADDAC(a[3], a[20]);
+ SQRADDAC(a[4], a[19]);
+ SQRADDAC(a[5], a[18]);
+ SQRADDAC(a[6], a[17]);
+ SQRADDAC(a[7], a[16]);
+ SQRADDAC(a[8], a[15]);
+ SQRADDAC(a[9], a[14]);
+ SQRADDAC(a[10], a[13]);
+ SQRADDAC(a[11], a[12]);
+ SQRADDDB;
+ COMBA_STORE(b[23]);
+
+ /* output 24 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[24]);
+ SQRADDAC(a[1], a[23]);
+ SQRADDAC(a[2], a[22]);
+ SQRADDAC(a[3], a[21]);
+ SQRADDAC(a[4], a[20]);
+ SQRADDAC(a[5], a[19]);
+ SQRADDAC(a[6], a[18]);
+ SQRADDAC(a[7], a[17]);
+ SQRADDAC(a[8], a[16]);
+ SQRADDAC(a[9], a[15]);
+ SQRADDAC(a[10], a[14]);
+ SQRADDAC(a[11], a[13]);
+ SQRADDDB;
+ SQRADD(a[12], a[12]);
+ COMBA_STORE(b[24]);
+
+ /* output 25 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[25]);
+ SQRADDAC(a[1], a[24]);
+ SQRADDAC(a[2], a[23]);
+ SQRADDAC(a[3], a[22]);
+ SQRADDAC(a[4], a[21]);
+ SQRADDAC(a[5], a[20]);
+ SQRADDAC(a[6], a[19]);
+ SQRADDAC(a[7], a[18]);
+ SQRADDAC(a[8], a[17]);
+ SQRADDAC(a[9], a[16]);
+ SQRADDAC(a[10], a[15]);
+ SQRADDAC(a[11], a[14]);
+ SQRADDAC(a[12], a[13]);
+ SQRADDDB;
+ COMBA_STORE(b[25]);
+
+ /* output 26 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[26]);
+ SQRADDAC(a[1], a[25]);
+ SQRADDAC(a[2], a[24]);
+ SQRADDAC(a[3], a[23]);
+ SQRADDAC(a[4], a[22]);
+ SQRADDAC(a[5], a[21]);
+ SQRADDAC(a[6], a[20]);
+ SQRADDAC(a[7], a[19]);
+ SQRADDAC(a[8], a[18]);
+ SQRADDAC(a[9], a[17]);
+ SQRADDAC(a[10], a[16]);
+ SQRADDAC(a[11], a[15]);
+ SQRADDAC(a[12], a[14]);
+ SQRADDDB;
+ SQRADD(a[13], a[13]);
+ COMBA_STORE(b[26]);
+
+ /* output 27 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[27]);
+ SQRADDAC(a[1], a[26]);
+ SQRADDAC(a[2], a[25]);
+ SQRADDAC(a[3], a[24]);
+ SQRADDAC(a[4], a[23]);
+ SQRADDAC(a[5], a[22]);
+ SQRADDAC(a[6], a[21]);
+ SQRADDAC(a[7], a[20]);
+ SQRADDAC(a[8], a[19]);
+ SQRADDAC(a[9], a[18]);
+ SQRADDAC(a[10], a[17]);
+ SQRADDAC(a[11], a[16]);
+ SQRADDAC(a[12], a[15]);
+ SQRADDAC(a[13], a[14]);
+ SQRADDDB;
+ COMBA_STORE(b[27]);
+
+ /* output 28 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[28]);
+ SQRADDAC(a[1], a[27]);
+ SQRADDAC(a[2], a[26]);
+ SQRADDAC(a[3], a[25]);
+ SQRADDAC(a[4], a[24]);
+ SQRADDAC(a[5], a[23]);
+ SQRADDAC(a[6], a[22]);
+ SQRADDAC(a[7], a[21]);
+ SQRADDAC(a[8], a[20]);
+ SQRADDAC(a[9], a[19]);
+ SQRADDAC(a[10], a[18]);
+ SQRADDAC(a[11], a[17]);
+ SQRADDAC(a[12], a[16]);
+ SQRADDAC(a[13], a[15]);
+ SQRADDDB;
+ SQRADD(a[14], a[14]);
+ COMBA_STORE(b[28]);
+
+ /* output 29 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[29]);
+ SQRADDAC(a[1], a[28]);
+ SQRADDAC(a[2], a[27]);
+ SQRADDAC(a[3], a[26]);
+ SQRADDAC(a[4], a[25]);
+ SQRADDAC(a[5], a[24]);
+ SQRADDAC(a[6], a[23]);
+ SQRADDAC(a[7], a[22]);
+ SQRADDAC(a[8], a[21]);
+ SQRADDAC(a[9], a[20]);
+ SQRADDAC(a[10], a[19]);
+ SQRADDAC(a[11], a[18]);
+ SQRADDAC(a[12], a[17]);
+ SQRADDAC(a[13], a[16]);
+ SQRADDAC(a[14], a[15]);
+ SQRADDDB;
+ COMBA_STORE(b[29]);
+
+ /* output 30 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[30]);
+ SQRADDAC(a[1], a[29]);
+ SQRADDAC(a[2], a[28]);
+ SQRADDAC(a[3], a[27]);
+ SQRADDAC(a[4], a[26]);
+ SQRADDAC(a[5], a[25]);
+ SQRADDAC(a[6], a[24]);
+ SQRADDAC(a[7], a[23]);
+ SQRADDAC(a[8], a[22]);
+ SQRADDAC(a[9], a[21]);
+ SQRADDAC(a[10], a[20]);
+ SQRADDAC(a[11], a[19]);
+ SQRADDAC(a[12], a[18]);
+ SQRADDAC(a[13], a[17]);
+ SQRADDAC(a[14], a[16]);
+ SQRADDDB;
+ SQRADD(a[15], a[15]);
+ COMBA_STORE(b[30]);
+
+ /* output 31 */
+ CARRY_FORWARD;
+ SQRADDSC(a[0], a[31]);
+ SQRADDAC(a[1], a[30]);
+ SQRADDAC(a[2], a[29]);
+ SQRADDAC(a[3], a[28]);
+ SQRADDAC(a[4], a[27]);
+ SQRADDAC(a[5], a[26]);
+ SQRADDAC(a[6], a[25]);
+ SQRADDAC(a[7], a[24]);
+ SQRADDAC(a[8], a[23]);
+ SQRADDAC(a[9], a[22]);
+ SQRADDAC(a[10], a[21]);
+ SQRADDAC(a[11], a[20]);
+ SQRADDAC(a[12], a[19]);
+ SQRADDAC(a[13], a[18]);
+ SQRADDAC(a[14], a[17]);
+ SQRADDAC(a[15], a[16]);
+ SQRADDDB;
+ COMBA_STORE(b[31]);
+
+ /* output 32 */
+ CARRY_FORWARD;
+ SQRADDSC(a[1], a[31]);
+ SQRADDAC(a[2], a[30]);
+ SQRADDAC(a[3], a[29]);
+ SQRADDAC(a[4], a[28]);
+ SQRADDAC(a[5], a[27]);
+ SQRADDAC(a[6], a[26]);
+ SQRADDAC(a[7], a[25]);
+ SQRADDAC(a[8], a[24]);
+ SQRADDAC(a[9], a[23]);
+ SQRADDAC(a[10], a[22]);
+ SQRADDAC(a[11], a[21]);
+ SQRADDAC(a[12], a[20]);
+ SQRADDAC(a[13], a[19]);
+ SQRADDAC(a[14], a[18]);
+ SQRADDAC(a[15], a[17]);
+ SQRADDDB;
+ SQRADD(a[16], a[16]);
+ COMBA_STORE(b[32]);
+
+ /* output 33 */
+ CARRY_FORWARD;
+ SQRADDSC(a[2], a[31]);
+ SQRADDAC(a[3], a[30]);
+ SQRADDAC(a[4], a[29]);
+ SQRADDAC(a[5], a[28]);
+ SQRADDAC(a[6], a[27]);
+ SQRADDAC(a[7], a[26]);
+ SQRADDAC(a[8], a[25]);
+ SQRADDAC(a[9], a[24]);
+ SQRADDAC(a[10], a[23]);
+ SQRADDAC(a[11], a[22]);
+ SQRADDAC(a[12], a[21]);
+ SQRADDAC(a[13], a[20]);
+ SQRADDAC(a[14], a[19]);
+ SQRADDAC(a[15], a[18]);
+ SQRADDAC(a[16], a[17]);
+ SQRADDDB;
+ COMBA_STORE(b[33]);
+
+ /* output 34 */
+ CARRY_FORWARD;
+ SQRADDSC(a[3], a[31]);
+ SQRADDAC(a[4], a[30]);
+ SQRADDAC(a[5], a[29]);
+ SQRADDAC(a[6], a[28]);
+ SQRADDAC(a[7], a[27]);
+ SQRADDAC(a[8], a[26]);
+ SQRADDAC(a[9], a[25]);
+ SQRADDAC(a[10], a[24]);
+ SQRADDAC(a[11], a[23]);
+ SQRADDAC(a[12], a[22]);
+ SQRADDAC(a[13], a[21]);
+ SQRADDAC(a[14], a[20]);
+ SQRADDAC(a[15], a[19]);
+ SQRADDAC(a[16], a[18]);
+ SQRADDDB;
+ SQRADD(a[17], a[17]);
+ COMBA_STORE(b[34]);
+
+ /* output 35 */
+ CARRY_FORWARD;
+ SQRADDSC(a[4], a[31]);
+ SQRADDAC(a[5], a[30]);
+ SQRADDAC(a[6], a[29]);
+ SQRADDAC(a[7], a[28]);
+ SQRADDAC(a[8], a[27]);
+ SQRADDAC(a[9], a[26]);
+ SQRADDAC(a[10], a[25]);
+ SQRADDAC(a[11], a[24]);
+ SQRADDAC(a[12], a[23]);
+ SQRADDAC(a[13], a[22]);
+ SQRADDAC(a[14], a[21]);
+ SQRADDAC(a[15], a[20]);
+ SQRADDAC(a[16], a[19]);
+ SQRADDAC(a[17], a[18]);
+ SQRADDDB;
+ COMBA_STORE(b[35]);
+
+ /* output 36 */
+ CARRY_FORWARD;
+ SQRADDSC(a[5], a[31]);
+ SQRADDAC(a[6], a[30]);
+ SQRADDAC(a[7], a[29]);
+ SQRADDAC(a[8], a[28]);
+ SQRADDAC(a[9], a[27]);
+ SQRADDAC(a[10], a[26]);
+ SQRADDAC(a[11], a[25]);
+ SQRADDAC(a[12], a[24]);
+ SQRADDAC(a[13], a[23]);
+ SQRADDAC(a[14], a[22]);
+ SQRADDAC(a[15], a[21]);
+ SQRADDAC(a[16], a[20]);
+ SQRADDAC(a[17], a[19]);
+ SQRADDDB;
+ SQRADD(a[18], a[18]);
+ COMBA_STORE(b[36]);
+
+ /* output 37 */
+ CARRY_FORWARD;
+ SQRADDSC(a[6], a[31]);
+ SQRADDAC(a[7], a[30]);
+ SQRADDAC(a[8], a[29]);
+ SQRADDAC(a[9], a[28]);
+ SQRADDAC(a[10], a[27]);
+ SQRADDAC(a[11], a[26]);
+ SQRADDAC(a[12], a[25]);
+ SQRADDAC(a[13], a[24]);
+ SQRADDAC(a[14], a[23]);
+ SQRADDAC(a[15], a[22]);
+ SQRADDAC(a[16], a[21]);
+ SQRADDAC(a[17], a[20]);
+ SQRADDAC(a[18], a[19]);
+ SQRADDDB;
+ COMBA_STORE(b[37]);
+
+ /* output 38 */
+ CARRY_FORWARD;
+ SQRADDSC(a[7], a[31]);
+ SQRADDAC(a[8], a[30]);
+ SQRADDAC(a[9], a[29]);
+ SQRADDAC(a[10], a[28]);
+ SQRADDAC(a[11], a[27]);
+ SQRADDAC(a[12], a[26]);
+ SQRADDAC(a[13], a[25]);
+ SQRADDAC(a[14], a[24]);
+ SQRADDAC(a[15], a[23]);
+ SQRADDAC(a[16], a[22]);
+ SQRADDAC(a[17], a[21]);
+ SQRADDAC(a[18], a[20]);
+ SQRADDDB;
+ SQRADD(a[19], a[19]);
+ COMBA_STORE(b[38]);
+
+ /* output 39 */
+ CARRY_FORWARD;
+ SQRADDSC(a[8], a[31]);
+ SQRADDAC(a[9], a[30]);
+ SQRADDAC(a[10], a[29]);
+ SQRADDAC(a[11], a[28]);
+ SQRADDAC(a[12], a[27]);
+ SQRADDAC(a[13], a[26]);
+ SQRADDAC(a[14], a[25]);
+ SQRADDAC(a[15], a[24]);
+ SQRADDAC(a[16], a[23]);
+ SQRADDAC(a[17], a[22]);
+ SQRADDAC(a[18], a[21]);
+ SQRADDAC(a[19], a[20]);
+ SQRADDDB;
+ COMBA_STORE(b[39]);
+
+ /* output 40 */
+ CARRY_FORWARD;
+ SQRADDSC(a[9], a[31]);
+ SQRADDAC(a[10], a[30]);
+ SQRADDAC(a[11], a[29]);
+ SQRADDAC(a[12], a[28]);
+ SQRADDAC(a[13], a[27]);
+ SQRADDAC(a[14], a[26]);
+ SQRADDAC(a[15], a[25]);
+ SQRADDAC(a[16], a[24]);
+ SQRADDAC(a[17], a[23]);
+ SQRADDAC(a[18], a[22]);
+ SQRADDAC(a[19], a[21]);
+ SQRADDDB;
+ SQRADD(a[20], a[20]);
+ COMBA_STORE(b[40]);
+
+ /* output 41 */
+ CARRY_FORWARD;
+ SQRADDSC(a[10], a[31]);
+ SQRADDAC(a[11], a[30]);
+ SQRADDAC(a[12], a[29]);
+ SQRADDAC(a[13], a[28]);
+ SQRADDAC(a[14], a[27]);
+ SQRADDAC(a[15], a[26]);
+ SQRADDAC(a[16], a[25]);
+ SQRADDAC(a[17], a[24]);
+ SQRADDAC(a[18], a[23]);
+ SQRADDAC(a[19], a[22]);
+ SQRADDAC(a[20], a[21]);
+ SQRADDDB;
+ COMBA_STORE(b[41]);
+
+ /* output 42 */
+ CARRY_FORWARD;
+ SQRADDSC(a[11], a[31]);
+ SQRADDAC(a[12], a[30]);
+ SQRADDAC(a[13], a[29]);
+ SQRADDAC(a[14], a[28]);
+ SQRADDAC(a[15], a[27]);
+ SQRADDAC(a[16], a[26]);
+ SQRADDAC(a[17], a[25]);
+ SQRADDAC(a[18], a[24]);
+ SQRADDAC(a[19], a[23]);
+ SQRADDAC(a[20], a[22]);
+ SQRADDDB;
+ SQRADD(a[21], a[21]);
+ COMBA_STORE(b[42]);
+
+ /* output 43 */
+ CARRY_FORWARD;
+ SQRADDSC(a[12], a[31]);
+ SQRADDAC(a[13], a[30]);
+ SQRADDAC(a[14], a[29]);
+ SQRADDAC(a[15], a[28]);
+ SQRADDAC(a[16], a[27]);
+ SQRADDAC(a[17], a[26]);
+ SQRADDAC(a[18], a[25]);
+ SQRADDAC(a[19], a[24]);
+ SQRADDAC(a[20], a[23]);
+ SQRADDAC(a[21], a[22]);
+ SQRADDDB;
+ COMBA_STORE(b[43]);
+
+ /* output 44 */
+ CARRY_FORWARD;
+ SQRADDSC(a[13], a[31]);
+ SQRADDAC(a[14], a[30]);
+ SQRADDAC(a[15], a[29]);
+ SQRADDAC(a[16], a[28]);
+ SQRADDAC(a[17], a[27]);
+ SQRADDAC(a[18], a[26]);
+ SQRADDAC(a[19], a[25]);
+ SQRADDAC(a[20], a[24]);
+ SQRADDAC(a[21], a[23]);
+ SQRADDDB;
+ SQRADD(a[22], a[22]);
+ COMBA_STORE(b[44]);
+
+ /* output 45 */
+ CARRY_FORWARD;
+ SQRADDSC(a[14], a[31]);
+ SQRADDAC(a[15], a[30]);
+ SQRADDAC(a[16], a[29]);
+ SQRADDAC(a[17], a[28]);
+ SQRADDAC(a[18], a[27]);
+ SQRADDAC(a[19], a[26]);
+ SQRADDAC(a[20], a[25]);
+ SQRADDAC(a[21], a[24]);
+ SQRADDAC(a[22], a[23]);
+ SQRADDDB;
+ COMBA_STORE(b[45]);
+
+ /* output 46 */
+ CARRY_FORWARD;
+ SQRADDSC(a[15], a[31]);
+ SQRADDAC(a[16], a[30]);
+ SQRADDAC(a[17], a[29]);
+ SQRADDAC(a[18], a[28]);
+ SQRADDAC(a[19], a[27]);
+ SQRADDAC(a[20], a[26]);
+ SQRADDAC(a[21], a[25]);
+ SQRADDAC(a[22], a[24]);
+ SQRADDDB;
+ SQRADD(a[23], a[23]);
+ COMBA_STORE(b[46]);
+
+ /* output 47 */
+ CARRY_FORWARD;
+ SQRADDSC(a[16], a[31]);
+ SQRADDAC(a[17], a[30]);
+ SQRADDAC(a[18], a[29]);
+ SQRADDAC(a[19], a[28]);
+ SQRADDAC(a[20], a[27]);
+ SQRADDAC(a[21], a[26]);
+ SQRADDAC(a[22], a[25]);
+ SQRADDAC(a[23], a[24]);
+ SQRADDDB;
+ COMBA_STORE(b[47]);
+
+ /* output 48 */
+ CARRY_FORWARD;
+ SQRADDSC(a[17], a[31]);
+ SQRADDAC(a[18], a[30]);
+ SQRADDAC(a[19], a[29]);
+ SQRADDAC(a[20], a[28]);
+ SQRADDAC(a[21], a[27]);
+ SQRADDAC(a[22], a[26]);
+ SQRADDAC(a[23], a[25]);
+ SQRADDDB;
+ SQRADD(a[24], a[24]);
+ COMBA_STORE(b[48]);
+
+ /* output 49 */
+ CARRY_FORWARD;
+ SQRADDSC(a[18], a[31]);
+ SQRADDAC(a[19], a[30]);
+ SQRADDAC(a[20], a[29]);
+ SQRADDAC(a[21], a[28]);
+ SQRADDAC(a[22], a[27]);
+ SQRADDAC(a[23], a[26]);
+ SQRADDAC(a[24], a[25]);
+ SQRADDDB;
+ COMBA_STORE(b[49]);
+
+ /* output 50 */
+ CARRY_FORWARD;
+ SQRADDSC(a[19], a[31]);
+ SQRADDAC(a[20], a[30]);
+ SQRADDAC(a[21], a[29]);
+ SQRADDAC(a[22], a[28]);
+ SQRADDAC(a[23], a[27]);
+ SQRADDAC(a[24], a[26]);
+ SQRADDDB;
+ SQRADD(a[25], a[25]);
+ COMBA_STORE(b[50]);
+
+ /* output 51 */
+ CARRY_FORWARD;
+ SQRADDSC(a[20], a[31]);
+ SQRADDAC(a[21], a[30]);
+ SQRADDAC(a[22], a[29]);
+ SQRADDAC(a[23], a[28]);
+ SQRADDAC(a[24], a[27]);
+ SQRADDAC(a[25], a[26]);
+ SQRADDDB;
+ COMBA_STORE(b[51]);
+
+ /* output 52 */
+ CARRY_FORWARD;
+ SQRADDSC(a[21], a[31]);
+ SQRADDAC(a[22], a[30]);
+ SQRADDAC(a[23], a[29]);
+ SQRADDAC(a[24], a[28]);
+ SQRADDAC(a[25], a[27]);
+ SQRADDDB;
+ SQRADD(a[26], a[26]);
+ COMBA_STORE(b[52]);
+
+ /* output 53 */
+ CARRY_FORWARD;
+ SQRADDSC(a[22], a[31]);
+ SQRADDAC(a[23], a[30]);
+ SQRADDAC(a[24], a[29]);
+ SQRADDAC(a[25], a[28]);
+ SQRADDAC(a[26], a[27]);
+ SQRADDDB;
+ COMBA_STORE(b[53]);
+
+ /* output 54 */
+ CARRY_FORWARD;
+ SQRADDSC(a[23], a[31]);
+ SQRADDAC(a[24], a[30]);
+ SQRADDAC(a[25], a[29]);
+ SQRADDAC(a[26], a[28]);
+ SQRADDDB;
+ SQRADD(a[27], a[27]);
+ COMBA_STORE(b[54]);
+
+ /* output 55 */
+ CARRY_FORWARD;
+ SQRADDSC(a[24], a[31]);
+ SQRADDAC(a[25], a[30]);
+ SQRADDAC(a[26], a[29]);
+ SQRADDAC(a[27], a[28]);
+ SQRADDDB;
+ COMBA_STORE(b[55]);
+
+ /* output 56 */
+ CARRY_FORWARD;
+ SQRADDSC(a[25], a[31]);
+ SQRADDAC(a[26], a[30]);
+ SQRADDAC(a[27], a[29]);
+ SQRADDDB;
+ SQRADD(a[28], a[28]);
+ COMBA_STORE(b[56]);
+
+ /* output 57 */
+ CARRY_FORWARD;
+ SQRADDSC(a[26], a[31]);
+ SQRADDAC(a[27], a[30]);
+ SQRADDAC(a[28], a[29]);
+ SQRADDDB;
+ COMBA_STORE(b[57]);
+
+ /* output 58 */
+ CARRY_FORWARD;
+ SQRADD2(a[27], a[31]);
+ SQRADD2(a[28], a[30]);
+ SQRADD(a[29], a[29]);
+ COMBA_STORE(b[58]);
+
+ /* output 59 */
+ CARRY_FORWARD;
+ SQRADD2(a[28], a[31]);
+ SQRADD2(a[29], a[30]);
+ COMBA_STORE(b[59]);
+
+ /* output 60 */
+ CARRY_FORWARD;
+ SQRADD2(a[29], a[31]);
+ SQRADD(a[30], a[30]);
+ COMBA_STORE(b[60]);
+
+ /* output 61 */
+ CARRY_FORWARD;
+ SQRADD2(a[30], a[31]);
+ COMBA_STORE(b[61]);
+
+ /* output 62 */
+ CARRY_FORWARD;
+ SQRADD(a[31], a[31]);
+ COMBA_STORE(b[62]);
+ COMBA_STORE2(b[63]);
+ COMBA_FINI;
+
+ B->used = 64;
+ B->sign = ZPOS;
+ memcpy(B->dp, b, 64 * sizeof(mp_digit));
+ mp_clamp(B);
+}
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
new file mode 100644
index 000000000..cb432583f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm
@@ -0,0 +1,13066 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;/* TomsFastMath, a fast ISO C bignum library.
+; *
+; * This project is meant to fill in where LibTomMath
+; * falls short. That is speed ;-)
+; *
+; * This project is public domain and free for all purposes.
+; *
+; * Tom St Denis, tomstdenis@iahu.ca
+; */
+
+;/*
+; * The source file from which this assembly was derived
+; * comes from TFM v0.03, which has the above license.
+; * This source was from mp_comba_amd64.sun.s and convert to
+; * MASM code set.
+; */
+
+.CODE
+
+externdef memcpy:PROC
+
+public s_mp_mul_comba_4
+public s_mp_mul_comba_8
+public s_mp_mul_comba_16
+public s_mp_mul_comba_32
+public s_mp_sqr_comba_8
+public s_mp_sqr_comba_16
+public s_mp_sqr_comba_32
+
+
+; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ sub rsp, 64
+ mov r9, qword ptr [16+rdi]
+ mov rbx, rdx
+ mov rdx, qword ptr [16+rsi]
+ mov rax, qword ptr [r9]
+ mov qword ptr [-64+64+rsp], rax
+ mov r8, qword ptr [8+r9]
+ mov qword ptr [-56+64+rsp], r8
+ mov rbp, qword ptr [16+r9]
+ mov qword ptr [-48+64+rsp], rbp
+ mov r12, qword ptr [24+r9]
+ mov qword ptr [-40+64+rsp], r12
+ mov rcx, qword ptr [rdx]
+ mov qword ptr [-32+64+rsp], rcx
+ mov r10, qword ptr [8+rdx]
+ mov qword ptr [-24+64+rsp], r10
+ mov r11, qword ptr [16+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-16+64+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [24+rdx]
+ mov qword ptr [-8+64+rsp], rax
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-64+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-32+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-56+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-24+64+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-48+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-16+64+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-40+64+rsp]
+ mul qword ptr [-8+64+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [48+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [56+r11], rcx
+ mov dword ptr [8+rbx], 8
+ jne L9
+ ALIGN 16
+L18:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L9
+ lea r10d, dword ptr [-2+rdx]
+ cmp dword ptr [r11+r10*8], 0
+ je L18
+L9:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 64
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_4 ENDP
+
+
+; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C)
+
+ ALIGN 16
+s_mp_mul_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 8+128
+ mov rdx, qword ptr [16+rdi]
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-80+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ mov qword ptr [-72+128+rsp], r11
+ mov rax, qword ptr [56+rdx]
+ mov rdx, qword ptr [16+rsi]
+ mov qword ptr [-64+128+rsp], rax
+ mov r8, qword ptr [rdx]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [8+rdx]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [16+rdx]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [24+rdx]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [32+rdx]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [40+rdx]
+ mov qword ptr [-16+128+rsp], r10
+ mov r11, qword ptr [48+rdx]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [-8+128+rsp], r11
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [56+rdx]
+ mov qword ptr [128+rsp], rax
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [-8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [112+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [120+r11], rcx
+ mov dword ptr [8+rbx], 16
+ jne L35
+ ALIGN 16
+L43:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L35
+ lea eax, dword ptr [-2+rdx]
+ cmp dword ptr [r11+rax*8], 0
+ je L43
+L35:
+ mov r11d, dword ptr [8+rbx]
+ xor edx, edx
+ test r11d, r11d
+ cmovne edx, esi
+ mov dword ptr [rbx], edx
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_8 ENDP
+
+
+; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+
+ ALIGN 16
+s_mp_mul_comba_16 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push r12
+ push rbp
+ push rbx
+ mov rbx, rdx
+ sub rsp, 136+128
+ mov rax, qword ptr [16+rdi]
+ mov r8, qword ptr [rax]
+ mov qword ptr [-120+128+rsp], r8
+ mov rbp, qword ptr [8+rax]
+ mov qword ptr [-112+128+rsp], rbp
+ mov r9, qword ptr [16+rax]
+ mov qword ptr [-104+128+rsp], r9
+ mov r12, qword ptr [24+rax]
+ mov qword ptr [-96+128+rsp], r12
+ mov rcx, qword ptr [32+rax]
+ mov qword ptr [-88+128+rsp], rcx
+ mov r10, qword ptr [40+rax]
+ mov qword ptr [-80+128+rsp], r10
+ mov rdx, qword ptr [48+rax]
+ mov qword ptr [-72+128+rsp], rdx
+ mov r11, qword ptr [56+rax]
+ mov qword ptr [-64+128+rsp], r11
+ mov r8, qword ptr [64+rax]
+ mov qword ptr [-56+128+rsp], r8
+ mov rbp, qword ptr [72+rax]
+ mov qword ptr [-48+128+rsp], rbp
+ mov r9, qword ptr [80+rax]
+ mov qword ptr [-40+128+rsp], r9
+ mov r12, qword ptr [88+rax]
+ mov qword ptr [-32+128+rsp], r12
+ mov rcx, qword ptr [96+rax]
+ mov qword ptr [-24+128+rsp], rcx
+ mov r10, qword ptr [104+rax]
+ mov qword ptr [-16+128+rsp], r10
+ mov rdx, qword ptr [112+rax]
+ mov qword ptr [-8+128+rsp], rdx
+ mov r11, qword ptr [120+rax]
+ mov qword ptr [128+rsp], r11
+ mov r11, qword ptr [16+rsi]
+ mov r8, qword ptr [r11]
+ mov qword ptr [8+128+rsp], r8
+ mov rbp, qword ptr [8+r11]
+ mov qword ptr [16+128+rsp], rbp
+ mov r9, qword ptr [16+r11]
+ mov qword ptr [24+128+rsp], r9
+ mov r12, qword ptr [24+r11]
+ mov qword ptr [32+128+rsp], r12
+ mov rcx, qword ptr [32+r11]
+ mov qword ptr [40+128+rsp], rcx
+ mov r10, qword ptr [40+r11]
+ mov qword ptr [48+128+rsp], r10
+ mov rdx, qword ptr [48+r11]
+ mov qword ptr [56+128+rsp], rdx
+ mov rax, qword ptr [56+r11]
+ mov qword ptr [64+128+rsp], rax
+ mov r8, qword ptr [64+r11]
+ mov qword ptr [72+128+rsp], r8
+ mov rbp, qword ptr [72+r11]
+ mov qword ptr [80+128+rsp], rbp
+ mov r9, qword ptr [80+r11]
+ mov qword ptr [88+128+rsp], r9
+ mov r12, qword ptr [88+r11]
+ mov qword ptr [96+128+rsp], r12
+ mov rcx, qword ptr [96+r11]
+ mov qword ptr [104+128+rsp], rcx
+ mov r10, qword ptr [104+r11]
+ mov qword ptr [112+128+rsp], r10
+ mov rdx, qword ptr [112+r11]
+ xor r10d, r10d
+ mov r8, r10
+ mov r9, r10
+ mov rbp, r10
+ mov qword ptr [120+128+rsp], rdx
+ mov rax, qword ptr [120+r11]
+ mov qword ptr [128+128+rsp], rax
+ mov r11, qword ptr [16+rbx]
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rbp, 0
+ mov qword ptr [r11], r8
+ mov r8, rbp
+ mov rbp, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc rbp, 0
+ mov r12, rbp
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r9, rax
+ adc r8, rdx
+ adc r12, 0
+ mov qword ptr [8+r11], r9
+ mov r9, r12
+ mov r12, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc r12, 0
+ mov rcx, r12
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [16+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [24+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [32+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [40+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [48+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [56+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [64+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [72+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [80+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [88+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [96+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [104+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [8+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [112+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-120+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [16+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [8+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [120+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-112+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [24+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [16+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [128+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-104+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [32+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [24+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [136+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-96+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [40+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [32+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [144+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-88+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [48+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [40+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [152+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-80+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [56+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [48+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [160+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-72+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [64+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [56+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [168+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-64+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [72+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [64+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [176+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-56+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [80+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [72+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [184+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-48+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [88+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [80+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [192+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-40+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [96+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [88+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [200+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-32+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [104+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [96+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [208+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-24+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [112+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbp, r9
+ mov r12, r8
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [104+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [216+r11], rcx
+ mov r9, r12
+ mov r8, rbp
+ mov rcx, r10
+ mov rax, qword ptr [-16+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [120+128+rsp]
+ add r8, rax
+ adc r9, rdx
+ adc rcx, 0
+ mov rbp, r9
+ mov r12, rcx
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [112+128+rsp]
+ add r8, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [224+r11], r8
+ mov r9, r12
+ mov rcx, rbp
+ mov r8, r10
+ mov rax, qword ptr [-8+128+rsp]
+ mul qword ptr [128+128+rsp]
+ add rcx, rax
+ adc r9, rdx
+ adc r8, 0
+ mov r12, r8
+ mov rbp, r9
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [120+128+rsp]
+ add rcx, rax
+ adc rbp, rdx
+ adc r12, 0
+ mov qword ptr [232+r11], rcx
+ mov r8, rbp
+ mov rcx, r12
+ mov rax, qword ptr [128+rsp]
+ mul qword ptr [128+128+rsp]
+ add r8, rax
+ adc rcx, rdx
+ adc r10, 0
+ mov qword ptr [240+r11], r8
+ mov esi, dword ptr [rsi]
+ xor esi, dword ptr [rdi]
+ test rcx, rcx
+ mov qword ptr [248+r11], rcx
+ mov dword ptr [8+rbx], 32
+ jne L76
+ ALIGN 16
+L84:
+ mov edx, dword ptr [8+rbx]
+ lea edi, dword ptr [-1+rdx]
+ test edi, edi
+ mov dword ptr [8+rbx], edi
+ je L76
+ lea eax, dword ptr [-2+rdx]
+ cmp dword ptr [r11+rax*8], 0
+ je L84
+L76:
+ mov edx, dword ptr [8+rbx]
+ xor r11d, r11d
+ test edx, edx
+ cmovne r11d, esi
+ mov dword ptr [rbx], r11d
+ add rsp, 136+128
+ pop rbx
+ pop rbp
+ pop r12
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_16 ENDP
+
+; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C)
+
+
+ ALIGN 16
+s_mp_mul_comba_32 PROC ; a "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+
+ push rbp
+ mov rbp, rsp
+ push r13
+ mov r13, rdx
+; mov edx, 256
+ mov r8d, 256
+ push r12
+ mov r12, rsi
+ push rbx
+ mov rbx, rdi
+ sub rsp, 520+32 ; +32 for "home" storage
+; mov rsi, qword ptr [16+rdi]
+; lea rdi, qword ptr [-544+rbp]
+ mov rdx, qword ptr [16+rdi]
+ lea rcx, qword ptr [-544+rbp]
+ call memcpy
+; mov rsi, qword ptr [16+r12]
+; lea rdi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rdx, qword ptr [16+r12]
+ lea rcx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov r9, qword ptr [16+r13]
+ xor r8d, r8d
+ mov rsi, r8
+ mov rdi, r8
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [r9], rsi
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-280+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov r11, r10
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-288+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r11, 0
+ mov qword ptr [8+r9], rdi
+ mov rdi, r11
+ mov r11, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rcx, r11
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [16+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [24+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [32+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [40+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [48+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [56+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [64+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [72+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [80+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [88+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [96+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [104+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [112+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [120+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [128+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [136+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [144+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [152+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [160+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [168+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [176+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [184+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [192+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [200+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [208+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [216+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [224+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [232+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-288+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [240+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-544+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-280+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-288+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [248+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-536+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-272+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-280+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [256+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-528+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-264+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-272+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [264+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-520+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-256+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-264+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [272+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-512+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-248+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-256+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [280+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-504+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-240+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-248+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [288+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-496+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-232+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-240+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [296+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-488+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-224+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-232+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [304+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-480+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-216+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-224+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [312+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-472+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-184+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-192+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-200+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-208+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-216+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [320+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-464+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-192+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-200+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-208+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [328+r9], rcx
+ mov rdi, r11
+ mov r11, r10
+ mov r10, r8
+ mov rax, qword ptr [-456+rbp]
+ mul qword ptr [-40+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-48+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-56+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-64+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-72+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-80+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-88+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-96+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-104+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-112+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-120+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-128+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-136+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-144+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-152+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-160+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-168+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-176+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-184+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-192+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-200+rbp]
+ add r11, rax
+ adc rdi, rdx
+ adc r10, 0
+ mov qword ptr [336+r9], r11
+ mov rsi, r10
+ mov r10, r8
+ mov rax, qword ptr [-448+rbp]
+ mul qword ptr [-40+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov rcx, r10
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-48+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-56+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-64+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-72+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-80+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-88+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-96+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-104+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-112+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-120+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-128+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-136+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-144+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-152+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-160+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-168+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-176+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-184+rbp]
+ add rdi, rax
+ adc rsi, rdx
+ adc rcx, 0
+ mov r11, rsi
+ mov r10, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-192+rbp]
+ add rdi, rax
+ adc r11, rdx
+ adc r10, 0
+ mov qword ptr [344+r9], rdi
+ mov rcx, r11
+ mov rdi, r10
+ mov r11, r8
+ mov rax, qword ptr [-440+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov rsi, r11
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-176+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-184+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [352+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-432+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-168+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-176+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [360+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-424+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-160+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-168+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [368+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-416+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-152+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-160+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [376+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-408+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-144+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-152+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [384+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-400+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-136+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-144+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [392+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-392+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-128+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-136+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [400+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-384+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-120+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-128+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [408+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-376+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-112+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-120+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [416+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-368+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-104+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-112+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [424+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-360+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-96+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-104+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [432+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-352+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-88+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-96+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [440+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-344+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-80+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-88+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [448+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-336+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-72+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-80+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [456+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-328+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-64+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-72+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [464+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-320+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-56+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r10, rdi
+ mov r11, rcx
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-64+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [472+r9], rsi
+ mov rdi, r11
+ mov rcx, r10
+ mov rsi, r8
+ mov rax, qword ptr [-312+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-48+rbp]
+ add rcx, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r10, rdi
+ mov r11, rsi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-56+rbp]
+ add rcx, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [480+r9], rcx
+ mov rdi, r11
+ mov rsi, r10
+ mov rcx, r8
+ mov rax, qword ptr [-304+rbp]
+ mul qword ptr [-40+rbp]
+ add rsi, rax
+ adc rdi, rdx
+ adc rcx, 0
+ mov r11, rcx
+ mov r10, rdi
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-48+rbp]
+ add rsi, rax
+ adc r10, rdx
+ adc r11, 0
+ mov qword ptr [488+r9], rsi
+ mov rcx, r10
+ mov rsi, r11
+ mov rax, qword ptr [-296+rbp]
+ mul qword ptr [-40+rbp]
+ add rcx, rax
+ adc rsi, rdx
+ adc r8, 0
+ mov qword ptr [496+r9], rcx
+ mov ecx, dword ptr [r12]
+ xor ecx, dword ptr [rbx]
+ test rsi, rsi
+ mov qword ptr [504+r9], rsi
+ mov dword ptr [8+r13], 64
+ jne L149
+ ALIGN 16
+L157:
+ mov edx, dword ptr [8+r13]
+ lea ebx, dword ptr [-1+rdx]
+ test ebx, ebx
+ mov dword ptr [8+r13], ebx
+ je L149
+ lea r12d, dword ptr [-2+rdx]
+ cmp dword ptr [r9+r12*8], 0
+ je L157
+L149:
+ mov r9d, dword ptr [8+r13]
+ xor edx, edx
+ test r9d, r9d
+ cmovne edx, ecx
+ mov dword ptr [r13], edx
+ add rsp, 520+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_mul_comba_32 ENDP
+
+
+; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_4 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ push rbx
+ sub rsp, 80
+ mov r11, rsi
+ xor esi, esi
+ mov r10, rsi
+ mov rbp, rsi
+ mov r8, rsi
+ mov rbx, rsi
+ mov rcx, qword ptr [16+rdi]
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc rdi, 0
+ mov qword ptr [-72+80+rsp], r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ add rbx, rax
+ adc rdi, rdx
+ adc rbp, 0
+ mov qword ptr [-64+80+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ add rdi, rax
+ adc rbp, rdx
+ adc r8, 0
+ mov rbx, rbp
+ mov rbp, r8
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-56+80+rsp], rdi
+ mov r9, rbp
+ mov r8, rbx
+ mov rdi, rsi
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ add r8, rax
+ adc r9, rdx
+ adc rdi, 0
+ mov rbx, r9
+ mov rbp, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ add r8, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov qword ptr [-48+80+rsp], r8
+ mov r9, rbp
+ mov rdi, rbx
+ mov r8, rsi
+ mov dword ptr [8+r11], 8
+ mov dword ptr [r11], 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r8, 0
+ mov rbx, r9
+ mov rbp, r8
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add rdi, rax
+ adc rbx, rdx
+ adc rbp, 0
+ mov rax, rbp
+ mov qword ptr [-40+80+rsp], rdi
+ mov rbp, rbx
+ mov rdi, rax
+ mov rbx, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ add rbp, rax
+ adc rdi, rdx
+ adc rbx, 0
+ mov qword ptr [-32+80+rsp], rbp
+ mov r9, rbx
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add rdi, rax
+ adc r9, rdx
+ adc rsi, 0
+ mov rdx, qword ptr [16+r11]
+ mov qword ptr [-24+80+rsp], rdi
+ mov qword ptr [-16+80+rsp], r9
+ mov qword ptr [rdx], r10
+ mov r8, qword ptr [-64+80+rsp]
+ mov qword ptr [8+rdx], r8
+ mov rbp, qword ptr [-56+80+rsp]
+ mov qword ptr [16+rdx], rbp
+ mov rdi, qword ptr [-48+80+rsp]
+ mov qword ptr [24+rdx], rdi
+ mov rsi, qword ptr [-40+80+rsp]
+ mov qword ptr [32+rdx], rsi
+ mov rbx, qword ptr [-32+80+rsp]
+ mov qword ptr [40+rdx], rbx
+ mov rcx, qword ptr [-24+80+rsp]
+ mov qword ptr [48+rdx], rcx
+ mov rax, qword ptr [-16+80+rsp]
+ mov qword ptr [56+rdx], rax
+ mov edx, dword ptr [8+r11]
+ test edx, edx
+ je L168
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r11]
+ mov r10d, ecx
+ cmp dword ptr [rsi+r10*8], 0
+ jne L166
+ mov edx, ecx
+ ALIGN 16
+L167:
+ test edx, edx
+ mov ecx, edx
+ je L171
+ dec edx
+ mov eax, edx
+ cmp dword ptr [rsi+rax*8], 0
+ je L167
+ mov dword ptr [8+r11], ecx
+ mov edx, ecx
+L166:
+ test edx, edx
+ je L168
+ mov eax, dword ptr [r11]
+ jmp L169
+
+L171:
+ mov dword ptr [8+r11], edx
+L168:
+ xor eax, eax
+L169:
+ add rsp, 80
+ pop rbx
+ pop rbp
+ mov dword ptr [r11], eax
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_4 ENDP
+
+
+; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_8 PROC
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+
+ push r14
+ xor r9d, r9d
+ mov r14, r9
+ mov r10, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbp
+ mov rbp, rsi
+ mov rsi, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 8+128
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r14, rax
+ adc rbx, rdx
+ adc r12, 0
+ mov qword ptr [-120+128+rsp], r14
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ add rbx, rax
+ adc r12, rdx
+ adc r10, 0
+ mov qword ptr [-112+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ add r12, rax
+ adc r10, rdx
+ adc r13, 0
+ mov rbx, r10
+ mov r10, r13
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-104+128+rsp], r12
+ mov rdi, r10
+ mov r11, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov r10, rsi
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov rsi, r9
+ mov qword ptr [-96+128+rsp], r11
+ mov r8, r10
+ mov r12, rbx
+ mov r11, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ add r12, rax
+ adc r8, rdx
+ adc r13, 0
+ mov rbx, r8
+ mov r10, r13
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r12, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-88+128+rsp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-80+128+rsp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-72+128+rsp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rax, rsi
+ mov qword ptr [-64+128+rsp], rbx
+ mov r11, rax
+ mov rbx, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rsi, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov r13, r12
+ mov r11, rsi
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-56+128+rsp], r10
+ mov r10, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor r13, r13
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc r13, 0
+ mov r12, rdi
+ mov rax, r13
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov qword ptr [-48+128+rsp], rbx
+ mov r12, r11
+ mov rsi, r10
+ mov rbx, r9
+ mov r11, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add r12, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov r13, rbx
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov r13, r9
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-40+128+rsp], r12
+ mov r8, rbx
+ mov rdi, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rdi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r10, r8
+ mov rbx, r11
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-32+128+rsp], rdi
+ mov rsi, rbx
+ mov r12, r10
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ add r12, rax
+ adc rsi, rdx
+ adc r13, 0
+ mov r10, rsi
+ mov rbx, r13
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r12, rax
+ adc r10, rdx
+ adc rbx, 0
+ mov qword ptr [-24+128+rsp], r12
+ mov rdi, r10
+ mov rsi, rbx
+ mov r10, r9
+ mov dword ptr [8+rbp], 16
+ mov dword ptr [rbp], 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r10, 0
+ mov qword ptr [-16+128+rsp], rdi
+ mov r8, r10
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov rax, qword ptr [16+rbp]
+ mov qword ptr [-8+128+rsp], rsi
+ mov qword ptr [128+rsp], r8
+ mov qword ptr [rax], r14
+ mov rbx, qword ptr [-112+128+rsp]
+ mov qword ptr [8+rax], rbx
+ mov rcx, qword ptr [-104+128+rsp]
+ mov qword ptr [16+rax], rcx
+ mov rdx, qword ptr [-96+128+rsp]
+ mov qword ptr [24+rax], rdx
+ mov r14, qword ptr [-88+128+rsp]
+ mov qword ptr [32+rax], r14
+ mov r13, qword ptr [-80+128+rsp]
+ mov qword ptr [40+rax], r13
+ mov r12, qword ptr [-72+128+rsp]
+ mov qword ptr [48+rax], r12
+ mov r11, qword ptr [-64+128+rsp]
+ mov qword ptr [56+rax], r11
+ mov r10, qword ptr [-56+128+rsp]
+ mov qword ptr [64+rax], r10
+ mov r9, qword ptr [-48+128+rsp]
+ mov qword ptr [72+rax], r9
+ mov r8, qword ptr [-40+128+rsp]
+ mov qword ptr [80+rax], r8
+ mov rdi, qword ptr [-32+128+rsp]
+ mov qword ptr [88+rax], rdi
+ mov rsi, qword ptr [-24+128+rsp]
+ mov qword ptr [96+rax], rsi
+ mov rbx, qword ptr [-16+128+rsp]
+ mov qword ptr [104+rax], rbx
+ mov rcx, qword ptr [-8+128+rsp]
+ mov qword ptr [112+rax], rcx
+ mov rdx, qword ptr [128+rsp]
+ mov qword ptr [120+rax], rdx
+ mov edx, dword ptr [8+rbp]
+ test edx, edx
+ je L192
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+rbp]
+ mov r14d, ecx
+ cmp dword ptr [rsi+r14*8], 0
+ jne L190
+ mov edx, ecx
+ ALIGN 16
+L191:
+ test edx, edx
+ mov ecx, edx
+ je L195
+ dec edx
+ mov r9d, edx
+ cmp dword ptr [rsi+r9*8], 0
+ je L191
+ mov dword ptr [8+rbp], ecx
+ mov edx, ecx
+L190:
+ test edx, edx
+ je L192
+ mov eax, dword ptr [rbp]
+ jmp L193
+
+L195:
+ mov dword ptr [8+rbp], edx
+L192:
+ xor eax, eax
+L193:
+ mov dword ptr [rbp], eax
+ add rsp, 8+128
+ pop rbx
+ pop rbp
+ pop r12
+ pop r13
+ pop r14
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_8 ENDP
+
+
+; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B)
+
+ ALIGN 16
+s_mp_sqr_comba_16 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r9d, r9d
+ mov r8, r9
+ mov r11, r9
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r9
+ push r13
+ mov r13, r9
+ push r12
+ mov r12, r9
+ push rbx
+ mov rbx, r9
+ sub rsp, 256+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-288+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-280+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r10, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-272+rbp], rsi
+ mov rdi, r10
+ mov rsi, r9
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r10, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov rbx, r11
+ mov rdi, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov r11, r9
+ mov qword ptr [-264+rbp], r10
+ mov r8, rbx
+ mov r13, r12
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r10, r12
+ mov r8, r9
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-256+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-248+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-240+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-232+rbp], rbx
+ mov rbx, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-224+rbp], r10
+ mov rsi, r12
+ mov r10, rbx
+ mov r12, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-216+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ add r10, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r10, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-208+rbp], r10
+ mov r10, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-200+rbp], r11
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rdx, rdi
+ mov r11, rsi
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ add r10, r8
+ adc r12, rdx
+ adc rax, r11
+ mov rbx, rdx
+ mov r13, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-192+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-184+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r10
+ mov r10, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r9
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov rbx, rdi
+ mov rdx, rsi
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ add r10, r8
+ adc r12, rbx
+ adc rax, rdx
+ mov r11, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r10, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r10
+ mov r11, r9
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r10, r13
+ mov rbx, r9
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ add r12, r8
+ adc r10, rdi
+ adc r11, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r9
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r9
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ add r10, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r9
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r10
+ mov r10, r11
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r10, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r9
+ mov r13, rdi
+ mov r12, rsi
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ add r10, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r10, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r10
+ mov r10, r9
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r12, rdi
+ mov rax, rsi
+ mov rsi, r9
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ add rbx, r8
+ adc r11, r12
+ adc r10, rax
+ mov r12, r9
+ mov qword ptr [-88+rbp], rbx
+ mov r13, r11
+ mov r11, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ add r13, rax
+ adc r11, rdx
+ adc r12, 0
+ mov rdi, r12
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ add r13, rax
+ adc r11, rdx
+ adc rdi, 0
+ mov rbx, r11
+ mov r10, rdi
+ mov r11, r9
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r10, 0
+ mov qword ptr [-80+rbp], r13
+ mov r8, r10
+ mov r10, rbx
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ add r10, rax
+ adc r8, rdx
+ adc rsi, 0
+ mov r12, r8
+ mov rbx, rsi
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ add r10, rax
+ adc r12, rdx
+ adc rbx, 0
+ mov qword ptr [-72+rbp], r10
+ mov r13, rbx
+ mov rbx, r12
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ add rbx, rax
+ adc r13, rdx
+ adc r11, 0
+ mov r12, r11
+ mov r10, r13
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add rbx, rax
+ adc r10, rdx
+ adc r12, 0
+ mov qword ptr [-64+rbp], rbx
+ mov rdi, r10
+ mov rbx, r9
+ mov rsi, r12
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc rbx, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, rbx
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r9, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 32
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-288+rbp]
+; mov edx, 256
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-288+rbp]
+ mov r8d, 256
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L232
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r9d, ecx
+ cmp dword ptr [rsi+r9*8], 0
+ jne L230
+ mov edx, ecx
+ ALIGN 16
+L231:
+ test edx, edx
+ mov ecx, edx
+ je L235
+ dec edx
+ mov eax, edx
+ cmp dword ptr [rsi+rax*8], 0
+ je L231
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L230:
+ test edx, edx
+ je L232
+ mov eax, dword ptr [r14]
+ jmp L233
+
+L235:
+ mov dword ptr [8+r14], edx
+L232:
+ xor eax, eax
+L233:
+ mov dword ptr [r14], eax
+ add rsp, 256+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_16 ENDP
+
+
+; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+ ALIGN 16
+s_mp_sqr_comba_32 PROC ; A "FRAME" function
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+
+ push rbp
+ xor r10d, r10d
+ mov r8, r10
+ mov r11, r10
+ mov rbp, rsp
+ push r14
+ mov r14, rsi
+ mov rsi, r10
+ push r13
+ mov r13, r10
+ push r12
+ mov r12, r10
+ push rbx
+ mov rbx, r10
+ sub rsp, 512+32 ; +32 for "home" storage
+ mov rcx, qword ptr [16+rdi]
+ mov rax, qword ptr [rcx]
+ mul rax
+ add r8, rax
+ adc rbx, rdx
+ adc rsi, 0
+ mov qword ptr [-544+rbp], r8
+ mov rax, qword ptr [rcx]
+ mul qword ptr [8+rcx]
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ add rbx, rax
+ adc rsi, rdx
+ adc r12, 0
+ mov qword ptr [-536+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [16+rcx]
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rsi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rbx, r12
+ mov r9, r13
+ mov rax, qword ptr [8+rcx]
+ mul rax
+ add rsi, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-528+rbp], rsi
+ mov rdi, r9
+ mov rsi, r10
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [24+rcx]
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ add r9, rax
+ adc rdi, rdx
+ adc r11, 0
+ mov r12, rdi
+ mov r13, r11
+ mov rdi, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [16+rcx]
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-520+rbp], r9
+ mov r8, r13
+ mov r13, r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [32+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [24+rcx]
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ add r13, rax
+ adc r8, rdx
+ adc r12, 0
+ mov rbx, r8
+ mov r9, r12
+ mov r8, r10
+ mov rax, qword ptr [16+rcx]
+ mul rax
+ add r13, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov qword ptr [-512+rbp], r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [40+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [24+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r11, rsi
+ mov qword ptr [-504+rbp], rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [48+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [24+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-496+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [rcx]
+ mul qword ptr [56+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [32+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r11, rdx
+ mov qword ptr [-488+rbp], rbx
+ mov rbx, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [64+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rax, qword ptr [32+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov rdi, r13
+ mov qword ptr [-480+rbp], r9
+ mov rsi, r12
+ mov r9, rbx
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [72+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [40+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ add r11, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-472+rbp], r11
+ mov rbx, r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [80+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ add r9, r8
+ adc rbx, r13
+ adc rax, r12
+ mov rdx, rax
+ mov r11, rbx
+ mov rdi, r13
+ mov rbx, rdx
+ mov rsi, r12
+ mov rax, qword ptr [40+rcx]
+ mul rax
+ add r9, rax
+ adc r11, rdx
+ adc rbx, 0
+ mov qword ptr [-464+rbp], r9
+ mov r9, rbx
+ mov rax, qword ptr [rcx]
+ mul qword ptr [88+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [48+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add r11, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov r13, rdx
+ mov qword ptr [-456+rbp], r11
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [96+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, rdi
+ mov r11, rsi
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ add r9, r8
+ adc r12, rax
+ adc r13, r11
+ mov rbx, rax
+ mov rsi, r11
+ mov rax, qword ptr [48+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, rbx
+ mov qword ptr [-448+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [104+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [56+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-440+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [112+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r13
+ mov rbx, rdi
+ mov r13, rsi
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ add r9, r8
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov rdi, rbx
+ mov rsi, r11
+ mov rax, qword ptr [56+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-432+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [120+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [64+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-424+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [128+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [72+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [64+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-416+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [136+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [72+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-408+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [144+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [80+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [72+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-400+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [152+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [80+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-392+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [160+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [88+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [80+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-384+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [168+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [88+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-376+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [176+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [96+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [88+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-368+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [184+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [152+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [144+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [136+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [128+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [120+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [112+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [104+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [96+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov rdi, rdx
+ mov qword ptr [-360+rbp], r12
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [192+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rbx, r8
+ mov rax, rdi
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ add r9, rsi
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov r8, rbx
+ mov rax, qword ptr [96+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r11
+ mov qword ptr [-352+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [rcx]
+ mul qword ptr [200+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [104+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-344+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [208+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [104+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-336+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [216+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [112+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-328+rbp], r12
+ mov rax, qword ptr [rcx]
+ mul qword ptr [224+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [112+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-320+rbp], r9
+ mov rbx, r13
+ mov r9, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [232+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [120+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r9, rdi
+ mov qword ptr [-312+rbp], r12
+ mov r13, r9
+ mov rax, qword ptr [rcx]
+ mul qword ptr [240+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov r11, r8
+ mov rdx, rdi
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ add rbx, rsi
+ adc r13, r11
+ adc rax, rdx
+ mov r9, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov r8, r11
+ mov r13, rdx
+ mov rdi, r9
+ mov rax, qword ptr [120+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-304+rbp], rbx
+ mov rbx, r13
+ mov r13, r10
+ mov rax, qword ptr [rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [128+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ add r12, rsi
+ adc rbx, r8
+ adc r13, rdi
+ mov qword ptr [-296+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [8+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov r11, r8
+ mov rax, rdi
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ add rbx, rsi
+ adc r12, r11
+ adc r13, rax
+ mov r9, rax
+ mov r8, r11
+ mov rax, qword ptr [128+rcx]
+ mul rax
+ add rbx, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rdi, r9
+ mov qword ptr [-288+rbp], rbx
+ mov r9, r13
+ mov rax, qword ptr [16+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov r13, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [136+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-280+rbp], r12
+ mov r12, r10
+ mov rax, qword ptr [24+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rdx, r13
+ mov rbx, r8
+ mov r13, rdi
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ add r9, rsi
+ adc rdx, rbx
+ adc r12, r13
+ mov rax, r12
+ mov r11, r13
+ mov r12, rdx
+ mov r13, rax
+ mov r8, rbx
+ mov rdi, r11
+ mov rax, qword ptr [136+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-272+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [32+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [144+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r13, rdi
+ mov qword ptr [-264+rbp], r12
+ mov rax, qword ptr [40+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r13
+ mov rdx, r10
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ add r9, rsi
+ adc rax, rbx
+ adc rdx, r12
+ mov rdi, rdx
+ mov r11, r12
+ mov r8, rbx
+ mov r12, rax
+ mov r13, rdi
+ mov rdi, r11
+ mov rax, qword ptr [144+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r11, r10
+ mov qword ptr [-256+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [48+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [152+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ add r12, rsi
+ adc r9, r8
+ adc r11, rdi
+ mov qword ptr [-248+rbp], r12
+ mov r13, r11
+ mov rax, qword ptr [56+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [160+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, r10
+ mov rdx, rsi
+ mov rbx, r8
+ mov r12, rdi
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ add r9, rdx
+ adc r13, rbx
+ adc rax, r12
+ mov r11, r12
+ mov r8, rdx
+ mov rdx, rax
+ mov r12, r13
+ mov rdi, rbx
+ mov r13, rdx
+ mov rsi, r11
+ mov rax, qword ptr [152+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov qword ptr [-240+rbp], r9
+ mov r9, r13
+ mov r13, r10
+ mov rax, qword ptr [64+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [160+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r8
+ mov rdx, rdi
+ mov rbx, rsi
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ add r12, rax
+ adc r9, rdx
+ adc r13, rbx
+ mov qword ptr [-232+rbp], r12
+ mov r8, rdx
+ mov rsi, rax
+ mov rdi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [72+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [168+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [160+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-224+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [80+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [168+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-216+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [88+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [176+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [168+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-208+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [96+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [176+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-200+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [104+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [184+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [176+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-192+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [112+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [184+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, r8
+ mov rax, rdi
+ mov rdx, rsi
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ add r12, rbx
+ adc r9, rax
+ adc r13, rdx
+ mov qword ptr [-184+rbp], r12
+ mov rdi, rdx
+ mov r8, rax
+ mov rsi, rbx
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [120+rcx]
+ mul qword ptr [248+rcx]
+ mov rsi, rax
+ mov r8, rdx
+ xor rdi, rdi
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [232+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [224+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [216+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [208+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [200+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [192+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc rdi, 0
+ mov rax, rsi
+ mov rbx, r8
+ mov rdx, rdi
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ add r9, rax
+ adc r12, rbx
+ adc r13, rdx
+ mov r11, rdx
+ mov r8, rax
+ mov rdi, rbx
+ mov rax, qword ptr [184+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-176+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [128+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [192+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ add r12, r8
+ adc r9, rdi
+ adc r13, rsi
+ mov qword ptr [-168+rbp], r12
+ mov r12, r13
+ mov r13, r10
+ mov rax, qword ptr [136+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rbx, rdi
+ mov rax, rsi
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ add r9, r8
+ adc r12, rbx
+ adc r13, rax
+ mov r11, rax
+ mov rdi, rbx
+ mov rbx, r10
+ mov rax, qword ptr [192+rcx]
+ mul rax
+ add r9, rax
+ adc r12, rdx
+ adc r13, 0
+ mov rsi, r11
+ mov qword ptr [-160+rbp], r9
+ mov r9, r13
+ mov rax, qword ptr [144+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [200+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ add r12, r8
+ adc r9, rdi
+ adc rbx, rsi
+ mov qword ptr [-152+rbp], r12
+ mov rax, qword ptr [152+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ add r9, r8
+ adc rbx, r13
+ adc rdx, r12
+ mov rax, rdx
+ mov rdi, r13
+ mov rsi, r12
+ mov r11, rax
+ mov r12, r10
+ mov rax, qword ptr [200+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-144+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [160+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [208+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-136+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [168+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov rax, qword ptr [208+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-128+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [176+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [216+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rdx, r10
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc rdx, rsi
+ mov qword ptr [-120+rbp], rbx
+ mov r11, rdx
+ mov rbx, r10
+ mov rax, qword ptr [184+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ add r9, r8
+ adc r11, r13
+ adc rbx, r12
+ mov rdx, rbx
+ mov rdi, r13
+ mov rbx, r11
+ mov rsi, r12
+ mov r11, rdx
+ mov r12, r10
+ mov rax, qword ptr [216+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-112+rbp], r9
+ mov r9, r11
+ mov rax, qword ptr [192+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [224+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ add rbx, r8
+ adc r9, rdi
+ adc r12, rsi
+ mov qword ptr [-104+rbp], rbx
+ mov r11, r12
+ mov rax, qword ptr [200+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, r10
+ mov r13, rdi
+ mov r12, rsi
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ add r9, r8
+ adc r11, r13
+ adc rax, r12
+ mov rdx, rax
+ mov rbx, r11
+ mov rdi, r13
+ mov r11, rdx
+ mov rsi, r12
+ mov r12, r10
+ mov rax, qword ptr [224+rcx]
+ mul rax
+ add r9, rax
+ adc rbx, rdx
+ adc r11, 0
+ mov qword ptr [-96+rbp], r9
+ mov r9, r10
+ mov rax, qword ptr [208+rcx]
+ mul qword ptr [248+rcx]
+ mov r8, rax
+ mov rdi, rdx
+ xor rsi, rsi
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [240+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [232+rcx]
+ add r8, rax
+ adc rdi, rdx
+ adc rsi, 0
+ mov r13, rdi
+ mov rax, rsi
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ add rbx, r8
+ adc r11, r13
+ adc r9, rax
+ mov qword ptr [-88+rbp], rbx
+ mov rsi, r11
+ mov r8, r9
+ mov rax, qword ptr [216+rcx]
+ mul qword ptr [248+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r12, 0
+ mov r11, r12
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [240+rcx]
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ add rsi, rax
+ adc r8, rdx
+ adc r11, 0
+ mov r13, r8
+ mov rbx, r11
+ mov rax, qword ptr [232+rcx]
+ mul rax
+ add rsi, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-80+rbp], rsi
+ mov r12, rbx
+ mov rdi, r13
+ mov r13, r10
+ mov rax, qword ptr [224+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ add rdi, rax
+ adc r12, rdx
+ adc r13, 0
+ mov r9, r12
+ mov r12, r13
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [240+rcx]
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ add rdi, rax
+ adc r9, rdx
+ adc r12, 0
+ mov qword ptr [-72+rbp], rdi
+ mov r11, r9
+ mov rbx, r12
+ mov r9, r10
+ mov rax, qword ptr [232+rcx]
+ mul qword ptr [248+rcx]
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ add r11, rax
+ adc rbx, rdx
+ adc r9, 0
+ mov r13, rbx
+ mov rbx, r9
+ mov r9, r10
+ mov rax, qword ptr [240+rcx]
+ mul rax
+ add r11, rax
+ adc r13, rdx
+ adc rbx, 0
+ mov qword ptr [-64+rbp], r11
+ mov rdi, r13
+ mov rsi, rbx
+ mov rax, qword ptr [240+rcx]
+ mul qword ptr [248+rcx]
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ add rdi, rax
+ adc rsi, rdx
+ adc r9, 0
+ mov qword ptr [-56+rbp], rdi
+ mov r8, r9
+ mov rax, qword ptr [248+rcx]
+ mul rax
+ add rsi, rax
+ adc r8, rdx
+ adc r10, 0
+ mov qword ptr [-48+rbp], rsi
+ mov qword ptr [-40+rbp], r8
+ mov dword ptr [8+r14], 64
+ mov dword ptr [r14], 0
+; mov rdi, qword ptr [16+r14]
+; lea rsi, qword ptr [-544+rbp]
+; mov edx, 512
+ mov rcx, qword ptr [16+r14]
+ lea rdx, qword ptr [-544+rbp]
+ mov r8d, 512
+ call memcpy
+ mov edx, dword ptr [8+r14]
+ test edx, edx
+ je L304
+ lea ecx, dword ptr [-1+rdx]
+ mov rsi, qword ptr [16+r14]
+ mov r10d, ecx
+ cmp dword ptr [rsi+r10*8], 0
+ jne L302
+ mov edx, ecx
+ ALIGN 16
+L303:
+ test edx, edx
+ mov ecx, edx
+ je L307
+ dec edx
+ mov eax, edx
+ cmp dword ptr [rsi+rax*8], 0
+ je L303
+ mov dword ptr [8+r14], ecx
+ mov edx, ecx
+L302:
+ test edx, edx
+ je L304
+ mov eax, dword ptr [r14]
+ jmp L305
+
+L307:
+ mov dword ptr [8+r14], edx
+L304:
+ xor eax, eax
+L305:
+ mov dword ptr [r14], eax
+ add rsp, 512+32 ; +32 for "home" storage
+ pop rbx
+ pop r12
+ pop r13
+ pop r14
+ pop rbp
+
+ pop rsi
+ pop rdi
+
+ ret
+
+s_mp_sqr_comba_32 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
new file mode 100644
index 000000000..a5181df33
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s
@@ -0,0 +1,16097 @@
+//* TomsFastMath, a fast ISO C bignum library.
+/ *
+/ * This project is meant to fill in where LibTomMath
+/ * falls short. That is speed ;-)
+/ *
+/ * This project is public domain and free for all purposes.
+/ *
+/ * Tom St Denis, tomstdenis@iahu.ca
+/ */
+
+//*
+/ * The source file from which this assembly was derived
+/ * comes from TFM v0.03, which has the above license.
+/ * This source was compiled with an unnamed compiler at
+/ * the highest optimization level. Afterwards, the
+/ * trailing .section was removed because it causes errors
+/ * in the Studio 10 compiler on AMD 64.
+/ */
+
+ .file "mp_comba.c"
+ .text
+ .align 16
+.globl s_mp_mul_comba_4
+ .type s_mp_mul_comba_4, @function
+s_mp_mul_comba_4:
+.LFB2:
+ pushq %r12
+.LCFI0:
+ pushq %rbp
+.LCFI1:
+ pushq %rbx
+.LCFI2:
+ movq 16(%rdi), %r9
+ movq %rdx, %rbx
+ movq 16(%rsi), %rdx
+ movq (%r9), %rax
+ movq %rax, -64(%rsp)
+ movq 8(%r9), %r8
+ movq %r8, -56(%rsp)
+ movq 16(%r9), %rbp
+ movq %rbp, -48(%rsp)
+ movq 24(%r9), %r12
+ movq %r12, -40(%rsp)
+ movq (%rdx), %rcx
+ movq %rcx, -32(%rsp)
+ movq 8(%rdx), %r10
+ movq %r10, -24(%rsp)
+ movq 16(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -16(%rsp)
+ movq 16(%rbx), %r11
+ movq 24(%rdx), %rax
+ movq %rax, -8(%rsp)
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -56(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -48(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -40(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -40(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 56(%r11)
+ movl $8, 8(%rbx)
+ jne .L9
+ .align 16
+.L18:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L9
+ leal -2(%rdx), %r10d
+ cmpq $0, (%r11,%r10,8)
+ je .L18
+.L9:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE2:
+ .size s_mp_mul_comba_4, .-s_mp_mul_comba_4
+ .align 16
+.globl s_mp_mul_comba_8
+ .type s_mp_mul_comba_8, @function
+s_mp_mul_comba_8:
+.LFB3:
+ pushq %r12
+.LCFI3:
+ pushq %rbp
+.LCFI4:
+ pushq %rbx
+.LCFI5:
+ movq %rdx, %rbx
+ subq $8, %rsp
+.LCFI6:
+ movq 16(%rdi), %rdx
+ movq (%rdx), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rdx), %r11
+ movq %r11, -72(%rsp)
+ movq 56(%rdx), %rax
+ movq 16(%rsi), %rdx
+ movq %rax, -64(%rsp)
+ movq (%rdx), %r8
+ movq %r8, -56(%rsp)
+ movq 8(%rdx), %rbp
+ movq %rbp, -48(%rsp)
+ movq 16(%rdx), %r9
+ movq %r9, -40(%rsp)
+ movq 24(%rdx), %r12
+ movq %r12, -32(%rsp)
+ movq 32(%rdx), %rcx
+ movq %rcx, -24(%rsp)
+ movq 40(%rdx), %r10
+ movq %r10, -16(%rsp)
+ movq 48(%rdx), %r11
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %r11, -8(%rsp)
+ movq 16(%rbx), %r11
+ movq 56(%rdx), %rax
+ movq %rax, (%rsp)
+/APP
+ movq -120(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq -16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq -64(%rsp),%rax
+ mulq -8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq (%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 120(%r11)
+ movl $16, 8(%rbx)
+ jne .L35
+ .align 16
+.L43:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L35
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L43
+.L35:
+ movl 8(%rbx), %r11d
+ xorl %edx, %edx
+ testl %r11d, %r11d
+ cmovne %esi, %edx
+ movl %edx, (%rbx)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE3:
+ .size s_mp_mul_comba_8, .-s_mp_mul_comba_8
+ .align 16
+.globl s_mp_mul_comba_16
+ .type s_mp_mul_comba_16, @function
+s_mp_mul_comba_16:
+.LFB4:
+ pushq %r12
+.LCFI7:
+ pushq %rbp
+.LCFI8:
+ pushq %rbx
+.LCFI9:
+ movq %rdx, %rbx
+ subq $136, %rsp
+.LCFI10:
+ movq 16(%rdi), %rax
+ movq (%rax), %r8
+ movq %r8, -120(%rsp)
+ movq 8(%rax), %rbp
+ movq %rbp, -112(%rsp)
+ movq 16(%rax), %r9
+ movq %r9, -104(%rsp)
+ movq 24(%rax), %r12
+ movq %r12, -96(%rsp)
+ movq 32(%rax), %rcx
+ movq %rcx, -88(%rsp)
+ movq 40(%rax), %r10
+ movq %r10, -80(%rsp)
+ movq 48(%rax), %rdx
+ movq %rdx, -72(%rsp)
+ movq 56(%rax), %r11
+ movq %r11, -64(%rsp)
+ movq 64(%rax), %r8
+ movq %r8, -56(%rsp)
+ movq 72(%rax), %rbp
+ movq %rbp, -48(%rsp)
+ movq 80(%rax), %r9
+ movq %r9, -40(%rsp)
+ movq 88(%rax), %r12
+ movq %r12, -32(%rsp)
+ movq 96(%rax), %rcx
+ movq %rcx, -24(%rsp)
+ movq 104(%rax), %r10
+ movq %r10, -16(%rsp)
+ movq 112(%rax), %rdx
+ movq %rdx, -8(%rsp)
+ movq 120(%rax), %r11
+ movq %r11, (%rsp)
+ movq 16(%rsi), %r11
+ movq (%r11), %r8
+ movq %r8, 8(%rsp)
+ movq 8(%r11), %rbp
+ movq %rbp, 16(%rsp)
+ movq 16(%r11), %r9
+ movq %r9, 24(%rsp)
+ movq 24(%r11), %r12
+ movq %r12, 32(%rsp)
+ movq 32(%r11), %rcx
+ movq %rcx, 40(%rsp)
+ movq 40(%r11), %r10
+ movq %r10, 48(%rsp)
+ movq 48(%r11), %rdx
+ movq %rdx, 56(%rsp)
+ movq 56(%r11), %rax
+ movq %rax, 64(%rsp)
+ movq 64(%r11), %r8
+ movq %r8, 72(%rsp)
+ movq 72(%r11), %rbp
+ movq %rbp, 80(%rsp)
+ movq 80(%r11), %r9
+ movq %r9, 88(%rsp)
+ movq 88(%r11), %r12
+ movq %r12, 96(%rsp)
+ movq 96(%r11), %rcx
+ movq %rcx, 104(%rsp)
+ movq 104(%r11), %r10
+ movq %r10, 112(%rsp)
+ movq 112(%r11), %rdx
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r9
+ movq %r10, %rbp
+ movq %rdx, 120(%rsp)
+ movq 120(%r11), %rax
+ movq %rax, 128(%rsp)
+ movq 16(%rbx), %r11
+/APP
+ movq -120(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, (%r11)
+ movq %rbp, %r8
+ movq %r10, %rbp
+/APP
+ movq -120(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %r12
+/APP
+ movq -112(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r9
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r9, 8(%r11)
+ movq %r12, %r9
+ movq %r10, %r12
+/APP
+ movq -120(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -104(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 16(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -96(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 24(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -88(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 32(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -80(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 40(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -72(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 48(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -64(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 56(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -56(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 64(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -48(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 72(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -40(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 80(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -32(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 88(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -24(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 96(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq -16(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 104(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -120(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -112(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq -8(%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 112(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -120(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -112(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -104(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 8(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 120(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -112(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -104(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -96(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 16(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 128(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -104(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -96(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -88(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 24(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 136(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -96(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -88(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -80(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 32(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 144(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -88(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -80(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -72(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 40(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 152(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -80(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -72(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -64(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 48(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 160(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -72(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -64(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -56(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 56(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 168(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -64(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -56(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -48(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 64(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 176(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -56(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -48(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -40(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 72(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 184(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -48(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -40(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -32(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 80(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 192(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -40(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -32(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -24(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 88(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 200(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -32(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -24(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -16(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 96(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 208(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -24(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -16(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+ movq -8(%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbp
+ movq %r8, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 104(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 216(%r11)
+ movq %r12, %r9
+ movq %rbp, %r8
+ movq %r10, %rcx
+/APP
+ movq -16(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+ movq -8(%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rcx
+
+/NO_APP
+ movq %r9, %rbp
+ movq %rcx, %r12
+/APP
+ movq (%rsp),%rax
+ mulq 112(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, 224(%r11)
+ movq %r12, %r9
+ movq %rbp, %rcx
+ movq %r10, %r8
+/APP
+ movq -8(%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r8, %r12
+ movq %r9, %rbp
+/APP
+ movq (%rsp),%rax
+ mulq 120(%rsp)
+ addq %rax,%rcx
+ adcq %rdx,%rbp
+ adcq $0,%r12
+
+/NO_APP
+ movq %rcx, 232(%r11)
+ movq %rbp, %r8
+ movq %r12, %rcx
+/APP
+ movq (%rsp),%rax
+ mulq 128(%rsp)
+ addq %rax,%r8
+ adcq %rdx,%rcx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r8, 240(%r11)
+ movl (%rsi), %esi
+ xorl (%rdi), %esi
+ testq %rcx, %rcx
+ movq %rcx, 248(%r11)
+ movl $32, 8(%rbx)
+ jne .L76
+ .align 16
+.L84:
+ movl 8(%rbx), %edx
+ leal -1(%rdx), %edi
+ testl %edi, %edi
+ movl %edi, 8(%rbx)
+ je .L76
+ leal -2(%rdx), %eax
+ cmpq $0, (%r11,%rax,8)
+ je .L84
+.L76:
+ movl 8(%rbx), %edx
+ xorl %r11d, %r11d
+ testl %edx, %edx
+ cmovne %esi, %r11d
+ movl %r11d, (%rbx)
+ addq $136, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ ret
+.LFE4:
+ .size s_mp_mul_comba_16, .-s_mp_mul_comba_16
+ .align 16
+.globl s_mp_mul_comba_32
+ .type s_mp_mul_comba_32, @function
+s_mp_mul_comba_32:
+.LFB5:
+ pushq %rbp
+.LCFI11:
+ movq %rsp, %rbp
+.LCFI12:
+ pushq %r13
+.LCFI13:
+ movq %rdx, %r13
+ movl $256, %edx
+ pushq %r12
+.LCFI14:
+ movq %rsi, %r12
+ pushq %rbx
+.LCFI15:
+ movq %rdi, %rbx
+ subq $520, %rsp
+.LCFI16:
+ movq 16(%rdi), %rsi
+ leaq -544(%rbp), %rdi
+ call memcpy@PLT
+ movq 16(%r12), %rsi
+ leaq -288(%rbp), %rdi
+ movl $256, %edx
+ call memcpy@PLT
+ movq 16(%r13), %r9
+ xorl %r8d, %r8d
+ movq %r8, %rsi
+ movq %r8, %rdi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, (%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -544(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %r11
+/APP
+ movq -536(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, 8(%r9)
+ movq %r11, %rdi
+ movq %r8, %r11
+/APP
+ movq -544(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -528(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 16(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -520(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 24(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -512(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 32(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -504(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 40(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -496(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 48(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -488(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 56(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -480(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 64(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -472(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 72(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -464(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 80(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -456(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 88(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -448(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 96(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 104(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -432(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 112(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -424(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 120(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -416(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 128(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -408(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 136(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -400(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 144(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -392(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 152(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -384(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 160(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -376(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 168(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -368(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 176(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -360(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 184(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -352(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 192(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -344(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 200(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -336(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 208(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -328(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 216(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -320(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 224(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -312(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 232(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -544(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -536(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -304(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 240(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -544(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -536(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -528(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -288(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 248(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -536(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -528(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -520(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -280(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 256(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -528(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -520(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -512(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -272(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 264(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -520(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -512(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -504(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -264(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 272(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -512(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -504(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -496(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -256(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 280(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -504(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -496(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -488(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -248(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 288(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -496(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -488(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -480(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -240(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 296(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -488(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -480(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -472(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -232(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 304(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -480(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -472(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -464(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -224(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 312(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -472(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -464(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -456(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -448(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -440(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -216(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 320(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -464(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -456(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -448(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -440(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -432(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -208(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 328(%r9)
+ movq %r11, %rdi
+ movq %r10, %r11
+ movq %r8, %r10
+/APP
+ movq -456(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -448(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -440(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -432(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -424(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -416(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -408(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -400(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -392(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -384(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -376(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -368(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -360(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -352(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -344(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -336(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -328(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -320(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -312(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -304(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+ movq -296(%rbp),%rax
+ mulq -200(%rbp)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r11, 336(%r9)
+ movq %r10, %rsi
+ movq %r8, %r10
+/APP
+ movq -448(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %r10, %rcx
+/APP
+ movq -440(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -432(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rsi, %r11
+ movq %rcx, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -192(%rbp)
+ addq %rax,%rdi
+ adcq %rdx,%r11
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, 344(%r9)
+ movq %r11, %rcx
+ movq %r10, %rdi
+ movq %r8, %r11
+/APP
+ movq -440(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %rsi
+/APP
+ movq -432(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -424(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -184(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 352(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -432(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -424(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -416(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -176(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 360(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -424(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -416(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -408(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -168(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 368(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -416(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -408(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -400(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -160(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 376(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -408(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -400(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -392(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -152(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 384(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -400(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -392(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -384(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -144(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 392(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -392(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -384(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -376(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -136(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 400(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -384(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -376(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -368(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -128(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 408(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -376(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -368(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -360(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -120(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 416(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -368(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -360(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -352(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -112(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 424(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -360(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -352(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -344(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -104(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 432(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -352(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -344(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -336(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -96(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 440(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -344(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -336(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -328(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -88(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 448(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -336(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -328(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -320(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -80(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 456(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -328(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -320(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -312(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -72(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 464(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -320(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -312(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+ movq -304(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rcx, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -64(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 472(%r9)
+ movq %r11, %rdi
+ movq %r10, %rcx
+ movq %r8, %rsi
+/APP
+ movq -312(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq -304(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r10
+ movq %rsi, %r11
+/APP
+ movq -296(%rbp),%rax
+ mulq -56(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rcx, 480(%r9)
+ movq %r11, %rdi
+ movq %r10, %rsi
+ movq %r8, %rcx
+/APP
+ movq -304(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%rdi
+ adcq $0,%rcx
+
+/NO_APP
+ movq %rcx, %r11
+ movq %rdi, %r10
+/APP
+ movq -296(%rbp),%rax
+ mulq -48(%rbp)
+ addq %rax,%rsi
+ adcq %rdx,%r10
+ adcq $0,%r11
+
+/NO_APP
+ movq %rsi, 488(%r9)
+ movq %r10, %rcx
+ movq %r11, %rsi
+/APP
+ movq -296(%rbp),%rax
+ mulq -40(%rbp)
+ addq %rax,%rcx
+ adcq %rdx,%rsi
+ adcq $0,%r8
+
+/NO_APP
+ movq %rcx, 496(%r9)
+ movl (%r12), %ecx
+ xorl (%rbx), %ecx
+ testq %rsi, %rsi
+ movq %rsi, 504(%r9)
+ movl $64, 8(%r13)
+ jne .L149
+ .align 16
+.L157:
+ movl 8(%r13), %edx
+ leal -1(%rdx), %ebx
+ testl %ebx, %ebx
+ movl %ebx, 8(%r13)
+ je .L149
+ leal -2(%rdx), %r12d
+ cmpq $0, (%r9,%r12,8)
+ je .L157
+.L149:
+ movl 8(%r13), %r9d
+ xorl %edx, %edx
+ testl %r9d, %r9d
+ cmovne %ecx, %edx
+ movl %edx, (%r13)
+ addq $520, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ leave
+ ret
+.LFE5:
+ .size s_mp_mul_comba_32, .-s_mp_mul_comba_32
+ .align 16
+.globl s_mp_sqr_comba_4
+ .type s_mp_sqr_comba_4, @function
+s_mp_sqr_comba_4:
+.LFB6:
+ pushq %rbp
+.LCFI17:
+ movq %rsi, %r11
+ xorl %esi, %esi
+ movq %rsi, %r10
+ movq %rsi, %rbp
+ movq %rsi, %r8
+ pushq %rbx
+.LCFI18:
+ movq %rsi, %rbx
+ movq 16(%rdi), %rcx
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, -72(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+ addq %rax,%rbx
+ adcq %rdx,%rdi
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%rbp
+ adcq $0,%r8
+
+/NO_APP
+ movq %rbp, %rbx
+ movq %r8, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rdi, -56(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %r8
+ movq %rsi, %rdi
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+ addq %rax,%r8
+ adcq %rdx,%r9
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r9, %rbx
+ movq %rdi, %rbp
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %r8, -48(%rsp)
+ movq %rbp, %r9
+ movq %rbx, %rdi
+ movq %rsi, %r8
+ movl $8, 8(%r11)
+ movl $0, (%r11)
+/APP
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r8
+
+/NO_APP
+ movq %r9, %rbx
+ movq %r8, %rbp
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%rbx
+ adcq $0,%rbp
+
+/NO_APP
+ movq %rbp, %rax
+ movq %rdi, -40(%rsp)
+ movq %rbx, %rbp
+ movq %rax, %rdi
+ movq %rsi, %rbx
+/APP
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+ addq %rax,%rbp
+ adcq %rdx,%rdi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbp, -32(%rsp)
+ movq %rbx, %r9
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%rsi
+
+/NO_APP
+ movq 16(%r11), %rdx
+ movq %rdi, -24(%rsp)
+ movq %r9, -16(%rsp)
+ movq %r10, (%rdx)
+ movq -64(%rsp), %r8
+ movq %r8, 8(%rdx)
+ movq -56(%rsp), %rbp
+ movq %rbp, 16(%rdx)
+ movq -48(%rsp), %rdi
+ movq %rdi, 24(%rdx)
+ movq -40(%rsp), %rsi
+ movq %rsi, 32(%rdx)
+ movq -32(%rsp), %rbx
+ movq %rbx, 40(%rdx)
+ movq -24(%rsp), %rcx
+ movq %rcx, 48(%rdx)
+ movq -16(%rsp), %rax
+ movq %rax, 56(%rdx)
+ movl 8(%r11), %edx
+ testl %edx, %edx
+ je .L168
+ leal -1(%rdx), %ecx
+ movq 16(%r11), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L166
+ movl %ecx, %edx
+ .align 16
+.L167:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L171
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L167
+ movl %ecx, 8(%r11)
+ movl %ecx, %edx
+.L166:
+ testl %edx, %edx
+ je .L168
+ popq %rbx
+ popq %rbp
+ movl (%r11), %eax
+ movl %eax, (%r11)
+ ret
+.L171:
+ movl %edx, 8(%r11)
+ .align 16
+.L168:
+ popq %rbx
+ popq %rbp
+ xorl %eax, %eax
+ movl %eax, (%r11)
+ ret
+.LFE6:
+ .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4
+ .align 16
+.globl s_mp_sqr_comba_8
+ .type s_mp_sqr_comba_8, @function
+s_mp_sqr_comba_8:
+.LFB7:
+ pushq %r14
+.LCFI19:
+ xorl %r9d, %r9d
+ movq %r9, %r14
+ movq %r9, %r10
+ pushq %r13
+.LCFI20:
+ movq %r9, %r13
+ pushq %r12
+.LCFI21:
+ movq %r9, %r12
+ pushq %rbp
+.LCFI22:
+ movq %rsi, %rbp
+ movq %r9, %rsi
+ pushq %rbx
+.LCFI23:
+ movq %r9, %rbx
+ subq $8, %rsp
+.LCFI24:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r14
+ adcq %rdx,%rbx
+ adcq $0,%r12
+
+/NO_APP
+ movq %r14, -120(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r10
+
+/NO_APP
+ movq %rbx, -112(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %rbx
+ movq %r13, %r10
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -104(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+ addq %rax,%r11
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %r10
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r9, %rsi
+ movq %r11, -96(%rsp)
+ movq %r10, %r8
+ movq %rbx, %r12
+ movq %r9, %r11
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%r8
+ adcq $0,%r13
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r13, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r12, -88(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -80(%rsp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -72(%rsp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rax
+
+/NO_APP
+ movq %rbx, -64(%rsp)
+ movq %rax, %r11
+ movq %r9, %rbx
+/APP
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rsi
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %r13
+ movq %rsi, %r11
+/APP
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -56(%rsp)
+ movq %r9, %r10
+/APP
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %r13,%r13
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r13, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %rbx, -48(%rsp)
+ movq %r11, %r12
+ movq %r10, %rsi
+ movq %r9, %rbx
+ movq %r9, %r11
+/APP
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rbx, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+ movq %r9, %r13
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -40(%rsp)
+ movq %rbx, %r8
+ movq %r10, %rdi
+/APP
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rdi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r10
+ movq %r11, %rbx
+/APP
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -32(%rsp)
+ movq %rbx, %rsi
+ movq %r10, %r12
+/APP
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+ addq %rax,%r12
+ adcq %rdx,%rsi
+ adcq $0,%r13
+
+/NO_APP
+ movq %rsi, %r10
+ movq %r13, %rbx
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r12
+ adcq %rdx,%r10
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r12, -24(%rsp)
+ movq %r10, %rdi
+ movq %rbx, %rsi
+ movq %r9, %r10
+ movl $16, 8(%rbp)
+ movl $0, (%rbp)
+/APP
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r10
+
+/NO_APP
+ movq %rdi, -16(%rsp)
+ movq %r10, %r8
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq 16(%rbp), %rax
+ movq %rsi, -8(%rsp)
+ movq %r8, (%rsp)
+ movq %r14, (%rax)
+ movq -112(%rsp), %rbx
+ movq %rbx, 8(%rax)
+ movq -104(%rsp), %rcx
+ movq %rcx, 16(%rax)
+ movq -96(%rsp), %rdx
+ movq %rdx, 24(%rax)
+ movq -88(%rsp), %r14
+ movq %r14, 32(%rax)
+ movq -80(%rsp), %r13
+ movq %r13, 40(%rax)
+ movq -72(%rsp), %r12
+ movq %r12, 48(%rax)
+ movq -64(%rsp), %r11
+ movq %r11, 56(%rax)
+ movq -56(%rsp), %r10
+ movq %r10, 64(%rax)
+ movq -48(%rsp), %r9
+ movq %r9, 72(%rax)
+ movq -40(%rsp), %r8
+ movq %r8, 80(%rax)
+ movq -32(%rsp), %rdi
+ movq %rdi, 88(%rax)
+ movq -24(%rsp), %rsi
+ movq %rsi, 96(%rax)
+ movq -16(%rsp), %rbx
+ movq %rbx, 104(%rax)
+ movq -8(%rsp), %rcx
+ movq %rcx, 112(%rax)
+ movq (%rsp), %rdx
+ movq %rdx, 120(%rax)
+ movl 8(%rbp), %edx
+ testl %edx, %edx
+ je .L192
+ leal -1(%rdx), %ecx
+ movq 16(%rbp), %rsi
+ mov %ecx, %r14d
+ cmpq $0, (%rsi,%r14,8)
+ jne .L190
+ movl %ecx, %edx
+ .align 16
+.L191:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L195
+ decl %edx
+ mov %edx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ je .L191
+ movl %ecx, 8(%rbp)
+ movl %ecx, %edx
+.L190:
+ testl %edx, %edx
+ je .L192
+ movl (%rbp), %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.L195:
+ movl %edx, 8(%rbp)
+ .align 16
+.L192:
+ xorl %eax, %eax
+ movl %eax, (%rbp)
+ addq $8, %rsp
+ popq %rbx
+ popq %rbp
+ popq %r12
+ popq %r13
+ popq %r14
+ ret
+.LFE7:
+ .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8
+ .align 16
+.globl s_mp_sqr_comba_16
+ .type s_mp_sqr_comba_16, @function
+s_mp_sqr_comba_16:
+.LFB8:
+ pushq %rbp
+.LCFI25:
+ xorl %r9d, %r9d
+ movq %r9, %r8
+ movq %r9, %r11
+ movq %rsp, %rbp
+.LCFI26:
+ pushq %r14
+.LCFI27:
+ movq %rsi, %r14
+ movq %r9, %rsi
+ pushq %r13
+.LCFI28:
+ movq %r9, %r13
+ pushq %r12
+.LCFI29:
+ movq %r9, %r12
+ pushq %rbx
+.LCFI30:
+ movq %r9, %rbx
+ subq $256, %rsp
+.LCFI31:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -288(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -280(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r10
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -272(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rsi
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r10
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %rbx
+ movq %r9, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, %r11
+ movq %r10, -264(%rbp)
+ movq %rbx, %r8
+ movq %r12, %r13
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r10
+ movq %r9, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -256(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -248(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -240(%rbp)
+ movq %r11, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -232(%rbp)
+ movq %r9, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r10, -224(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r10
+ movq %r9, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -216(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -208(%rbp)
+ movq %rbx, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -200(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rdx
+ movq %rsi, %r11
+/APP
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+ addq %r8,%r10
+ adcq %rdx,%r12
+ adcq %r11,%rax
+
+/NO_APP
+ movq %rdx, %rbx
+ movq %rax, %r13
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r10, -192(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %r13, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -176(%rbp)
+ movq %r13, %r10
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r9, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+/APP
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %rbx
+ movq %rsi, %rdx
+/APP
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+ addq %r8,%r10
+ adcq %rbx,%r12
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r13
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r10, -160(%rbp)
+ movq %r9, %r11
+/APP
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r13, %r10
+ movq %r9, %rbx
+/APP
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+ addq %r8,%r12
+ adcq %rdi,%r10
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -144(%rbp)
+ movq %r11, %r10
+/APP
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -128(%rbp)
+ movq %r11, %r10
+/APP
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r9, %rbx
+/APP
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r9, %r12
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -112(%rbp)
+ movq %r11, %r10
+/APP
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r10
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r9, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r10
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r10
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r10, -96(%rbp)
+ movq %r9, %r10
+/APP
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r12
+ movq %rsi, %rax
+ movq %r9, %rsi
+/APP
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+ addq %r8,%rbx
+ adcq %r12,%r11
+ adcq %rax,%r10
+
+/NO_APP
+ movq %r9, %r12
+ movq %rbx, -88(%rbp)
+ movq %r11, %r13
+ movq %r10, %r11
+/APP
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %rdi
+/APP
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+ addq %rax,%r13
+ adcq %rdx,%r11
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r11, %rbx
+ movq %rdi, %r10
+ movq %r9, %r11
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r10
+
+/NO_APP
+ movq %r13, -80(%rbp)
+ movq %r10, %r8
+ movq %rbx, %r10
+/APP
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+ addq %rax,%r10
+ adcq %rdx,%r8
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %r12
+ movq %rsi, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+ addq %rax,%r10
+ adcq %rdx,%r12
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r10, -72(%rbp)
+ movq %rbx, %r13
+ movq %r12, %rbx
+/APP
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+ addq %rax,%rbx
+ adcq %rdx,%r13
+ adcq $0,%r11
+
+/NO_APP
+ movq %r11, %r12
+ movq %r13, %r10
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r10
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -64(%rbp)
+ movq %r10, %rdi
+ movq %r9, %rbx
+ movq %r12, %rsi
+/APP
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %rbx, %r8
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -288(%rbp), %rsi
+ movl $256, %edx
+ movq %r8, -40(%rbp)
+ movl $32, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L232
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r9d
+ cmpq $0, (%rsi,%r9,8)
+ jne .L230
+ movl %ecx, %edx
+ .align 16
+.L231:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L235
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L231
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L230:
+ testl %edx, %edx
+ je .L232
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L235:
+ movl %edx, 8(%r14)
+ .align 16
+.L232:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $256, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE8:
+ .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16
+ .align 16
+.globl s_mp_sqr_comba_32
+ .type s_mp_sqr_comba_32, @function
+s_mp_sqr_comba_32:
+.LFB9:
+ pushq %rbp
+.LCFI32:
+ xorl %r10d, %r10d
+ movq %r10, %r8
+ movq %r10, %r11
+ movq %rsp, %rbp
+.LCFI33:
+ pushq %r14
+.LCFI34:
+ movq %rsi, %r14
+ movq %r10, %rsi
+ pushq %r13
+.LCFI35:
+ movq %r10, %r13
+ pushq %r12
+.LCFI36:
+ movq %r10, %r12
+ pushq %rbx
+.LCFI37:
+ movq %r10, %rbx
+ subq $512, %rsp
+.LCFI38:
+ movq 16(%rdi), %rcx
+/APP
+ movq (%rcx),%rax
+ mulq %rax
+ addq %rax,%r8
+ adcq %rdx,%rbx
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, -544(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 8(%rcx)
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+ addq %rax,%rbx
+ adcq %rdx,%rsi
+ adcq $0,%r12
+
+/NO_APP
+ movq %rbx, -536(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rsi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %rbx
+ movq %r13, %r9
+/APP
+ movq 8(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rsi, -528(%rbp)
+ movq %r9, %rdi
+ movq %r10, %rsi
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+ addq %rax,%r9
+ adcq %rdx,%rdi
+ adcq $0,%r11
+
+/NO_APP
+ movq %rdi, %r12
+ movq %r11, %r13
+ movq %r10, %rdi
+/APP
+ movq 8(%rcx),%rax
+ mulq 16(%rcx)
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -520(%rbp)
+ movq %r13, %r8
+ movq %r12, %r13
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+ movq 8(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%r13
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r8, %rbx
+ movq %r12, %r9
+ movq %r10, %r8
+/APP
+ movq 16(%rcx),%rax
+ mulq %rax
+ addq %rax,%r13
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %r13, -512(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 40(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 24(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r11
+
+/NO_APP
+ movq %rbx, -504(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 48(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 24(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -496(%rbp)
+ movq %r11, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 56(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 32(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rbx, -488(%rbp)
+ movq %r10, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 64(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+ movq 32(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r13, %rdi
+ movq %r9, -480(%rbp)
+ movq %r12, %rsi
+ movq %rbx, %r9
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 72(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 40(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %r11, -472(%rbp)
+ movq %r12, %rbx
+/APP
+ movq (%rcx),%rax
+ mulq 80(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %rbx, %r11
+ movq %r13, %rdi
+ movq %rdx, %rbx
+ movq %r12, %rsi
+/APP
+ movq 40(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r11
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r9, -464(%rbp)
+ movq %rbx, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 88(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 48(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%r11
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rdx, %r13
+ movq %r11, -456(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 96(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rax
+ movq %rsi, %r11
+/APP
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+ addq %r8,%r9
+ adcq %rax,%r12
+ adcq %r11,%r13
+
+/NO_APP
+ movq %rax, %rbx
+ movq %r11, %rsi
+/APP
+ movq 48(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, %rdi
+ movq %r9, -448(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 104(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 56(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -440(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 112(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %rdi, %rbx
+ movq %rsi, %r13
+/APP
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %r8,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %rdi
+ movq %r11, %rsi
+/APP
+ movq 56(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -432(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 120(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 8(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 64(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -424(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 128(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 64(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -416(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 136(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 72(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -408(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 144(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 72(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -400(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 152(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 80(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -392(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 160(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 80(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -384(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 168(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 88(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -376(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 176(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 88(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -368(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 184(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 16(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 24(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 32(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 40(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 48(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 56(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 64(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 96(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, -360(%rbp)
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 192(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %rsi,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %r8
+/APP
+ movq 96(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rdi
+ movq %r9, -352(%rbp)
+ movq %r13, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 200(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 104(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -344(%rbp)
+ movq %r10, %r12
+/APP
+ movq (%rcx),%rax
+ mulq 208(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 104(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -336(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 216(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 112(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -328(%rbp)
+/APP
+ movq (%rcx),%rax
+ mulq 224(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 112(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -320(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r9
+/APP
+ movq (%rcx),%rax
+ mulq 232(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 120(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r9
+
+/NO_APP
+ movq %r12, -312(%rbp)
+ movq %r9, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 240(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %r8, %r11
+ movq %rdi, %rdx
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+ addq %rsi,%rbx
+ adcq %r11,%r13
+ adcq %rdx,%rax
+
+/NO_APP
+ movq %rdx, %r9
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %r11, %r8
+ movq %rdx, %r13
+ movq %r9, %rdi
+/APP
+ movq 120(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %rbx, -304(%rbp)
+ movq %r13, %rbx
+ movq %r10, %r13
+/APP
+ movq (%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 8(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 128(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%rbx
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -296(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 8(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 16(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 24(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r8, %r11
+ movq %rdi, %rax
+/APP
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+ addq %rsi,%rbx
+ adcq %r11,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r9
+ movq %r11, %r8
+/APP
+ movq 128(%rcx),%rax
+ mulq %rax
+ addq %rax,%rbx
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, %rdi
+ movq %rbx, -288(%rbp)
+ movq %r13, %r9
+/APP
+ movq 16(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 24(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 136(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -280(%rbp)
+ movq %r10, %r12
+/APP
+ movq 24(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 32(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r13
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+ addq %rsi,%r9
+ adcq %rbx,%rdx
+ adcq %r13,%r12
+
+/NO_APP
+ movq %r12, %rax
+ movq %r13, %r11
+ movq %rdx, %r12
+ movq %rax, %r13
+ movq %rbx, %r8
+ movq %r11, %rdi
+/APP
+ movq 136(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -272(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 32(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 40(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 144(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r13
+
+/NO_APP
+ movq %r12, -264(%rbp)
+/APP
+ movq 40(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 48(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r13, %rax
+ movq %r10, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+ addq %rsi,%r9
+ adcq %rbx,%rax
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rdi
+ movq %r12, %r11
+ movq %rbx, %r8
+ movq %rax, %r12
+ movq %rdi, %r13
+ movq %r11, %rdi
+/APP
+ movq 144(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r10, %r11
+ movq %r9, -256(%rbp)
+ movq %r13, %r9
+/APP
+ movq 48(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 56(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 152(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+ addq %rsi,%r12
+ adcq %r8,%r9
+ adcq %rdi,%r11
+
+/NO_APP
+ movq %r12, -248(%rbp)
+ movq %r11, %r13
+/APP
+ movq 56(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 64(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 72(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rsi, %rdx
+ movq %r8, %rbx
+ movq %rdi, %r12
+/APP
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+ addq %rdx,%r9
+ adcq %rbx,%r13
+ adcq %r12,%rax
+
+/NO_APP
+ movq %r12, %r11
+ movq %rdx, %r8
+ movq %rax, %rdx
+ movq %r13, %r12
+ movq %rbx, %rdi
+ movq %rdx, %r13
+ movq %r11, %rsi
+/APP
+ movq 152(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r9, -240(%rbp)
+ movq %r13, %r9
+ movq %r10, %r13
+/APP
+ movq 64(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 72(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 80(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 88(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 160(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rax
+ movq %rdi, %rdx
+ movq %rsi, %rbx
+/APP
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+ addq %rax,%r12
+ adcq %rdx,%r9
+ adcq %rbx,%r13
+
+/NO_APP
+ movq %r12, -232(%rbp)
+ movq %rdx, %r8
+ movq %rax, %rsi
+ movq %rbx, %rdi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 72(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 80(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 88(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 160(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -224(%rbp)
+ movq %r13, %r9
+/APP
+ movq 80(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 96(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 104(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 168(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -216(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 88(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 96(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 104(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 168(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -208(%rbp)
+ movq %r13, %r9
+/APP
+ movq 96(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 112(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 120(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 176(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -200(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 104(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 112(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 120(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 176(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -192(%rbp)
+ movq %r13, %r9
+/APP
+ movq 112(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 128(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 136(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 184(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r8, %rbx
+ movq %rdi, %rax
+ movq %rsi, %rdx
+/APP
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+ addq %rbx,%r12
+ adcq %rax,%r9
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %r12, -184(%rbp)
+ movq %rdx, %rdi
+ movq %rax, %r8
+ movq %rbx, %rsi
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 120(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%rsi
+ movq %rdx,%r8
+ xorq %rdi,%rdi
+
+ movq 128(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 136(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 144(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 152(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 160(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 168(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+ movq 176(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%rdi
+
+/NO_APP
+ movq %rsi, %rax
+ movq %r8, %rbx
+ movq %rdi, %rdx
+/APP
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+ addq %rax,%r9
+ adcq %rbx,%r12
+ adcq %rdx,%r13
+
+/NO_APP
+ movq %rdx, %r11
+ movq %rax, %r8
+ movq %rbx, %rdi
+/APP
+ movq 184(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -176(%rbp)
+ movq %r13, %r9
+/APP
+ movq 128(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+/NO_APP
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 192(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%r13
+
+/NO_APP
+ movq %r12, -168(%rbp)
+ movq %r13, %r12
+ movq %r10, %r13
+/APP
+ movq 136(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 144(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %rbx
+ movq %rsi, %rax
+/APP
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+ addq %r8,%r9
+ adcq %rbx,%r12
+ adcq %rax,%r13
+
+/NO_APP
+ movq %rax, %r11
+ movq %rbx, %rdi
+ movq %r10, %rbx
+/APP
+ movq 192(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r11, %rsi
+ movq %r9, -160(%rbp)
+ movq %r13, %r9
+/APP
+ movq 144(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 152(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 200(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+ addq %r8,%r12
+ adcq %rdi,%r9
+ adcq %rsi,%rbx
+
+/NO_APP
+ movq %r12, -152(%rbp)
+/APP
+ movq 152(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 160(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+ addq %r8,%r9
+ adcq %r13,%rbx
+ adcq %r12,%rdx
+
+/NO_APP
+ movq %rdx, %rax
+ movq %r13, %rdi
+ movq %r12, %rsi
+ movq %rax, %r11
+ movq %r10, %r12
+/APP
+ movq 200(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -144(%rbp)
+ movq %r11, %r9
+/APP
+ movq 160(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 168(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 208(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -136(%rbp)
+ movq %r12, %r11
+/APP
+ movq 168(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 176(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+/APP
+ movq 208(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -128(%rbp)
+ movq %r11, %r9
+/APP
+ movq 176(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 184(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 216(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rdx
+/APP
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%rdx
+
+/NO_APP
+ movq %rbx, -120(%rbp)
+ movq %rdx, %r11
+ movq %r10, %rbx
+/APP
+ movq 184(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 192(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rbx
+
+/NO_APP
+ movq %rbx, %rdx
+ movq %r13, %rdi
+ movq %r11, %rbx
+ movq %r12, %rsi
+ movq %rdx, %r11
+ movq %r10, %r12
+/APP
+ movq 216(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -112(%rbp)
+ movq %r11, %r9
+/APP
+ movq 192(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 200(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 224(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+ addq %r8,%rbx
+ adcq %rdi,%r9
+ adcq %rsi,%r12
+
+/NO_APP
+ movq %rbx, -104(%rbp)
+ movq %r12, %r11
+/APP
+ movq 200(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 208(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %r10, %rax
+ movq %rdi, %r13
+ movq %rsi, %r12
+/APP
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+ addq %r8,%r9
+ adcq %r13,%r11
+ adcq %r12,%rax
+
+/NO_APP
+ movq %rax, %rdx
+ movq %r11, %rbx
+ movq %r13, %rdi
+ movq %rdx, %r11
+ movq %r12, %rsi
+ movq %r10, %r12
+/APP
+ movq 224(%rcx),%rax
+ mulq %rax
+ addq %rax,%r9
+ adcq %rdx,%rbx
+ adcq $0,%r11
+
+/NO_APP
+ movq %r9, -96(%rbp)
+ movq %r10, %r9
+/APP
+ movq 208(%rcx),%rax
+ mulq 248(%rcx)
+ movq %rax,%r8
+ movq %rdx,%rdi
+ xorq %rsi,%rsi
+
+ movq 216(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+ movq 224(%rcx),%rax
+ mulq 232(%rcx)
+ addq %rax,%r8
+ adcq %rdx,%rdi
+ adcq $0,%rsi
+
+/NO_APP
+ movq %rdi, %r13
+ movq %rsi, %rax
+/APP
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+ addq %r8,%rbx
+ adcq %r13,%r11
+ adcq %rax,%r9
+
+/NO_APP
+ movq %rbx, -88(%rbp)
+ movq %r11, %rsi
+ movq %r9, %r8
+/APP
+ movq 216(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r12
+
+/NO_APP
+ movq %r12, %r11
+/APP
+ movq 224(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r11
+
+/NO_APP
+ movq %r8, %r13
+ movq %r11, %rbx
+/APP
+ movq 232(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %rsi, -80(%rbp)
+ movq %rbx, %r12
+ movq %r13, %rdi
+ movq %r10, %r13
+/APP
+ movq 224(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+ addq %rax,%rdi
+ adcq %rdx,%r12
+ adcq $0,%r13
+
+/NO_APP
+ movq %r12, %r9
+ movq %r13, %r12
+/APP
+ movq 232(%rcx),%rax
+ mulq 240(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+ addq %rax,%rdi
+ adcq %rdx,%r9
+ adcq $0,%r12
+
+/NO_APP
+ movq %rdi, -72(%rbp)
+ movq %r9, %r11
+ movq %r12, %rbx
+ movq %r10, %r9
+/APP
+ movq 232(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+ addq %rax,%r11
+ adcq %rdx,%rbx
+ adcq $0,%r9
+
+/NO_APP
+ movq %rbx, %r13
+ movq %r9, %rbx
+ movq %r10, %r9
+/APP
+ movq 240(%rcx),%rax
+ mulq %rax
+ addq %rax,%r11
+ adcq %rdx,%r13
+ adcq $0,%rbx
+
+/NO_APP
+ movq %r11, -64(%rbp)
+ movq %r13, %rdi
+ movq %rbx, %rsi
+/APP
+ movq 240(%rcx),%rax
+ mulq 248(%rcx)
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+ addq %rax,%rdi
+ adcq %rdx,%rsi
+ adcq $0,%r9
+
+/NO_APP
+ movq %rdi, -56(%rbp)
+ movq %r9, %r8
+/APP
+ movq 248(%rcx),%rax
+ mulq %rax
+ addq %rax,%rsi
+ adcq %rdx,%r8
+ adcq $0,%r10
+
+/NO_APP
+ movq %rsi, -48(%rbp)
+ movq 16(%r14), %rdi
+ leaq -544(%rbp), %rsi
+ movl $512, %edx
+ movq %r8, -40(%rbp)
+ movl $64, 8(%r14)
+ movl $0, (%r14)
+ call memcpy@PLT
+ movl 8(%r14), %edx
+ testl %edx, %edx
+ je .L304
+ leal -1(%rdx), %ecx
+ movq 16(%r14), %rsi
+ mov %ecx, %r10d
+ cmpq $0, (%rsi,%r10,8)
+ jne .L302
+ movl %ecx, %edx
+ .align 16
+.L303:
+ testl %edx, %edx
+ movl %edx, %ecx
+ je .L307
+ decl %edx
+ mov %edx, %eax
+ cmpq $0, (%rsi,%rax,8)
+ je .L303
+ movl %ecx, 8(%r14)
+ movl %ecx, %edx
+.L302:
+ testl %edx, %edx
+ je .L304
+ movl (%r14), %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.L307:
+ movl %edx, 8(%r14)
+ .align 16
+.L304:
+ xorl %eax, %eax
+ movl %eax, (%r14)
+ addq $512, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ leave
+ ret
+.LFE9:
+ .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
new file mode 100644
index 000000000..5be4da4bf
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h
@@ -0,0 +1,73 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_PRIV_H_
+#define _MP_GF2M_PRIV_H_
+
+#include "mpi-priv.h"
+
+extern const mp_digit mp_gf2m_sqr_tb[16];
+
+#if defined(MP_USE_UINT_DIGIT)
+#define MP_DIGIT_BITS 32
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 5
+#define MP_DIGIT_BITS_MASK 0x1f
+#else
+#define MP_DIGIT_BITS 64
+/* enable fast divide and mod operations on MP_DIGIT_BITS */
+#define MP_DIGIT_BITS_LOG_2 6
+#define MP_DIGIT_BITS_MASK 0x3f
+#endif
+
+/* Platform-specific macros for fast binary polynomial squaring. */
+#if MP_DIGIT_BITS == 32
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#else
+#define gf2m_SQR1(w) \
+ mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF]
+#define gf2m_SQR0(w) \
+ mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \
+ mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \
+ mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \
+ mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF]
+#endif
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b);
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0);
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0);
+
+#endif /* _MP_GF2M_PRIV_H_ */
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c
new file mode 100644
index 000000000..5a096adde
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.c
@@ -0,0 +1,678 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mp_gf2m.h"
+#include "mp_gf2m-priv.h"
+#include "mplogic.h"
+#include "mpi-priv.h"
+
+const mp_digit mp_gf2m_sqr_tb[16] =
+ {
+ 0, 1, 4, 5, 16, 17, 20, 21,
+ 64, 65, 68, 69, 80, 81, 84, 85
+ };
+
+/* Multiply two binary polynomials mp_digits a, b.
+ * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1.
+ * Output in two mp_digits rh, rl.
+ */
+#if MP_DIGIT_BITS == 32
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[8], top2b = a >> 30;
+ register mp_digit a1, a2, a4;
+
+ a1 = a & (0x3FFFFFFF);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+
+ s = tab[b & 0x7];
+ l = s;
+ s = tab[b >> 3 & 0x7];
+ l ^= s << 3;
+ h = s >> 29;
+ s = tab[b >> 6 & 0x7];
+ l ^= s << 6;
+ h ^= s >> 26;
+ s = tab[b >> 9 & 0x7];
+ l ^= s << 9;
+ h ^= s >> 23;
+ s = tab[b >> 12 & 0x7];
+ l ^= s << 12;
+ h ^= s >> 20;
+ s = tab[b >> 15 & 0x7];
+ l ^= s << 15;
+ h ^= s >> 17;
+ s = tab[b >> 18 & 0x7];
+ l ^= s << 18;
+ h ^= s >> 14;
+ s = tab[b >> 21 & 0x7];
+ l ^= s << 21;
+ h ^= s >> 11;
+ s = tab[b >> 24 & 0x7];
+ l ^= s << 24;
+ h ^= s >> 8;
+ s = tab[b >> 27 & 0x7];
+ l ^= s << 27;
+ h ^= s >> 5;
+ s = tab[b >> 30];
+ l ^= s << 30;
+ h ^= s >> 2;
+
+ /* compensate for the top two bits of a */
+
+ if (top2b & 01) {
+ l ^= b << 30;
+ h ^= b >> 2;
+ }
+ if (top2b & 02) {
+ l ^= b << 31;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#else
+void
+s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b)
+{
+ register mp_digit h, l, s;
+ mp_digit tab[16], top3b = a >> 61;
+ register mp_digit a1, a2, a4, a8;
+
+ a1 = a & (0x1FFFFFFFFFFFFFFFULL);
+ a2 = a1 << 1;
+ a4 = a2 << 1;
+ a8 = a4 << 1;
+ tab[0] = 0;
+ tab[1] = a1;
+ tab[2] = a2;
+ tab[3] = a1 ^ a2;
+ tab[4] = a4;
+ tab[5] = a1 ^ a4;
+ tab[6] = a2 ^ a4;
+ tab[7] = a1 ^ a2 ^ a4;
+ tab[8] = a8;
+ tab[9] = a1 ^ a8;
+ tab[10] = a2 ^ a8;
+ tab[11] = a1 ^ a2 ^ a8;
+ tab[12] = a4 ^ a8;
+ tab[13] = a1 ^ a4 ^ a8;
+ tab[14] = a2 ^ a4 ^ a8;
+ tab[15] = a1 ^ a2 ^ a4 ^ a8;
+
+ s = tab[b & 0xF];
+ l = s;
+ s = tab[b >> 4 & 0xF];
+ l ^= s << 4;
+ h = s >> 60;
+ s = tab[b >> 8 & 0xF];
+ l ^= s << 8;
+ h ^= s >> 56;
+ s = tab[b >> 12 & 0xF];
+ l ^= s << 12;
+ h ^= s >> 52;
+ s = tab[b >> 16 & 0xF];
+ l ^= s << 16;
+ h ^= s >> 48;
+ s = tab[b >> 20 & 0xF];
+ l ^= s << 20;
+ h ^= s >> 44;
+ s = tab[b >> 24 & 0xF];
+ l ^= s << 24;
+ h ^= s >> 40;
+ s = tab[b >> 28 & 0xF];
+ l ^= s << 28;
+ h ^= s >> 36;
+ s = tab[b >> 32 & 0xF];
+ l ^= s << 32;
+ h ^= s >> 32;
+ s = tab[b >> 36 & 0xF];
+ l ^= s << 36;
+ h ^= s >> 28;
+ s = tab[b >> 40 & 0xF];
+ l ^= s << 40;
+ h ^= s >> 24;
+ s = tab[b >> 44 & 0xF];
+ l ^= s << 44;
+ h ^= s >> 20;
+ s = tab[b >> 48 & 0xF];
+ l ^= s << 48;
+ h ^= s >> 16;
+ s = tab[b >> 52 & 0xF];
+ l ^= s << 52;
+ h ^= s >> 12;
+ s = tab[b >> 56 & 0xF];
+ l ^= s << 56;
+ h ^= s >> 8;
+ s = tab[b >> 60];
+ l ^= s << 60;
+ h ^= s >> 4;
+
+ /* compensate for the top three bits of a */
+
+ if (top3b & 01) {
+ l ^= b << 61;
+ h ^= b >> 3;
+ }
+ if (top3b & 02) {
+ l ^= b << 62;
+ h ^= b >> 2;
+ }
+ if (top3b & 04) {
+ l ^= b << 63;
+ h ^= b >> 1;
+ }
+
+ *rh = h;
+ *rl = l;
+}
+#endif
+
+/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0)
+ * result is a binary polynomial in 4 mp_digits r[4].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit m1, m0;
+ /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */
+ s_bmul_1x1(r + 3, r + 2, a1, b1);
+ s_bmul_1x1(r + 1, r, a0, b0);
+ s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1);
+ /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */
+ r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */
+ r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */
+}
+
+/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0)
+ * result is a binary polynomial in 6 mp_digits r[6].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0,
+ const mp_digit b2, const mp_digit b1, const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */
+ s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3];
+ zm[2] ^= r[2];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0)
+ * result is a binary polynomial in 8 mp_digits r[8].
+ * The caller MUST ensure that r has the right amount of space allocated.
+ */
+void
+s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1,
+ const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1,
+ const mp_digit b0)
+{
+ mp_digit zm[4];
+
+ s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */
+ s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */
+ s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */
+
+ zm[3] ^= r[3] ^ r[7];
+ zm[2] ^= r[2] ^ r[6];
+ zm[1] ^= r[1] ^ r[5];
+ zm[0] ^= r[0] ^ r[4];
+
+ r[5] ^= zm[3];
+ r[4] ^= zm[2];
+ r[3] ^= zm[1];
+ r[2] ^= zm[0];
+}
+
+/* Compute addition of two binary polynomials a and b,
+ * store result in c; c could be a or b, a and b could be equal;
+ * c is the bitwise XOR of a and b.
+ */
+mp_err
+mp_badd(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+ mp_size ix;
+ mp_size used_pa, used_pb;
+ mp_err res = MP_OKAY;
+
+ /* Add all digits up to the precision of b. If b had more
+ * precision than a initially, swap a, b first
+ */
+ if (MP_USED(a) >= MP_USED(b)) {
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used_pa = MP_USED(a);
+ used_pb = MP_USED(b);
+ } else {
+ pa = MP_DIGITS(b);
+ pb = MP_DIGITS(a);
+ used_pa = MP_USED(b);
+ used_pb = MP_USED(a);
+ }
+
+ /* Make sure c has enough precision for the output value */
+ MP_CHECKOK(s_mp_pad(c, used_pa));
+
+ /* Do word-by-word xor */
+ pc = MP_DIGITS(c);
+ for (ix = 0; ix < used_pb; ix++) {
+ (*pc++) = (*pa++) ^ (*pb++);
+ }
+
+ /* Finish the rest of digits until we're actually done */
+ for (; ix < used_pa; ++ix) {
+ *pc++ = *pa++;
+ }
+
+ MP_USED(c) = used_pa;
+ MP_SIGN(c) = ZPOS;
+ s_mp_clamp(c);
+
+CLEANUP:
+ return res;
+}
+
+#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1));
+
+/* Compute binary polynomial multiply d = a * b */
+static void
+s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ = a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d = carry;
+}
+
+/* Compute binary polynomial xor multiply accumulate d ^= a * b */
+static void
+s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d)
+{
+ mp_digit a_i, a0b0, a1b1, carry = 0;
+ while (a_len--) {
+ a_i = *a++;
+ s_bmul_1x1(&a1b1, &a0b0, a_i, b);
+ *d++ ^= a0b0 ^ carry;
+ carry = a1b1;
+ }
+ *d ^= carry;
+}
+
+/* Compute binary polynomial xor multiply c = a * b.
+ * All parameters may be identical.
+ */
+mp_err
+mp_bmul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pb, b_i;
+ mp_int tmp;
+ mp_size ib, a_used, b_used;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&tmp) = 0;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ MP_CHECKOK(mp_init_copy(&tmp, b));
+ b = &tmp;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b if b longer */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b)));
+
+ pb = MP_DIGITS(b);
+ s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ a_used = MP_USED(a);
+ b_used = MP_USED(b);
+ MP_USED(c) = a_used + b_used;
+ for (ib = 1; ib < b_used; ib++) {
+ b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + a_used) = b_i;
+ }
+
+ s_mp_clamp(c);
+
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute modular reduction of a and store result in r.
+ * r could be a.
+ * For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err
+mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ int j, k;
+ int n, dN, d0, d1;
+ mp_digit zz, *z, tmp;
+ mp_size used;
+ mp_err res = MP_OKAY;
+
+ /* The algorithm does the reduction in place in r,
+ * if a != r, copy a into r first so reduction can be done in r
+ */
+ if (a != r) {
+ MP_CHECKOK(mp_copy(a, r));
+ }
+ z = MP_DIGITS(r);
+
+ /* start reduction */
+ /*dN = p[0] / MP_DIGIT_BITS; */
+ dN = p[0] >> MP_DIGIT_BITS_LOG_2;
+ used = MP_USED(r);
+
+ for (j = used - 1; j > dN;) {
+
+ zz = z[j];
+ if (zz == 0) {
+ j--;
+ continue;
+ }
+ z[j] = 0;
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k] */
+ n = p[0] - p[k];
+ /*d0 = n % MP_DIGIT_BITS; */
+ d0 = n & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ /*n /= MP_DIGIT_BITS; */
+ n >>= MP_DIGIT_BITS_LOG_2;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* reducing component t^0 */
+ n = dN;
+ /*d0 = p[0] % MP_DIGIT_BITS;*/
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[j - n] ^= (zz >> d0);
+ if (d0)
+ z[j - n - 1] ^= (zz << d1);
+ }
+
+ /* final round of reduction */
+ while (j == dN) {
+
+ /* d0 = p[0] % MP_DIGIT_BITS; */
+ d0 = p[0] & MP_DIGIT_BITS_MASK;
+ zz = z[dN] >> d0;
+ if (zz == 0)
+ break;
+ d1 = MP_DIGIT_BITS - d0;
+
+ /* clear up the top d1 bits */
+ if (d0) {
+ z[dN] = (z[dN] << d1) >> d1;
+ } else {
+ z[dN] = 0;
+ }
+ *z ^= zz; /* reduction t^0 component */
+
+ for (k = 1; p[k] > 0; k++) {
+ /* reducing component t^p[k]*/
+ /* n = p[k] / MP_DIGIT_BITS; */
+ n = p[k] >> MP_DIGIT_BITS_LOG_2;
+ /* d0 = p[k] % MP_DIGIT_BITS; */
+ d0 = p[k] & MP_DIGIT_BITS_MASK;
+ d1 = MP_DIGIT_BITS - d0;
+ z[n] ^= (zz << d0);
+ tmp = zz >> d1;
+ if (d0 && tmp)
+ z[n + 1] ^= tmp;
+ }
+ }
+
+ s_mp_clamp(r);
+CLEANUP:
+ return res;
+}
+
+/* Compute the product of two polynomials a and b, reduce modulo p,
+ * Store the result in r. r could be a or b; a could be b.
+ */
+mp_err
+mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r)
+{
+ mp_err res;
+
+ if (a == b)
+ return mp_bsqrmod(a, p, r);
+ if ((res = mp_bmul(a, b, r)) != MP_OKAY)
+ return res;
+ return mp_bmod(r, p, r);
+}
+
+/* Compute binary polynomial squaring c = a*a mod p .
+ * Parameter r and a can be identical.
+ */
+
+mp_err
+mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r)
+{
+ mp_digit *pa, *pr, a_i;
+ mp_int tmp;
+ mp_size ia, a_used;
+ mp_err res;
+
+ ARGCHK(a != NULL && r != NULL, MP_BADARG);
+ MP_DIGITS(&tmp) = 0;
+
+ if (a == r) {
+ MP_CHECKOK(mp_init_copy(&tmp, a));
+ a = &tmp;
+ }
+
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+ MP_CHECKOK(s_mp_pad(r, 2 * USED(a)));
+
+ pa = MP_DIGITS(a);
+ pr = MP_DIGITS(r);
+ a_used = MP_USED(a);
+ MP_USED(r) = 2 * a_used;
+
+ for (ia = 0; ia < a_used; ia++) {
+ a_i = *pa++;
+ *pr++ = gf2m_SQR0(a_i);
+ *pr++ = gf2m_SQR1(a_i);
+ }
+
+ MP_CHECKOK(mp_bmod(r, p, r));
+ s_mp_clamp(r);
+ SIGN(r) = ZPOS;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+}
+
+/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p.
+ * Store the result in r. r could be x or y, and x could equal y.
+ * Uses algorithm Modular_Division_GF(2^m) from
+ * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to
+ * the Great Divide".
+ */
+int
+mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r)
+{
+ mp_int aa, bb, uu;
+ mp_int *a, *b, *u, *v;
+ mp_err res = MP_OKAY;
+
+ MP_DIGITS(&aa) = 0;
+ MP_DIGITS(&bb) = 0;
+ MP_DIGITS(&uu) = 0;
+
+ MP_CHECKOK(mp_init_copy(&aa, x));
+ MP_CHECKOK(mp_init_copy(&uu, y));
+ MP_CHECKOK(mp_init_copy(&bb, pp));
+ MP_CHECKOK(s_mp_pad(r, USED(pp)));
+ MP_USED(r) = 1;
+ MP_DIGIT(r, 0) = 0;
+
+ a = &aa;
+ b = &bb;
+ u = &uu;
+ v = r;
+ /* reduce x and y mod p */
+ MP_CHECKOK(mp_bmod(a, p, a));
+ MP_CHECKOK(mp_bmod(u, p, u));
+
+ while (!mp_isodd(a)) {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ }
+
+ do {
+ if (mp_cmp_mag(b, a) > 0) {
+ MP_CHECKOK(mp_badd(b, a, b));
+ MP_CHECKOK(mp_badd(v, u, v));
+ do {
+ s_mp_div2(b);
+ if (mp_isodd(v)) {
+ MP_CHECKOK(mp_badd(v, pp, v));
+ }
+ s_mp_div2(v);
+ } while (!mp_isodd(b));
+ } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1))
+ break;
+ else {
+ MP_CHECKOK(mp_badd(a, b, a));
+ MP_CHECKOK(mp_badd(u, v, u));
+ do {
+ s_mp_div2(a);
+ if (mp_isodd(u)) {
+ MP_CHECKOK(mp_badd(u, pp, u));
+ }
+ s_mp_div2(u);
+ } while (!mp_isodd(a));
+ }
+ } while (1);
+
+ MP_CHECKOK(mp_copy(u, r));
+
+CLEANUP:
+ mp_clear(&aa);
+ mp_clear(&bb);
+ mp_clear(&uu);
+ return res;
+}
+
+/* Convert the bit-string representation of a polynomial a into an array
+ * of integers corresponding to the bits with non-zero coefficient.
+ * Up to max elements of the array will be filled. Return value is total
+ * number of coefficients that would be extracted if array was large enough.
+ */
+int
+mp_bpoly2arr(const mp_int *a, unsigned int p[], int max)
+{
+ int i, j, k;
+ mp_digit top_bit, mask;
+
+ top_bit = 1;
+ top_bit <<= MP_DIGIT_BIT - 1;
+
+ for (k = 0; k < max; k++)
+ p[k] = 0;
+ k = 0;
+
+ for (i = MP_USED(a) - 1; i >= 0; i--) {
+ mask = top_bit;
+ for (j = MP_DIGIT_BIT - 1; j >= 0; j--) {
+ if (MP_DIGITS(a)[i] & mask) {
+ if (k < max)
+ p[k] = MP_DIGIT_BIT * i + j;
+ k++;
+ }
+ mask >>= 1;
+ }
+ }
+
+ return k;
+}
+
+/* Convert the coefficient array representation of a polynomial to a
+ * bit-string. The array must be terminated by 0.
+ */
+mp_err
+mp_barr2poly(const unsigned int p[], mp_int *a)
+{
+
+ mp_err res = MP_OKAY;
+ int i;
+
+ mp_zero(a);
+ for (i = 0; p[i] > 0; i++) {
+ MP_CHECKOK(mpl_set_bit(a, p[i], 1));
+ }
+ MP_CHECKOK(mpl_set_bit(a, 0, 1));
+
+CLEANUP:
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h
new file mode 100644
index 000000000..ed2c85493
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mp_gf2m.h
@@ -0,0 +1,28 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _MP_GF2M_H_
+#define _MP_GF2M_H_
+
+#include "mpi.h"
+
+mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c);
+
+/* For modular arithmetic, the irreducible polynomial f(t) is represented
+ * as an array of int[], where f(t) is of the form:
+ * f(t) = t^p[0] + t^p[1] + ... + t^p[k]
+ * where m = p[0] > p[1] > ... > p[k] = 0.
+ */
+mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[],
+ mp_int *r);
+mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r);
+mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp,
+ const unsigned int p[], mp_int *r);
+
+int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max);
+mp_err mp_barr2poly(const unsigned int p[], mp_int *a);
+
+#endif /* _MP_GF2M_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c
new file mode 100644
index 000000000..6fed35239
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache.c
@@ -0,0 +1,808 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+#include "prtypes.h"
+
+/*
+ * This file implements a single function: s_mpi_getProcessorLineSize();
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * Currently the file returns good data for most modern x86 processors, and
+ * reasonable data on 64-bit ppc processors. All other processors are assumed
+ * to have a cache line size of 32 bytes unless modified by target.mk.
+ *
+ */
+
+#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+/* X86 processors have special instructions that tell us about the cache */
+#include "string.h"
+
+#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
+#define AMD_64 1
+#endif
+
+/* Generic CPUID function */
+#if defined(AMD_64)
+
+#if defined(__GNUC__)
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ __asm__("cpuid\n\t"
+ : "=a"(*eax),
+ "=b"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+#elif defined(_MSC_VER)
+
+#include <intrin.h>
+
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ int intrinsic_out[4];
+
+ __cpuid(intrinsic_out, op);
+ *eax = intrinsic_out[0];
+ *ebx = intrinsic_out[1];
+ *ecx = intrinsic_out[2];
+ *edx = intrinsic_out[3];
+}
+
+#endif
+
+#else /* !defined(AMD_64) */
+
+/* x86 */
+
+#if defined(__GNUC__)
+void
+freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx)
+{
+ /* Some older processors don't fill the ecx register with cpuid, so clobber it
+ * before calling cpuid, so that there's no risk of picking random bits that
+ * erroneously indicate that absent CPU features are present.
+ * Also, GCC isn't smart enough to save the ebx PIC register on its own
+ * in this case, so do it by hand. Use edi to store ebx and pass the
+ * value returned in ebx from cpuid through edi. */
+ __asm__("xor %%ecx, %%ecx\n\t"
+ "mov %%ebx,%%edi\n\t"
+ "cpuid\n\t"
+ "xchgl %%ebx,%%edi\n\t"
+ : "=a"(*eax),
+ "=D"(*ebx),
+ "=c"(*ecx),
+ "=d"(*edx)
+ : "0"(op));
+}
+
+/*
+ * try flipping a processor flag to determine CPU type
+ */
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm__("pushfl\n\t" /* get the flags */
+ "popl %0\n\t"
+ "movl %0,%1\n\t" /* save the original flags */
+ "xorl %2,%0\n\t" /* flip the bit */
+ "pushl %0\n\t" /* set the flags */
+ "popfl\n\t"
+ "pushfl\n\t" /* get the flags again (for return) */
+ "popl %0\n\t"
+ "pushl %1\n\t" /* restore the original flags */
+ "popfl\n\t"
+ : "=r"(changedFlags),
+ "=r"(originalFlags),
+ "=r"(flag)
+ : "2"(flag));
+ return changedFlags ^ originalFlags;
+}
+
+#elif defined(_MSC_VER)
+
+/*
+ * windows versions of the above assembler
+ */
+#define wcpuid __asm __emit 0fh __asm __emit 0a2h
+void
+freebl_cpuid(unsigned long op, unsigned long *Reax,
+ unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx)
+{
+ unsigned long Leax, Lebx, Lecx, Ledx;
+ __asm {
+ pushad
+ xor ecx,ecx
+ mov eax,op
+ wcpuid
+ mov Leax,eax
+ mov Lebx,ebx
+ mov Lecx,ecx
+ mov Ledx,edx
+ popad
+ }
+ *Reax = Leax;
+ *Rebx = Lebx;
+ *Recx = Lecx;
+ *Redx = Ledx;
+}
+
+static unsigned long
+changeFlag(unsigned long flag)
+{
+ unsigned long changedFlags, originalFlags;
+ __asm {
+ push eax
+ push ebx
+ pushfd /* get the flags */
+ pop eax
+ push eax /* save the flags on the stack */
+ mov originalFlags,eax /* save the original flags */
+ mov ebx,flag
+ xor eax,ebx /* flip the bit */
+ push eax /* set the flags */
+ popfd
+ pushfd /* get the flags again (for return) */
+ pop eax
+ popfd /* restore the original flags */
+ mov changedFlags,eax
+ pop ebx
+ pop eax
+ }
+ return changedFlags ^ originalFlags;
+}
+#endif
+
+#endif
+
+#if !defined(AMD_64)
+#define AC_FLAG 0x40000
+#define ID_FLAG 0x200000
+
+/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */
+static int
+is386()
+{
+ return changeFlag(AC_FLAG) == 0;
+}
+
+/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */
+static int
+is486()
+{
+ return changeFlag(ID_FLAG) == 0;
+}
+#endif
+
+/*
+ * table for Intel Cache.
+ * See Intel Application Note AP-485 for more information
+ */
+
+typedef unsigned char CacheTypeEntry;
+
+typedef enum {
+ Cache_NONE = 0,
+ Cache_UNKNOWN = 1,
+ Cache_TLB = 2,
+ Cache_TLBi = 3,
+ Cache_TLBd = 4,
+ Cache_Trace = 5,
+ Cache_L1 = 6,
+ Cache_L1i = 7,
+ Cache_L1d = 8,
+ Cache_L2 = 9,
+ Cache_L2i = 10,
+ Cache_L2d = 11,
+ Cache_L3 = 12,
+ Cache_L3i = 13,
+ Cache_L3d = 14
+} CacheType;
+
+struct _cache {
+ CacheTypeEntry type;
+ unsigned char lineSize;
+};
+static const struct _cache CacheMap[256] = {
+ /* 00 */ { Cache_NONE, 0 },
+ /* 01 */ { Cache_TLBi, 0 },
+ /* 02 */ { Cache_TLBi, 0 },
+ /* 03 */ { Cache_TLBd, 0 },
+ /* 04 */ {
+ Cache_TLBd,
+ },
+ /* 05 */ { Cache_UNKNOWN, 0 },
+ /* 06 */ { Cache_L1i, 32 },
+ /* 07 */ { Cache_UNKNOWN, 0 },
+ /* 08 */ { Cache_L1i, 32 },
+ /* 09 */ { Cache_UNKNOWN, 0 },
+ /* 0a */ { Cache_L1d, 32 },
+ /* 0b */ { Cache_UNKNOWN, 0 },
+ /* 0c */ { Cache_L1d, 32 },
+ /* 0d */ { Cache_UNKNOWN, 0 },
+ /* 0e */ { Cache_UNKNOWN, 0 },
+ /* 0f */ { Cache_UNKNOWN, 0 },
+ /* 10 */ { Cache_UNKNOWN, 0 },
+ /* 11 */ { Cache_UNKNOWN, 0 },
+ /* 12 */ { Cache_UNKNOWN, 0 },
+ /* 13 */ { Cache_UNKNOWN, 0 },
+ /* 14 */ { Cache_UNKNOWN, 0 },
+ /* 15 */ { Cache_UNKNOWN, 0 },
+ /* 16 */ { Cache_UNKNOWN, 0 },
+ /* 17 */ { Cache_UNKNOWN, 0 },
+ /* 18 */ { Cache_UNKNOWN, 0 },
+ /* 19 */ { Cache_UNKNOWN, 0 },
+ /* 1a */ { Cache_UNKNOWN, 0 },
+ /* 1b */ { Cache_UNKNOWN, 0 },
+ /* 1c */ { Cache_UNKNOWN, 0 },
+ /* 1d */ { Cache_UNKNOWN, 0 },
+ /* 1e */ { Cache_UNKNOWN, 0 },
+ /* 1f */ { Cache_UNKNOWN, 0 },
+ /* 20 */ { Cache_UNKNOWN, 0 },
+ /* 21 */ { Cache_UNKNOWN, 0 },
+ /* 22 */ { Cache_L3, 64 },
+ /* 23 */ { Cache_L3, 64 },
+ /* 24 */ { Cache_UNKNOWN, 0 },
+ /* 25 */ { Cache_L3, 64 },
+ /* 26 */ { Cache_UNKNOWN, 0 },
+ /* 27 */ { Cache_UNKNOWN, 0 },
+ /* 28 */ { Cache_UNKNOWN, 0 },
+ /* 29 */ { Cache_L3, 64 },
+ /* 2a */ { Cache_UNKNOWN, 0 },
+ /* 2b */ { Cache_UNKNOWN, 0 },
+ /* 2c */ { Cache_L1d, 64 },
+ /* 2d */ { Cache_UNKNOWN, 0 },
+ /* 2e */ { Cache_UNKNOWN, 0 },
+ /* 2f */ { Cache_UNKNOWN, 0 },
+ /* 30 */ { Cache_L1i, 64 },
+ /* 31 */ { Cache_UNKNOWN, 0 },
+ /* 32 */ { Cache_UNKNOWN, 0 },
+ /* 33 */ { Cache_UNKNOWN, 0 },
+ /* 34 */ { Cache_UNKNOWN, 0 },
+ /* 35 */ { Cache_UNKNOWN, 0 },
+ /* 36 */ { Cache_UNKNOWN, 0 },
+ /* 37 */ { Cache_UNKNOWN, 0 },
+ /* 38 */ { Cache_UNKNOWN, 0 },
+ /* 39 */ { Cache_L2, 64 },
+ /* 3a */ { Cache_UNKNOWN, 0 },
+ /* 3b */ { Cache_L2, 64 },
+ /* 3c */ { Cache_L2, 64 },
+ /* 3d */ { Cache_UNKNOWN, 0 },
+ /* 3e */ { Cache_UNKNOWN, 0 },
+ /* 3f */ { Cache_UNKNOWN, 0 },
+ /* 40 */ { Cache_L2, 0 },
+ /* 41 */ { Cache_L2, 32 },
+ /* 42 */ { Cache_L2, 32 },
+ /* 43 */ { Cache_L2, 32 },
+ /* 44 */ { Cache_L2, 32 },
+ /* 45 */ { Cache_L2, 32 },
+ /* 46 */ { Cache_UNKNOWN, 0 },
+ /* 47 */ { Cache_UNKNOWN, 0 },
+ /* 48 */ { Cache_UNKNOWN, 0 },
+ /* 49 */ { Cache_UNKNOWN, 0 },
+ /* 4a */ { Cache_UNKNOWN, 0 },
+ /* 4b */ { Cache_UNKNOWN, 0 },
+ /* 4c */ { Cache_UNKNOWN, 0 },
+ /* 4d */ { Cache_UNKNOWN, 0 },
+ /* 4e */ { Cache_UNKNOWN, 0 },
+ /* 4f */ { Cache_UNKNOWN, 0 },
+ /* 50 */ { Cache_TLBi, 0 },
+ /* 51 */ { Cache_TLBi, 0 },
+ /* 52 */ { Cache_TLBi, 0 },
+ /* 53 */ { Cache_UNKNOWN, 0 },
+ /* 54 */ { Cache_UNKNOWN, 0 },
+ /* 55 */ { Cache_UNKNOWN, 0 },
+ /* 56 */ { Cache_UNKNOWN, 0 },
+ /* 57 */ { Cache_UNKNOWN, 0 },
+ /* 58 */ { Cache_UNKNOWN, 0 },
+ /* 59 */ { Cache_UNKNOWN, 0 },
+ /* 5a */ { Cache_UNKNOWN, 0 },
+ /* 5b */ { Cache_TLBd, 0 },
+ /* 5c */ { Cache_TLBd, 0 },
+ /* 5d */ { Cache_TLBd, 0 },
+ /* 5e */ { Cache_UNKNOWN, 0 },
+ /* 5f */ { Cache_UNKNOWN, 0 },
+ /* 60 */ { Cache_UNKNOWN, 0 },
+ /* 61 */ { Cache_UNKNOWN, 0 },
+ /* 62 */ { Cache_UNKNOWN, 0 },
+ /* 63 */ { Cache_UNKNOWN, 0 },
+ /* 64 */ { Cache_UNKNOWN, 0 },
+ /* 65 */ { Cache_UNKNOWN, 0 },
+ /* 66 */ { Cache_L1d, 64 },
+ /* 67 */ { Cache_L1d, 64 },
+ /* 68 */ { Cache_L1d, 64 },
+ /* 69 */ { Cache_UNKNOWN, 0 },
+ /* 6a */ { Cache_UNKNOWN, 0 },
+ /* 6b */ { Cache_UNKNOWN, 0 },
+ /* 6c */ { Cache_UNKNOWN, 0 },
+ /* 6d */ { Cache_UNKNOWN, 0 },
+ /* 6e */ { Cache_UNKNOWN, 0 },
+ /* 6f */ { Cache_UNKNOWN, 0 },
+ /* 70 */ { Cache_Trace, 1 },
+ /* 71 */ { Cache_Trace, 1 },
+ /* 72 */ { Cache_Trace, 1 },
+ /* 73 */ { Cache_UNKNOWN, 0 },
+ /* 74 */ { Cache_UNKNOWN, 0 },
+ /* 75 */ { Cache_UNKNOWN, 0 },
+ /* 76 */ { Cache_UNKNOWN, 0 },
+ /* 77 */ { Cache_UNKNOWN, 0 },
+ /* 78 */ { Cache_UNKNOWN, 0 },
+ /* 79 */ { Cache_L2, 64 },
+ /* 7a */ { Cache_L2, 64 },
+ /* 7b */ { Cache_L2, 64 },
+ /* 7c */ { Cache_L2, 64 },
+ /* 7d */ { Cache_UNKNOWN, 0 },
+ /* 7e */ { Cache_UNKNOWN, 0 },
+ /* 7f */ { Cache_UNKNOWN, 0 },
+ /* 80 */ { Cache_UNKNOWN, 0 },
+ /* 81 */ { Cache_UNKNOWN, 0 },
+ /* 82 */ { Cache_L2, 32 },
+ /* 83 */ { Cache_L2, 32 },
+ /* 84 */ { Cache_L2, 32 },
+ /* 85 */ { Cache_L2, 32 },
+ /* 86 */ { Cache_L2, 64 },
+ /* 87 */ { Cache_L2, 64 },
+ /* 88 */ { Cache_UNKNOWN, 0 },
+ /* 89 */ { Cache_UNKNOWN, 0 },
+ /* 8a */ { Cache_UNKNOWN, 0 },
+ /* 8b */ { Cache_UNKNOWN, 0 },
+ /* 8c */ { Cache_UNKNOWN, 0 },
+ /* 8d */ { Cache_UNKNOWN, 0 },
+ /* 8e */ { Cache_UNKNOWN, 0 },
+ /* 8f */ { Cache_UNKNOWN, 0 },
+ /* 90 */ { Cache_UNKNOWN, 0 },
+ /* 91 */ { Cache_UNKNOWN, 0 },
+ /* 92 */ { Cache_UNKNOWN, 0 },
+ /* 93 */ { Cache_UNKNOWN, 0 },
+ /* 94 */ { Cache_UNKNOWN, 0 },
+ /* 95 */ { Cache_UNKNOWN, 0 },
+ /* 96 */ { Cache_UNKNOWN, 0 },
+ /* 97 */ { Cache_UNKNOWN, 0 },
+ /* 98 */ { Cache_UNKNOWN, 0 },
+ /* 99 */ { Cache_UNKNOWN, 0 },
+ /* 9a */ { Cache_UNKNOWN, 0 },
+ /* 9b */ { Cache_UNKNOWN, 0 },
+ /* 9c */ { Cache_UNKNOWN, 0 },
+ /* 9d */ { Cache_UNKNOWN, 0 },
+ /* 9e */ { Cache_UNKNOWN, 0 },
+ /* 9f */ { Cache_UNKNOWN, 0 },
+ /* a0 */ { Cache_UNKNOWN, 0 },
+ /* a1 */ { Cache_UNKNOWN, 0 },
+ /* a2 */ { Cache_UNKNOWN, 0 },
+ /* a3 */ { Cache_UNKNOWN, 0 },
+ /* a4 */ { Cache_UNKNOWN, 0 },
+ /* a5 */ { Cache_UNKNOWN, 0 },
+ /* a6 */ { Cache_UNKNOWN, 0 },
+ /* a7 */ { Cache_UNKNOWN, 0 },
+ /* a8 */ { Cache_UNKNOWN, 0 },
+ /* a9 */ { Cache_UNKNOWN, 0 },
+ /* aa */ { Cache_UNKNOWN, 0 },
+ /* ab */ { Cache_UNKNOWN, 0 },
+ /* ac */ { Cache_UNKNOWN, 0 },
+ /* ad */ { Cache_UNKNOWN, 0 },
+ /* ae */ { Cache_UNKNOWN, 0 },
+ /* af */ { Cache_UNKNOWN, 0 },
+ /* b0 */ { Cache_TLBi, 0 },
+ /* b1 */ { Cache_UNKNOWN, 0 },
+ /* b2 */ { Cache_UNKNOWN, 0 },
+ /* b3 */ { Cache_TLBd, 0 },
+ /* b4 */ { Cache_UNKNOWN, 0 },
+ /* b5 */ { Cache_UNKNOWN, 0 },
+ /* b6 */ { Cache_UNKNOWN, 0 },
+ /* b7 */ { Cache_UNKNOWN, 0 },
+ /* b8 */ { Cache_UNKNOWN, 0 },
+ /* b9 */ { Cache_UNKNOWN, 0 },
+ /* ba */ { Cache_UNKNOWN, 0 },
+ /* bb */ { Cache_UNKNOWN, 0 },
+ /* bc */ { Cache_UNKNOWN, 0 },
+ /* bd */ { Cache_UNKNOWN, 0 },
+ /* be */ { Cache_UNKNOWN, 0 },
+ /* bf */ { Cache_UNKNOWN, 0 },
+ /* c0 */ { Cache_UNKNOWN, 0 },
+ /* c1 */ { Cache_UNKNOWN, 0 },
+ /* c2 */ { Cache_UNKNOWN, 0 },
+ /* c3 */ { Cache_UNKNOWN, 0 },
+ /* c4 */ { Cache_UNKNOWN, 0 },
+ /* c5 */ { Cache_UNKNOWN, 0 },
+ /* c6 */ { Cache_UNKNOWN, 0 },
+ /* c7 */ { Cache_UNKNOWN, 0 },
+ /* c8 */ { Cache_UNKNOWN, 0 },
+ /* c9 */ { Cache_UNKNOWN, 0 },
+ /* ca */ { Cache_UNKNOWN, 0 },
+ /* cb */ { Cache_UNKNOWN, 0 },
+ /* cc */ { Cache_UNKNOWN, 0 },
+ /* cd */ { Cache_UNKNOWN, 0 },
+ /* ce */ { Cache_UNKNOWN, 0 },
+ /* cf */ { Cache_UNKNOWN, 0 },
+ /* d0 */ { Cache_UNKNOWN, 0 },
+ /* d1 */ { Cache_UNKNOWN, 0 },
+ /* d2 */ { Cache_UNKNOWN, 0 },
+ /* d3 */ { Cache_UNKNOWN, 0 },
+ /* d4 */ { Cache_UNKNOWN, 0 },
+ /* d5 */ { Cache_UNKNOWN, 0 },
+ /* d6 */ { Cache_UNKNOWN, 0 },
+ /* d7 */ { Cache_UNKNOWN, 0 },
+ /* d8 */ { Cache_UNKNOWN, 0 },
+ /* d9 */ { Cache_UNKNOWN, 0 },
+ /* da */ { Cache_UNKNOWN, 0 },
+ /* db */ { Cache_UNKNOWN, 0 },
+ /* dc */ { Cache_UNKNOWN, 0 },
+ /* dd */ { Cache_UNKNOWN, 0 },
+ /* de */ { Cache_UNKNOWN, 0 },
+ /* df */ { Cache_UNKNOWN, 0 },
+ /* e0 */ { Cache_UNKNOWN, 0 },
+ /* e1 */ { Cache_UNKNOWN, 0 },
+ /* e2 */ { Cache_UNKNOWN, 0 },
+ /* e3 */ { Cache_UNKNOWN, 0 },
+ /* e4 */ { Cache_UNKNOWN, 0 },
+ /* e5 */ { Cache_UNKNOWN, 0 },
+ /* e6 */ { Cache_UNKNOWN, 0 },
+ /* e7 */ { Cache_UNKNOWN, 0 },
+ /* e8 */ { Cache_UNKNOWN, 0 },
+ /* e9 */ { Cache_UNKNOWN, 0 },
+ /* ea */ { Cache_UNKNOWN, 0 },
+ /* eb */ { Cache_UNKNOWN, 0 },
+ /* ec */ { Cache_UNKNOWN, 0 },
+ /* ed */ { Cache_UNKNOWN, 0 },
+ /* ee */ { Cache_UNKNOWN, 0 },
+ /* ef */ { Cache_UNKNOWN, 0 },
+ /* f0 */ { Cache_UNKNOWN, 0 },
+ /* f1 */ { Cache_UNKNOWN, 0 },
+ /* f2 */ { Cache_UNKNOWN, 0 },
+ /* f3 */ { Cache_UNKNOWN, 0 },
+ /* f4 */ { Cache_UNKNOWN, 0 },
+ /* f5 */ { Cache_UNKNOWN, 0 },
+ /* f6 */ { Cache_UNKNOWN, 0 },
+ /* f7 */ { Cache_UNKNOWN, 0 },
+ /* f8 */ { Cache_UNKNOWN, 0 },
+ /* f9 */ { Cache_UNKNOWN, 0 },
+ /* fa */ { Cache_UNKNOWN, 0 },
+ /* fb */ { Cache_UNKNOWN, 0 },
+ /* fc */ { Cache_UNKNOWN, 0 },
+ /* fd */ { Cache_UNKNOWN, 0 },
+ /* fe */ { Cache_UNKNOWN, 0 },
+ /* ff */ { Cache_UNKNOWN, 0 }
+};
+
+/*
+ * use the above table to determine the CacheEntryLineSize.
+ */
+static void
+getIntelCacheEntryLineSize(unsigned long val, int *level,
+ unsigned long *lineSize)
+{
+ CacheType type;
+
+ type = CacheMap[val].type;
+ /* only interested in data caches */
+ /* NOTE val = 0x40 is a special value that means no L2 or L3 cache.
+ * this data check has the side effect of rejecting that entry. If
+ * that wasn't the case, we could have to reject it explicitly */
+ if (CacheMap[val].lineSize == 0) {
+ return;
+ }
+ /* look at the caches, skip types we aren't interested in.
+ * if we already have a value for a lower level cache, skip the
+ * current entry */
+ if ((type == Cache_L1) || (type == Cache_L1d)) {
+ *level = 1;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) {
+ *level = 2;
+ *lineSize = CacheMap[val].lineSize;
+ } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) {
+ *level = 3;
+ *lineSize = CacheMap[val].lineSize;
+ }
+ return;
+}
+
+static void
+getIntelRegisterCacheLineSize(unsigned long val,
+ int *level, unsigned long *lineSize)
+{
+ getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize);
+ getIntelCacheEntryLineSize(val & 0xff, level, lineSize);
+}
+
+/*
+ * returns '0' if no recognized cache is found, or if the cache
+ * information is supported by this processor
+ */
+static unsigned long
+getIntelCacheLineSize(int cpuidLevel)
+{
+ int level = 4;
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+ int repeat, count;
+
+ if (cpuidLevel < 2) {
+ return 0;
+ }
+
+ /* command '2' of the cpuid is intel's cache info call. Each byte of the
+ * 4 registers contain a potential descriptor for the cache. The CacheMap
+ * table maps the cache entry with the processor cache. Register 'al'
+ * contains a count value that cpuid '2' needs to be called in order to
+ * find all the cache descriptors. Only registers with the high bit set
+ * to 'zero' have valid descriptors. This code loops through all the
+ * required calls to cpuid '2' and passes any valid descriptors it finds
+ * to the getIntelRegisterCacheLineSize code, which breaks the registers
+ * down into their component descriptors. In the end the lineSize of the
+ * lowest level cache data cache is returned. */
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ repeat = eax & 0xf;
+ for (count = 0; count < repeat; count++) {
+ if ((eax & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize);
+ }
+ if ((ebx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ebx, &level, &lineSize);
+ }
+ if ((ecx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(ecx, &level, &lineSize);
+ }
+ if ((edx & 0x80000000) == 0) {
+ getIntelRegisterCacheLineSize(edx, &level, &lineSize);
+ }
+ if (count + 1 != repeat) {
+ freebl_cpuid(2, &eax, &ebx, &ecx, &edx);
+ }
+ }
+ return lineSize;
+}
+
+/*
+ * returns '0' if the cache info is not supported by this processor.
+ * This is based on the AMD extended cache commands for cpuid.
+ * (see "AMD Processor Recognition Application Note" Publication 20734).
+ * Some other processors use the identical scheme.
+ * (see "Processor Recognition, Transmeta Corporation").
+ */
+static unsigned long
+getOtherCacheLineSize(unsigned long cpuidLevel)
+{
+ unsigned long lineSize = 0;
+ unsigned long eax, ebx, ecx, edx;
+
+ /* get the Extended CPUID level */
+ freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+
+ if (cpuidLevel >= 0x80000005) {
+ freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx);
+ lineSize = ecx & 0xff; /* line Size, L1 Data Cache */
+ }
+ return lineSize;
+}
+
+static const char *const manMap[] = {
+#define INTEL 0
+ "GenuineIntel",
+#define AMD 1
+ "AuthenticAMD",
+#define CYRIX 2
+ "CyrixInstead",
+#define CENTAUR 2
+ "CentaurHauls",
+#define NEXGEN 3
+ "NexGenDriven",
+#define TRANSMETA 4
+ "GenuineTMx86",
+#define RISE 5
+ "RiseRiseRise",
+#define UMC 6
+ "UMC UMC UMC ",
+#define SIS 7
+ "Sis Sis Sis ",
+#define NATIONAL 8
+ "Geode by NSC",
+};
+
+static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]);
+
+#define MAN_UNKNOWN 9
+
+#if !defined(AMD_64)
+#define SSE2_FLAG (1 << 26)
+unsigned long
+s_mpi_is_sse2()
+{
+ unsigned long eax, ebx, ecx, edx;
+
+ if (is386() || is486()) {
+ return 0;
+ }
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+
+ /* has no SSE2 extensions */
+ if (eax == 0) {
+ return 0;
+ }
+
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ return (edx & SSE2_FLAG) == SSE2_FLAG;
+}
+#endif
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ unsigned long eax, ebx, ecx, edx;
+ PRUint32 cpuid[3];
+ unsigned long cpuidLevel;
+ unsigned long cacheLineSize = 0;
+ int manufacturer = MAN_UNKNOWN;
+ int i;
+ char string[13];
+
+#if !defined(AMD_64)
+ if (is386()) {
+ return 0; /* 386 had no cache */
+ }
+ if (is486()) {
+ return 32; /* really? need more info */
+ }
+#endif
+
+ /* Pentium, cpuid command is available */
+ freebl_cpuid(0, &eax, &ebx, &ecx, &edx);
+ cpuidLevel = eax;
+ /* string holds the CPU's manufacturer ID string - a twelve
+ * character ASCII string stored in ebx, edx, ecx, and
+ * the 32-bit extended feature flags are in edx, ecx.
+ */
+ cpuid[0] = ebx;
+ cpuid[1] = ecx;
+ cpuid[2] = edx;
+ memcpy(string, cpuid, sizeof(cpuid));
+ string[12] = 0;
+
+ manufacturer = MAN_UNKNOWN;
+ for (i = 0; i < n_manufacturers; i++) {
+ if (strcmp(manMap[i], string) == 0) {
+ manufacturer = i;
+ }
+ }
+
+ if (manufacturer == INTEL) {
+ cacheLineSize = getIntelCacheLineSize(cpuidLevel);
+ } else {
+ cacheLineSize = getOtherCacheLineSize(cpuidLevel);
+ }
+ /* doesn't support cache info based on cpuid. This means
+ * an old pentium class processor, which have cache lines of
+ * 32. If we learn differently, we can use a switch based on
+ * the Manufacturer id */
+ if (cacheLineSize == 0) {
+ cacheLineSize = 32;
+ }
+ return cacheLineSize;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+#if defined(__ppc64__)
+/*
+ * Sigh, The PPC has some really nice features to help us determine cache
+ * size, since it had lots of direct control functions to do so. The POWER
+ * processor even has an instruction to do this, but it was dropped in
+ * PowerPC. Unfortunately most of them are not available in user mode.
+ *
+ * The dcbz function would be a great way to determine cache line size except
+ * 1) it only works on write-back memory (it throws an exception otherwise),
+ * and 2) because so many mac programs 'knew' the processor cache size was
+ * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new
+ * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep
+ * these programs happy. dcbzl work if 64 bit instructions are supported.
+ * If you know 64 bit instructions are supported, and that stack is
+ * write-back, you can use this code.
+ */
+#include "memory.h"
+
+/* clear the cache line that contains 'array' */
+static inline void
+dcbzl(char *array)
+{
+ register char *a asm("r2") = array;
+ __asm__ __volatile__("dcbzl %0,r0"
+ : "=r"(a)
+ : "0"(a));
+}
+
+#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1)))
+
+#define PPC_MAX_LINE_SIZE 256
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ char testArray[2 * PPC_MAX_LINE_SIZE + 1];
+ char *test;
+ int i;
+
+ /* align the array on a maximum line size boundary, so we
+ * know we are starting to clear from the first address */
+ test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE);
+ /* set all the values to 1's */
+ memset(test, 0xff, PPC_MAX_LINE_SIZE);
+ /* clear one cache block starting at 'test' */
+ dcbzl(test);
+
+ /* find the size of the cleared area, that's our block size */
+ for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) {
+ if (test[i - 1] == 0) {
+ return i;
+ }
+ }
+ return 0;
+}
+
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/*
+ * put other processor and platform specific cache code here
+ * return the smallest cache line size in bytes on the processor
+ * (usually the L1 cache). If the OS has a call, this would be
+ * a greate place to put it.
+ *
+ * If there is no cache, return 0;
+ *
+ * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions
+ * below aren't compiled.
+ *
+ */
+
+/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or
+ * OS */
+#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED)
+
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ return MPI_CACHE_LINE_SIZE;
+}
+#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1
+#endif
+
+/* If no way to get the processor cache line size has been defined, assume
+ * it's 32 bytes (most common value, does not significantly impact performance)
+ */
+#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED
+unsigned long
+s_mpi_getProcessorLineSize()
+{
+ return 32;
+}
+#endif
+
+#ifdef TEST_IT
+#include <stdio.h>
+
+main()
+{
+ printf("line size = %d\n", s_mpi_getProcessorLineSize());
+}
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
new file mode 100644
index 000000000..d493b4762
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s
@@ -0,0 +1,861 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 80
+manMap:
+ .quad .LC0
+ .quad .LC1
+ .quad .LC2
+ .quad .LC3
+ .quad .LC4
+ .quad .LC5
+ .quad .LC6
+ .quad .LC7
+ .quad .LC8
+ .quad .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 16
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+.LFB2:
+ movq %rdx, %r10
+ pushq %rbx
+.LCFI0:
+ movq %rcx, %r11
+ movq %rdi, %rax
+/APP
+ cpuid
+
+/NO_APP
+ movq %rax, (%rsi)
+ movq %rbx, (%r10)
+ popq %rbx
+ movq %rcx, (%r11)
+ movq %rdx, (%r8)
+ ret
+.LFE2:
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 16
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+.LFB3:
+ leaq CacheMap(%rip), %r9
+ movq %rdx, %r10
+ movzbl 1(%r9,%rdi,2), %ecx
+ movzbl (%r9,%rdi,2), %r8d
+ testb %cl, %cl
+ je .L2
+ cmpl $6, %r8d
+ sete %dl
+ cmpl $8, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L4
+ movl $1, (%rsi)
+.L9:
+ movzbl %cl, %eax
+ movq %rax, (%r10)
+ ret
+ .align 16
+.L4:
+ movl (%rsi), %r11d
+ cmpl $1, %r11d
+ jg .L11
+.L6:
+ cmpl $2, %r11d
+ jle .L2
+ cmpl $12, %r8d
+ sete %dl
+ cmpl $14, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L2
+ movzbq 1(%r9,%rdi,2), %rax
+ movl $3, (%rsi)
+ movq %rax, (%r10)
+ .align 16
+.L2:
+ rep ; ret
+ .align 16
+.L11:
+ cmpl $9, %r8d
+ sete %dl
+ cmpl $11, %r8d
+ sete %al
+ orl %edx, %eax
+ testb $1, %al
+ je .L6
+ movl $2, (%rsi)
+ jmp .L9
+.LFE3:
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 16
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+.LFB4:
+ pushq %rbp
+.LCFI1:
+ movq %rsp, %rbp
+.LCFI2:
+ movq %rbx, -24(%rbp)
+.LCFI3:
+ movq %rdi, %rbx
+ shrq $24, %rdi
+ movq %r12, -16(%rbp)
+.LCFI4:
+ movq %r13, -8(%rbp)
+.LCFI5:
+ andl $255, %edi
+ subq $24, %rsp
+.LCFI6:
+ movq %rsi, %r13
+ movq %rdx, %r12
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $16, %rdi
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %rbx, %rdi
+ movq %r12, %rdx
+ movq %r13, %rsi
+ shrq $8, %rdi
+ andl $255, %ebx
+ andl $255, %edi
+ call getIntelCacheEntryLineSize
+ movq %r12, %rdx
+ movq %r13, %rsi
+ movq %rbx, %rdi
+ movq 8(%rsp), %r12
+ movq (%rsp), %rbx
+ movq 16(%rsp), %r13
+ leave
+ jmp getIntelCacheEntryLineSize
+.LFE4:
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 16
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+.LFB7:
+ pushq %rbp
+.LCFI7:
+ xorl %edi, %edi
+ movq %rsp, %rbp
+.LCFI8:
+ pushq %r15
+.LCFI9:
+ leaq -136(%rbp), %r8
+ leaq -144(%rbp), %rcx
+ leaq -152(%rbp), %rdx
+ pushq %r14
+.LCFI10:
+ leaq -160(%rbp), %rsi
+ leaq -128(%rbp), %r14
+ pushq %r13
+.LCFI11:
+ leaq manMap(%rip), %r13
+ pushq %r12
+.LCFI12:
+ movl $9, %r12d
+ pushq %rbx
+.LCFI13:
+ xorl %ebx, %ebx
+ subq $200, %rsp
+.LCFI14:
+ call freebl_cpuid
+ movq -152(%rbp), %rax
+ movq -160(%rbp), %r15
+ movb $0, -116(%rbp)
+ movl %eax, -128(%rbp)
+ movq -136(%rbp), %rax
+ movl %eax, -124(%rbp)
+ movq -144(%rbp), %rax
+ movl %eax, -120(%rbp)
+ .align 16
+.L18:
+ movslq %ebx,%rax
+ movq %r14, %rsi
+ movq (%r13,%rax,8), %rdi
+ call strcmp@PLT
+ testl %eax, %eax
+ cmove %ebx, %r12d
+ incl %ebx
+ cmpl $9, %ebx
+ jle .L18
+ testl %r12d, %r12d
+ jne .L19
+ xorl %eax, %eax
+ decl %r15d
+ movl $4, -204(%rbp)
+ movq $0, -200(%rbp)
+ jle .L21
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movq -192(%rbp), %rdi
+ movl %edi, %r12d
+ andl $15, %r12d
+ cmpl %r12d, %ebx
+ jl .L30
+ jmp .L38
+ .align 16
+.L25:
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L40
+.L26:
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L41
+.L27:
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ je .L42
+.L28:
+ incl %ebx
+ cmpl %r12d, %ebx
+ je .L24
+ leaq -168(%rbp), %r8
+ leaq -176(%rbp), %rcx
+ leaq -184(%rbp), %rdx
+ leaq -192(%rbp), %rsi
+ movl $2, %edi
+ call freebl_cpuid
+.L24:
+ cmpl %r12d, %ebx
+ jge .L38
+ movq -192(%rbp), %rdi
+.L30:
+ testl $2147483648, %edi
+ jne .L25
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ andl $4294967040, %edi
+ call getIntelRegisterCacheLineSize
+ movq -184(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L26
+.L40:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -176(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L27
+.L41:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ movq -168(%rbp), %rdi
+ testl $2147483648, %edi
+ jne .L28
+.L42:
+ leaq -200(%rbp), %rdx
+ leaq -204(%rbp), %rsi
+ call getIntelRegisterCacheLineSize
+ jmp .L28
+.L38:
+ movq -200(%rbp), %rax
+.L21:
+ movq %rax, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L19:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483648, %edi
+ xorl %ebx, %ebx
+ call freebl_cpuid
+ movl $2147483652, %eax
+ cmpq %rax, -240(%rbp)
+ ja .L43
+.L32:
+ movq %rbx, %rdx
+ movl $32, %eax
+ testq %rdx, %rdx
+ cmoveq %rax, %rdx
+ addq $200, %rsp
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ leave
+ movq %rdx, %rax
+ ret
+.L43:
+ leaq -216(%rbp), %r8
+ leaq -224(%rbp), %rcx
+ leaq -232(%rbp), %rdx
+ leaq -240(%rbp), %rsi
+ movl $2147483653, %edi
+ call freebl_cpuid
+ movzbq -224(%rbp), %rbx
+ jmp .L32
+.LFE7:
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
new file mode 100644
index 000000000..af17ebcb4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s
@@ -0,0 +1,902 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "mpcpucache.c"
+/ .section .rodata.str1.1,"aMS",@progbits,1
+ .section .rodata
+.LC0:
+ .string "GenuineIntel"
+.LC1:
+ .string "AuthenticAMD"
+.LC2:
+ .string "CyrixInstead"
+.LC3:
+ .string "CentaurHauls"
+.LC4:
+ .string "NexGenDriven"
+.LC5:
+ .string "GenuineTMx86"
+.LC6:
+ .string "RiseRiseRise"
+.LC7:
+ .string "UMC UMC UMC "
+.LC8:
+ .string "Sis Sis Sis "
+.LC9:
+ .string "Geode by NSC"
+ .section .data.rel.ro.local,"aw",@progbits
+ .align 32
+ .type manMap, @object
+ .size manMap, 40
+manMap:
+ .long .LC0
+ .long .LC1
+ .long .LC2
+ .long .LC3
+ .long .LC4
+ .long .LC5
+ .long .LC6
+ .long .LC7
+ .long .LC8
+ .long .LC9
+ .section .rodata
+ .align 32
+ .type CacheMap, @object
+ .size CacheMap, 512
+CacheMap:
+ .byte 0
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .zero 1
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 12
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 7
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 8
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 5
+ .byte 1
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 32
+ .byte 9
+ .byte 64
+ .byte 9
+ .byte 64
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 3
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 4
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .byte 1
+ .byte 0
+ .text
+ .align 4
+.globl freebl_cpuid
+ .type freebl_cpuid, @function
+freebl_cpuid:
+ pushl %ebp
+ pushl %edi
+ pushl %esi
+ subl $8, %esp
+ movl %edx, %ebp
+/APP
+ pushl %ebx
+ xorl %ecx, %ecx
+ cpuid
+ mov %ebx,%esi
+ popl %ebx
+
+/NO_APP
+ movl %eax, (%ebp)
+ movl 24(%esp), %eax
+ movl %esi, (%eax)
+ movl 28(%esp), %eax
+ movl %ecx, (%eax)
+ movl 32(%esp), %eax
+ movl %edx, (%eax)
+ addl $8, %esp
+ popl %esi
+ popl %edi
+ popl %ebp
+ ret
+ .size freebl_cpuid, .-freebl_cpuid
+ .align 4
+ .type changeFlag, @function
+changeFlag:
+/APP
+ pushfl
+ popl %edx
+ movl %edx,%ecx
+ xorl %eax,%edx
+ pushl %edx
+ popfl
+ pushfl
+ popl %edx
+ pushl %ecx
+ popfl
+
+/NO_APP
+ xorl %ecx, %edx
+ movl %edx, %eax
+ ret
+ .size changeFlag, .-changeFlag
+ .align 4
+ .type getIntelCacheEntryLineSize, @function
+getIntelCacheEntryLineSize:
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ call .L17
+.L17:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx
+ movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx
+ movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al
+ testb %al, %al
+ movl 16(%esp), %edi
+ je .L3
+ cmpl $6, %ecx
+ je .L6
+ cmpl $8, %ecx
+ je .L6
+ movl (%edx), %esi
+ cmpl $1, %esi
+ jg .L15
+.L8:
+ cmpl $2, %esi
+ jle .L3
+ cmpl $12, %ecx
+ je .L12
+ cmpl $14, %ecx
+ je .L12
+ .align 4
+.L3:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L6:
+ movzbl %al, %eax
+ movl $1, (%edx)
+ movl %eax, (%edi)
+.L16:
+ popl %ebx
+ popl %esi
+ popl %edi
+ ret
+ .align 4
+.L15:
+ cmpl $9, %ecx
+ je .L9
+ cmpl $11, %ecx
+ jne .L8
+.L9:
+ movzbl %al, %eax
+ movl $2, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+.L12:
+ movzbl %al, %eax
+ movl $3, (%edx)
+ movl %eax, (%edi)
+ jmp .L16
+ .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize
+ .align 4
+ .type getIntelRegisterCacheLineSize, @function
+getIntelRegisterCacheLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ecx
+ movl 8(%ebp), %edi
+ movl %eax, %esi
+ movl %edx, -12(%ebp)
+ shrl $24, %eax
+ pushl %edi
+ call getIntelCacheEntryLineSize
+ movl %esi, %eax
+ pushl %edi
+ shrl $16, %eax
+ movl -12(%ebp), %edx
+ andl $255, %eax
+ call getIntelCacheEntryLineSize
+ pushl %edi
+ movl %esi, %edx
+ movzbl %dh, %eax
+ movl -12(%ebp), %edx
+ call getIntelCacheEntryLineSize
+ andl $255, %esi
+ movl %edi, 8(%ebp)
+ movl -12(%ebp), %edx
+ addl $12, %esp
+ leal -8(%ebp), %esp
+ movl %esi, %eax
+ popl %esi
+ popl %edi
+ leave
+ jmp getIntelCacheEntryLineSize
+ .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize
+ .align 4
+.globl s_mpi_getProcessorLineSize
+ .type s_mpi_getProcessorLineSize, @function
+s_mpi_getProcessorLineSize:
+ pushl %ebp
+ movl %esp, %ebp
+ pushl %edi
+ pushl %esi
+ pushl %ebx
+ subl $188, %esp
+ call .L52
+.L52:
+ popl %ebx
+ addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx
+ movl $9, -168(%ebp)
+ movl $262144, %eax
+ call changeFlag
+ xorl %edx, %edx
+ testl %eax, %eax
+ jne .L50
+.L19:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+ .align 4
+.L50:
+ movl $2097152, %eax
+ call changeFlag
+ testl %eax, %eax
+ movl $32, %edx
+ je .L19
+ leal -108(%ebp), %eax
+ pushl %eax
+ leal -112(%ebp), %eax
+ pushl %eax
+ leal -116(%ebp), %eax
+ pushl %eax
+ leal -120(%ebp), %edx
+ xorl %eax, %eax
+ call freebl_cpuid
+ movl -120(%ebp), %eax
+ movl %eax, -164(%ebp)
+ movl -116(%ebp), %eax
+ movl %eax, -104(%ebp)
+ movl -108(%ebp), %eax
+ movl %eax, -100(%ebp)
+ movl -112(%ebp), %eax
+ movl %eax, -96(%ebp)
+ movb $0, -92(%ebp)
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -104(%ebp), %edi
+ .align 4
+.L28:
+ subl $8, %esp
+ pushl %edi
+ pushl manMap@GOTOFF(%ebx,%esi,4)
+ call strcmp@PLT
+ addl $16, %esp
+ testl %eax, %eax
+ jne .L26
+ movl %esi, -168(%ebp)
+.L26:
+ incl %esi
+ cmpl $9, %esi
+ jle .L28
+ movl -168(%ebp), %eax
+ testl %eax, %eax
+ jne .L29
+ xorl %eax, %eax
+ cmpl $1, -164(%ebp)
+ movl $4, -144(%ebp)
+ movl $0, -140(%ebp)
+ jle .L41
+ leal -124(%ebp), %edx
+ movl %edx, -188(%ebp)
+ leal -128(%ebp), %eax
+ pushl %edx
+ movl %eax, -184(%ebp)
+ leal -132(%ebp), %edx
+ pushl %eax
+ movl %edx, -180(%ebp)
+ movl $2, %eax
+ pushl %edx
+ leal -136(%ebp), %edx
+ call freebl_cpuid
+ movl -136(%ebp), %eax
+ movl %eax, %edi
+ andl $15, %edi
+ xorl %esi, %esi
+ addl $12, %esp
+ leal -140(%ebp), %edx
+ cmpl %edi, %esi
+ movl %edx, -176(%ebp)
+ jl .L40
+ jmp .L48
+ .align 4
+.L49:
+ movl -136(%ebp), %eax
+.L40:
+ testl %eax, %eax
+ js .L35
+ xorb %al, %al
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L35:
+ movl -132(%ebp), %eax
+ testl %eax, %eax
+ js .L36
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L36:
+ movl -128(%ebp), %eax
+ testl %eax, %eax
+ js .L37
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L37:
+ movl -124(%ebp), %eax
+ testl %eax, %eax
+ js .L38
+ pushl -176(%ebp)
+ leal -144(%ebp), %edx
+ call getIntelRegisterCacheLineSize
+ popl %eax
+.L38:
+ incl %esi
+ cmpl %edi, %esi
+ je .L34
+ pushl -188(%ebp)
+ pushl -184(%ebp)
+ pushl -180(%ebp)
+ leal -136(%ebp), %edx
+ movl $2, %eax
+ call freebl_cpuid
+ addl $12, %esp
+.L34:
+ cmpl %edi, %esi
+ jl .L49
+.L48:
+ movl -140(%ebp), %eax
+.L41:
+ testl %eax, %eax
+ jne .L44
+ movb $32, %al
+.L44:
+ leal -12(%ebp), %esp
+ popl %ebx
+ popl %esi
+ movl %eax, %edx
+ movl %edx, %eax
+ popl %edi
+ leave
+ ret
+.L29:
+ leal -148(%ebp), %eax
+ movl %eax, -192(%ebp)
+ movl $0, -172(%ebp)
+ leal -152(%ebp), %edi
+ pushl %eax
+ pushl %edi
+ leal -156(%ebp), %esi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483648, %eax
+ call freebl_cpuid
+ addl $12, %esp
+ cmpl $-2147483644, -160(%ebp)
+ ja .L51
+.L42:
+ movl -172(%ebp), %eax
+ jmp .L41
+.L51:
+ pushl -192(%ebp)
+ pushl %edi
+ pushl %esi
+ leal -160(%ebp), %edx
+ movl $-2147483643, %eax
+ call freebl_cpuid
+ movzbl -152(%ebp), %edx
+ addl $12, %esp
+ movl %edx, -172(%ebp)
+ jmp .L42
+ .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize
diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h
new file mode 100644
index 000000000..f365592a4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-config.h
@@ -0,0 +1,68 @@
+/* Default configuration for MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_CONFIG_H_
+#define MPI_CONFIG_H_
+
+/*
+ For boolean options,
+ 0 = no
+ 1 = yes
+
+ Other options are documented individually.
+
+ */
+
+#ifndef MP_IOFUNC
+#define MP_IOFUNC 0 /* include mp_print() ? */
+#endif
+
+#ifndef MP_MODARITH
+#define MP_MODARITH 1 /* include modular arithmetic ? */
+#endif
+
+#ifndef MP_NUMTH
+#define MP_NUMTH 1 /* include number theoretic functions? */
+#endif
+
+#ifndef MP_LOGTAB
+#define MP_LOGTAB 1 /* use table of logs instead of log()? */
+#endif
+
+#ifndef MP_MEMSET
+#define MP_MEMSET 1 /* use memset() to zero buffers? */
+#endif
+
+#ifndef MP_MEMCPY
+#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */
+#endif
+
+#ifndef MP_ARGCHK
+/*
+ 0 = no parameter checks
+ 1 = runtime checks, continue execution and return an error to caller
+ 2 = assertions; dump core on parameter errors
+ */
+#ifdef DEBUG
+#define MP_ARGCHK 2 /* how to check input arguments */
+#else
+#define MP_ARGCHK 1 /* how to check input arguments */
+#endif
+#endif
+
+#ifndef MP_DEBUG
+#define MP_DEBUG 0 /* print diagnostic output? */
+#endif
+
+#ifndef MP_DEFPREC
+#define MP_DEFPREC 64 /* default precision, in digits */
+#endif
+
+#ifndef MP_SQUARE
+#define MP_SQUARE 1 /* use separate squaring code? */
+#endif
+
+#endif /* ifndef MPI_CONFIG_H_ */
diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h
new file mode 100644
index 000000000..b34452c48
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi-priv.h
@@ -0,0 +1,243 @@
+/*
+ * mpi-priv.h - Private header file for MPI
+ * Arbitrary precision integer arithmetic library
+ *
+ * NOTE WELL: the content of this header file is NOT part of the "public"
+ * API for the MPI library, and may change at any time.
+ * Application programs that use libmpi should NOT include this header file.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#ifndef _MPI_PRIV_H_
+#define _MPI_PRIV_H_ 1
+
+#include "mpi.h"
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#if MP_DEBUG
+#include <stdio.h>
+
+#define DIAG(T, V) \
+ { \
+ fprintf(stderr, T); \
+ mp_print(V, stderr); \
+ fputc('\n', stderr); \
+ }
+#else
+#define DIAG(T, V)
+#endif
+
+/* If we aren't using a wired-in logarithm table, we need to include
+ the math library to get the log() function
+ */
+
+/* {{{ s_logv_2[] - log table for 2 in various bases */
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+
+extern const float s_logv_2[];
+#define LOG_V_2(R) s_logv_2[(R)]
+
+#else
+
+/*
+ If MP_LOGTAB is not defined, use the math library to compute the
+ logarithms on the fly. Otherwise, use the table.
+ Pick which works best for your system.
+ */
+
+#include <math.h>
+#define LOG_V_2(R) (log(2.0) / log(R))
+
+#endif /* if MP_LOGTAB */
+
+/* }}} */
+
+/* {{{ Digit arithmetic macros */
+
+/*
+ When adding and multiplying digits, the results can be larger than
+ can be contained in an mp_digit. Thus, an mp_word is used. These
+ macros mask off the upper and lower digits of the mp_word (the
+ mp_word may be more than 2 mp_digits wide, but we only concern
+ ourselves with the low-order 2 mp_digits)
+ */
+
+#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT)
+#define ACCUM(W) (mp_digit)(W)
+
+#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b))
+#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b))
+
+/* }}} */
+
+/* {{{ Comparison constants */
+
+#define MP_LT -1
+#define MP_EQ 0
+#define MP_GT 1
+
+/* }}} */
+
+/* {{{ private function declarations */
+
+void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */
+void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */
+void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */
+void s_mp_free(void *ptr); /* general free function */
+
+mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */
+mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */
+
+void s_mp_clamp(mp_int *mp); /* clip leading zeroes */
+
+void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */
+
+mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */
+void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */
+mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */
+void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */
+void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */
+void s_mp_div_2(mp_int *mp); /* divide by 2 in place */
+mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */
+mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd);
+/* normalize for division */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */
+mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */
+mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
+/* unsigned digit divide */
+mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu);
+/* Barrett reduction */
+mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */
+mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */
+mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset);
+/* a += b * RADIX^offset */
+mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */
+#if MP_SQUARE
+mp_err s_mp_sqr(mp_int *a); /* magnitude square */
+#else
+#define s_mp_sqr(a) s_mp_mul(a, a)
+#endif
+mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */
+mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */
+int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */
+int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */
+int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */
+int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */
+
+int s_mp_tovalue(char ch, int r); /* convert ch to value */
+char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */
+int s_mp_outlen(int bits, int r); /* output length in bytes */
+mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */
+mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c);
+mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c);
+
+#ifdef NSS_USE_COMBA
+
+#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1)))
+
+void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C);
+void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C);
+
+void s_mp_sqr_comba_4(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_8(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_16(const mp_int *A, mp_int *B);
+void s_mp_sqr_comba_32(const mp_int *A, mp_int *B);
+
+#endif /* end NSS_USE_COMBA */
+
+/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */
+#if defined(__OS2__) && defined(__IBMC__)
+#define MPI_ASM_DECL __cdecl
+#else
+#define MPI_ASM_DECL
+#endif
+
+#ifdef MPI_AMD64
+
+mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit);
+mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit);
+
+/* c = a * b */
+#define s_mpv_mul_d(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b)
+
+/* c += a * b */
+#define s_mpv_mul_d_add(a, a_len, b, c) \
+ ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b)
+
+#else
+
+void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c);
+
+#endif
+
+void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a,
+ mp_size a_len, mp_digit b,
+ mp_digit *c);
+void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a,
+ mp_size a_len,
+ mp_digit *sqrs);
+
+mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo,
+ mp_digit divisor, mp_digit *quot, mp_digit *rem);
+
+/* c += a * b * (MP_RADIX ** offset); */
+/* Callers of this macro should be aware that the return type might vary;
+ * it should be treated as a void function. */
+#define s_mp_mul_d_add_offset(a, b, c, off) \
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off)
+
+typedef struct {
+ mp_int N; /* modulus N */
+ mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */
+} mp_mont_modulus;
+
+mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm);
+mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm);
+
+/*
+ * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line
+ * if a cache exists, or zero if there is no cache. If more than one
+ * cache line exists, it should return the smallest line size (which is
+ * usually the L1 cache).
+ *
+ * mp_modexp uses this information to make sure that private key information
+ * isn't being leaked through the cache.
+ *
+ * see mpcpucache.c for the implementation.
+ */
+unsigned long s_mpi_getProcessorLineSize();
+
+/* }}} */
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c
new file mode 100644
index 000000000..f6f75439c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.c
@@ -0,0 +1,4839 @@
+/*
+ * mpi.c
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#if defined(OSF1)
+#include <c_asm.h>
+#endif
+
+#if defined(__arm__) && \
+ ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__))
+/* 16-bit thumb or ARM v3 doesn't work inlined assember version */
+#undef MP_ASSEMBLY_MULTIPLY
+#undef MP_ASSEMBLY_SQUARE
+#endif
+
+#if MP_LOGTAB
+/*
+ A table of the logs of 2 for various bases (the 0 and 1 entries of
+ this table are meaningless and should not be referenced).
+
+ This table is used to compute output lengths for the mp_toradix()
+ function. Since a number n in radix r takes up about log_r(n)
+ digits, we estimate the output size by taking the least integer
+ greater than log_r(n), where:
+
+ log_r(n) = log_2(n) * log_r(2)
+
+ This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
+ which are the output bases supported.
+ */
+#include "logtab.h"
+#endif
+
+#ifdef CT_VERIF
+#include <valgrind/memcheck.h>
+#endif
+
+/* {{{ Constant strings */
+
+/* Constant strings returned by mp_strerror() */
+static const char *mp_err_string[] = {
+ "unknown result code", /* say what? */
+ "boolean true", /* MP_OKAY, MP_YES */
+ "boolean false", /* MP_NO */
+ "out of memory", /* MP_MEM */
+ "argument out of range", /* MP_RANGE */
+ "invalid input parameter", /* MP_BADARG */
+ "result is undefined" /* MP_UNDEF */
+};
+
+/* Value to digit maps for radix conversion */
+
+/* s_dmap_1 - standard digits and letters */
+static const char *s_dmap_1 =
+ "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
+
+/* }}} */
+
+/* {{{ Default precision manipulation */
+
+/* Default precision for newly created mp_int's */
+static mp_size s_mp_defprec = MP_DEFPREC;
+
+mp_size
+mp_get_prec(void)
+{
+ return s_mp_defprec;
+
+} /* end mp_get_prec() */
+
+void
+mp_set_prec(mp_size prec)
+{
+ if (prec == 0)
+ s_mp_defprec = MP_DEFPREC;
+ else
+ s_mp_defprec = prec;
+
+} /* end mp_set_prec() */
+
+/* }}} */
+
+#ifdef CT_VERIF
+void
+mp_taint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+
+void
+mp_untaint(mp_int *mp)
+{
+ size_t i;
+ for (i = 0; i < mp->used; ++i) {
+ VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit));
+ }
+}
+#endif
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_init(mp) */
+
+/*
+ mp_init(mp)
+
+ Initialize a new zero-valued mp_int. Returns MP_OKAY if successful,
+ MP_MEM if memory could not be allocated for the structure.
+ */
+
+mp_err
+mp_init(mp_int *mp)
+{
+ return mp_init_size(mp, s_mp_defprec);
+
+} /* end mp_init() */
+
+/* }}} */
+
+/* {{{ mp_init_size(mp, prec) */
+
+/*
+ mp_init_size(mp, prec)
+
+ Initialize a new zero-valued mp_int with at least the given
+ precision; returns MP_OKAY if successful, or MP_MEM if memory could
+ not be allocated for the structure.
+ */
+
+mp_err
+mp_init_size(mp_int *mp, mp_size prec)
+{
+ ARGCHK(mp != NULL && prec > 0, MP_BADARG);
+
+ prec = MP_ROUNDUP(prec, s_mp_defprec);
+ if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ SIGN(mp) = ZPOS;
+ USED(mp) = 1;
+ ALLOC(mp) = prec;
+
+ return MP_OKAY;
+
+} /* end mp_init_size() */
+
+/* }}} */
+
+/* {{{ mp_init_copy(mp, from) */
+
+/*
+ mp_init_copy(mp, from)
+
+ Initialize mp as an exact copy of from. Returns MP_OKAY if
+ successful, MP_MEM if memory could not be allocated for the new
+ structure.
+ */
+
+mp_err
+mp_init_copy(mp_int *mp, const mp_int *from)
+{
+ ARGCHK(mp != NULL && from != NULL, MP_BADARG);
+
+ if (mp == from)
+ return MP_OKAY;
+
+ if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
+ USED(mp) = USED(from);
+ ALLOC(mp) = ALLOC(from);
+ SIGN(mp) = SIGN(from);
+
+ return MP_OKAY;
+
+} /* end mp_init_copy() */
+
+/* }}} */
+
+/* {{{ mp_copy(from, to) */
+
+/*
+ mp_copy(from, to)
+
+ Copies the mp_int 'from' to the mp_int 'to'. It is presumed that
+ 'to' has already been initialized (if not, use mp_init_copy()
+ instead). If 'from' and 'to' are identical, nothing happens.
+ */
+
+mp_err
+mp_copy(const mp_int *from, mp_int *to)
+{
+ ARGCHK(from != NULL && to != NULL, MP_BADARG);
+
+ if (from == to)
+ return MP_OKAY;
+
+ { /* copy */
+ mp_digit *tmp;
+
+ /*
+ If the allocated buffer in 'to' already has enough space to hold
+ all the used digits of 'from', we'll re-use it to avoid hitting
+ the memory allocater more than necessary; otherwise, we'd have
+ to grow anyway, so we just allocate a hunk and make the copy as
+ usual
+ */
+ if (ALLOC(to) >= USED(from)) {
+ s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
+ s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
+
+ } else {
+ if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(from), tmp, USED(from));
+
+ if (DIGITS(to) != NULL) {
+ s_mp_setz(DIGITS(to), ALLOC(to));
+ s_mp_free(DIGITS(to));
+ }
+
+ DIGITS(to) = tmp;
+ ALLOC(to) = ALLOC(from);
+ }
+
+ /* Copy the precision and sign from the original */
+ USED(to) = USED(from);
+ SIGN(to) = SIGN(from);
+ } /* end copy */
+
+ return MP_OKAY;
+
+} /* end mp_copy() */
+
+/* }}} */
+
+/* {{{ mp_exch(mp1, mp2) */
+
+/*
+ mp_exch(mp1, mp2)
+
+ Exchange mp1 and mp2 without allocating any intermediate memory
+ (well, unless you count the stack space needed for this call and the
+ locals it creates...). This cannot fail.
+ */
+
+void
+mp_exch(mp_int *mp1, mp_int *mp2)
+{
+#if MP_ARGCHK == 2
+ assert(mp1 != NULL && mp2 != NULL);
+#else
+ if (mp1 == NULL || mp2 == NULL)
+ return;
+#endif
+
+ s_mp_exch(mp1, mp2);
+
+} /* end mp_exch() */
+
+/* }}} */
+
+/* {{{ mp_clear(mp) */
+
+/*
+ mp_clear(mp)
+
+ Release the storage used by an mp_int, and void its fields so that
+ if someone calls mp_clear() again for the same int later, we won't
+ get tollchocked.
+ */
+
+void
+mp_clear(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ if (DIGITS(mp) != NULL) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = NULL;
+ }
+
+ USED(mp) = 0;
+ ALLOC(mp) = 0;
+
+} /* end mp_clear() */
+
+/* }}} */
+
+/* {{{ mp_zero(mp) */
+
+/*
+ mp_zero(mp)
+
+ Set mp to zero. Does not change the allocated size of the structure,
+ and therefore cannot fail (except on a bad argument, which we ignore)
+ */
+void
+mp_zero(mp_int *mp)
+{
+ if (mp == NULL)
+ return;
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+
+} /* end mp_zero() */
+
+/* }}} */
+
+/* {{{ mp_set(mp, d) */
+
+void
+mp_set(mp_int *mp, mp_digit d)
+{
+ if (mp == NULL)
+ return;
+
+ mp_zero(mp);
+ DIGIT(mp, 0) = d;
+
+} /* end mp_set() */
+
+/* }}} */
+
+/* {{{ mp_set_int(mp, z) */
+
+mp_err
+mp_set_int(mp_int *mp, long z)
+{
+ int ix;
+ unsigned long v = labs(z);
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ mp_zero(mp);
+ if (z == 0)
+ return MP_OKAY; /* shortcut for zero */
+
+ if (sizeof v <= sizeof(mp_digit)) {
+ DIGIT(mp, 0) = v;
+ } else {
+ for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+ if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+ return res;
+
+ res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
+ if (res != MP_OKAY)
+ return res;
+ }
+ }
+ if (z < 0)
+ SIGN(mp) = NEG;
+
+ return MP_OKAY;
+
+} /* end mp_set_int() */
+
+/* }}} */
+
+/* {{{ mp_set_ulong(mp, z) */
+
+mp_err
+mp_set_ulong(mp_int *mp, unsigned long z)
+{
+ int ix;
+ mp_err res;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ mp_zero(mp);
+ if (z == 0)
+ return MP_OKAY; /* shortcut for zero */
+
+ if (sizeof z <= sizeof(mp_digit)) {
+ DIGIT(mp, 0) = z;
+ } else {
+ for (ix = sizeof(long) - 1; ix >= 0; ix--) {
+ if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY)
+ return res;
+
+ res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX));
+ if (res != MP_OKAY)
+ return res;
+ }
+ }
+ return MP_OKAY;
+} /* end mp_set_ulong() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Digit arithmetic */
+
+/* {{{ mp_add_d(a, d, b) */
+
+/*
+ mp_add_d(a, d, b)
+
+ Compute the sum b = a + d, for a single digit d. Respects the sign of
+ its primary addend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_add_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == ZPOS) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_add_d() */
+
+/* }}} */
+
+/* {{{ mp_sub_d(a, d, b) */
+
+/*
+ mp_sub_d(a, d, b)
+
+ Compute the difference b = a - d, for a single digit d. Respects the
+ sign of its subtrahend (single digits are unsigned anyway).
+ */
+
+mp_err
+mp_sub_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_int tmp;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+
+ if (SIGN(&tmp) == NEG) {
+ if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else if (s_mp_cmp_d(&tmp, d) >= 0) {
+ if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY)
+ goto CLEANUP;
+ } else {
+ mp_neg(&tmp, &tmp);
+
+ DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0);
+ SIGN(&tmp) = NEG;
+ }
+
+ if (s_mp_cmp_d(&tmp, 0) == 0)
+ SIGN(&tmp) = ZPOS;
+
+ s_mp_exch(&tmp, b);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sub_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_d(a, d, b) */
+
+/*
+ mp_mul_d(a, d, b)
+
+ Compute the product b = a * d, for a single digit d. Respects the sign
+ of its multiplicand (single digits are unsigned anyway)
+ */
+
+mp_err
+mp_mul_d(const mp_int *a, mp_digit d, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if (d == 0) {
+ mp_zero(b);
+ return MP_OKAY;
+ }
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ res = s_mp_mul_d(b, d);
+
+ return res;
+
+} /* end mp_mul_d() */
+
+/* }}} */
+
+/* {{{ mp_mul_2(a, c) */
+
+mp_err
+mp_mul_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2(c);
+
+} /* end mp_mul_2() */
+
+/* }}} */
+
+/* {{{ mp_div_d(a, d, q, r) */
+
+/*
+ mp_div_d(a, d, q, r)
+
+ Compute the quotient q = a / d and remainder r = a mod d, for a
+ single digit d. Respects the sign of its divisor (single digits are
+ unsigned anyway).
+ */
+
+mp_err
+mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
+{
+ mp_err res;
+ mp_int qp;
+ mp_digit rem = 0;
+ int pow;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_RANGE;
+
+ /* Shortcut for powers of two ... */
+ if ((pow = s_mp_ispow2d(d)) >= 0) {
+ mp_digit mask;
+
+ mask = ((mp_digit)1 << pow) - 1;
+ rem = DIGIT(a, 0) & mask;
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY) {
+ return res;
+ }
+ s_mp_div_2d(q, pow);
+ }
+
+ if (r)
+ *r = rem;
+
+ return MP_OKAY;
+ }
+
+ if ((res = mp_init_copy(&qp, a)) != MP_OKAY)
+ return res;
+
+ res = s_mp_div_d(&qp, d, &rem);
+
+ if (s_mp_cmp_d(&qp, 0) == 0)
+ SIGN(q) = ZPOS;
+
+ if (r) {
+ *r = rem;
+ }
+
+ if (q)
+ s_mp_exch(&qp, q);
+
+ mp_clear(&qp);
+ return res;
+
+} /* end mp_div_d() */
+
+/* }}} */
+
+/* {{{ mp_div_2(a, c) */
+
+/*
+ mp_div_2(a, c)
+
+ Compute c = a / 2, disregarding the remainder.
+ */
+
+mp_err
+mp_div_2(const mp_int *a, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2(c);
+
+ return MP_OKAY;
+
+} /* end mp_div_2() */
+
+/* }}} */
+
+/* {{{ mp_expt_d(a, d, b) */
+
+mp_err
+mp_expt_d(const mp_int *a, mp_digit d, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ DIGIT(&s, 0) = 1;
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Full arithmetic */
+
+/* {{{ mp_abs(a, b) */
+
+/*
+ mp_abs(a, b)
+
+ Compute b = |a|. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_abs(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ SIGN(b) = ZPOS;
+
+ return MP_OKAY;
+
+} /* end mp_abs() */
+
+/* }}} */
+
+/* {{{ mp_neg(a, b) */
+
+/*
+ mp_neg(a, b)
+
+ Compute b = -a. 'a' and 'b' may be identical.
+ */
+
+mp_err
+mp_neg(const mp_int *a, mp_int *b)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ if (s_mp_cmp_d(b, 0) == MP_EQ)
+ SIGN(b) = ZPOS;
+ else
+ SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG;
+
+ return MP_OKAY;
+
+} /* end mp_neg() */
+
+/* }}} */
+
+/* {{{ mp_add(a, b, c) */
+
+/*
+ mp_add(a, b, c)
+
+ Compute c = a + b. All parameters may be identical.
+ */
+
+mp_err
+mp_add(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else { /* different sign: |a| < |b| */
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_add() */
+
+/* }}} */
+
+/* {{{ mp_sub(a, b, c) */
+
+/*
+ mp_sub(a, b, c)
+
+ Compute c = a - b. All parameters may be identical.
+ */
+
+mp_err
+mp_sub(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_err res;
+ int magDiff;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == b) {
+ mp_zero(c);
+ return MP_OKAY;
+ }
+
+ if (MP_SIGN(a) != MP_SIGN(b)) {
+ MP_CHECKOK(s_mp_add_3arg(a, b, c));
+ } else if (!(magDiff = s_mp_cmp(a, b))) {
+ mp_zero(c);
+ res = MP_OKAY;
+ } else if (magDiff > 0) {
+ MP_CHECKOK(s_mp_sub_3arg(a, b, c));
+ } else {
+ MP_CHECKOK(s_mp_sub_3arg(b, a, c));
+ MP_SIGN(c) = !MP_SIGN(a);
+ }
+
+ if (s_mp_cmp_d(c, 0) == MP_EQ)
+ MP_SIGN(c) = MP_ZPOS;
+
+CLEANUP:
+ return res;
+
+} /* end mp_sub() */
+
+/* }}} */
+
+/* {{{ mp_mul(a, b, c) */
+
+/*
+ mp_mul(a, b, c)
+
+ Compute c = a * b. All parameters may be identical.
+ */
+mp_err
+mp_mul(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pb;
+ mp_int tmp;
+ mp_err res;
+ mp_size ib;
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (a == c) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ if (a == b)
+ b = &tmp;
+ a = &tmp;
+ } else if (b == c) {
+ if ((res = mp_init_copy(&tmp, b)) != MP_OKAY)
+ return res;
+ b = &tmp;
+ } else {
+ MP_DIGITS(&tmp) = 0;
+ }
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY)
+ goto CLEANUP;
+
+#ifdef NSS_USE_COMBA
+ if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) {
+ if (MP_USED(a) == 4) {
+ s_mp_mul_comba_4(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_mul_comba_8(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_mul_comba_16(a, b, c);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_mul_comba_32(a, b, c);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c));
+
+ /* Outer loop: Digits of b */
+ useda = MP_USED(a);
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ else
+ MP_DIGIT(c, ib + useda) = b_i;
+ }
+
+ s_mp_clamp(c);
+
+ if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ)
+ SIGN(c) = ZPOS;
+ else
+ SIGN(c) = NEG;
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+} /* end mp_mul() */
+
+/* }}} */
+
+/* {{{ mp_sqr(a, sqr) */
+
+#if MP_SQUARE
+/*
+ Computes the square of a. This can be done more
+ efficiently than a general multiplication, because many of the
+ computation steps are redundant when squaring. The inner product
+ step is a bit more complicated, but we save a fair number of
+ iterations of the multiplication loop.
+ */
+
+/* sqr = a^2; Caller provides both a and tmp; */
+mp_err
+mp_sqr(const mp_int *a, mp_int *sqr)
+{
+ mp_digit *pa;
+ mp_digit d;
+ mp_err res;
+ mp_size ix;
+ mp_int tmp;
+ int count;
+
+ ARGCHK(a != NULL && sqr != NULL, MP_BADARG);
+
+ if (a == sqr) {
+ if ((res = mp_init_copy(&tmp, a)) != MP_OKAY)
+ return res;
+ a = &tmp;
+ } else {
+ DIGITS(&tmp) = 0;
+ res = MP_OKAY;
+ }
+
+ ix = 2 * MP_USED(a);
+ if (ix > MP_ALLOC(sqr)) {
+ MP_USED(sqr) = 1;
+ MP_CHECKOK(s_mp_grow(sqr, ix));
+ }
+ MP_USED(sqr) = ix;
+ MP_DIGIT(sqr, 0) = 0;
+
+#ifdef NSS_USE_COMBA
+ if (IS_POWER_OF_2(MP_USED(a))) {
+ if (MP_USED(a) == 4) {
+ s_mp_sqr_comba_4(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 8) {
+ s_mp_sqr_comba_8(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 16) {
+ s_mp_sqr_comba_16(a, sqr);
+ goto CLEANUP;
+ }
+ if (MP_USED(a) == 32) {
+ s_mp_sqr_comba_32(a, sqr);
+ goto CLEANUP;
+ }
+ }
+#endif
+
+ pa = MP_DIGITS(a);
+ count = MP_USED(a) - 1;
+ if (count > 0) {
+ d = *pa++;
+ s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1);
+ for (ix = 3; --count > 0; ix += 2) {
+ d = *pa++;
+ s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix);
+ } /* for(ix ...) */
+ MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */
+
+ /* now sqr *= 2 */
+ s_mp_mul_2(sqr);
+ } else {
+ MP_DIGIT(sqr, 1) = 0;
+ }
+
+ /* now add the squares of the digits of a to sqr. */
+ s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr));
+
+ SIGN(sqr) = ZPOS;
+ s_mp_clamp(sqr);
+
+CLEANUP:
+ mp_clear(&tmp);
+ return res;
+
+} /* end mp_sqr() */
+#endif
+
+/* }}} */
+
+/* {{{ mp_div(a, b, q, r) */
+
+/*
+ mp_div(a, b, q, r)
+
+ Compute q = a / b and r = a mod b. Input parameters may be re-used
+ as output parameters. If q or r is NULL, that portion of the
+ computation will be discarded (although it will still be computed)
+ */
+mp_err
+mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r)
+{
+ mp_err res;
+ mp_int *pQ, *pR;
+ mp_int qtmp, rtmp, btmp;
+ int cmp;
+ mp_sign signA;
+ mp_sign signB;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ signA = MP_SIGN(a);
+ signB = MP_SIGN(b);
+
+ if (mp_cmp_z(b) == MP_EQ)
+ return MP_RANGE;
+
+ DIGITS(&qtmp) = 0;
+ DIGITS(&rtmp) = 0;
+ DIGITS(&btmp) = 0;
+
+ /* Set up some temporaries... */
+ if (!r || r == a || r == b) {
+ MP_CHECKOK(mp_init_copy(&rtmp, a));
+ pR = &rtmp;
+ } else {
+ MP_CHECKOK(mp_copy(a, r));
+ pR = r;
+ }
+
+ if (!q || q == a || q == b) {
+ MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a)));
+ pQ = &qtmp;
+ } else {
+ MP_CHECKOK(s_mp_pad(q, MP_USED(a)));
+ pQ = q;
+ mp_zero(pQ);
+ }
+
+ /*
+ If |a| <= |b|, we can compute the solution without division;
+ otherwise, we actually do the work required.
+ */
+ if ((cmp = s_mp_cmp(a, b)) <= 0) {
+ if (cmp) {
+ /* r was set to a above. */
+ mp_zero(pQ);
+ } else {
+ mp_set(pQ, 1);
+ mp_zero(pR);
+ }
+ } else {
+ MP_CHECKOK(mp_init_copy(&btmp, b));
+ MP_CHECKOK(s_mp_div(pR, &btmp, pQ));
+ }
+
+ /* Compute the signs for the output */
+ MP_SIGN(pR) = signA; /* Sr = Sa */
+ /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */
+ MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG;
+
+ if (s_mp_cmp_d(pQ, 0) == MP_EQ)
+ SIGN(pQ) = ZPOS;
+ if (s_mp_cmp_d(pR, 0) == MP_EQ)
+ SIGN(pR) = ZPOS;
+
+ /* Copy output, if it is needed */
+ if (q && q != pQ)
+ s_mp_exch(pQ, q);
+
+ if (r && r != pR)
+ s_mp_exch(pR, r);
+
+CLEANUP:
+ mp_clear(&btmp);
+ mp_clear(&rtmp);
+ mp_clear(&qtmp);
+
+ return res;
+
+} /* end mp_div() */
+
+/* }}} */
+
+/* {{{ mp_div_2d(a, d, q, r) */
+
+mp_err
+mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (q) {
+ if ((res = mp_copy(a, q)) != MP_OKAY)
+ return res;
+ }
+ if (r) {
+ if ((res = mp_copy(a, r)) != MP_OKAY)
+ return res;
+ }
+ if (q) {
+ s_mp_div_2d(q, d);
+ }
+ if (r) {
+ s_mp_mod_2d(r, d);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_div_2d() */
+
+/* }}} */
+
+/* {{{ mp_expt(a, b, c) */
+
+/*
+ mp_expt(a, b, c)
+
+ Compute c = a ** b, that is, raise a to the b power. Uses a
+ standard iterative square-and-multiply technique.
+ */
+
+mp_err
+mp_expt(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+
+ mp_set(&s, 1);
+
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ /* Loop over low-order digits in ascending order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over bits of each non-maximal digit */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Consider now the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ if (mp_iseven(b))
+ SIGN(&s) = SIGN(a);
+
+ res = mp_copy(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_expt() */
+
+/* }}} */
+
+/* {{{ mp_2expt(a, k) */
+
+/* Compute a = 2^k */
+
+mp_err
+mp_2expt(mp_int *a, mp_digit k)
+{
+ ARGCHK(a != NULL, MP_BADARG);
+
+ return s_mp_2expt(a, k);
+
+} /* end mp_2expt() */
+
+/* }}} */
+
+/* {{{ mp_mod(a, m, c) */
+
+/*
+ mp_mod(a, m, c)
+
+ Compute c = a (mod m). Result will always be 0 <= c < m.
+ */
+
+mp_err
+mp_mod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ int mag;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if (SIGN(m) == NEG)
+ return MP_RANGE;
+
+ /*
+ If |a| > m, we need to divide to get the remainder and take the
+ absolute value.
+
+ If |a| < m, we don't need to do any division, just copy and adjust
+ the sign (if a is negative).
+
+ If |a| == m, we can simply set the result to zero.
+
+ This order is intended to minimize the average path length of the
+ comparison chain on common workloads -- the most frequent cases are
+ that |a| != m, so we do those first.
+ */
+ if ((mag = s_mp_cmp(a, m)) > 0) {
+ if ((res = mp_div(a, m, NULL, c)) != MP_OKAY)
+ return res;
+
+ if (SIGN(c) == NEG) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else if (mag < 0) {
+ if ((res = mp_copy(a, c)) != MP_OKAY)
+ return res;
+
+ if (mp_cmp_z(a) < 0) {
+ if ((res = mp_add(c, m, c)) != MP_OKAY)
+ return res;
+ }
+
+ } else {
+ mp_zero(c);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_mod() */
+
+/* }}} */
+
+/* {{{ mp_mod_d(a, d, c) */
+
+/*
+ mp_mod_d(a, d, c)
+
+ Compute c = a (mod d). Result will always be 0 <= c < d
+ */
+mp_err
+mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if (s_mp_cmp_d(a, d) > 0) {
+ if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
+ return res;
+
+ } else {
+ if (SIGN(a) == NEG)
+ rem = d - DIGIT(a, 0);
+ else
+ rem = DIGIT(a, 0);
+ }
+
+ if (c)
+ *c = rem;
+
+ return MP_OKAY;
+
+} /* end mp_mod_d() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Modular arithmetic */
+
+#if MP_MODARITH
+/* {{{ mp_addmod(a, b, m, c) */
+
+/*
+ mp_addmod(a, b, m, c)
+
+ Compute c = (a + b) mod m
+ */
+
+mp_err
+mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_add(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_submod(a, b, m, c) */
+
+/*
+ mp_submod(a, b, m, c)
+
+ Compute c = (a - b) mod m
+ */
+
+mp_err
+mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sub(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_mulmod(a, b, m, c) */
+
+/*
+ mp_mulmod(a, b, m, c)
+
+ Compute c = (a * b) mod m
+ */
+
+mp_err
+mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_mul(a, b, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+}
+
+/* }}} */
+
+/* {{{ mp_sqrmod(a, m, c) */
+
+#if MP_SQUARE
+mp_err
+mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_sqr(a, c)) != MP_OKAY)
+ return res;
+ if ((res = mp_mod(c, m, c)) != MP_OKAY)
+ return res;
+
+ return MP_OKAY;
+
+} /* end mp_sqrmod() */
+#endif
+
+/* }}} */
+
+/* {{{ s_mp_exptmod(a, b, m, c) */
+
+/*
+ s_mp_exptmod(a, b, m, c)
+
+ Compute c = (a ** b) mod m. Uses a standard square-and-multiply
+ method with modular reductions at each step. (This is basically the
+ same code as mp_expt(), except for the addition of the reductions)
+
+ The modular reductions are done using Barrett's algorithm (see
+ s_mp_reduce() below for details)
+ */
+
+mp_err
+s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c)
+{
+ mp_int s, x, mu;
+ mp_err res;
+ mp_digit d;
+ unsigned int dig, bit;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
+ return MP_RANGE;
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto X;
+ if ((res = mp_init(&mu)) != MP_OKAY)
+ goto MU;
+
+ mp_set(&s, 1);
+
+ /* mu = b^2k / m */
+ if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* Loop over digits of b in ascending order, except highest order */
+ for (dig = 0; dig < (USED(b) - 1); dig++) {
+ d = DIGIT(b, dig);
+
+ /* Loop over the bits of the lower-order digits */
+ for (bit = 0; bit < DIGIT_BIT; bit++) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+ }
+
+ /* Now do the last digit... */
+ d = DIGIT(b, dig);
+
+ while (d) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d >>= 1;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&mu);
+MU:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end s_mp_exptmod() */
+
+/* }}} */
+
+/* {{{ mp_exptmod_d(a, d, m, c) */
+
+mp_err
+mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c)
+{
+ mp_int s, x;
+ mp_err res;
+
+ ARGCHK(a != NULL && c != NULL, MP_BADARG);
+
+ if ((res = mp_init(&s)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ goto X;
+
+ mp_set(&s, 1);
+
+ while (d != 0) {
+ if (d & 1) {
+ if ((res = s_mp_mul(&s, &x)) != MP_OKAY ||
+ (res = mp_mod(&s, m, &s)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ d /= 2;
+
+ if ((res = s_mp_sqr(&x)) != MP_OKAY ||
+ (res = mp_mod(&x, m, &x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ s_mp_exch(&s, c);
+
+CLEANUP:
+ mp_clear(&x);
+X:
+ mp_clear(&s);
+
+ return res;
+
+} /* end mp_exptmod_d() */
+
+/* }}} */
+#endif /* if MP_MODARITH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Comparison functions */
+
+/* {{{ mp_cmp_z(a) */
+
+/*
+ mp_cmp_z(a)
+
+ Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0.
+ */
+
+int
+mp_cmp_z(const mp_int *a)
+{
+ if (SIGN(a) == NEG)
+ return MP_LT;
+ else if (USED(a) == 1 && DIGIT(a, 0) == 0)
+ return MP_EQ;
+ else
+ return MP_GT;
+
+} /* end mp_cmp_z() */
+
+/* }}} */
+
+/* {{{ mp_cmp_d(a, d) */
+
+/*
+ mp_cmp_d(a, d)
+
+ Compare a <=> d. Returns <0 if a<d, 0 if a=d, >0 if a>d
+ */
+
+int
+mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ ARGCHK(a != NULL, MP_EQ);
+
+ if (SIGN(a) == NEG)
+ return MP_LT;
+
+ return s_mp_cmp_d(a, d);
+
+} /* end mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ mp_cmp(a, b) */
+
+int
+mp_cmp(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ if (SIGN(a) == SIGN(b)) {
+ int mag;
+
+ if ((mag = s_mp_cmp(a, b)) == MP_EQ)
+ return MP_EQ;
+
+ if (SIGN(a) == ZPOS)
+ return mag;
+ else
+ return -mag;
+
+ } else if (SIGN(a) == ZPOS) {
+ return MP_GT;
+ } else {
+ return MP_LT;
+ }
+
+} /* end mp_cmp() */
+
+/* }}} */
+
+/* {{{ mp_cmp_mag(a, b) */
+
+/*
+ mp_cmp_mag(a, b)
+
+ Compares |a| <=> |b|, and returns an appropriate comparison result
+ */
+
+int
+mp_cmp_mag(const mp_int *a, const mp_int *b)
+{
+ ARGCHK(a != NULL && b != NULL, MP_EQ);
+
+ return s_mp_cmp(a, b);
+
+} /* end mp_cmp_mag() */
+
+/* }}} */
+
+/* {{{ mp_isodd(a) */
+
+/*
+ mp_isodd(a)
+
+ Returns a true (non-zero) value if a is odd, false (zero) otherwise.
+ */
+int
+mp_isodd(const mp_int *a)
+{
+ ARGCHK(a != NULL, 0);
+
+ return (int)(DIGIT(a, 0) & 1);
+
+} /* end mp_isodd() */
+
+/* }}} */
+
+/* {{{ mp_iseven(a) */
+
+int
+mp_iseven(const mp_int *a)
+{
+ return !mp_isodd(a);
+
+} /* end mp_iseven() */
+
+/* }}} */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ Number theoretic functions */
+
+#if MP_NUMTH
+/* {{{ mp_gcd(a, b, c) */
+
+/*
+ Like the old mp_gcd() function, except computes the GCD using the
+ binary algorithm due to Josef Stein in 1961 (via Knuth).
+ */
+mp_err
+mp_gcd(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_err res;
+ mp_int u, v, t;
+ mp_size k = 0;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
+ return MP_RANGE;
+ if (mp_cmp_z(a) == MP_EQ) {
+ return mp_copy(b, c);
+ } else if (mp_cmp_z(b) == MP_EQ) {
+ return mp_copy(a, c);
+ }
+
+ if ((res = mp_init(&t)) != MP_OKAY)
+ return res;
+ if ((res = mp_init_copy(&u, a)) != MP_OKAY)
+ goto U;
+ if ((res = mp_init_copy(&v, b)) != MP_OKAY)
+ goto V;
+
+ SIGN(&u) = ZPOS;
+ SIGN(&v) = ZPOS;
+
+ /* Divide out common factors of 2 until at least 1 of a, b is even */
+ while (mp_iseven(&u) && mp_iseven(&v)) {
+ s_mp_div_2(&u);
+ s_mp_div_2(&v);
+ ++k;
+ }
+
+ /* Initialize t */
+ if (mp_isodd(&u)) {
+ if ((res = mp_copy(&v, &t)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* t = -v */
+ if (SIGN(&v) == ZPOS)
+ SIGN(&t) = NEG;
+ else
+ SIGN(&t) = ZPOS;
+
+ } else {
+ if ((res = mp_copy(&u, &t)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ for (;;) {
+ while (mp_iseven(&t)) {
+ s_mp_div_2(&t);
+ }
+
+ if (mp_cmp_z(&t) == MP_GT) {
+ if ((res = mp_copy(&t, &u)) != MP_OKAY)
+ goto CLEANUP;
+
+ } else {
+ if ((res = mp_copy(&t, &v)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* v = -t */
+ if (SIGN(&t) == ZPOS)
+ SIGN(&v) = NEG;
+ else
+ SIGN(&v) = ZPOS;
+ }
+
+ if ((res = mp_sub(&u, &v, &t)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (s_mp_cmp_d(&t, 0) == MP_EQ)
+ break;
+ }
+
+ s_mp_2expt(&v, k); /* v = 2^k */
+ res = mp_mul(&u, &v, c); /* c = u * v */
+
+CLEANUP:
+ mp_clear(&v);
+V:
+ mp_clear(&u);
+U:
+ mp_clear(&t);
+
+ return res;
+
+} /* end mp_gcd() */
+
+/* }}} */
+
+/* {{{ mp_lcm(a, b, c) */
+
+/* We compute the least common multiple using the rule:
+
+ ab = [a, b](a, b)
+
+ ... by computing the product, and dividing out the gcd.
+ */
+
+mp_err
+mp_lcm(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int gcd, prod;
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ /* Set up temporaries */
+ if ((res = mp_init(&gcd)) != MP_OKAY)
+ return res;
+ if ((res = mp_init(&prod)) != MP_OKAY)
+ goto GCD;
+
+ if ((res = mp_mul(a, b, &prod)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
+ goto CLEANUP;
+
+ res = mp_div(&prod, &gcd, c, NULL);
+
+CLEANUP:
+ mp_clear(&prod);
+GCD:
+ mp_clear(&gcd);
+
+ return res;
+
+} /* end mp_lcm() */
+
+/* }}} */
+
+/* {{{ mp_xgcd(a, b, g, x, y) */
+
+/*
+ mp_xgcd(a, b, g, x, y)
+
+ Compute g = (a, b) and values x and y satisfying Bezout's identity
+ (that is, ax + by = g). This uses the binary extended GCD algorithm
+ based on the Stein algorithm used for mp_gcd()
+ See algorithm 14.61 in Handbook of Applied Cryptogrpahy.
+ */
+
+mp_err
+mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y)
+{
+ mp_int gx, xc, yc, u, v, A, B, C, D;
+ mp_int *clean[9];
+ mp_err res;
+ int last = -1;
+
+ if (mp_cmp_z(b) == 0)
+ return MP_RANGE;
+
+ /* Initialize all these variables we need */
+ MP_CHECKOK(mp_init(&u));
+ clean[++last] = &u;
+ MP_CHECKOK(mp_init(&v));
+ clean[++last] = &v;
+ MP_CHECKOK(mp_init(&gx));
+ clean[++last] = &gx;
+ MP_CHECKOK(mp_init(&A));
+ clean[++last] = &A;
+ MP_CHECKOK(mp_init(&B));
+ clean[++last] = &B;
+ MP_CHECKOK(mp_init(&C));
+ clean[++last] = &C;
+ MP_CHECKOK(mp_init(&D));
+ clean[++last] = &D;
+ MP_CHECKOK(mp_init_copy(&xc, a));
+ clean[++last] = &xc;
+ mp_abs(&xc, &xc);
+ MP_CHECKOK(mp_init_copy(&yc, b));
+ clean[++last] = &yc;
+ mp_abs(&yc, &yc);
+
+ mp_set(&gx, 1);
+
+ /* Divide by two until at least one of them is odd */
+ while (mp_iseven(&xc) && mp_iseven(&yc)) {
+ mp_size nx = mp_trailing_zeros(&xc);
+ mp_size ny = mp_trailing_zeros(&yc);
+ mp_size n = MP_MIN(nx, ny);
+ s_mp_div_2d(&xc, n);
+ s_mp_div_2d(&yc, n);
+ MP_CHECKOK(s_mp_mul_2d(&gx, n));
+ }
+
+ MP_CHECKOK(mp_copy(&xc, &u));
+ MP_CHECKOK(mp_copy(&yc, &v));
+ mp_set(&A, 1);
+ mp_set(&D, 1);
+
+ /* Loop through binary GCD algorithm */
+ do {
+ while (mp_iseven(&u)) {
+ s_mp_div_2(&u);
+
+ if (mp_iseven(&A) && mp_iseven(&B)) {
+ s_mp_div_2(&A);
+ s_mp_div_2(&B);
+ } else {
+ MP_CHECKOK(mp_add(&A, &yc, &A));
+ s_mp_div_2(&A);
+ MP_CHECKOK(mp_sub(&B, &xc, &B));
+ s_mp_div_2(&B);
+ }
+ }
+
+ while (mp_iseven(&v)) {
+ s_mp_div_2(&v);
+
+ if (mp_iseven(&C) && mp_iseven(&D)) {
+ s_mp_div_2(&C);
+ s_mp_div_2(&D);
+ } else {
+ MP_CHECKOK(mp_add(&C, &yc, &C));
+ s_mp_div_2(&C);
+ MP_CHECKOK(mp_sub(&D, &xc, &D));
+ s_mp_div_2(&D);
+ }
+ }
+
+ if (mp_cmp(&u, &v) >= 0) {
+ MP_CHECKOK(mp_sub(&u, &v, &u));
+ MP_CHECKOK(mp_sub(&A, &C, &A));
+ MP_CHECKOK(mp_sub(&B, &D, &B));
+ } else {
+ MP_CHECKOK(mp_sub(&v, &u, &v));
+ MP_CHECKOK(mp_sub(&C, &A, &C));
+ MP_CHECKOK(mp_sub(&D, &B, &D));
+ }
+ } while (mp_cmp_z(&u) != 0);
+
+ /* copy results to output */
+ if (x)
+ MP_CHECKOK(mp_copy(&C, x));
+
+ if (y)
+ MP_CHECKOK(mp_copy(&D, y));
+
+ if (g)
+ MP_CHECKOK(mp_mul(&gx, &v, g));
+
+CLEANUP:
+ while (last >= 0)
+ mp_clear(clean[last--]);
+
+ return res;
+
+} /* end mp_xgcd() */
+
+/* }}} */
+
+mp_size
+mp_trailing_zeros(const mp_int *mp)
+{
+ mp_digit d;
+ mp_size n = 0;
+ unsigned int ix;
+
+ if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp))
+ return n;
+
+ for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix)
+ n += MP_DIGIT_BIT;
+ if (!d)
+ return 0; /* shouldn't happen, but ... */
+#if !defined(MP_USE_UINT_DIGIT)
+ if (!(d & 0xffffffffU)) {
+ d >>= 32;
+ n += 32;
+ }
+#endif
+ if (!(d & 0xffffU)) {
+ d >>= 16;
+ n += 16;
+ }
+ if (!(d & 0xffU)) {
+ d >>= 8;
+ n += 8;
+ }
+ if (!(d & 0xfU)) {
+ d >>= 4;
+ n += 4;
+ }
+ if (!(d & 0x3U)) {
+ d >>= 2;
+ n += 2;
+ }
+ if (!(d & 0x1U)) {
+ d >>= 1;
+ n += 1;
+ }
+#if MP_ARGCHK == 2
+ assert(0 != (d & 1));
+#endif
+ return n;
+}
+
+/* Given a and prime p, computes c and k such that a*c == 2**k (mod p).
+** Returns k (positive) or error (negative).
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c)
+{
+ mp_err res;
+ mp_err k = 0;
+ mp_int d, f, g;
+
+ ARGCHK(a && p && c, MP_BADARG);
+
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&f) = 0;
+ MP_DIGITS(&g) = 0;
+ MP_CHECKOK(mp_init(&d));
+ MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */
+ MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */
+
+ mp_set(c, 1);
+ mp_zero(&d);
+
+ if (mp_cmp_z(&f) == 0) {
+ res = MP_UNDEF;
+ } else
+ for (;;) {
+ int diff_sign;
+ while (mp_iseven(&f)) {
+ mp_size n = mp_trailing_zeros(&f);
+ if (!n) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+ s_mp_div_2d(&f, n);
+ MP_CHECKOK(s_mp_mul_2d(&d, n));
+ k += n;
+ }
+ if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */
+ res = k;
+ break;
+ }
+ diff_sign = mp_cmp(&f, &g);
+ if (diff_sign < 0) { /* f < g */
+ s_mp_exch(&f, &g);
+ s_mp_exch(c, &d);
+ } else if (diff_sign == 0) { /* f == g */
+ res = MP_UNDEF; /* a and p are not relatively prime */
+ break;
+ }
+ if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) {
+ MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */
+ MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */
+ } else {
+ MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */
+ MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */
+ }
+ }
+ if (res >= 0) {
+ while (MP_SIGN(c) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(c, p, c));
+ }
+ res = k;
+ }
+
+CLEANUP:
+ mp_clear(&d);
+ mp_clear(&f);
+ mp_clear(&g);
+ return res;
+}
+
+/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_digit
+s_mp_invmod_radix(mp_digit P)
+{
+ mp_digit T = P;
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#if !defined(MP_USE_UINT_DIGIT)
+ T *= 2 - (P * T);
+ T *= 2 - (P * T);
+#endif
+ return T;
+}
+
+/* Given c, k, and prime p, where a*c == 2**k (mod p),
+** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction.
+** This technique from the paper "Fast Modular Reciprocals" (unpublished)
+** by Richard Schroeppel (a.k.a. Captain Nemo).
+*/
+mp_err
+s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x)
+{
+ int k_orig = k;
+ mp_digit r;
+ mp_size ix;
+ mp_err res;
+
+ if (mp_cmp_z(c) < 0) { /* c < 0 */
+ MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */
+ } else {
+ MP_CHECKOK(mp_copy(c, x)); /* x = c */
+ }
+
+ /* make sure x is large enough */
+ ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1;
+ ix = MP_MAX(ix, MP_USED(x));
+ MP_CHECKOK(s_mp_pad(x, ix));
+
+ r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0));
+
+ for (ix = 0; k > 0; ix++) {
+ int j = MP_MIN(k, MP_DIGIT_BIT);
+ mp_digit v = r * MP_DIGIT(x, ix);
+ if (j < MP_DIGIT_BIT) {
+ v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */
+ }
+ s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */
+ k -= j;
+ }
+ s_mp_clamp(x);
+ s_mp_div_2d(x, k_orig);
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+
+/* compute mod inverse using Schroeppel's method, only if m is odd */
+mp_err
+s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ int k;
+ mp_err res;
+ mp_int x;
+
+ ARGCHK(a && m && c, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+ if (mp_iseven(m))
+ return MP_UNDEF;
+
+ MP_DIGITS(&x) = 0;
+
+ if (a == c) {
+ if ((res = mp_init_copy(&x, a)) != MP_OKAY)
+ return res;
+ if (a == m)
+ m = &x;
+ a = &x;
+ } else if (m == c) {
+ if ((res = mp_init_copy(&x, m)) != MP_OKAY)
+ return res;
+ m = &x;
+ } else {
+ MP_DIGITS(&x) = 0;
+ }
+
+ MP_CHECKOK(s_mp_almost_inverse(a, m, c));
+ k = res;
+ MP_CHECKOK(s_mp_fixup_reciprocal(c, m, k, c));
+CLEANUP:
+ mp_clear(&x);
+ return res;
+}
+
+/* Known good algorithm for computing modular inverse. But slow. */
+mp_err
+mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_int g, x;
+ mp_err res;
+
+ ARGCHK(a && m && c, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ MP_DIGITS(&g) = 0;
+ MP_DIGITS(&x) = 0;
+ MP_CHECKOK(mp_init(&x));
+ MP_CHECKOK(mp_init(&g));
+
+ MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL));
+
+ if (mp_cmp_d(&g, 1) != MP_EQ) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+ res = mp_mod(&x, m, c);
+ SIGN(c) = SIGN(a);
+
+CLEANUP:
+ mp_clear(&x);
+ mp_clear(&g);
+
+ return res;
+}
+
+/* modular inverse where modulus is 2**k. */
+/* c = a**-1 mod 2**k */
+mp_err
+s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c)
+{
+ mp_err res;
+ mp_size ix = k + 4;
+ mp_int t0, t1, val, tmp, two2k;
+
+ static const mp_digit d2 = 2;
+ static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 };
+
+ if (mp_iseven(a))
+ return MP_UNDEF;
+ if (k <= MP_DIGIT_BIT) {
+ mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0));
+ if (k < MP_DIGIT_BIT)
+ i &= ((mp_digit)1 << k) - (mp_digit)1;
+ mp_set(c, i);
+ return MP_OKAY;
+ }
+ MP_DIGITS(&t0) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&val) = 0;
+ MP_DIGITS(&tmp) = 0;
+ MP_DIGITS(&two2k) = 0;
+ MP_CHECKOK(mp_init_copy(&val, a));
+ s_mp_mod_2d(&val, k);
+ MP_CHECKOK(mp_init_copy(&t0, &val));
+ MP_CHECKOK(mp_init_copy(&t1, &t0));
+ MP_CHECKOK(mp_init(&tmp));
+ MP_CHECKOK(mp_init(&two2k));
+ MP_CHECKOK(s_mp_2expt(&two2k, k));
+ do {
+ MP_CHECKOK(mp_mul(&val, &t1, &tmp));
+ MP_CHECKOK(mp_sub(&two, &tmp, &tmp));
+ MP_CHECKOK(mp_mul(&t1, &tmp, &t1));
+ s_mp_mod_2d(&t1, k);
+ while (MP_SIGN(&t1) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&t1, &two2k, &t1));
+ }
+ if (mp_cmp(&t1, &t0) == MP_EQ)
+ break;
+ MP_CHECKOK(mp_copy(&t1, &t0));
+ } while (--ix > 0);
+ if (!ix) {
+ res = MP_UNDEF;
+ } else {
+ mp_exch(c, &t1);
+ }
+
+CLEANUP:
+ mp_clear(&t0);
+ mp_clear(&t1);
+ mp_clear(&val);
+ mp_clear(&tmp);
+ mp_clear(&two2k);
+ return res;
+}
+
+mp_err
+s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c)
+{
+ mp_err res;
+ mp_size k;
+ mp_int oddFactor, evenFactor; /* factors of the modulus */
+ mp_int oddPart, evenPart; /* parts to combine via CRT. */
+ mp_int C2, tmp1, tmp2;
+
+ /*static const mp_digit d1 = 1; */
+ /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */
+
+ if ((res = s_mp_ispow2(m)) >= 0) {
+ k = res;
+ return s_mp_invmod_2d(a, k, c);
+ }
+ MP_DIGITS(&oddFactor) = 0;
+ MP_DIGITS(&evenFactor) = 0;
+ MP_DIGITS(&oddPart) = 0;
+ MP_DIGITS(&evenPart) = 0;
+ MP_DIGITS(&C2) = 0;
+ MP_DIGITS(&tmp1) = 0;
+ MP_DIGITS(&tmp2) = 0;
+
+ MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */
+ MP_CHECKOK(mp_init(&evenFactor));
+ MP_CHECKOK(mp_init(&oddPart));
+ MP_CHECKOK(mp_init(&evenPart));
+ MP_CHECKOK(mp_init(&C2));
+ MP_CHECKOK(mp_init(&tmp1));
+ MP_CHECKOK(mp_init(&tmp2));
+
+ k = mp_trailing_zeros(m);
+ s_mp_div_2d(&oddFactor, k);
+ MP_CHECKOK(s_mp_2expt(&evenFactor, k));
+
+ /* compute a**-1 mod oddFactor. */
+ MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart));
+ /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */
+ MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart));
+
+ /* Use Chinese Remainer theorem to compute a**-1 mod m. */
+ /* let m1 = oddFactor, v1 = oddPart,
+ * let m2 = evenFactor, v2 = evenPart.
+ */
+
+ /* Compute C2 = m1**-1 mod m2. */
+ MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2));
+
+ /* compute u = (v2 - v1)*C2 mod m2 */
+ MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1));
+ MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2));
+ s_mp_mod_2d(&tmp2, k);
+ while (MP_SIGN(&tmp2) != MP_ZPOS) {
+ MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2));
+ }
+
+ /* compute answer = v1 + u*m1 */
+ MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c));
+ MP_CHECKOK(mp_add(&oddPart, c, c));
+ /* not sure this is necessary, but it's low cost if not. */
+ MP_CHECKOK(mp_mod(c, m, c));
+
+CLEANUP:
+ mp_clear(&oddFactor);
+ mp_clear(&evenFactor);
+ mp_clear(&oddPart);
+ mp_clear(&evenPart);
+ mp_clear(&C2);
+ mp_clear(&tmp1);
+ mp_clear(&tmp2);
+ return res;
+}
+
+/* {{{ mp_invmod(a, m, c) */
+
+/*
+ mp_invmod(a, m, c)
+
+ Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
+ This is equivalent to the question of whether (a, m) = 1. If not,
+ MP_UNDEF is returned, and there is no inverse.
+ */
+
+mp_err
+mp_invmod(const mp_int *a, const mp_int *m, mp_int *c)
+{
+
+ ARGCHK(a && m && c, MP_BADARG);
+
+ if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
+ return MP_RANGE;
+
+ if (mp_isodd(m)) {
+ return s_mp_invmod_odd_m(a, m, c);
+ }
+ if (mp_iseven(a))
+ return MP_UNDEF; /* not invertable */
+
+ return s_mp_invmod_even_m(a, m, c);
+
+} /* end mp_invmod() */
+
+/* }}} */
+#endif /* if MP_NUMTH */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ mp_print(mp, ofp) */
+
+#if MP_IOFUNC
+/*
+ mp_print(mp, ofp)
+
+ Print a textual representation of the given mp_int on the output
+ stream 'ofp'. Output is generated using the internal radix.
+ */
+
+void
+mp_print(mp_int *mp, FILE *ofp)
+{
+ int ix;
+
+ if (mp == NULL || ofp == NULL)
+ return;
+
+ fputc((SIGN(mp) == NEG) ? '-' : '+', ofp);
+
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
+ }
+
+} /* end mp_print() */
+
+#endif /* if MP_IOFUNC */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* {{{ More I/O Functions */
+
+/* {{{ mp_read_raw(mp, str, len) */
+
+/*
+ mp_read_raw(mp, str, len)
+
+ Read in a raw value (base 256) into the given mp_int
+ */
+
+mp_err
+mp_read_raw(mp_int *mp, char *str, int len)
+{
+ int ix;
+ mp_err res;
+ unsigned char *ustr = (unsigned char *)str;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Get sign from first byte */
+ if (ustr[0])
+ SIGN(mp) = NEG;
+ else
+ SIGN(mp) = ZPOS;
+
+ /* Read the rest of the digits */
+ for (ix = 1; ix < len; ix++) {
+ if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY)
+ return res;
+ if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY)
+ return res;
+ }
+
+ return MP_OKAY;
+
+} /* end mp_read_raw() */
+
+/* }}} */
+
+/* {{{ mp_raw_size(mp) */
+
+int
+mp_raw_size(mp_int *mp)
+{
+ ARGCHK(mp != NULL, 0);
+
+ return (USED(mp) * sizeof(mp_digit)) + 1;
+
+} /* end mp_raw_size() */
+
+/* }}} */
+
+/* {{{ mp_toraw(mp, str) */
+
+mp_err
+mp_toraw(mp_int *mp, char *str)
+{
+ int ix, jx, pos = 1;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+
+ str[0] = (char)SIGN(mp);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ str[pos++] = (char)(d >> (jx * CHAR_BIT));
+ }
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toraw() */
+
+/* }}} */
+
+/* {{{ mp_read_radix(mp, str, radix) */
+
+/*
+ mp_read_radix(mp, str, radix)
+
+ Read an integer from the given string, and set mp to the resulting
+ value. The input is presumed to be in base 10. Leading non-digit
+ characters are ignored, and the function reads until a non-digit
+ character or the end of the string.
+ */
+
+mp_err
+mp_read_radix(mp_int *mp, const char *str, int radix)
+{
+ int ix = 0, val = 0;
+ mp_err res;
+ mp_sign sig = ZPOS;
+
+ ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
+ MP_BADARG);
+
+ mp_zero(mp);
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while (str[ix] &&
+ (s_mp_tovalue(str[ix], radix) < 0) &&
+ str[ix] != '-' &&
+ str[ix] != '+') {
+ ++ix;
+ }
+
+ if (str[ix] == '-') {
+ sig = NEG;
+ ++ix;
+ } else if (str[ix] == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++ix;
+ }
+
+ while ((val = s_mp_tovalue(str[ix], radix)) >= 0) {
+ if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
+ return res;
+ if ((res = s_mp_add_d(mp, val)) != MP_OKAY)
+ return res;
+ ++ix;
+ }
+
+ if (s_mp_cmp_d(mp, 0) == MP_EQ)
+ SIGN(mp) = ZPOS;
+ else
+ SIGN(mp) = sig;
+
+ return MP_OKAY;
+
+} /* end mp_read_radix() */
+
+mp_err
+mp_read_variable_radix(mp_int *a, const char *str, int default_radix)
+{
+ int radix = default_radix;
+ int cx;
+ mp_sign sig = ZPOS;
+ mp_err res;
+
+ /* Skip leading non-digit characters until a digit or '-' or '+' */
+ while ((cx = *str) != 0 &&
+ (s_mp_tovalue(cx, radix) < 0) &&
+ cx != '-' &&
+ cx != '+') {
+ ++str;
+ }
+
+ if (cx == '-') {
+ sig = NEG;
+ ++str;
+ } else if (cx == '+') {
+ sig = ZPOS; /* this is the default anyway... */
+ ++str;
+ }
+
+ if (str[0] == '0') {
+ if ((str[1] | 0x20) == 'x') {
+ radix = 16;
+ str += 2;
+ } else {
+ radix = 8;
+ str++;
+ }
+ }
+ res = mp_read_radix(a, str, radix);
+ if (res == MP_OKAY) {
+ MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig;
+ }
+ return res;
+}
+
+/* }}} */
+
+/* {{{ mp_radix_size(mp, radix) */
+
+int
+mp_radix_size(mp_int *mp, int radix)
+{
+ int bits;
+
+ if (!mp || radix < 2 || radix > MAX_RADIX)
+ return 0;
+
+ bits = USED(mp) * DIGIT_BIT - 1;
+
+ return s_mp_outlen(bits, radix);
+
+} /* end mp_radix_size() */
+
+/* }}} */
+
+/* {{{ mp_toradix(mp, str, radix) */
+
+mp_err
+mp_toradix(mp_int *mp, char *str, int radix)
+{
+ int ix, pos = 0;
+
+ ARGCHK(mp != NULL && str != NULL, MP_BADARG);
+ ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
+
+ if (mp_cmp_z(mp) == MP_EQ) {
+ str[0] = '0';
+ str[1] = '\0';
+ } else {
+ mp_err res;
+ mp_int tmp;
+ mp_sign sgn;
+ mp_digit rem, rdx = (mp_digit)radix;
+ char ch;
+
+ if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
+ return res;
+
+ /* Save sign for later, and take absolute value */
+ sgn = SIGN(&tmp);
+ SIGN(&tmp) = ZPOS;
+
+ /* Generate output digits in reverse order */
+ while (mp_cmp_z(&tmp) != 0) {
+ if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) {
+ mp_clear(&tmp);
+ return res;
+ }
+
+ /* Generate digits, use capital letters */
+ ch = s_mp_todigit(rem, radix, 0);
+
+ str[pos++] = ch;
+ }
+
+ /* Add - sign if original value was negative */
+ if (sgn == NEG)
+ str[pos++] = '-';
+
+ /* Add trailing NUL to end the string */
+ str[pos--] = '\0';
+
+ /* Reverse the digits and sign indicator */
+ ix = 0;
+ while (ix < pos) {
+ char tmp = str[ix];
+
+ str[ix] = str[pos];
+ str[pos] = tmp;
+ ++ix;
+ --pos;
+ }
+
+ mp_clear(&tmp);
+ }
+
+ return MP_OKAY;
+
+} /* end mp_toradix() */
+
+/* }}} */
+
+/* {{{ mp_tovalue(ch, r) */
+
+int
+mp_tovalue(char ch, int r)
+{
+ return s_mp_tovalue(ch, r);
+
+} /* end mp_tovalue() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_strerror(ec) */
+
+/*
+ mp_strerror(ec)
+
+ Return a string describing the meaning of error code 'ec'. The
+ string returned is allocated in static memory, so the caller should
+ not attempt to modify or free the memory associated with this
+ string.
+ */
+const char *
+mp_strerror(mp_err ec)
+{
+ int aec = (ec < 0) ? -ec : ec;
+
+ /* Code values are negative, so the senses of these comparisons
+ are accurate */
+ if (ec < MP_LAST_CODE || ec > MP_OKAY) {
+ return mp_err_string[0]; /* unknown error code */
+ } else {
+ return mp_err_string[aec + 1];
+ }
+
+} /* end mp_strerror() */
+
+/* }}} */
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static function definitions (internal use only) */
+
+/* {{{ Memory management */
+
+/* {{{ s_mp_grow(mp, min) */
+
+/* Make sure there are at least 'min' digits allocated to mp */
+mp_err
+s_mp_grow(mp_int *mp, mp_size min)
+{
+ if (min > ALLOC(mp)) {
+ mp_digit *tmp;
+
+ /* Set min to next nearest default precision block size */
+ min = MP_ROUNDUP(min, s_mp_defprec);
+
+ if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
+ return MP_MEM;
+
+ s_mp_copy(DIGITS(mp), tmp, USED(mp));
+
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ s_mp_free(DIGITS(mp));
+ DIGITS(mp) = tmp;
+ ALLOC(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_grow() */
+
+/* }}} */
+
+/* {{{ s_mp_pad(mp, min) */
+
+/* Make sure the used size of mp is at least 'min', growing if needed */
+mp_err
+s_mp_pad(mp_int *mp, mp_size min)
+{
+ if (min > USED(mp)) {
+ mp_err res;
+
+ /* Make sure there is room to increase precision */
+ if (min > ALLOC(mp)) {
+ if ((res = s_mp_grow(mp, min)) != MP_OKAY)
+ return res;
+ } else {
+ s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp));
+ }
+
+ /* Increase precision; should already be 0-filled */
+ USED(mp) = min;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_pad() */
+
+/* }}} */
+
+/* {{{ s_mp_setz(dp, count) */
+
+/* Set 'count' digits pointed to by dp to be zeroes */
+void
+s_mp_setz(mp_digit *dp, mp_size count)
+{
+#if MP_MEMSET == 0
+ int ix;
+
+ for (ix = 0; ix < count; ix++)
+ dp[ix] = 0;
+#else
+ memset(dp, 0, count * sizeof(mp_digit));
+#endif
+
+} /* end s_mp_setz() */
+
+/* }}} */
+
+/* {{{ s_mp_copy(sp, dp, count) */
+
+/* Copy 'count' digits from sp to dp */
+void
+s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count)
+{
+#if MP_MEMCPY == 0
+ int ix;
+
+ for (ix = 0; ix < count; ix++)
+ dp[ix] = sp[ix];
+#else
+ memcpy(dp, sp, count * sizeof(mp_digit));
+#endif
+} /* end s_mp_copy() */
+
+/* }}} */
+
+/* {{{ s_mp_alloc(nb, ni) */
+
+/* Allocate ni records of nb bytes each, and return a pointer to that */
+void *
+s_mp_alloc(size_t nb, size_t ni)
+{
+ return calloc(nb, ni);
+
+} /* end s_mp_alloc() */
+
+/* }}} */
+
+/* {{{ s_mp_free(ptr) */
+
+/* Free the memory pointed to by ptr */
+void
+s_mp_free(void *ptr)
+{
+ if (ptr) {
+ free(ptr);
+ }
+} /* end s_mp_free() */
+
+/* }}} */
+
+/* {{{ s_mp_clamp(mp) */
+
+/* Remove leading zeroes from the given value */
+void
+s_mp_clamp(mp_int *mp)
+{
+ mp_size used = MP_USED(mp);
+ while (used > 1 && DIGIT(mp, used - 1) == 0)
+ --used;
+ MP_USED(mp) = used;
+} /* end s_mp_clamp() */
+
+/* }}} */
+
+/* {{{ s_mp_exch(a, b) */
+
+/* Exchange the data for a and b; (b, a) = (a, b) */
+void
+s_mp_exch(mp_int *a, mp_int *b)
+{
+ mp_int tmp;
+
+ tmp = *a;
+ *a = *b;
+ *b = tmp;
+
+} /* end s_mp_exch() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Arithmetic helpers */
+
+/* {{{ s_mp_lshd(mp, p) */
+
+/*
+ Shift mp leftward by p digits, growing if needed, and zero-filling
+ the in-shifted digits at the right end. This is a convenient
+ alternative to multiplication by powers of the radix
+ */
+
+mp_err
+s_mp_lshd(mp_int *mp, mp_size p)
+{
+ mp_err res;
+ unsigned int ix;
+
+ if (p == 0)
+ return MP_OKAY;
+
+ if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0)
+ return MP_OKAY;
+
+ if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
+ return res;
+
+ /* Shift all the significant figures over as needed */
+ for (ix = USED(mp) - p; ix-- > 0;) {
+ DIGIT(mp, ix + p) = DIGIT(mp, ix);
+ }
+
+ /* Fill the bottom digits with zeroes */
+ for (ix = 0; (mp_size)ix < p; ix++)
+ DIGIT(mp, ix) = 0;
+
+ return MP_OKAY;
+
+} /* end s_mp_lshd() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2d(mp, d) */
+
+/*
+ Multiply the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value.
+ */
+mp_err
+s_mp_mul_2d(mp_int *mp, mp_digit d)
+{
+ mp_err res;
+ mp_digit dshift, bshift;
+ mp_digit mask;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+
+ dshift = d / MP_DIGIT_BIT;
+ bshift = d % MP_DIGIT_BIT;
+ /* bits to be shifted out of the top word */
+ if (bshift) {
+ mask = (mp_digit)~0 << (MP_DIGIT_BIT - bshift);
+ mask &= MP_DIGIT(mp, MP_USED(mp) - 1);
+ } else {
+ mask = 0;
+ }
+
+ if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0))))
+ return res;
+
+ if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift)))
+ return res;
+
+ if (bshift) {
+ mp_digit *pa = MP_DIGITS(mp);
+ mp_digit *alim = pa + MP_USED(mp);
+ mp_digit prev = 0;
+
+ for (pa += dshift; pa < alim;) {
+ mp_digit x = *pa;
+ *pa++ = (x << bshift) | prev;
+ prev = x >> (DIGIT_BIT - bshift);
+ }
+ }
+
+ s_mp_clamp(mp);
+ return MP_OKAY;
+} /* end s_mp_mul_2d() */
+
+/* {{{ s_mp_rshd(mp, p) */
+
+/*
+ Shift mp rightward by p digits. Maintains the invariant that
+ digits above the precision are all zero. Digits shifted off the
+ end are lost. Cannot fail.
+ */
+
+void
+s_mp_rshd(mp_int *mp, mp_size p)
+{
+ mp_size ix;
+ mp_digit *src, *dst;
+
+ if (p == 0)
+ return;
+
+ /* Shortcut when all digits are to be shifted off */
+ if (p >= USED(mp)) {
+ s_mp_setz(DIGITS(mp), ALLOC(mp));
+ USED(mp) = 1;
+ SIGN(mp) = ZPOS;
+ return;
+ }
+
+ /* Shift all the significant figures over as needed */
+ dst = MP_DIGITS(mp);
+ src = dst + p;
+ for (ix = USED(mp) - p; ix > 0; ix--)
+ *dst++ = *src++;
+
+ MP_USED(mp) -= p;
+ /* Fill the top digits with zeroes */
+ while (p-- > 0)
+ *dst++ = 0;
+
+} /* end s_mp_rshd() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2(mp) */
+
+/* Divide by two -- take advantage of radix properties to do it fast */
+void
+s_mp_div_2(mp_int *mp)
+{
+ s_mp_div_2d(mp, 1);
+
+} /* end s_mp_div_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_2(mp) */
+
+mp_err
+s_mp_mul_2(mp_int *mp)
+{
+ mp_digit *pd;
+ unsigned int ix, used;
+ mp_digit kin = 0;
+
+ /* Shift digits leftward by 1 bit */
+ used = MP_USED(mp);
+ pd = MP_DIGITS(mp);
+ for (ix = 0; ix < used; ix++) {
+ mp_digit d = *pd;
+ *pd++ = (d << 1) | kin;
+ kin = (d >> (DIGIT_BIT - 1));
+ }
+
+ /* Deal with rollover from last digit */
+ if (kin) {
+ if (ix >= ALLOC(mp)) {
+ mp_err res;
+ if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
+ return res;
+ }
+
+ DIGIT(mp, ix) = kin;
+ USED(mp) += 1;
+ }
+
+ return MP_OKAY;
+
+} /* end s_mp_mul_2() */
+
+/* }}} */
+
+/* {{{ s_mp_mod_2d(mp, d) */
+
+/*
+ Remainder the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise AND of the value, and does not require the full
+ division code
+ */
+void
+s_mp_mod_2d(mp_int *mp, mp_digit d)
+{
+ mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
+ mp_size ix;
+ mp_digit dmask;
+
+ if (ndig >= USED(mp))
+ return;
+
+ /* Flush all the bits above 2^d in its digit */
+ dmask = ((mp_digit)1 << nbit) - 1;
+ DIGIT(mp, ndig) &= dmask;
+
+ /* Flush all digits above the one with 2^d in it */
+ for (ix = ndig + 1; ix < USED(mp); ix++)
+ DIGIT(mp, ix) = 0;
+
+ s_mp_clamp(mp);
+
+} /* end s_mp_mod_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_2d(mp, d) */
+
+/*
+ Divide the integer by 2^d, where d is a number of bits. This
+ amounts to a bitwise shift of the value, and does not require the
+ full division code (used in Barrett reduction, see below)
+ */
+void
+s_mp_div_2d(mp_int *mp, mp_digit d)
+{
+ int ix;
+ mp_digit save, next, mask;
+
+ s_mp_rshd(mp, d / DIGIT_BIT);
+ d %= DIGIT_BIT;
+ if (d) {
+ mask = ((mp_digit)1 << d) - 1;
+ save = 0;
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ next = DIGIT(mp, ix) & mask;
+ DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d));
+ save = next;
+ }
+ }
+ s_mp_clamp(mp);
+
+} /* end s_mp_div_2d() */
+
+/* }}} */
+
+/* {{{ s_mp_norm(a, b, *d) */
+
+/*
+ s_mp_norm(a, b, *d)
+
+ Normalize a and b for division, where b is the divisor. In order
+ that we might make good guesses for quotient digits, we want the
+ leading digit of b to be at least half the radix, which we
+ accomplish by multiplying a and b by a power of 2. The exponent
+ (shift count) is placed in *pd, so that the remainder can be shifted
+ back at the end of the division process.
+ */
+
+mp_err
+s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd)
+{
+ mp_digit d;
+ mp_digit mask;
+ mp_digit b_msd;
+ mp_err res = MP_OKAY;
+
+ d = 0;
+ mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */
+ b_msd = DIGIT(b, USED(b) - 1);
+ while (!(b_msd & mask)) {
+ b_msd <<= 1;
+ ++d;
+ }
+
+ if (d) {
+ MP_CHECKOK(s_mp_mul_2d(a, d));
+ MP_CHECKOK(s_mp_mul_2d(b, d));
+ }
+
+ *pd = d;
+CLEANUP:
+ return res;
+
+} /* end s_mp_norm() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive digit arithmetic */
+
+/* {{{ s_mp_add_d(mp, d) */
+
+/* Add d to |mp| in place */
+mp_err s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+ mp_size ix = 1;
+
+ w = (mp_word)DIGIT(mp, 0) + d;
+ DIGIT(mp, 0) = ACCUM(w);
+ k = CARRYOUT(w);
+
+ while (ix < USED(mp) && k) {
+ w = (mp_word)DIGIT(mp, ix) + k;
+ DIGIT(mp, ix) = ACCUM(w);
+ k = CARRYOUT(w);
+ ++ix;
+ }
+
+ if (k != 0) {
+ mp_err res;
+
+ if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(mp, ix) = (mp_digit)k;
+ }
+
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit sum, mp_i, carry = 0;
+ mp_err res = MP_OKAY;
+ int used = (int)MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = sum = d + mp_i;
+ carry = (sum < d);
+ while (carry && --used > 0) {
+ mp_i = *pmp;
+ *pmp++ = sum = carry + mp_i;
+ carry = !sum;
+ }
+ if (carry && !used) {
+ /* mp is growing */
+ used = MP_USED(mp);
+ MP_CHECKOK(s_mp_pad(mp, used + 1));
+ MP_DIGIT(mp, used) = carry;
+ }
+CLEANUP:
+ return res;
+#endif
+} /* end s_mp_add_d() */
+
+/* }}} */
+
+/* {{{ s_mp_sub_d(mp, d) */
+
+/* Subtract d from |mp| in place, assumes |mp| > d */
+mp_err s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_word w, b = 0;
+ mp_size ix = 1;
+
+ /* Compute initial subtraction */
+ w = (RADIX + (mp_word)DIGIT(mp, 0)) - d;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, 0) = ACCUM(w);
+
+ /* Propagate borrows leftward */
+ while (b && ix < USED(mp)) {
+ w = (RADIX + (mp_word)DIGIT(mp, ix)) - b;
+ b = CARRYOUT(w) ? 0 : 1;
+ DIGIT(mp, ix) = ACCUM(w);
+ ++ix;
+ }
+
+ /* Remove leading zeroes */
+ s_mp_clamp(mp);
+
+ /* If we have a borrow out, it's a violation of the input invariant */
+ if (b)
+ return MP_RANGE;
+ else
+ return MP_OKAY;
+#else
+ mp_digit *pmp = MP_DIGITS(mp);
+ mp_digit mp_i, diff, borrow;
+ mp_size used = MP_USED(mp);
+
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - d;
+ borrow = (diff > mp_i);
+ while (borrow && --used) {
+ mp_i = *pmp;
+ *pmp++ = diff = mp_i - borrow;
+ borrow = (diff > mp_i);
+ }
+ s_mp_clamp(mp);
+ return (borrow && !used) ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub_d() */
+
+/* }}} */
+
+/* {{{ s_mp_mul_d(a, d) */
+
+/* Compute a = a * d, single digit multiplication */
+mp_err
+s_mp_mul_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_size used;
+ int pow;
+
+ if (!d) {
+ mp_zero(a);
+ return MP_OKAY;
+ }
+ if (d == 1)
+ return MP_OKAY;
+ if (0 <= (pow = s_mp_ispow2d(d))) {
+ return s_mp_mul_2d(a, (mp_digit)pow);
+ }
+
+ used = MP_USED(a);
+ MP_CHECKOK(s_mp_pad(a, used + 1));
+
+ s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a));
+
+ s_mp_clamp(a);
+
+CLEANUP:
+ return res;
+
+} /* end s_mp_mul_d() */
+
+/* }}} */
+
+/* {{{ s_mp_div_d(mp, d, r) */
+
+/*
+ s_mp_div_d(mp, d, r)
+
+ Compute the quotient mp = mp / d and remainder r = mp mod d, for a
+ single digit d. If r is null, the remainder will be discarded.
+ */
+
+mp_err
+s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ mp_word w = 0, q;
+#else
+ mp_digit w = 0, q;
+#endif
+ int ix;
+ mp_err res;
+ mp_int quot;
+ mp_int rem;
+
+ if (d == 0)
+ return MP_RANGE;
+ if (d == 1) {
+ if (r)
+ *r = 0;
+ return MP_OKAY;
+ }
+ /* could check for power of 2 here, but mp_div_d does that. */
+ if (MP_USED(mp) == 1) {
+ mp_digit n = MP_DIGIT(mp, 0);
+ mp_digit rem;
+
+ q = n / d;
+ rem = n % d;
+ MP_DIGIT(mp, 0) = q;
+ if (r)
+ *r = rem;
+ return MP_OKAY;
+ }
+
+ MP_DIGITS(&rem) = 0;
+ MP_DIGITS(&quot) = 0;
+ /* Make room for the quotient */
+ MP_CHECKOK(mp_init_size(&quot, USED(mp)));
+
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ w = (w << DIGIT_BIT) | DIGIT(mp, ix);
+
+ if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ s_mp_lshd(&quot, 1);
+ DIGIT(&quot, 0) = (mp_digit)q;
+ }
+#else
+ {
+ mp_digit p;
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ mp_digit norm;
+#endif
+
+ MP_CHECKOK(mp_init_copy(&rem, mp));
+
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ MP_DIGIT(&quot, 0) = d;
+ MP_CHECKOK(s_mp_norm(&rem, &quot, &norm));
+ if (norm)
+ d <<= norm;
+ MP_DIGIT(&quot, 0) = 0;
+#endif
+
+ p = 0;
+ for (ix = USED(&rem) - 1; ix >= 0; ix--) {
+ w = DIGIT(&rem, ix);
+
+ if (p) {
+ MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w));
+ } else if (w >= d) {
+ q = w / d;
+ w = w % d;
+ } else {
+ q = 0;
+ }
+
+ MP_CHECKOK(s_mp_lshd(&quot, 1));
+ DIGIT(&quot, 0) = q;
+ p = w;
+ }
+#if !defined(MP_ASSEMBLY_DIV_2DX1D)
+ if (norm)
+ w >>= norm;
+#endif
+ }
+#endif
+
+ /* Deliver the remainder, if desired */
+ if (r) {
+ *r = (mp_digit)w;
+ }
+
+ s_mp_clamp(&quot);
+ mp_exch(&quot, mp);
+CLEANUP:
+ mp_clear(&quot);
+ mp_clear(&rem);
+
+ return res;
+} /* end s_mp_div_d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive full arithmetic */
+
+/* {{{ s_mp_add(a, b) */
+
+/* Compute a = |a| + |b| */
+mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_digit *pa, *pb;
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa + *pb++;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pa++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ used = MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ while (w && ix < used) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w = CARRYOUT(w);
+ ++ix;
+ }
+#else
+ while (carry && ix < used) {
+ sum = carry + *pa;
+ *pa++ = sum;
+ carry = !sum;
+ ++ix;
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ix) = (mp_digit)w;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, used) = carry;
+ }
+#endif
+
+ return MP_OKAY;
+} /* end s_mp_add() */
+
+/* }}} */
+
+/* Compute c = |a| + |b| */ /* magnitude addition */
+mp_err
+s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w = 0;
+#else
+ mp_digit sum, carry = 0, d;
+#endif
+ mp_size ix;
+ mp_size used;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = a;
+ a = b;
+ b = xch;
+ }
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ exchange step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ used = MP_USED(b);
+ for (ix = 0; ix < used; ix++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++ + *pb++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ d = *pa++;
+ sum = d + *pb++;
+ d = (sum < d); /* detect overflow */
+ *pc++ = sum += carry;
+ carry = d + (sum < carry); /* detect overflow */
+#endif
+ }
+
+ /* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+ for (used = MP_USED(a); ix < used; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w = CARRYOUT(w);
+#else
+ *pc++ = sum = carry + *pa++;
+ carry = (sum < carry);
+#endif
+ }
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (w) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = (mp_digit)w;
+ ++used;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(c, used + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(c, used) = carry;
+ ++used;
+ }
+#endif
+ MP_USED(c) = used;
+ return MP_OKAY;
+}
+/* {{{ s_mp_add_offset(a, b, offset) */
+
+/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */
+mp_err
+s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ mp_word w, k = 0;
+#else
+ mp_digit d, sum, carry = 0;
+#endif
+ mp_size ib;
+ mp_size ia;
+ mp_size lim;
+ mp_err res;
+
+ /* Make sure a has enough precision for the output value */
+ lim = MP_USED(b) + offset;
+ if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY)
+ return res;
+
+ /*
+ Add up all digits up to the precision of b. If b had initially
+ the same precision as a, or greater, we took care of it by the
+ padding step above, so there is no problem. If b had initially
+ less precision, we'll have to make sure the carry out is duly
+ propagated upward among the higher-order digits of the sum.
+ */
+ lim = USED(b);
+ for (ib = 0, ia = offset; ib < lim; ib++, ia++) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+#else
+ d = MP_DIGIT(a, ia);
+ sum = d + MP_DIGIT(b, ib);
+ d = (sum < d);
+ MP_DIGIT(a, ia) = sum += carry;
+ carry = d + (sum < carry);
+#endif
+ }
+
+/* If we run out of 'b' digits before we're actually done, make
+ sure the carries get propagated upward...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ for (lim = MP_USED(a); k && (ia < lim); ++ia) {
+ w = (mp_word)DIGIT(a, ia) + k;
+ DIGIT(a, ia) = ACCUM(w);
+ k = CARRYOUT(w);
+ }
+#else
+ for (lim = MP_USED(a); carry && (ia < lim); ++ia) {
+ d = MP_DIGIT(a, ia);
+ MP_DIGIT(a, ia) = sum = d + carry;
+ carry = (sum < d);
+ }
+#endif
+
+/* If there's an overall carry out, increase precision and include
+ it. We could have done this initially, but why touch the memory
+ allocator unless we're sure we have to?
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD)
+ if (k) {
+ if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, ia) = (mp_digit)k;
+ }
+#else
+ if (carry) {
+ if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, lim) = carry;
+ }
+#endif
+ s_mp_clamp(a);
+
+ return MP_OKAY;
+
+} /* end s_mp_add_offset() */
+
+/* }}} */
+
+/* {{{ s_mp_sub(a, b) */
+
+/* Compute a = |a| - |b|, assumes |a| >= |b| */
+mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */
+{
+ mp_digit *pa, *pb, *limit;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ limit = pb + MP_USED(b);
+ while (pb < limit) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa - *pb++;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa;
+ diff = d - *pb++;
+ d = (diff > d); /* detect borrow */
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pa++ = diff;
+ borrow = d;
+#endif
+ }
+ limit = MP_DIGITS(a) + MP_USED(a);
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ while (w && pa < limit) {
+ w = w + *pa;
+ *pa++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+ }
+#else
+ while (borrow && pa < limit) {
+ d = *pa;
+ *pa++ = diff = d - borrow;
+ borrow = (diff > d);
+ }
+#endif
+
+ /* Clobber any leading zeroes we created */
+ s_mp_clamp(a);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+} /* end s_mp_sub() */
+
+/* }}} */
+
+/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */
+mp_err
+s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c)
+{
+ mp_digit *pa, *pb, *pc;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ mp_sword w = 0;
+#else
+ mp_digit d, diff, borrow = 0;
+#endif
+ int ix, limit;
+ mp_err res;
+
+ MP_SIGN(c) = MP_SIGN(a);
+
+ /* Make sure a has enough precision for the output value */
+ if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a))))
+ return res;
+
+ /*
+ Subtract and propagate borrow. Up to the precision of b, this
+ accounts for the digits of b; after that, we just make sure the
+ carries get to the right place. This saves having to pad b out to
+ the precision of a just to make the loops work right...
+ */
+ pa = MP_DIGITS(a);
+ pb = MP_DIGITS(b);
+ pc = MP_DIGITS(c);
+ limit = MP_USED(b);
+ for (ix = 0; ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++ - *pb++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ diff = d - *pb++;
+ d = (diff > d);
+ if (borrow && --diff == MP_DIGIT_MAX)
+ ++d;
+ *pc++ = diff;
+ borrow = d;
+#endif
+ }
+ for (limit = MP_USED(a); ix < limit; ++ix) {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ w = w + *pa++;
+ *pc++ = ACCUM(w);
+ w >>= MP_DIGIT_BIT;
+#else
+ d = *pa++;
+ *pc++ = diff = d - borrow;
+ borrow = (diff > d);
+#endif
+ }
+
+ /* Clobber any leading zeroes we created */
+ MP_USED(c) = ix;
+ s_mp_clamp(c);
+
+/*
+ If there was a borrow out, then |b| > |a| in violation
+ of our input invariant. We've already done the work,
+ but we'll at least complain about it...
+ */
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD)
+ return w ? MP_RANGE : MP_OKAY;
+#else
+ return borrow ? MP_RANGE : MP_OKAY;
+#endif
+}
+/* {{{ s_mp_mul(a, b) */
+
+/* Compute a = |a| * |b| */
+mp_err
+s_mp_mul(mp_int *a, const mp_int *b)
+{
+ return mp_mul(a, b, a);
+} /* end s_mp_mul() */
+
+/* }}} */
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+#elif defined(OSF1)
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a1, %v0", a, b); \
+ Phi = asm("umulh %a0, %a1, %v0", a, b); \
+ }
+#else
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ mp_digit a0b1, a1b0; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \
+ a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \
+ a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \
+ a1b0 += a0b1; \
+ Phi += a1b0 >> MP_HALF_DIGIT_BIT; \
+ if (a1b0 < a0b1) \
+ Phi += MP_HALF_RADIX; \
+ a1b0 <<= MP_HALF_DIGIT_BIT; \
+ Plo += a1b0; \
+ if (Plo < a1b0) \
+ ++Phi; \
+ }
+#endif
+
+#if !defined(MP_ASSEMBLY_MULTIPLY)
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+#endif
+
+#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY)
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ unsigned long long square = (unsigned long long)a * a; \
+ Plo = (mp_digit)square; \
+ Phi = (mp_digit)(square >> MP_DIGIT_BIT); \
+ }
+#elif defined(OSF1)
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a0, %v0", a); \
+ Phi = asm("umulh %a0, %a0, %v0", a); \
+ }
+#else
+#define MP_SQR_D(a, Phi, Plo) \
+ { \
+ mp_digit Pmid; \
+ Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \
+ Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \
+ Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \
+ Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \
+ Pmid <<= (MP_HALF_DIGIT_BIT + 1); \
+ Plo += Pmid; \
+ if (Plo < Pmid) \
+ ++Phi; \
+ }
+#endif
+
+#if !defined(MP_ASSEMBLY_SQUARE)
+/* Add the squares of the digits of a to the digits of b. */
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD)
+ mp_word w;
+ mp_digit d;
+ mp_size ix;
+
+ w = 0;
+#define ADD_SQUARE(n) \
+ d = pa[n]; \
+ w += (d * (mp_word)d) + ps[2 * n]; \
+ ps[2 * n] = ACCUM(w); \
+ w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \
+ ps[2 * n + 1] = ACCUM(w); \
+ w = (w >> DIGIT_BIT)
+
+ for (ix = a_len; ix >= 4; ix -= 4) {
+ ADD_SQUARE(0);
+ ADD_SQUARE(1);
+ ADD_SQUARE(2);
+ ADD_SQUARE(3);
+ pa += 4;
+ ps += 8;
+ }
+ if (ix) {
+ ps += 2 * ix;
+ pa += ix;
+ switch (ix) {
+ case 3:
+ ADD_SQUARE(-3); /* FALLTHRU */
+ case 2:
+ ADD_SQUARE(-2); /* FALLTHRU */
+ case 1:
+ ADD_SQUARE(-1); /* FALLTHRU */
+ case 0:
+ break;
+ }
+ }
+ while (w) {
+ w += *ps;
+ *ps++ = ACCUM(w);
+ w = (w >> DIGIT_BIT);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *pa++;
+ mp_digit a0a0, a1a1;
+
+ MP_SQR_D(a_i, a1a1, a0a0);
+
+ /* here a1a1 and a0a0 constitute a_i ** 2 */
+ a0a0 += carry;
+ if (a0a0 < carry)
+ ++a1a1;
+
+ /* now add to ps */
+ a0a0 += a_i = *ps;
+ if (a0a0 < a_i)
+ ++a1a1;
+ *ps++ = a0a0;
+ a1a1 += a_i = *ps;
+ carry = (a1a1 < a_i);
+ *ps++ = a1a1;
+ }
+ while (carry) {
+ mp_digit s_i = *ps;
+ carry += s_i;
+ *ps++ = carry;
+ carry = carry < s_i;
+ }
+#endif
+}
+#endif
+
+#if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) && !defined(MP_ASSEMBLY_DIV_2DX1D)
+/*
+** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+** so its high bit is 1. This code is from NSPR.
+*/
+mp_err
+s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ mp_digit d1, d0, q1, q0;
+ mp_digit r1, r0, m;
+
+ d1 = divisor >> MP_HALF_DIGIT_BIT;
+ d0 = divisor & MP_HALF_DIGIT_MAX;
+ r1 = Nhi % d1;
+ q1 = Nhi / d1;
+ m = q1 * d0;
+ r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT);
+ if (r1 < m) {
+ q1--, r1 += divisor;
+ if (r1 >= divisor && r1 < m) {
+ q1--, r1 += divisor;
+ }
+ }
+ r1 -= m;
+ r0 = r1 % d1;
+ q0 = r1 / d1;
+ m = q0 * d0;
+ r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX);
+ if (r0 < m) {
+ q0--, r0 += divisor;
+ if (r0 >= divisor && r0 < m) {
+ q0--, r0 += divisor;
+ }
+ }
+ if (qp)
+ *qp = (q1 << MP_HALF_DIGIT_BIT) | q0;
+ if (rp)
+ *rp = r0 - m;
+ return MP_OKAY;
+}
+#endif
+
+#if MP_SQUARE
+/* {{{ s_mp_sqr(a) */
+
+mp_err
+s_mp_sqr(mp_int *a)
+{
+ mp_err res;
+ mp_int tmp;
+
+ if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY)
+ return res;
+ res = mp_sqr(a, &tmp);
+ if (res == MP_OKAY) {
+ s_mp_exch(&tmp, a);
+ }
+ mp_clear(&tmp);
+ return res;
+}
+
+/* }}} */
+#endif
+
+/* {{{ s_mp_div(a, b) */
+
+/*
+ s_mp_div(a, b)
+
+ Compute a = a / b and b = a mod b. Assumes b > a.
+ */
+
+mp_err s_mp_div(mp_int *rem, /* i: dividend, o: remainder */
+ mp_int *div, /* i: divisor */
+ mp_int *quot) /* i: 0; o: quotient */
+{
+ mp_int part, t;
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ mp_word q_msd;
+#else
+ mp_digit q_msd;
+#endif
+ mp_err res;
+ mp_digit d;
+ mp_digit div_msd;
+ int ix;
+
+ if (mp_cmp_z(div) == 0)
+ return MP_RANGE;
+
+ DIGITS(&t) = 0;
+ /* Shortcut if divisor is power of two */
+ if ((ix = s_mp_ispow2(div)) >= 0) {
+ MP_CHECKOK(mp_copy(rem, quot));
+ s_mp_div_2d(quot, (mp_digit)ix);
+ s_mp_mod_2d(rem, (mp_digit)ix);
+
+ return MP_OKAY;
+ }
+
+ MP_SIGN(rem) = ZPOS;
+ MP_SIGN(div) = ZPOS;
+ MP_SIGN(&part) = ZPOS;
+
+ /* A working temporary for division */
+ MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem)));
+
+ /* Normalize to optimize guessing */
+ MP_CHECKOK(s_mp_norm(rem, div, &d));
+
+ /* Perform the division itself...woo! */
+ MP_USED(quot) = MP_ALLOC(quot);
+
+ /* Find a partial substring of rem which is at least div */
+ /* If we didn't find one, we're finished dividing */
+ while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) {
+ int i;
+ int unusedRem;
+ int partExtended = 0; /* set to true if we need to extend part */
+
+ unusedRem = MP_USED(rem) - MP_USED(div);
+ MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem;
+ MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem;
+ MP_USED(&part) = MP_USED(div);
+
+ /* We have now truncated the part of the remainder to the same length as
+ * the divisor. If part is smaller than div, extend part by one digit. */
+ if (s_mp_cmp(&part, div) < 0) {
+ --unusedRem;
+#if MP_ARGCHK == 2
+ assert(unusedRem >= 0);
+#endif
+ --MP_DIGITS(&part);
+ ++MP_USED(&part);
+ ++MP_ALLOC(&part);
+ partExtended = 1;
+ }
+
+ /* Compute a guess for the next quotient digit */
+ q_msd = MP_DIGIT(&part, MP_USED(&part) - 1);
+ div_msd = MP_DIGIT(div, MP_USED(div) - 1);
+ if (!partExtended) {
+ /* In this case, q_msd /= div_msd is always 1. First, since div_msd is
+ * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since
+ * we didn't extend part, q_msd >= div_msd. Therefore we know that
+ * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we
+ * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */
+ q_msd = 1;
+ } else {
+#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD)
+ q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2);
+ q_msd /= div_msd;
+ if (q_msd == RADIX)
+ --q_msd;
+#else
+ if (q_msd == div_msd) {
+ q_msd = MP_DIGIT_MAX;
+ } else {
+ mp_digit r;
+ MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2),
+ div_msd, &q_msd, &r));
+ }
+#endif
+ }
+#if MP_ARGCHK == 2
+ assert(q_msd > 0); /* This case should never occur any more. */
+#endif
+ if (q_msd <= 0)
+ break;
+
+ /* See what that multiplies out to */
+ mp_copy(div, &t);
+ MP_CHECKOK(s_mp_mul_d(&t, (mp_digit)q_msd));
+
+ /*
+ If it's too big, back it off. We should not have to do this
+ more than once, or, in rare cases, twice. Knuth describes a
+ method by which this could be reduced to a maximum of once, but
+ I didn't implement that here.
+ * When using s_mpv_div_2dx1d, we may have to do this 3 times.
+ */
+ for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) {
+ --q_msd;
+ MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */
+ }
+ if (i < 0) {
+ res = MP_RANGE;
+ goto CLEANUP;
+ }
+
+ /* At this point, q_msd should be the right next digit */
+ MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */
+ s_mp_clamp(rem);
+
+ /*
+ Include the digit in the quotient. We allocated enough memory
+ for any quotient we could ever possibly get, so we should not
+ have to check for failures here
+ */
+ MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd;
+ }
+
+ /* Denormalize remainder */
+ if (d) {
+ s_mp_div_2d(rem, d);
+ }
+
+ s_mp_clamp(quot);
+
+CLEANUP:
+ mp_clear(&t);
+
+ return res;
+
+} /* end s_mp_div() */
+
+/* }}} */
+
+/* {{{ s_mp_2expt(a, k) */
+
+mp_err
+s_mp_2expt(mp_int *a, mp_digit k)
+{
+ mp_err res;
+ mp_size dig, bit;
+
+ dig = k / DIGIT_BIT;
+ bit = k % DIGIT_BIT;
+
+ mp_zero(a);
+ if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
+ return res;
+
+ DIGIT(a, dig) |= ((mp_digit)1 << bit);
+
+ return MP_OKAY;
+
+} /* end s_mp_2expt() */
+
+/* }}} */
+
+/* {{{ s_mp_reduce(x, m, mu) */
+
+/*
+ Compute Barrett reduction, x (mod m), given a precomputed value for
+ mu = b^2k / m, where b = RADIX and k = #digits(m). This should be
+ faster than straight division, when many reductions by the same
+ value of m are required (such as in modular exponentiation). This
+ can nearly halve the time required to do modular exponentiation,
+ as compared to using the full integer divide to reduce.
+
+ This algorithm was derived from the _Handbook of Applied
+ Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14,
+ pp. 603-604.
+ */
+
+mp_err
+s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu)
+{
+ mp_int q;
+ mp_err res;
+
+ if ((res = mp_init_copy(&q, x)) != MP_OKAY)
+ return res;
+
+ s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */
+ s_mp_mul(&q, mu); /* q2 = q1 * mu */
+ s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */
+
+ /* x = x mod b^(k+1), quick (no division) */
+ s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1));
+
+ /* q = q * m mod b^(k+1), quick (no division) */
+ s_mp_mul(&q, m);
+ s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1));
+
+ /* x = x - q */
+ if ((res = mp_sub(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+
+ /* If x < 0, add b^(k+1) to it */
+ if (mp_cmp_z(x) < 0) {
+ mp_set(&q, 1);
+ if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY)
+ goto CLEANUP;
+ if ((res = mp_add(x, &q, x)) != MP_OKAY)
+ goto CLEANUP;
+ }
+
+ /* Back off if it's too big */
+ while (mp_cmp(x, m) >= 0) {
+ if ((res = s_mp_sub(x, m)) != MP_OKAY)
+ break;
+ }
+
+CLEANUP:
+ mp_clear(&q);
+
+ return res;
+
+} /* end s_mp_reduce() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive comparisons */
+
+/* {{{ s_mp_cmp(a, b) */
+
+/* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b */
+int
+s_mp_cmp(const mp_int *a, const mp_int *b)
+{
+ mp_size used_a = MP_USED(a);
+ {
+ mp_size used_b = MP_USED(b);
+
+ if (used_a > used_b)
+ goto IS_GT;
+ if (used_a < used_b)
+ goto IS_LT;
+ }
+ {
+ mp_digit *pa, *pb;
+ mp_digit da = 0, db = 0;
+
+#define CMP_AB(n) \
+ if ((da = pa[n]) != (db = pb[n])) \
+ goto done
+
+ pa = MP_DIGITS(a) + used_a;
+ pb = MP_DIGITS(b) + used_a;
+ while (used_a >= 4) {
+ pa -= 4;
+ pb -= 4;
+ used_a -= 4;
+ CMP_AB(3);
+ CMP_AB(2);
+ CMP_AB(1);
+ CMP_AB(0);
+ }
+ while (used_a-- > 0 && ((da = *--pa) == (db = *--pb)))
+ /* do nothing */;
+ done:
+ if (da > db)
+ goto IS_GT;
+ if (da < db)
+ goto IS_LT;
+ }
+ return MP_EQ;
+IS_LT:
+ return MP_LT;
+IS_GT:
+ return MP_GT;
+} /* end s_mp_cmp() */
+
+/* }}} */
+
+/* {{{ s_mp_cmp_d(a, d) */
+
+/* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d */
+int
+s_mp_cmp_d(const mp_int *a, mp_digit d)
+{
+ if (USED(a) > 1)
+ return MP_GT;
+
+ if (DIGIT(a, 0) < d)
+ return MP_LT;
+ else if (DIGIT(a, 0) > d)
+ return MP_GT;
+ else
+ return MP_EQ;
+
+} /* end s_mp_cmp_d() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2(v) */
+
+/*
+ Returns -1 if the value is not a power of two; otherwise, it returns
+ k such that v = 2^k, i.e. lg(v).
+ */
+int
+s_mp_ispow2(const mp_int *v)
+{
+ mp_digit d;
+ int extra = 0, ix;
+
+ ix = MP_USED(v) - 1;
+ d = MP_DIGIT(v, ix); /* most significant digit of v */
+
+ extra = s_mp_ispow2d(d);
+ if (extra < 0 || ix == 0)
+ return extra;
+
+ while (--ix >= 0) {
+ if (DIGIT(v, ix) != 0)
+ return -1; /* not a power of two */
+ extra += MP_DIGIT_BIT;
+ }
+
+ return extra;
+
+} /* end s_mp_ispow2() */
+
+/* }}} */
+
+/* {{{ s_mp_ispow2d(d) */
+
+int
+s_mp_ispow2d(mp_digit d)
+{
+ if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */
+ int pow = 0;
+#if defined(MP_USE_UINT_DIGIT)
+ if (d & 0xffff0000U)
+ pow += 16;
+ if (d & 0xff00ff00U)
+ pow += 8;
+ if (d & 0xf0f0f0f0U)
+ pow += 4;
+ if (d & 0xccccccccU)
+ pow += 2;
+ if (d & 0xaaaaaaaaU)
+ pow += 1;
+#elif defined(MP_USE_LONG_LONG_DIGIT)
+ if (d & 0xffffffff00000000ULL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000ULL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00ULL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0ULL)
+ pow += 4;
+ if (d & 0xccccccccccccccccULL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaULL)
+ pow += 1;
+#elif defined(MP_USE_LONG_DIGIT)
+ if (d & 0xffffffff00000000UL)
+ pow += 32;
+ if (d & 0xffff0000ffff0000UL)
+ pow += 16;
+ if (d & 0xff00ff00ff00ff00UL)
+ pow += 8;
+ if (d & 0xf0f0f0f0f0f0f0f0UL)
+ pow += 4;
+ if (d & 0xccccccccccccccccUL)
+ pow += 2;
+ if (d & 0xaaaaaaaaaaaaaaaaUL)
+ pow += 1;
+#else
+#error "unknown type for mp_digit"
+#endif
+ return pow;
+ }
+ return -1;
+
+} /* end s_mp_ispow2d() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ Primitive I/O helpers */
+
+/* {{{ s_mp_tovalue(ch, r) */
+
+/*
+ Convert the given character to its digit value, in the given radix.
+ If the given character is not understood in the given radix, -1 is
+ returned. Otherwise the digit's numeric value is returned.
+
+ The results will be odd if you use a radix < 2 or > 62, you are
+ expected to know what you're up to.
+ */
+int
+s_mp_tovalue(char ch, int r)
+{
+ int val, xch;
+
+ if (r > 36)
+ xch = ch;
+ else
+ xch = toupper(ch);
+
+ if (isdigit(xch))
+ val = xch - '0';
+ else if (isupper(xch))
+ val = xch - 'A' + 10;
+ else if (islower(xch))
+ val = xch - 'a' + 36;
+ else if (xch == '+')
+ val = 62;
+ else if (xch == '/')
+ val = 63;
+ else
+ return -1;
+
+ if (val < 0 || val >= r)
+ return -1;
+
+ return val;
+
+} /* end s_mp_tovalue() */
+
+/* }}} */
+
+/* {{{ s_mp_todigit(val, r, low) */
+
+/*
+ Convert val to a radix-r digit, if possible. If val is out of range
+ for r, returns zero. Otherwise, returns an ASCII character denoting
+ the value in the given radix.
+
+ The results may be odd if you use a radix < 2 or > 64, you are
+ expected to know what you're doing.
+ */
+
+char
+s_mp_todigit(mp_digit val, int r, int low)
+{
+ char ch;
+
+ if (val >= r)
+ return 0;
+
+ ch = s_dmap_1[val];
+
+ if (r <= 36 && low)
+ ch = tolower(ch);
+
+ return ch;
+
+} /* end s_mp_todigit() */
+
+/* }}} */
+
+/* {{{ s_mp_outlen(bits, radix) */
+
+/*
+ Return an estimate for how long a string is needed to hold a radix
+ r representation of a number with 'bits' significant bits, plus an
+ extra for a zero terminator (assuming C style strings here)
+ */
+int
+s_mp_outlen(int bits, int r)
+{
+ return (int)((double)bits * LOG_V_2(r) + 1.5) + 1;
+
+} /* end s_mp_outlen() */
+
+/* }}} */
+
+/* }}} */
+
+/* {{{ mp_read_unsigned_octets(mp, str, len) */
+/* mp_read_unsigned_octets(mp, str, len)
+ Read in a raw value (base 256) into the given mp_int
+ No sign bit, number is positive. Leading zeros ignored.
+ */
+
+mp_err
+mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len)
+{
+ int count;
+ mp_err res;
+ mp_digit d;
+
+ ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
+
+ mp_zero(mp);
+
+ count = len % sizeof(mp_digit);
+ if (count) {
+ for (d = 0; count-- > 0; --len) {
+ d = (d << 8) | *str++;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+
+ /* Read the rest of the digits */
+ for (; len > 0; len -= sizeof(mp_digit)) {
+ for (d = 0, count = sizeof(mp_digit); count > 0; --count) {
+ d = (d << 8) | *str++;
+ }
+ if (MP_EQ == mp_cmp_z(mp)) {
+ if (!d)
+ continue;
+ } else {
+ if ((res = s_mp_lshd(mp, 1)) != MP_OKAY)
+ return res;
+ }
+ MP_DIGIT(mp, 0) = d;
+ }
+ return MP_OKAY;
+} /* end mp_read_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_unsigned_octet_size(mp) */
+unsigned int
+mp_unsigned_octet_size(const mp_int *mp)
+{
+ unsigned int bytes;
+ int ix;
+ mp_digit d = 0;
+
+ ARGCHK(mp != NULL, MP_BADARG);
+ ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG);
+
+ bytes = (USED(mp) * sizeof(mp_digit));
+
+ /* subtract leading zeros. */
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ d = DIGIT(mp, ix);
+ if (d)
+ break;
+ bytes -= sizeof(d);
+ }
+ if (!bytes)
+ return 1;
+
+ /* Have MSD, check digit bytes, high order first */
+ for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) {
+ unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT));
+ if (x)
+ break;
+ --bytes;
+ }
+ return bytes;
+} /* end mp_unsigned_octet_size() */
+/* }}} */
+
+/* {{{ mp_to_unsigned_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos && !x) /* suppress leading zeros */
+ continue;
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_unsigned_octets() */
+/* }}} */
+
+/* {{{ mp_to_signed_octets(mp, str) */
+/* output a buffer of big endian octets no longer than specified. */
+mp_err
+mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= maxlen, MP_BADARG);
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos) {
+ if (!x) /* suppress leading zeros */
+ continue;
+ if (x & 0x80) { /* add one leading zero to make output positive. */
+ ARGCHK(bytes + 1 <= maxlen, MP_BADARG);
+ if (bytes + 1 > maxlen)
+ return MP_BADARG;
+ str[pos++] = 0;
+ }
+ }
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return pos;
+} /* end mp_to_signed_octets() */
+/* }}} */
+
+/* {{{ mp_to_fixlen_octets(mp, str) */
+/* output a buffer of big endian octets exactly as long as requested. */
+mp_err
+mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length)
+{
+ int ix, pos = 0;
+ unsigned int bytes;
+
+ ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG);
+
+ bytes = mp_unsigned_octet_size(mp);
+ ARGCHK(bytes <= length, MP_BADARG);
+
+ /* place any needed leading zeros */
+ for (; length > bytes; --length) {
+ *str++ = 0;
+ }
+
+ /* Iterate over each digit... */
+ for (ix = USED(mp) - 1; ix >= 0; ix--) {
+ mp_digit d = DIGIT(mp, ix);
+ int jx;
+
+ /* Unpack digit bytes, high order first */
+ for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) {
+ unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT));
+ if (!pos && !x) /* suppress leading zeros */
+ continue;
+ str[pos++] = x;
+ }
+ }
+ if (!pos)
+ str[pos++] = 0;
+ return MP_OKAY;
+} /* end mp_to_fixlen_octets() */
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h
new file mode 100644
index 000000000..64ffe75d5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi.h
@@ -0,0 +1,313 @@
+/*
+ * mpi.h
+ *
+ * Arbitrary precision integer arithmetic library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPI_
+#define _H_MPI_
+
+#include "mpi-config.h"
+
+#include "seccomon.h"
+SEC_BEGIN_PROTOS
+
+#if MP_DEBUG
+#undef MP_IOFUNC
+#define MP_IOFUNC 1
+#endif
+
+#if MP_IOFUNC
+#include <stdio.h>
+#include <ctype.h>
+#endif
+
+#include <limits.h>
+
+#if defined(BSDI)
+#undef ULLONG_MAX
+#endif
+
+#include <sys/types.h>
+
+#define MP_NEG 1
+#define MP_ZPOS 0
+
+#define MP_OKAY 0 /* no error, all is well */
+#define MP_YES 0 /* yes (boolean result) */
+#define MP_NO -1 /* no (boolean result) */
+#define MP_MEM -2 /* out of memory */
+#define MP_RANGE -3 /* argument out of range */
+#define MP_BADARG -4 /* invalid parameter */
+#define MP_UNDEF -5 /* answer is undefined */
+#define MP_LAST_CODE MP_UNDEF
+
+typedef unsigned int mp_sign;
+typedef unsigned int mp_size;
+typedef int mp_err;
+
+#define MP_32BIT_MAX 4294967295U
+
+#if !defined(ULONG_MAX)
+#error "ULONG_MAX not defined"
+#elif !defined(UINT_MAX)
+#error "UINT_MAX not defined"
+#elif !defined(USHRT_MAX)
+#error "USHRT_MAX not defined"
+#endif
+
+#if defined(ULLONG_MAX) /* C99, Solaris */
+#define MP_ULONG_LONG_MAX ULLONG_MAX
+/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */
+#elif defined(ULONG_LONG_MAX) /* HPUX */
+#define MP_ULONG_LONG_MAX ULONG_LONG_MAX
+#elif defined(ULONGLONG_MAX) /* IRIX, AIX */
+#define MP_ULONG_LONG_MAX ULONGLONG_MAX
+#endif
+
+/* We only use unsigned long for mp_digit iff long is more than 32 bits. */
+#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX
+typedef unsigned long mp_digit;
+#define MP_DIGIT_MAX ULONG_MAX
+#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_DIGIT
+#define MP_USE_LONG_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+
+#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX)
+typedef unsigned long long mp_digit;
+#define MP_DIGIT_MAX MP_ULONG_LONG_MAX
+#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX UINT_MAX
+#undef MP_NO_MP_WORD
+#define MP_NO_MP_WORD 1
+#undef MP_USE_LONG_LONG_DIGIT
+#define MP_USE_LONG_LONG_DIGIT 1
+#undef MP_USE_LONG_DIGIT
+
+#else
+typedef unsigned int mp_digit;
+#define MP_DIGIT_MAX UINT_MAX
+#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */
+#define MP_HALF_DIGIT_MAX USHRT_MAX
+#undef MP_USE_UINT_DIGIT
+#define MP_USE_UINT_DIGIT 1
+#undef MP_USE_LONG_LONG_DIGIT
+#undef MP_USE_LONG_DIGIT
+#endif
+
+#if !defined(MP_NO_MP_WORD)
+#if defined(MP_USE_UINT_DIGIT) && \
+ (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX))
+
+#if (ULONG_MAX > UINT_MAX)
+typedef unsigned long mp_word;
+typedef long mp_sword;
+#define MP_WORD_MAX ULONG_MAX
+
+#else
+typedef unsigned long long mp_word;
+typedef long long mp_sword;
+#define MP_WORD_MAX MP_ULONG_LONG_MAX
+#endif
+
+#else
+#define MP_NO_MP_WORD 1
+#endif
+#endif /* !defined(MP_NO_MP_WORD) */
+
+#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD)
+typedef unsigned int mp_word;
+typedef int mp_sword;
+#define MP_WORD_MAX UINT_MAX
+#endif
+
+#define MP_DIGIT_BIT (CHAR_BIT * sizeof(mp_digit))
+#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word))
+#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX)
+
+#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2)
+#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX)
+/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named
+** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's
+** consistent with the other _HALF_ names.
+*/
+
+/* Macros for accessing the mp_int internals */
+#define MP_SIGN(MP) ((MP)->sign)
+#define MP_USED(MP) ((MP)->used)
+#define MP_ALLOC(MP) ((MP)->alloc)
+#define MP_DIGITS(MP) ((MP)->dp)
+#define MP_DIGIT(MP, N) (MP)->dp[(N)]
+
+/* This defines the maximum I/O base (minimum is 2) */
+#define MP_MAX_RADIX 64
+
+typedef struct {
+ mp_sign sign; /* sign of this quantity */
+ mp_size alloc; /* how many digits allocated */
+ mp_size used; /* how many digits used */
+ mp_digit *dp; /* the digits themselves */
+} mp_int;
+
+/* Default precision */
+mp_size mp_get_prec(void);
+void mp_set_prec(mp_size prec);
+
+/* Memory management */
+mp_err mp_init(mp_int *mp);
+mp_err mp_init_size(mp_int *mp, mp_size prec);
+mp_err mp_init_copy(mp_int *mp, const mp_int *from);
+mp_err mp_copy(const mp_int *from, mp_int *to);
+void mp_exch(mp_int *mp1, mp_int *mp2);
+void mp_clear(mp_int *mp);
+void mp_zero(mp_int *mp);
+void mp_set(mp_int *mp, mp_digit d);
+mp_err mp_set_int(mp_int *mp, long z);
+#define mp_set_long(mp, z) mp_set_int(mp, z)
+mp_err mp_set_ulong(mp_int *mp, unsigned long z);
+
+/* Single digit arithmetic */
+mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b);
+mp_err mp_mul_2(const mp_int *a, mp_int *c);
+mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r);
+mp_err mp_div_2(const mp_int *a, mp_int *c);
+mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c);
+
+/* Sign manipulations */
+mp_err mp_abs(const mp_int *a, mp_int *b);
+mp_err mp_neg(const mp_int *a, mp_int *b);
+
+/* Full arithmetic */
+mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c);
+mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqr(const mp_int *a, mp_int *b);
+#else
+#define mp_sqr(a, b) mp_mul(a, a, b)
+#endif
+mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r);
+mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r);
+mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_2expt(mp_int *a, mp_digit k);
+
+/* Modular arithmetic */
+#if MP_MODARITH
+mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c);
+mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+#if MP_SQUARE
+mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c);
+#else
+#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c)
+#endif
+mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c);
+mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c);
+#endif /* MP_MODARITH */
+
+/* Comparisons */
+int mp_cmp_z(const mp_int *a);
+int mp_cmp_d(const mp_int *a, mp_digit d);
+int mp_cmp(const mp_int *a, const mp_int *b);
+int mp_cmp_mag(const mp_int *a, const mp_int *b);
+int mp_isodd(const mp_int *a);
+int mp_iseven(const mp_int *a);
+
+/* Number theoretic */
+#if MP_NUMTH
+mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c);
+mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y);
+mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c);
+mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c);
+#endif /* end MP_NUMTH */
+
+/* Input and output */
+#if MP_IOFUNC
+void mp_print(mp_int *mp, FILE *ofp);
+#endif /* end MP_IOFUNC */
+
+/* Base conversion */
+mp_err mp_read_raw(mp_int *mp, char *str, int len);
+int mp_raw_size(mp_int *mp);
+mp_err mp_toraw(mp_int *mp, char *str);
+mp_err mp_read_radix(mp_int *mp, const char *str, int radix);
+mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix);
+int mp_radix_size(mp_int *mp, int radix);
+mp_err mp_toradix(mp_int *mp, char *str, int radix);
+int mp_tovalue(char ch, int r);
+
+#define mp_tobinary(M, S) mp_toradix((M), (S), 2)
+#define mp_tooctal(M, S) mp_toradix((M), (S), 8)
+#define mp_todecimal(M, S) mp_toradix((M), (S), 10)
+#define mp_tohex(M, S) mp_toradix((M), (S), 16)
+
+/* Error strings */
+const char *mp_strerror(mp_err ec);
+
+/* Octet string conversion functions */
+mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len);
+unsigned int mp_unsigned_octet_size(const mp_int *mp);
+mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen);
+mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len);
+
+/* Miscellaneous */
+mp_size mp_trailing_zeros(const mp_int *mp);
+void freebl_cpuid(unsigned long op, unsigned long *eax,
+ unsigned long *ebx, unsigned long *ecx,
+ unsigned long *edx);
+
+#define MP_CHECKOK(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+#define MP_CHECKERR(x) \
+ if (MP_OKAY > (res = (x))) \
+ goto CLEANUP
+
+#define NEG MP_NEG
+#define ZPOS MP_ZPOS
+#define DIGIT_MAX MP_DIGIT_MAX
+#define DIGIT_BIT MP_DIGIT_BIT
+#define DIGIT_FMT MP_DIGIT_FMT
+#define RADIX MP_RADIX
+#define MAX_RADIX MP_MAX_RADIX
+#define SIGN(MP) MP_SIGN(MP)
+#define USED(MP) MP_USED(MP)
+#define ALLOC(MP) MP_ALLOC(MP)
+#define DIGITS(MP) MP_DIGITS(MP)
+#define DIGIT(MP, N) MP_DIGIT(MP, N)
+
+#if MP_ARGCHK == 1
+#define ARGCHK(X, Y) \
+ { \
+ if (!(X)) { \
+ return (Y); \
+ } \
+ }
+#elif MP_ARGCHK == 2
+#include <assert.h>
+#define ARGCHK(X, Y) assert(X)
+#else
+#define ARGCHK(X, Y) /* */
+#endif
+
+#ifdef CT_VERIF
+void mp_taint(mp_int *mp);
+void mp_untaint(mp_int *mp);
+#endif
+
+SEC_END_PROTOS
+
+#endif /* end _H_MPI_ */
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c
new file mode 100644
index 000000000..9e538bb6a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64.c
@@ -0,0 +1,32 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef MPI_AMD64
+#error This file only works on AMD64 platforms.
+#endif
+
+#include <mpi-priv.h>
+
+/*
+ * MPI glue
+ *
+ */
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void MPI_ASM_DECL
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ mp_digit w;
+ mp_digit d;
+
+ d = s_mpv_mul_add_vec64(c, a, a_len, b);
+ c += a_len;
+ while (d) {
+ w = c[0] + d;
+ d = (w < c[0] || w < d);
+ *c++ = w;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_gas.s b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s
new file mode 100644
index 000000000..ad6e2b9d7
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s
@@ -0,0 +1,389 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_set_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r = a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L15:
+ cmpq $8, %r8 # 8 - len
+ jb .L16
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx # p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx # p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx # p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx # p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx # p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx # p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx # p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+# ------------------------------------------------------------------------
+#
+# Implementation of s_mpv_mul_add_vec which exploits
+# the 64X64->128 bit unsigned multiply instruction.
+#
+# ------------------------------------------------------------------------
+
+# r += a * digit, r and a are vectors of length len
+# returns the carry digit
+# r and a are 64 bit aligned.
+#
+# uint64_t
+# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+#
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+ xorq %rax, %rax # if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 # Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 # cy = 0
+
+.L25:
+ cmpq $8, %r8 # 8 - len
+ jb .L26
+ movq 0(%rsi), %rax # rax = a[0]
+ movq 0(%rdi), %r10 # r10 = r[0]
+ movq 8(%rsi), %r11 # prefetch a[1]
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ movq 8(%rdi), %r10 # prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 # prefetch a[2]
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ movq 16(%rdi), %r10 # prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 # prefetch a[3]
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ movq 24(%rdi), %r10 # prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 # prefetch a[4]
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ movq 32(%rdi), %r10 # prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 # prefetch a[5]
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ movq 40(%rdi), %r10 # prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 # prefetch a[6]
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ movq 48(%rdi), %r10 # prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 # prefetch a[7]
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ movq 56(%rdi), %r10 # prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx # p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 56(%rdi) # r[7] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx # p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 0(%rdi) # r[0] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx # p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 8(%rdi) # r[1] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx # p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 16(%rdi) # r[2] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx # p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 24(%rdi) # r[3] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx # p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 32(%rdi) # r[4] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx # p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 40(%rdi) # r[5] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx # p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx # p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx # p += cy
+ movq %rax, 48(%rdi) # r[6] = lo(p)
+ movq %rdx, %r9 # cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
+
+# Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
new file mode 100644
index 000000000..2120c18f9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm
@@ -0,0 +1,388 @@
+; This Source Code Form is subject to the terms of the Mozilla Public
+; License, v. 2.0. If a copy of the MPL was not distributed with this
+; file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+;
+; This code is converted from mpi_amd64_gas.asm for MASM for x64.
+;
+
+; ------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_set_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+; ------------------------------------------------------------------------
+
+; r = a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+.CODE
+
+s_mpv_mul_set_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L17
+ mov r8, rdx
+ xor r9, r9
+
+L15:
+ cmp r8, 8
+ jb L16
+ mov rax, [rsi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [0+rdi], rax
+ mov r9, rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi, 64
+ add rdi, 64
+ sub r8, 8
+ jz L17
+ jmp L15
+
+L16:
+ mov rax, [0+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [8+rsi]
+ mul rcx
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [16+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L17
+ mov rax, [24+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [24+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [32+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [32+rdi],rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [40+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [40+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+ mov rax, [48+rsi]
+ mul rcx
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L17
+
+L17:
+ mov rax, r9
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_set_vec64 ENDP
+
+
+;------------------------------------------------------------------------
+;
+; Implementation of s_mpv_mul_add_vec which exploits
+; the 64X64->128 bit unsigned multiply instruction.
+;
+;------------------------------------------------------------------------
+
+; r += a * digit, r and a are vectors of length len
+; returns the carry digit
+; r and a are 64 bit aligned.
+;
+; uint64_t
+; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+;
+
+s_mpv_mul_add_vec64 PROC
+
+ ; compatibilities for paramenter registers
+ ;
+ ; About GAS and MASM, the usage of parameter registers are different.
+
+ push rdi
+ push rsi
+
+ mov rdi, rcx
+ mov rsi, rdx
+ mov edx, r8d
+ mov rcx, r9
+
+ xor rax, rax
+ test rdx, rdx
+ jz L27
+ mov r8, rdx
+ xor r9, r9
+
+L25:
+ cmp r8, 8
+ jb L26
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mov r11, [8+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [8+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [16+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [16+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [24+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [24+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [32+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [32+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [40+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [40+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [48+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [48+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mov r11, [56+rsi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ mov r10, [56+rdi]
+ add rax,r9
+ adc rdx,0
+ mov [48+rdi],rax
+ mov r9,rdx
+ mov rax,r11
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [56+rdi],rax
+ mov r9,rdx
+ add rsi,64
+ add rdi,64
+ sub r8, 8
+ jz L27
+ jmp L25
+
+L26:
+ mov rax, [0+rsi]
+ mov r10, [0+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [0+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [8+rsi]
+ mov r10, [8+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [8+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [16+rsi]
+ mov r10, [16+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [16+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [24+rsi]
+ mov r10, [24+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [24+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [32+rsi]
+ mov r10, [32+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [32+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [40+rsi]
+ mov r10, [40+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax,r9
+ adc rdx,0
+ mov [40+rdi],rax
+ mov r9,rdx
+ dec r8
+ jz L27
+ mov rax, [48+rsi]
+ mov r10, [48+rdi]
+ mul rcx
+ add rax,r10
+ adc rdx,0
+ add rax, r9
+ adc rdx, 0
+ mov [48+rdi], rax
+ mov r9, rdx
+ dec r8
+ jz L27
+
+L27:
+ mov rax, r9
+
+ pop rsi
+ pop rdi
+ ret
+
+s_mpv_mul_add_vec64 ENDP
+
+END
diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
new file mode 100644
index 000000000..ddd5c40fd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s
@@ -0,0 +1,385 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_set_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r = a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L17
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L15:
+ cmpq $8, %r8 / 8 - len
+ jb .L16
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L17
+ jmp .L15
+
+.L16:
+ movq 0(%rsi), %rax
+ mulq %rcx / p = a[0] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 8(%rsi), %rax
+ mulq %rcx / p = a[1] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 16(%rsi), %rax
+ mulq %rcx / p = a[2] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 24(%rsi), %rax
+ mulq %rcx / p = a[3] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 32(%rsi), %rax
+ mulq %rcx / p = a[4] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 40(%rsi), %rax
+ mulq %rcx / p = a[5] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+ movq 48(%rsi), %rax
+ mulq %rcx / p = a[6] * digit
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L17
+
+
+.L17:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64
+
+/ ------------------------------------------------------------------------
+/
+/ Implementation of s_mpv_mul_add_vec which exploits
+/ the 64X64->128 bit unsigned multiply instruction.
+/
+/ ------------------------------------------------------------------------
+
+/ r += a * digit, r and a are vectors of length len
+/ returns the carry digit
+/ r and a are 64 bit aligned.
+/
+/ uint64_t
+/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit)
+/
+
+.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64:
+
+ xorq %rax, %rax / if (len == 0) return (0)
+ testq %rdx, %rdx
+ jz .L27
+
+ movq %rdx, %r8 / Use r8 for len; %rdx is used by mul
+ xorq %r9, %r9 / cy = 0
+
+.L25:
+ cmpq $8, %r8 / 8 - len
+ jb .L26
+ movq 0(%rsi), %rax / rax = a[0]
+ movq 0(%rdi), %r10 / r10 = r[0]
+ movq 8(%rsi), %r11 / prefetch a[1]
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ movq 8(%rdi), %r10 / prefetch r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 16(%rsi), %r11 / prefetch a[2]
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ movq 16(%rdi), %r10 / prefetch r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 24(%rsi), %r11 / prefetch a[3]
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ movq 24(%rdi), %r10 / prefetch r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 32(%rsi), %r11 / prefetch a[4]
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ movq 32(%rdi), %r10 / prefetch r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 40(%rsi), %r11 / prefetch a[5]
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ movq 40(%rdi), %r10 / prefetch r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 48(%rsi), %r11 / prefetch a[6]
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ movq 48(%rdi), %r10 / prefetch r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ movq 56(%rsi), %r11 / prefetch a[7]
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ movq 56(%rdi), %r10 / prefetch r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ movq %r11, %rax
+ mulq %rcx / p = a[7] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[7]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 56(%rdi) / r[7] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+
+ addq $64, %rsi
+ addq $64, %rdi
+ subq $8, %r8
+
+ jz .L27
+ jmp .L25
+
+.L26:
+ movq 0(%rsi), %rax
+ movq 0(%rdi), %r10
+ mulq %rcx / p = a[0] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[0]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 0(%rdi) / r[0] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 8(%rsi), %rax
+ movq 8(%rdi), %r10
+ mulq %rcx / p = a[1] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[1]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 8(%rdi) / r[1] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 16(%rsi), %rax
+ movq 16(%rdi), %r10
+ mulq %rcx / p = a[2] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[2]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 16(%rdi) / r[2] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 24(%rsi), %rax
+ movq 24(%rdi), %r10
+ mulq %rcx / p = a[3] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[3]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 24(%rdi) / r[3] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 32(%rsi), %rax
+ movq 32(%rdi), %r10
+ mulq %rcx / p = a[4] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[4]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 32(%rdi) / r[4] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 40(%rsi), %rax
+ movq 40(%rdi), %r10
+ mulq %rcx / p = a[5] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[5]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 40(%rdi) / r[5] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+ movq 48(%rsi), %rax
+ movq 48(%rdi), %r10
+ mulq %rcx / p = a[6] * digit
+ addq %r10, %rax
+ adcq $0, %rdx / p += r[6]
+ addq %r9, %rax
+ adcq $0, %rdx / p += cy
+ movq %rax, 48(%rdi) / r[6] = lo(p)
+ movq %rdx, %r9 / cy = hi(p)
+ decq %r8
+ jz .L27
+
+
+.L27:
+ movq %r9, %rax
+ ret
+
+.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64
diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c
new file mode 100644
index 000000000..b5139f28d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_arm.c
@@ -0,0 +1,175 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This inlined version is for 32-bit ARM platform only */
+
+#if !defined(__arm__)
+#error "This is for ARM only"
+#endif
+
+/* 16-bit thumb doesn't work inlined assember version */
+#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__)
+
+#include "mpi-priv.h"
+
+#ifdef MP_ASSEMBLY_MULTIPLY
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %2\n"
+ "str r5, [%3], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%3]\n"
+ :
+ : "r"(a), "r"(a_len), "r"(b), "r"(c)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+#ifdef __thumb2__
+ "cbz %1, 2f\n"
+#else
+ "cmp %1, r5\n" /* r5 is 0 now */
+ "beq 2f\n"
+#endif
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%3]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %2\n"
+ "str r5, [%3], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+ "2:\n"
+ "str r5, [%3]\n"
+ :
+ : "r"(a), "r"(a_len), "r"(b), "r"(c)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r5, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%3]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "umlal r5, r4, r6, %2\n"
+ "str r5, [%3], #4\n"
+ "mov r5, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r4, 3f\n"
+#else
+ "cmp r4, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%3]\n"
+ "adds r5, r6\n"
+ "adc r4, r4, #0\n"
+ "str r5, [%3], #4\n"
+ "movs r5, r4\n"
+ "bne 2b\n"
+
+ "3:\n"
+ :
+ : "r"(a), "r"(a_len), "r"(b), "r"(c)
+ : "memory", "cc", "%r4", "%r5", "%r6");
+}
+#endif
+
+#ifdef MP_ASSEMBLY_SQUARE
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ if (!a_len)
+ return;
+
+ __asm__ __volatile__(
+ "mov r3, #0\n"
+
+ "1:\n"
+ "mov r4, #0\n"
+ "ldr r6, [%0], #4\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "umlal r3, r4, r6, r6\n" /* w = r3:r4 */
+ "str r3, [%2], #4\n"
+
+ "ldr r5, [%2]\n"
+ "adds r3, r4, r5\n"
+ "mov r4, #0\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "mov r3, r4\n"
+
+ "subs %1, #1\n"
+ "bne 1b\n"
+
+#ifdef __thumb2__
+ "cbz r3, 3f\n"
+#else
+ "cmp r3, #0\n"
+ "beq 3f\n"
+#endif
+
+ "2:\n"
+ "mov r4, #0\n"
+ "ldr r5, [%2]\n"
+ "adds r3, r5\n"
+ "adc r4, r4, #0\n"
+ "str r3, [%2], #4\n"
+ "movs r3, r4\n"
+ "bne 2b\n"
+
+ "3:"
+ :
+ : "r"(pa), "r"(a_len), "r"(ps)
+ : "memory", "cc", "%r3", "%r4", "%r5", "%r6");
+}
+#endif
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c
new file mode 100644
index 000000000..0cea7685d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_hp.c
@@ -0,0 +1,81 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file contains routines that perform vector multiplication. */
+
+#include "mpi-priv.h"
+#include <unistd.h>
+
+#include <stddef.h>
+/* #include <sys/systeminfo.h> */
+#include <strings.h>
+
+extern void multacc512(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void maxpy_little(
+ int length, /* doublewords in multiplicand vector. */
+ const mp_digit *scalaraddr, /* Address of scalar. */
+ const mp_digit *multiplicand, /* The multiplicand vector. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+extern void add_diag_little(
+ int length, /* doublewords in input vector. */
+ const mp_digit *root, /* The vector to square. */
+ mp_digit *result); /* Where to accumulate the result. */
+
+void
+s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps)
+{
+ add_diag_little(a_len, pa, ps);
+}
+
+#define MAX_STACK_DIGITS 258
+#define MULTACC512_LEN (512 / MP_DIGIT_BIT)
+#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little)
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit x[MAX_STACK_DIGITS];
+ mp_digit *px = x;
+ size_t xSize = 0;
+
+ if (a == c) {
+ if (a_len > MAX_STACK_DIGITS) {
+ xSize = sizeof(mp_digit) * (a_len + 2);
+ px = malloc(xSize);
+ if (!px)
+ return;
+ }
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ }
+ s_mp_setz(c, a_len + 1);
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+ if (px != x && px) {
+ memset(px, 0, xSize);
+ free(px);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ c[a_len] = 0; /* so carry propagation stops here. */
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ mp_digit *c)
+{
+ HP_MPY_ADD_FN(a_len, &b, a, c);
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s
new file mode 100644
index 000000000..f80039659
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s
@@ -0,0 +1,313 @@
+/
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.text
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L2 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L1:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L1 / jmp if a_len != 0
+L2:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L4 / jmp if a_len == 0
+ mov 8(%ebp),%esi / esi = a
+ cld
+L3:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L3 / jmp if a_len != 0
+L4:
+ mov %ebx,0(%edi) / *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 36: caller's esi
+ / ebp - 32: caller's edi
+ / ebp - 28:
+ / ebp - 24:
+ / ebp - 20:
+ / ebp - 16:
+ / ebp - 12:
+ / ebp - 8:
+ / ebp - 4:
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: a argument
+ / ebp + 12: a_len argument
+ / ebp + 16: b argument
+ / ebp + 20: c argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je L6 / jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = a
+L5:
+ lodsl / eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx / edx = b
+ mull %edx / edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax / add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx / add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx / high half of product becomes next carry
+
+ stosl / [es:edi] = ax; edi += 4;
+ dec %ecx / --a_len
+ jnz L5 / jmp if a_len != 0
+L6:
+ cmp $0,%ebx / is carry zero?
+ jz L8
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L8
+L7:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L7
+L8:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ / ebp - 20: caller's esi
+ / ebp - 16: caller's edi
+ / ebp - 12:
+ / ebp - 8: carry
+ / ebp - 4: a_len local
+ / ebp + 0: caller's ebp
+ / ebp + 4: return address
+ / ebp + 8: pa argument
+ / ebp + 12: a_len argument
+ / ebp + 16: ps argument
+ / ebp + 20:
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx / carry = 0
+ mov 12(%ebp),%ecx / a_len
+ mov 16(%ebp),%edi / edi = ps
+ cmp $0,%ecx
+ je L11 / jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi / esi = pa
+L10:
+ lodsl / %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax / add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax / add low word from result
+ mov 4(%edi),%ebx
+ stosl / [es:di] = %eax; di += 4;
+ adc %ebx,%edx / add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl / [es:di] = %eax; di += 4;
+ dec %ecx / --a_len
+ jnz L10 / jmp if a_len != 0
+L11:
+ cmp $0,%ebx / is carry zero?
+ jz L14
+ mov 0(%edi),%eax / add in current word from *c
+ add %ebx,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jnc L14
+L12:
+ mov 0(%edi),%eax / add in current word from *c
+ adc $0,%eax
+ stosl / [es:edi] = ax; edi += 4;
+ jc L12
+L14:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ /
+ / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ / so its high bit is 1. This code is from NSPR.
+ /
+ / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ / mp_digit *qp, mp_digit *rp)
+
+ / esp + 0: Caller's ebx
+ / esp + 4: return address
+ / esp + 8: Nhi argument
+ / esp + 12: Nlo argument
+ / esp + 16: divisor argument
+ / esp + 20: qp argument
+ / esp + 24: rp argument
+ / registers:
+ / eax:
+ / ebx: carry
+ / ecx: a_len
+ / edx:
+ / esi: a ptr
+ / edi: c ptr
+ /
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax / return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s
new file mode 100644
index 000000000..455792bbb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_mips.s
@@ -0,0 +1,472 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+#include <regdef.h>
+ .set noreorder
+ .set noat
+
+ .section .text, 1, 0x00000006, 4, 4
+.text:
+ .section .text
+
+ .ent s_mpv_mul_d_add
+ .globl s_mpv_mul_d_add
+
+s_mpv_mul_d_add:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.L.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.L.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.L.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.L.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.L.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.L.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.L.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .L.6
+ addiu a3,a3,4
+ # } else {
+.L.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .L.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.L.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.L.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.L.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add
+
+ .ent s_mpv_mul_d_add_prop
+ .globl s_mpv_mul_d_add_prop
+
+s_mpv_mul_d_add_prop:
+ #/* c += a * b */
+ #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit c0, c1; regs a6, a7
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.M.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.M.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.M.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.M.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # w0 += c0;
+ daddu t0,t0,a6
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4 #
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.M.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.M.3:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.M.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # w0 += c0;
+ daddu t0,t0,a6 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # c1 = c[1];
+ lwu a7,4(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # w1 += c1;
+ daddu t1,t1,a7
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .M.6
+ addiu a3,a3,8
+ # } else {
+.M.5:
+ # w0 += c0;
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ b .M.6
+ addiu a3,a3,4
+ # }
+ # } else {
+.M.2:
+ # c0 = c[0];
+ lwu a6,0(a3)
+ # w0 += c0;
+ mflo t0
+ daddu t0,t0,a6
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ addiu a3,a3,4
+ # }
+.M.6:
+
+ # while (cy) {
+ beq t2,zero,.M.1
+ nop
+.M.7:
+ # mp_word w = (mp_word)*c + cy;
+ lwu a6,0(a3)
+ daddu t2,t2,a6
+ # *c++ = ACCUM(w);
+ sw t2,0(a3)
+ # cy = CARRYOUT(w);
+ dsrl32 t2,t2,0
+ bne t2,zero,.M.7
+ addiu a3,a3,4
+
+ # }
+.M.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d_add_prop
+
+ .ent s_mpv_mul_d
+ .globl s_mpv_mul_d
+
+s_mpv_mul_d:
+ #/* c = a * b */
+ #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b,
+ # mp_digit *c)
+ #{
+ # mp_digit a0, a1; regs a4, a5
+ # mp_digit cy = 0; reg t2
+ # mp_word w0, w1; regs t0, t1
+ #
+ # if (a_len) {
+ beq a1,zero,.N.1
+ move t2,zero # cy = 0
+ dsll32 a2,a2,0 # "b" is sometimes negative (?!?!)
+ dsrl32 a2,a2,0 # This clears the upper 32 bits.
+ # a0 = a[0];
+ lwu a4,0(a0)
+ # w0 = ((mp_word)b * a0);
+ dmultu a2,a4
+ # if (--a_len) {
+ addiu a1,a1,-1
+ beq a1,zero,.N.2
+ # while (a_len >= 2) {
+ sltiu t3,a1,2
+ bne t3,zero,.N.3
+ # a1 = a[1];
+ lwu a5,4(a0)
+.N.4:
+ # a_len -= 2;
+ addiu a1,a1,-2
+ # w0 += cy;
+ mflo t0
+ daddu t0,t0,t2
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # a0 = a[2];
+ lwu a4,8(a0)
+ # a += 2;
+ addiu a0,a0,8
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # w0 = (mp_word)b * a0;
+ dmultu a2,a4
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # c += 2;
+ addiu a3,a3,8
+ sltiu t3,a1,2
+ beq t3,zero,.N.4
+ # a1 = a[1];
+ lwu a5,4(a0)
+ # }
+.N.3:
+ # w0 += cy;
+ # if (a_len) {
+ mflo t0
+ beq a1,zero,.N.5
+ daddu t0,t0,t2
+ # w1 = (mp_word)b * a1;
+ dmultu a2,a5 #
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # w1 += cy;
+ mflo t1
+ daddu t1,t1,t2
+ # c[1] = ACCUM(w1);
+ sw t1,4(a3)
+ # cy = CARRYOUT(w1);
+ dsrl32 t2,t1,0
+ # c += 1;
+ b .N.6
+ addiu a3,a3,4
+ # } else {
+.N.5:
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ b .N.6
+ dsrl32 t2,t0,0
+ # }
+ # } else {
+.N.2:
+ mflo t0
+ # c[0] = ACCUM(w0);
+ sw t0,0(a3)
+ # cy = CARRYOUT(w0);
+ dsrl32 t2,t0,0
+ # }
+.N.6:
+ # c[1] = cy;
+ jr ra
+ sw t2,4(a3)
+ # }
+.N.1:
+ jr ra
+ nop
+ #}
+ #
+ .end s_mpv_mul_d
+
+
+ .ent s_mpv_sqr_add_prop
+ .globl s_mpv_sqr_add_prop
+ #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs);
+ # registers
+ # a0 *a
+ # a1 a_len
+ # a2 *sqr
+ # a3 digit from *a, a_i
+ # a4 square of digit from a
+ # a5,a6 next 2 digits in sqr
+ # a7,t0 carry
+s_mpv_sqr_add_prop:
+ move a7,zero
+ move t0,zero
+ lwu a3,0(a0)
+ addiu a1,a1,-1 # --a_len
+ dmultu a3,a3
+ beq a1,zero,.P.3 # jump if we've already done the only sqr
+ addiu a0,a0,4 # ++a
+.P.2:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ lwu a3,0(a0)
+ addiu a0,a0,4 # ++a
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ dmultu a3,a3
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ addiu a1,a1,-1 # --a_len
+ dsrl32 a4,a4,0
+ bne a1,zero,.P.2 # loop if a_len > 0
+ sw a4,-4(a2)
+.P.3:
+ lwu a5,0(a2)
+ lwu a6,4(a2)
+ addiu a2,a2,8 # sqrs += 2;
+ dsll32 a6,a6,0
+ daddu a5,a5,a6
+ mflo a4
+ daddu a6,a5,a4
+ sltu a7,a6,a5 # a7 = a6 < a5 detect overflow
+ daddu a4,a6,t0
+ sltu t0,a4,a6
+ add t0,t0,a7
+ sw a4,-8(a2)
+ beq t0,zero,.P.9 # jump if no carry
+ dsrl32 a4,a4,0
+.P.8:
+ sw a4,-4(a2)
+ /* propagate final carry */
+ lwu a5,0(a2)
+ daddu a6,a5,t0
+ sltu t0,a6,a5
+ bne t0,zero,.P.8 # loop if carry persists
+ addiu a2,a2,4 # sqrs++
+.P.9:
+ jr ra
+ sw a4,-4(a2)
+
+ .end s_mpv_sqr_add_prop
diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c
new file mode 100644
index 000000000..1e88357af
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sparc.c
@@ -0,0 +1,226 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* Multiplication performance enhancements for sparc v8+vis CPUs. */
+
+#include "mpi-priv.h"
+#include <stddef.h>
+#include <sys/systeminfo.h>
+#include <strings.h>
+
+/* In the functions below, */
+/* vector y must be 8-byte aligned, and n must be even */
+/* returns carry out of high order word of result */
+/* maximum n is 256 */
+
+/* vector x += vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a);
+
+/* vector z = vector x + vector y * scaler a; where y is of length n words. */
+extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y,
+ int n, mp_digit a);
+
+/* v8 versions of these functions run on any Sparc v8 CPU. */
+
+/* This trick works on Sparc V8 CPUs with the Workshop compilers. */
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ unsigned long long product = (unsigned long long)a * b; \
+ Plo = (mp_digit)product; \
+ Phi = (mp_digit)(product >> MP_DIGIT_BIT); \
+ }
+
+/* c = a * b */
+static void
+v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* c += a * b */
+static void
+v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+ *c = d;
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ *c = carry;
+#endif
+}
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+static void
+v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+#if !defined(MP_NO_MP_WORD)
+ mp_digit d = 0;
+
+ /* Inner product: Digits of a */
+ while (a_len--) {
+ mp_word w = ((mp_word)b * *a++) + *c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+
+ while (d) {
+ mp_word w = (mp_word)*c + d;
+ *c++ = ACCUM(w);
+ d = CARRYOUT(w);
+ }
+#else
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+
+ a0b0 += a_i = *c;
+ if (a0b0 < a_i)
+ ++a1b1;
+
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+#endif
+}
+
+/* These functions run only on v8plus+vis or v9+vis CPUs. */
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ s_mp_setz(c, a_len + 1);
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is a_len words long. */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ c[a_len] = d;
+ } else {
+ v8_mpv_mul_d_add(a, a_len, b, c);
+ }
+}
+
+/* c += a * b, where a is y words long. */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ mp_digit d;
+ mp_digit x[258];
+ if (a_len <= 256) {
+ if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) {
+ mp_digit *px;
+ px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x;
+ memcpy(px, a, a_len * sizeof(*a));
+ a = px;
+ if (a_len & 1) {
+ px[a_len] = 0;
+ }
+ }
+ d = mul_add_inp(c, a, a_len, b);
+ if (d) {
+ c += a_len;
+ do {
+ mp_digit sum = d + *c;
+ *c++ = sum;
+ d = sum < d;
+ } while (d);
+ }
+ } else {
+ v8_mpv_mul_d_add_prop(a, a_len, b, c);
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s
new file mode 100644
index 000000000..16a47019c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_sse2.s
@@ -0,0 +1,294 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#ifdef DARWIN
+#define s_mpv_mul_d _s_mpv_mul_d
+#define s_mpv_mul_d_add _s_mpv_mul_d_add
+#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
+#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
+#define s_mpv_div_2dx1d _s_mpv_div_2dx1d
+#define TYPE_FUNCTION(x)
+#else
+#define TYPE_FUNCTION(x) .type x, @function
+#endif
+
+.text
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.private_extern s_mpv_mul_d
+TYPE_FUNCTION(s_mpv_mul_d)
+s_mpv_mul_d:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # ebx:
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.private_extern s_mpv_mul_d_add
+TYPE_FUNCTION(s_mpv_mul_d_add)
+s_mpv_mul_d_add:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ movd 0(%edi), %mm0
+ paddq %mm0, %mm2 # add the carry
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, 0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.private_extern s_mpv_mul_d_add_prop
+TYPE_FUNCTION(s_mpv_mul_d_add_prop)
+s_mpv_mul_d_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ movd 16(%ebp), %mm1 # mm1 = b
+ mov 20(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a++
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm1, %mm0 # mm0 = b * *a++
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add *c++
+ movd %mm2, 0(%edi) # store the 32bit result
+ add $4, %edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 12: caller's ebx
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_sqr_add_prop
+.private_extern s_mpv_sqr_add_prop
+TYPE_FUNCTION(s_mpv_sqr_add_prop)
+s_mpv_sqr_add_prop:
+ push %ebp
+ mov %esp, %ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2, %mm2 # carry = 0
+ mov 12(%ebp), %ecx # ecx = a_len
+ mov 16(%ebp), %edi
+ cmp $0, %ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp), %esi # esi = a
+ cld
+1:
+ movd 0(%esi), %mm0 # mm0 = *a
+ movd 0(%edi), %mm3 # fetch the sum
+ add $4, %esi
+ pmuludq %mm0, %mm0 # mm0 = sqr(a)
+ paddq %mm0, %mm2 # add the carry
+ paddq %mm3, %mm2 # add the low word
+ movd 4(%edi), %mm3
+ movd %mm2, 0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3, %mm2 # add the high word
+ movd %mm2, 4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8, %edi
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ movd %mm2, %ebx
+ cmp $0, %ebx # is carry zero?
+ jz 4f
+ mov 0(%edi), %eax
+ add %ebx, %eax
+ stosl
+ jnc 4f
+3:
+ mov 0(%edi), %eax # add in current word from *c
+ adc $0, %eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 3b
+4:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+.globl s_mpv_div_2dx1d
+.private_extern s_mpv_div_2dx1d
+TYPE_FUNCTION(s_mpv_div_2dx1d)
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp), %edx
+ mov 12(%esp), %eax
+ mov 16(%esp), %ebx
+ div %ebx
+ mov 20(%esp), %ebx
+ mov %eax, 0(%ebx)
+ mov 24(%esp), %ebx
+ mov %edx, 0(%ebx)
+ xor %eax, %eax # return zero
+ pop %ebx
+ ret
+ nop
+
+#ifndef DARWIN
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
+#endif
diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s
new file mode 100644
index 000000000..8f7e2130c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86.s
@@ -0,0 +1,541 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+.ifndef NO_PIC
+.macro GET var,reg
+ movl \var@GOTOFF(%ebx),\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var@GOTOFF(%ebx)
+.endm
+.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d
+.type s_mpv_mul_d,@function
+s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add
+.type s_mpv_mul_d_add,@function
+s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl s_mpv_mul_d_add_prop
+.type s_mpv_mul_d_add_prop,@function
+s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl s_mpv_sqr_add_prop
+.type s_mpv_sqr_add_prop,@function
+s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl s_mpv_div_2dx1d
+.type s_mpv_div_2dx1d,@function
+s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
+ # Magic indicating no need for an executable stack
+.section .note.GNU-stack, "", @progbits
+.previous
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
new file mode 100644
index 000000000..4faeef30c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c
@@ -0,0 +1,531 @@
+/*
+ * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+
+static int is_sse = -1;
+extern unsigned long s_mpi_is_sse2();
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_x86
+ jg s_mpv_mul_d_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_sse2
+s_mpv_mul_d_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_2 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_1:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_1 ; jmp if a_len != 0
+L_2:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_6 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_5:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_5 ; jmp if a_len != 0
+L_6:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_x86
+ jg s_mpv_mul_d_add_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_sse2
+s_mpv_mul_d_add_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_11 ; jmp if a_len == 0
+ mov esi,[ebp+8] ; esi = a
+ cld
+L_10:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_10 ; jmp if a_len != 0
+L_11:
+ mov [edi],ebx ; *c = carry
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_16 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_15:
+ movd mm0, [esi] ; mm0 = *a++
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ movd mm0, [edi]
+ paddq mm2, mm0 ; add the carry
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_15 ; jmp if a_len != 0
+L_16:
+ movd [edi], mm2 ; *c = carry
+ emms
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 36: caller's esi
+ * ebp - 32: caller's edi
+ * ebp - 28:
+ * ebp - 24:
+ * ebp - 20:
+ * ebp - 16:
+ * ebp - 12:
+ * ebp - 8:
+ * ebp - 4:
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: a argument
+ * ebp + 12: a_len argument
+ * ebp + 16: b argument
+ * ebp + 20: c argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_mul_d_add_prop_x86
+ jg s_mpv_mul_d_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_mul_d_add_prop_sse2
+s_mpv_mul_d_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,28
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; ecx = a_len
+ mov edi,[ebp+20]
+ cmp ecx,0
+ je L_21 ; jmp if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = a
+L_20:
+ lodsd ; eax = [ds:esi]; esi += 4
+ mov edx,[ebp+16] ; edx = b
+ mul edx ; edx:eax = Phi:Plo = a_i * b
+
+ add eax,ebx ; add carry (ebx) to edx:eax
+ adc edx,0
+ mov ebx,[edi] ; add in current word from *c
+ add eax,ebx
+ adc edx,0
+ mov ebx,edx ; high half of product becomes next carry
+
+ stosd ; [es:edi] = ax; edi += 4;
+ dec ecx ; --a_len
+ jnz L_20 ; jmp if a_len != 0
+L_21:
+ cmp ebx,0 ; is carry zero?
+ jz L_23
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_23
+L_22:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_22
+L_23:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_mul_d_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ movd mm1, [ebp+16] ; mm1 = b
+ mov edi, [ebp+20]
+ cmp ecx, 0
+ je L_26 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_25:
+ movd mm0, [esi] ; mm0 = *a++
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm1 ; mm0 = b * *a++
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add *c++
+ movd [edi], mm2 ; store the 32bit result
+ add edi, 4
+ psrlq mm2, 32 ; save the carry
+ dec ecx ; --a_len
+ jnz L_25 ; jmp if a_len != 0
+L_26:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_28
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_28
+L_27:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_27
+L_28:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * ebp - 20: caller's esi
+ * ebp - 16: caller's edi
+ * ebp - 12:
+ * ebp - 8: carry
+ * ebp - 4: a_len local
+ * ebp + 0: caller's ebp
+ * ebp + 4: return address
+ * ebp + 8: pa argument
+ * ebp + 12: a_len argument
+ * ebp + 16: ps argument
+ * ebp + 20:
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs)
+{
+ __asm {
+ mov eax, is_sse
+ cmp eax, 0
+ je s_mpv_sqr_add_prop_x86
+ jg s_mpv_sqr_add_prop_sse2
+ call s_mpi_is_sse2
+ mov is_sse, eax
+ cmp eax, 0
+ jg s_mpv_sqr_add_prop_sse2
+s_mpv_sqr_add_prop_x86:
+ push ebp
+ mov ebp,esp
+ sub esp,12
+ push edi
+ push esi
+ push ebx
+ mov ebx,0 ; carry = 0
+ mov ecx,[ebp+12] ; a_len
+ mov edi,[ebp+16] ; edi = ps
+ cmp ecx,0
+ je L_31 ; jump if a_len == 0
+ cld
+ mov esi,[ebp+8] ; esi = pa
+L_30:
+ lodsd ; eax = [ds:si]; si += 4;
+ mul eax
+
+ add eax,ebx ; add "carry"
+ adc edx,0
+ mov ebx,[edi]
+ add eax,ebx ; add low word from result
+ mov ebx,[edi+4]
+ stosd ; [es:di] = eax; di += 4;
+ adc edx,ebx ; add high word from result
+ mov ebx,0
+ mov eax,edx
+ adc ebx,0
+ stosd ; [es:di] = eax; di += 4;
+ dec ecx ; --a_len
+ jnz L_30 ; jmp if a_len != 0
+L_31:
+ cmp ebx,0 ; is carry zero?
+ jz L_34
+ mov eax,[edi] ; add in current word from *c
+ add eax,ebx
+ stosd ; [es:edi] = ax; edi += 4;
+ jnc L_34
+L_32:
+ mov eax,[edi] ; add in current word from *c
+ adc eax,0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_32
+L_34:
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+s_mpv_sqr_add_prop_sse2:
+ push ebp
+ mov ebp, esp
+ push edi
+ push esi
+ push ebx
+ psubq mm2, mm2 ; carry = 0
+ mov ecx, [ebp+12] ; ecx = a_len
+ mov edi, [ebp+16]
+ cmp ecx, 0
+ je L_36 ; jmp if a_len == 0
+ mov esi, [ebp+8] ; esi = a
+ cld
+L_35:
+ movd mm0, [esi] ; mm0 = *a
+ movd mm3, [edi] ; fetch the sum
+ add esi, 4
+ pmuludq mm0, mm0 ; mm0 = sqr(a)
+ paddq mm2, mm0 ; add the carry
+ paddq mm2, mm3 ; add the low word
+ movd mm3, [edi+4]
+ movd [edi], mm2 ; store the 32bit result
+ psrlq mm2, 32
+ paddq mm2, mm3 ; add the high word
+ movd [edi+4], mm2 ; store the 32bit result
+ psrlq mm2, 32 ; save the carry.
+ add edi, 8
+ dec ecx ; --a_len
+ jnz L_35 ; jmp if a_len != 0
+L_36:
+ movd ebx, mm2
+ cmp ebx, 0 ; is carry zero?
+ jz L_38
+ mov eax, [edi]
+ add eax, ebx
+ stosd
+ jnc L_38
+L_37:
+ mov eax, [edi] ; add in current word from *c
+ adc eax, 0
+ stosd ; [es:edi] = ax; edi += 4;
+ jc L_37
+L_38:
+ emms
+ pop ebx
+ pop esi
+ pop edi
+ leave
+ ret
+ nop
+ }
+}
+
+/*
+ * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ * so its high bit is 1. This code is from NSPR.
+ *
+ * Dump of assembler code for function s_mpv_div_2dx1d:
+ *
+ * esp + 0: Caller's ebx
+ * esp + 4: return address
+ * esp + 8: Nhi argument
+ * esp + 12: Nlo argument
+ * esp + 16: divisor argument
+ * esp + 20: qp argument
+ * esp + 24: rp argument
+ * registers:
+ * eax:
+ * ebx: carry
+ * ecx: a_len
+ * edx:
+ * esi: a ptr
+ * edi: c ptr
+ */
+__declspec(naked) mp_err
+ s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ mp_digit *qp, mp_digit *rp)
+{
+ __asm {
+ push ebx
+ mov edx,[esp+8]
+ mov eax,[esp+12]
+ mov ebx,[esp+16]
+ div ebx
+ mov ebx,[esp+20]
+ mov [ebx],eax
+ mov ebx,[esp+24]
+ mov [ebx],edx
+ xor eax,eax ; return zero
+ pop ebx
+ ret
+ nop
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
new file mode 100644
index 000000000..b903e2564
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s
@@ -0,0 +1,538 @@
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+.data
+.align 4
+ #
+ # -1 means to call _s_mpi_is_sse to determine if we support sse
+ # instructions.
+ # 0 means to use x86 instructions
+ # 1 means to use sse2 instructions
+.type is_sse,@object
+.size is_sse,4
+is_sse: .long -1
+
+#
+# sigh, handle the difference between -fPIC and not PIC
+# default to pic, since this file seems to be exclusively
+# linux right now (solaris uses mpi_i86pc.s and windows uses
+# mpi_x86_asm.c)
+#
+#.ifndef NO_PIC
+#.macro GET var,reg
+# movl \var@GOTOFF(%ebx),\reg
+#.endm
+#.macro PUT reg,var
+# movl \reg,\var@GOTOFF(%ebx)
+#.endm
+#.else
+.macro GET var,reg
+ movl \var,\reg
+.endm
+.macro PUT reg,var
+ movl \reg,\var
+.endm
+#.endif
+
+.text
+
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d
+.type _s_mpv_mul_d,@function
+_s_mpv_mul_d:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_x86
+ jg _s_mpv_mul_d_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_sse2
+_s_mpv_mul_d_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 2f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+1:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 1b # jmp if a_len != 0
+2:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 6f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+5:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 5b # jmp if a_len != 0
+6:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 36: caller's esi
+ # ebp - 32: caller's edi
+ # ebp - 28:
+ # ebp - 24:
+ # ebp - 20:
+ # ebp - 16:
+ # ebp - 12:
+ # ebp - 8:
+ # ebp - 4:
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add
+.type _s_mpv_mul_d_add,@function
+_s_mpv_mul_d_add:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_x86
+ jg _s_mpv_mul_d_add_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_sse2
+_s_mpv_mul_d_add_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 11f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+10:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 10b # jmp if a_len != 0
+11:
+ mov %ebx,0(%edi) # *c = carry
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 16f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+15:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ movd 0(%edi),%mm0
+ paddq %mm0,%mm2 # add the carry
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 15b # jmp if a_len != 0
+16:
+ movd %mm2,0(%edi) # *c = carry
+ emms
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ # ebp - 8: caller's esi
+ # ebp - 4: caller's edi
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: a argument
+ # ebp + 12: a_len argument
+ # ebp + 16: b argument
+ # ebp + 20: c argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+.globl _s_mpv_mul_d_add_prop
+.type _s_mpv_mul_d_add_prop,@function
+_s_mpv_mul_d_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_mul_d_add_prop_x86
+ jg _s_mpv_mul_d_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_mul_d_add_prop_sse2
+_s_mpv_mul_d_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $28,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 21f # jmp if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = a
+20:
+ lodsl # eax = [ds:esi]; esi += 4
+ mov 16(%ebp),%edx # edx = b
+ mull %edx # edx:eax = Phi:Plo = a_i * b
+
+ add %ebx,%eax # add carry (%ebx) to edx:eax
+ adc $0,%edx
+ mov 0(%edi),%ebx # add in current word from *c
+ add %ebx,%eax
+ adc $0,%edx
+ mov %edx,%ebx # high half of product becomes next carry
+
+ stosl # [es:edi] = ax; edi += 4;
+ dec %ecx # --a_len
+ jnz 20b # jmp if a_len != 0
+21:
+ cmp $0,%ebx # is carry zero?
+ jz 23f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 23f
+22:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 22b
+23:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_mul_d_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ movd 16(%ebp),%mm1 # mm1 = b
+ mov 20(%ebp),%edi
+ cmp $0,%ecx
+ je 26f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+25:
+ movd 0(%esi),%mm0 # mm0 = *a++
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm1,%mm0 # mm0 = b * *a++
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add *c++
+ movd %mm2,0(%edi) # store the 32bit result
+ add $4,%edi
+ psrlq $32, %mm2 # save the carry
+ dec %ecx # --a_len
+ jnz 25b # jmp if a_len != 0
+26:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 28f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 28f
+27:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 27b
+28:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+
+ # ebp - 20: caller's esi
+ # ebp - 16: caller's edi
+ # ebp - 12:
+ # ebp - 8: carry
+ # ebp - 4: a_len local
+ # ebp + 0: caller's ebp
+ # ebp + 4: return address
+ # ebp + 8: pa argument
+ # ebp + 12: a_len argument
+ # ebp + 16: ps argument
+ # ebp + 20:
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+
+.globl _s_mpv_sqr_add_prop
+.type _s_mpv_sqr_add_prop,@function
+_s_mpv_sqr_add_prop:
+ GET is_sse,%eax
+ cmp $0,%eax
+ je _s_mpv_sqr_add_prop_x86
+ jg _s_mpv_sqr_add_prop_sse2
+ call _s_mpi_is_sse2
+ PUT %eax,is_sse
+ cmp $0,%eax
+ jg _s_mpv_sqr_add_prop_sse2
+_s_mpv_sqr_add_prop_x86:
+ push %ebp
+ mov %esp,%ebp
+ sub $12,%esp
+ push %edi
+ push %esi
+ push %ebx
+ movl $0,%ebx # carry = 0
+ mov 12(%ebp),%ecx # a_len
+ mov 16(%ebp),%edi # edi = ps
+ cmp $0,%ecx
+ je 31f # jump if a_len == 0
+ cld
+ mov 8(%ebp),%esi # esi = pa
+30:
+ lodsl # %eax = [ds:si]; si += 4;
+ mull %eax
+
+ add %ebx,%eax # add "carry"
+ adc $0,%edx
+ mov 0(%edi),%ebx
+ add %ebx,%eax # add low word from result
+ mov 4(%edi),%ebx
+ stosl # [es:di] = %eax; di += 4;
+ adc %ebx,%edx # add high word from result
+ movl $0,%ebx
+ mov %edx,%eax
+ adc $0,%ebx
+ stosl # [es:di] = %eax; di += 4;
+ dec %ecx # --a_len
+ jnz 30b # jmp if a_len != 0
+31:
+ cmp $0,%ebx # is carry zero?
+ jz 34f
+ mov 0(%edi),%eax # add in current word from *c
+ add %ebx,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jnc 34f
+32:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 32b
+34:
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+_s_mpv_sqr_add_prop_sse2:
+ push %ebp
+ mov %esp,%ebp
+ push %edi
+ push %esi
+ push %ebx
+ psubq %mm2,%mm2 # carry = 0
+ mov 12(%ebp),%ecx # ecx = a_len
+ mov 16(%ebp),%edi
+ cmp $0,%ecx
+ je 36f # jmp if a_len == 0
+ mov 8(%ebp),%esi # esi = a
+ cld
+35:
+ movd 0(%esi),%mm0 # mm0 = *a
+ movd 0(%edi),%mm3 # fetch the sum
+ add $4,%esi
+ pmuludq %mm0,%mm0 # mm0 = sqr(a)
+ paddq %mm0,%mm2 # add the carry
+ paddq %mm3,%mm2 # add the low word
+ movd 4(%edi),%mm3
+ movd %mm2,0(%edi) # store the 32bit result
+ psrlq $32, %mm2
+ paddq %mm3,%mm2 # add the high word
+ movd %mm2,4(%edi) # store the 32bit result
+ psrlq $32, %mm2 # save the carry.
+ add $8,%edi
+ dec %ecx # --a_len
+ jnz 35b # jmp if a_len != 0
+36:
+ movd %mm2,%ebx
+ cmp $0,%ebx # is carry zero?
+ jz 38f
+ mov 0(%edi),%eax
+ add %ebx, %eax
+ stosl
+ jnc 38f
+37:
+ mov 0(%edi),%eax # add in current word from *c
+ adc $0,%eax
+ stosl # [es:edi] = ax; edi += 4;
+ jc 37b
+38:
+ emms
+ pop %ebx
+ pop %esi
+ pop %edi
+ leave
+ ret
+ nop
+
+ #
+ # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
+ # so its high bit is 1. This code is from NSPR.
+ #
+ # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
+ # mp_digit *qp, mp_digit *rp)
+
+ # esp + 0: Caller's ebx
+ # esp + 4: return address
+ # esp + 8: Nhi argument
+ # esp + 12: Nlo argument
+ # esp + 16: divisor argument
+ # esp + 20: qp argument
+ # esp + 24: rp argument
+ # registers:
+ # eax:
+ # ebx: carry
+ # ecx: a_len
+ # edx:
+ # esi: a ptr
+ # edi: c ptr
+ #
+
+.globl _s_mpv_div_2dx1d
+.type _s_mpv_div_2dx1d,@function
+_s_mpv_div_2dx1d:
+ push %ebx
+ mov 8(%esp),%edx
+ mov 12(%esp),%eax
+ mov 16(%esp),%ebx
+ div %ebx
+ mov 20(%esp),%ebx
+ mov %eax,0(%ebx)
+ mov 24(%esp),%ebx
+ mov %edx,0(%ebx)
+ xor %eax,%eax # return zero
+ pop %ebx
+ ret
+ nop
+
diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c
new file mode 100644
index 000000000..89fd03ae8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.c
@@ -0,0 +1,443 @@
+/*
+ * mplogic.c
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mplogic.h"
+
+/* {{{ Lookup table for population count */
+
+static unsigned char bitc[] = {
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
+};
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_not(a, b) - compute b = ~a
+ mpl_and(a, b, c) - compute c = a & b
+ mpl_or(a, b, c) - compute c = a | b
+ mpl_xor(a, b, c) - compute c = a ^ b
+ */
+
+/* {{{ mpl_not(a, b) */
+
+mp_err
+mpl_not(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ /* This relies on the fact that the digit type is unsigned */
+ for (ix = 0; ix < USED(b); ix++)
+ DIGIT(b, ix) = ~DIGIT(b, ix);
+
+ s_mp_clamp(b);
+
+ return MP_OKAY;
+
+} /* end mpl_not() */
+
+/* }}} */
+
+/* {{{ mpl_and(a, b, c) */
+
+mp_err
+mpl_and(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) <= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) &= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_and() */
+
+/* }}} */
+
+/* {{{ mpl_or(a, b, c) */
+
+mp_err
+mpl_or(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) |= DIGIT(other, ix);
+
+ return MP_OKAY;
+
+} /* end mpl_or() */
+
+/* }}} */
+
+/* {{{ mpl_xor(a, b, c) */
+
+mp_err
+mpl_xor(mp_int *a, mp_int *b, mp_int *c)
+{
+ mp_int *which, *other;
+ mp_err res;
+ unsigned int ix;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (USED(a) >= USED(b)) {
+ which = a;
+ other = b;
+ } else {
+ which = b;
+ other = a;
+ }
+
+ if ((res = mp_copy(which, c)) != MP_OKAY)
+ return res;
+
+ for (ix = 0; ix < USED(which); ix++)
+ DIGIT(c, ix) ^= DIGIT(other, ix);
+
+ s_mp_clamp(c);
+
+ return MP_OKAY;
+
+} /* end mpl_xor() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_rsh(a, b, d) - b = a >> d
+ mpl_lsh(a, b, d) - b = a << d
+ */
+
+/* {{{ mpl_rsh(a, b, d) */
+
+mp_err
+mpl_rsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ s_mp_div_2d(b, d);
+
+ return MP_OKAY;
+
+} /* end mpl_rsh() */
+
+/* }}} */
+
+/* {{{ mpl_lsh(a, b, d) */
+
+mp_err
+mpl_lsh(const mp_int *a, mp_int *b, mp_digit d)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && b != NULL, MP_BADARG);
+
+ if ((res = mp_copy(a, b)) != MP_OKAY)
+ return res;
+
+ return s_mp_mul_2d(b, d);
+
+} /* end mpl_lsh() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_num_set(a, num)
+
+ Count the number of set bits in the binary representation of a.
+ Returns MP_OKAY and sets 'num' to be the number of such bits, if
+ possible. If num is NULL, the result is thrown away, but it is
+ not considered an error.
+
+ mpl_num_clear() does basically the same thing for clear bits.
+ */
+
+/* {{{ mpl_num_set(a, num) */
+
+mp_err
+mpl_num_set(mp_int *a, int *num)
+{
+ unsigned int ix;
+ int db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_set() */
+
+/* }}} */
+
+/* {{{ mpl_num_clear(a, num) */
+
+mp_err
+mpl_num_clear(mp_int *a, int *num)
+{
+ unsigned int ix;
+ int db, nset = 0;
+ mp_digit cur;
+ unsigned char reg;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ cur = DIGIT(a, ix);
+
+ for (db = 0; db < sizeof(mp_digit); db++) {
+ reg = (unsigned char)(cur >> (CHAR_BIT * db));
+
+ nset += bitc[UCHAR_MAX - reg];
+ }
+ }
+
+ if (num)
+ *num = nset;
+
+ return MP_OKAY;
+
+} /* end mpl_num_clear() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/*
+ mpl_parity(a)
+
+ Determines the bitwise parity of the value given. Returns MP_EVEN
+ if an even number of digits are set, MP_ODD if an odd number are
+ set.
+ */
+
+/* {{{ mpl_parity(a) */
+
+mp_err
+mpl_parity(mp_int *a)
+{
+ unsigned int ix;
+ int par = 0;
+ mp_digit cur;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ int shft = (sizeof(mp_digit) * CHAR_BIT) / 2;
+
+ cur = DIGIT(a, ix);
+
+ /* Compute parity for current digit */
+ while (shft != 0) {
+ cur ^= (cur >> shft);
+ shft >>= 1;
+ }
+ cur &= 1;
+
+ /* XOR with running parity so far */
+ par ^= cur;
+ }
+
+ if (par)
+ return MP_ODD;
+ else
+ return MP_EVEN;
+
+} /* end mpl_parity() */
+
+/* }}} */
+
+/*
+ mpl_set_bit
+
+ Returns MP_OKAY or some error code.
+ Grows a if needed to set a bit to 1.
+ */
+mp_err
+mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value)
+{
+ mp_size ix;
+ mp_err rv;
+ mp_digit mask;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ if (ix + 1 > MP_USED(a)) {
+ rv = s_mp_pad(a, ix + 1);
+ if (rv != MP_OKAY)
+ return rv;
+ }
+
+ bitNum = bitNum % MP_DIGIT_BIT;
+ mask = (mp_digit)1 << bitNum;
+ if (value)
+ MP_DIGIT(a, ix) |= mask;
+ else
+ MP_DIGIT(a, ix) &= ~mask;
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+/*
+ mpl_get_bit
+
+ returns 0 or 1 or some (negative) error code.
+ */
+mp_err
+mpl_get_bit(const mp_int *a, mp_size bitNum)
+{
+ mp_size bit, ix;
+ mp_err rv;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ ix = bitNum / MP_DIGIT_BIT;
+ ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE);
+
+ bit = bitNum % MP_DIGIT_BIT;
+ rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1;
+ return rv;
+}
+
+/*
+ mpl_get_bits
+ - Extracts numBits bits from a, where the least significant extracted bit
+ is bit lsbNum. Returns a negative value if error occurs.
+ - Because sign bit is used to indicate error, maximum number of bits to
+ be returned is the lesser of (a) the number of bits in an mp_digit, or
+ (b) one less than the number of bits in an mp_err.
+ - lsbNum + numbits can be greater than the number of significant bits in
+ integer a, as long as bit lsbNum is in the high order digit of a.
+ */
+mp_err
+mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits)
+{
+ mp_size rshift = (lsbNum % MP_DIGIT_BIT);
+ mp_size lsWndx = (lsbNum / MP_DIGIT_BIT);
+ mp_digit *digit = MP_DIGITS(a) + lsWndx;
+ mp_digit mask = ((1 << numBits) - 1);
+
+ ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG);
+ ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE);
+
+ if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) ||
+ (lsWndx + 1 >= MP_USED(a))) {
+ mask &= (digit[0] >> rshift);
+ } else {
+ mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift)));
+ }
+ return (mp_err)mask;
+}
+
+/*
+ mpl_significant_bits
+ returns number of significnant bits in abs(a).
+ returns 1 if value is zero.
+ */
+mp_size
+mpl_significant_bits(const mp_int *a)
+{
+ mp_size bits = 0;
+ int ix;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = MP_USED(a); ix > 0;) {
+ mp_digit d;
+ d = MP_DIGIT(a, --ix);
+ if (d) {
+ while (d) {
+ ++bits;
+ d >>= 1;
+ }
+ break;
+ }
+ }
+ bits += ix * MP_DIGIT_BIT;
+ if (!bits)
+ bits = 1;
+ return bits;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h
new file mode 100644
index 000000000..a4a6b7735
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mplogic.h
@@ -0,0 +1,52 @@
+/*
+ * mplogic.h
+ *
+ * Bitwise logical operations on MPI values
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MPLOGIC_
+#define _H_MPLOGIC_
+
+#include "mpi.h"
+
+/*
+ The logical operations treat an mp_int as if it were a bit vector,
+ without regard to its sign (an mp_int is represented in a signed
+ magnitude format). Values are treated as if they had an infinite
+ string of zeros left of the most-significant bit.
+ */
+
+/* Parity results */
+
+#define MP_EVEN MP_YES
+#define MP_ODD MP_NO
+
+/* Bitwise functions */
+
+mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */
+mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */
+mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */
+mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */
+
+/* Shift functions */
+
+mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */
+mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */
+
+/* Bit count and parity */
+
+mp_err mpl_num_set(mp_int *a, int *num); /* count set bits */
+mp_err mpl_num_clear(mp_int *a, int *num); /* count clear bits */
+mp_err mpl_parity(mp_int *a); /* determine parity */
+
+/* Get & Set the value of a bit */
+
+mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value);
+mp_err mpl_get_bit(const mp_int *a, mp_size bitNum);
+mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits);
+mp_size mpl_significant_bits(const mp_int *a);
+
+#endif /* end _H_MPLOGIC_ */
diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c
new file mode 100644
index 000000000..06fd41b3a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpmontg.c
@@ -0,0 +1,1141 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This file implements moduluar exponentiation using Montgomery's
+ * method for modular reduction. This file implements the method
+ * described as "Improvement 2" in the paper "A Cryptogrpahic Library for
+ * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr.
+ * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90"
+ * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244,
+ * published by Springer Verlag.
+ */
+
+#define MP_USING_CACHE_SAFE_MOD_EXP 1
+#include <string.h>
+#include "mpi-priv.h"
+#include "mplogic.h"
+#include "mpprime.h"
+#ifdef MP_USING_MONT_MULF
+#include "montmulf.h"
+#endif
+#include <stddef.h> /* ptrdiff_t */
+#include <assert.h>
+
+#define STATIC
+
+#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */
+
+/*! computes T = REDC(T), 2^b == R
+ \param T < RN
+*/
+mp_err
+s_mp_redc(mp_int *T, mp_mont_modulus *mmm)
+{
+ mp_err res;
+ mp_size i;
+
+ i = (MP_USED(&mmm->N) << 1) + 1;
+ MP_CHECKOK(s_mp_pad(T, i));
+ for (i = 0; i < MP_USED(&mmm->N); ++i) {
+ mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime;
+ /* T += N * m_i * (MP_RADIX ** i); */
+ s_mp_mul_d_add_offset(&mmm->N, m_i, T, i);
+ }
+ s_mp_clamp(T);
+
+ /* T /= R */
+ s_mp_rshd(T, MP_USED(&mmm->N));
+
+ if ((res = s_mp_cmp(T, &mmm->N)) >= 0) {
+ /* T = T - N */
+ MP_CHECKOK(s_mp_sub(T, &mmm->N));
+#ifdef DEBUG
+ if ((res = mp_cmp(T, &mmm->N)) >= 0) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+#endif
+ }
+ res = MP_OKAY;
+CLEANUP:
+ return res;
+}
+
+#if !defined(MP_MONT_USE_MP_MUL)
+
+/*! c <- REDC( a * b ) mod N
+ \param a < N i.e. "reduced"
+ \param b < N i.e. "reduced"
+ \param mmm modulus N and n0' of N
+*/
+mp_err
+s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c,
+ mp_mont_modulus *mmm)
+{
+ mp_digit *pb;
+ mp_digit m_i;
+ mp_err res;
+ mp_size ib; /* "index b": index of current digit of B */
+ mp_size useda, usedb;
+
+ ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
+
+ if (MP_USED(a) < MP_USED(b)) {
+ const mp_int *xch = b; /* switch a and b, to do fewer outer loops */
+ b = a;
+ a = xch;
+ }
+
+ MP_USED(c) = 1;
+ MP_DIGIT(c, 0) = 0;
+ ib = (MP_USED(&mmm->N) << 1) + 1;
+ if ((res = s_mp_pad(c, ib)) != MP_OKAY)
+ goto CLEANUP;
+
+ useda = MP_USED(a);
+ pb = MP_DIGITS(b);
+ s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c));
+ s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1));
+ m_i = MP_DIGIT(c, 0) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0);
+
+ /* Outer loop: Digits of b */
+ usedb = MP_USED(b);
+ for (ib = 1; ib < usedb; ib++) {
+ mp_digit b_i = *pb++;
+
+ /* Inner product: Digits of a */
+ if (b_i)
+ s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib);
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ if (usedb < MP_USED(&mmm->N)) {
+ for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) {
+ m_i = MP_DIGIT(c, ib) * mmm->n0prime;
+ s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib);
+ }
+ }
+ s_mp_clamp(c);
+ s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */
+ if (s_mp_cmp(c, &mmm->N) >= 0) {
+ MP_CHECKOK(s_mp_sub(c, &mmm->N));
+ }
+ res = MP_OKAY;
+
+CLEANUP:
+ return res;
+}
+#endif
+
+STATIC
+mp_err
+s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont)
+{
+ mp_err res;
+
+ /* xMont = x * R mod N where N is modulus */
+ MP_CHECKOK(mp_copy(x, xMont));
+ MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */
+ MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */
+CLEANUP:
+ return res;
+}
+
+#ifdef MP_USING_MONT_MULF
+
+/* the floating point multiply is already cache safe,
+ * don't turn on cache safe unless we specifically
+ * force it */
+#ifndef MP_FORCE_CACHE_SAFE
+#undef MP_USING_CACHE_SAFE_MOD_EXP
+#endif
+
+unsigned int mp_using_mont_mulf = 1;
+
+/* computes montgomery square of the integer in mResult */
+#define SQR \
+ conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, d16Tmp, \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* computes montgomery product of x and the integer in mResult */
+#define MUL(x) \
+ conv_i32_to_d32(dm1, mResult, nLen); \
+ mont_mulf_noconv(mResult, dm1, oddPowers[x], \
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0)
+
+/* Do modular exponentiation using floating point multiply code. */
+mp_err
+mp_exptmod_f(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_digit *mResult;
+ double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp;
+ double dn0;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ int dSize = 0, oddPowSize, dTmpSize;
+ mp_int accum1;
+ double *oddPowers[MAX_ODD_INTS];
+
+ /* function for computing n0prime only works if n0 is odd */
+
+ MP_DIGITS(&accum1) = 0;
+
+ for (i = 0; i < MAX_ODD_INTS; ++i)
+ oddPowers[i] = 0;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+
+ mp_set(&accum1, 1);
+ MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ MP_CHECKOK(s_mp_pad(&accum1, nLen));
+
+ oddPowSize = 2 * nLen + 1;
+ dTmpSize = 2 * oddPowSize;
+ dSize = sizeof(double) * (nLen * 4 + 1 +
+ ((odd_ints + 1) * oddPowSize) + dTmpSize);
+ dBuf = (double *)malloc(dSize);
+ dm1 = dBuf; /* array of d32 */
+ dn = dBuf + nLen; /* array of d32 */
+ dSqr = dn + nLen; /* array of d32 */
+ d16Tmp = dSqr + nLen; /* array of d16 */
+ dTmp = d16Tmp + oddPowSize;
+
+ for (i = 0; i < odd_ints; ++i) {
+ oddPowers[i] = dTmp;
+ dTmp += oddPowSize;
+ }
+ mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */
+
+ /* Make dn and dn0 */
+ conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen);
+ dn0 = (double)(mmm->n0prime & 0xffff);
+
+ /* Make dSqr */
+ conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen);
+ mont_mulf_noconv(mResult, dm1, oddPowers[0],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d32(dSqr, mResult, nLen);
+
+ for (i = 1; i < odd_ints; ++i) {
+ mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1],
+ dTmp, dn, MP_DIGITS(modulus), nLen, dn0);
+ conv_i32_to_d16(oddPowers[i], mResult, nLen);
+ }
+
+ s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ MUL(0);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 1) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 2);
+ } else if (smallExp & 2) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 4);
+ SQR;
+ } else if (smallExp & 4) {
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 8);
+ SQR;
+ SQR;
+ } else if (smallExp & 8) {
+ SQR;
+ SQR;
+ SQR;
+ MUL(smallExp / 16);
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x10) {
+ SQR;
+ SQR;
+ MUL(smallExp / 32);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else if (smallExp & 0x20) {
+ SQR;
+ MUL(smallExp / 64);
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ SQR;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */
+
+ res = s_mp_redc(&accum1, mmm);
+ mp_exch(&accum1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ if (dBuf) {
+ if (dSize)
+ memset(dBuf, 0, dSize);
+ free(dBuf);
+ }
+
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL(x, a, b) \
+ MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm))
+#endif
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size odd_ints)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS];
+
+ /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */
+ /* oddPowers[i] = base ** (2*i + 1); */
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&power2) = 0;
+ for (i = 0; i < MAX_ODD_INTS; ++i) {
+ MP_DIGITS(oddPowers + i) = 0;
+ }
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase));
+
+ MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2));
+ MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */
+ MP_CHECKOK(s_mp_redc(&power2, mmm));
+
+ for (i = 1; i < odd_ints; ++i) {
+ MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2));
+ MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i));
+ MP_CHECKOK(s_mp_redc(oddPowers + i, mmm));
+ }
+
+ /* set accumulator to montgomery residue of 1 */
+ mp_set(&accum1, 1);
+ MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ if (window_bits == 1) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL(0, pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 4) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else if (window_bits == 5) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 2, pa2, pa1);
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 4, pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 8, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 16, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 32, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else {
+ abort();
+ }
+ } else if (window_bits == 6) {
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 2, pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 2) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 4, pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 4) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 8, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 8) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp / 16, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x10) {
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp / 32, pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 0x20) {
+ SQR(pa1, pa2);
+ MUL(smallExp / 64, pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else {
+ abort();
+ }
+ } else {
+ abort();
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&power2);
+ for (i = 0; i < odd_ints; ++i) {
+ mp_clear(oddPowers + i);
+ }
+ return res;
+}
+#undef SQR
+#undef MUL
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+unsigned int mp_using_cache_safe_exp = 1;
+#endif
+
+mp_err
+mp_set_safe_modexp(int value)
+{
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ mp_using_cache_safe_exp = value;
+ return MP_OKAY;
+#else
+ if (value == 0) {
+ return MP_OKAY;
+ }
+ return MP_BADARG;
+#endif
+}
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+#define WEAVE_WORD_SIZE 4
+
+/*
+ * mpi_to_weave takes an array of bignums, a matrix in which each bignum
+ * occupies all the columns of a row, and transposes it into a matrix in
+ * which each bignum occupies a column of every row. The first row of the
+ * input matrix becomes the first column of the output matrix. The n'th
+ * row of input becomes the n'th column of output. The input data is said
+ * to be "interleaved" or "woven" into the output matrix.
+ *
+ * The array of bignums is left in this woven form. Each time a single
+ * bignum value is needed, it is recreated by fetching the n'th column,
+ * forming a single row which is the new bignum.
+ *
+ * The purpose of this interleaving is make it impossible to determine which
+ * of the bignums is being used in any one operation by examining the pattern
+ * of cache misses.
+ *
+ * The weaving function does not transpose the entire input matrix in one call.
+ * It transposes 4 rows of mp_ints into their respective columns of output.
+ *
+ * This implementation treats each mp_int bignum as an array of mp_digits,
+ * It stores those bytes as a column of mp_digits in the output matrix. It
+ * doesn't care if the machine uses big-endian or little-endian byte ordering
+ * within mp_digits.
+ *
+ * "bignums" is an array of mp_ints.
+ * It points to four rows, four mp_ints, a subset of a larger array of mp_ints.
+ *
+ * "weaved" is the weaved output matrix.
+ * The first byte of bignums[0] is stored in weaved[0].
+ *
+ * "nBignums" is the total number of bignums in the array of which "bignums"
+ * is a part.
+ *
+ * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array.
+ * mp_ints that use less than nDigits digits are logically padded with zeros
+ * while being stored in the weaved array.
+ */
+mp_err mpi_to_weave(const mp_int *bignums,
+ mp_digit *weaved,
+ mp_size nDigits, /* in each mp_int of input */
+ mp_size nBignums) /* in the entire source array */
+{
+ mp_size i;
+ mp_digit *endDest = weaved + (nDigits * nBignums);
+
+ for (i = 0; i < WEAVE_WORD_SIZE; i++) {
+ mp_size used = MP_USED(&bignums[i]);
+ mp_digit *pSrc = MP_DIGITS(&bignums[i]);
+ mp_digit *endSrc = pSrc + used;
+ mp_digit *pDest = weaved + i;
+
+ ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG);
+ ARGCHK(used <= nDigits, MP_BADARG);
+
+ for (; pSrc < endSrc; pSrc++) {
+ *pDest = *pSrc;
+ pDest += nBignums;
+ }
+ while (pDest < endDest) {
+ *pDest = 0;
+ pDest += nBignums;
+ }
+ }
+
+ return MP_OKAY;
+}
+
+/*
+ * These functions return 0xffffffff if the output is true, and 0 otherwise.
+ */
+#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1)))
+#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1))
+#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b))
+
+/* Reverse the operation above for one mp_int.
+ * Reconstruct one mp_int from its column in the weaved array.
+ * Every read accesses every element of the weaved array, in order to
+ * avoid timing attacks based on patterns of memory accesses.
+ */
+mp_err weave_to_mpi(mp_int *a, /* out, result */
+ const mp_digit *weaved, /* in, byte matrix */
+ mp_size index, /* which column to read */
+ mp_size nDigits, /* number of mp_digits in each bignum */
+ mp_size nBignums) /* width of the matrix */
+{
+ /* these are indices, but need to be the same size as mp_digit
+ * because of the CONST_TIME operations */
+ mp_digit i, j;
+ mp_digit d;
+ mp_digit *pDest = MP_DIGITS(a);
+
+ MP_SIGN(a) = MP_ZPOS;
+ MP_USED(a) = nDigits;
+
+ assert(weaved != NULL);
+
+ /* Fetch the proper column in constant time, indexing over the whole array */
+ for (i = 0; i < nDigits; ++i) {
+ d = 0;
+ for (j = 0; j < nBignums; ++j) {
+ d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index);
+ }
+ pDest[i] = d;
+ }
+
+ s_mp_clamp(a);
+ return MP_OKAY;
+}
+
+#define SQR(a, b) \
+ MP_CHECKOK(mp_sqr(a, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+
+#if defined(MP_MONT_USE_MP_MUL)
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(mp_mul(a, x, b)); \
+ MP_CHECKOK(s_mp_redc(b, mmm))
+#else
+#define MUL_NOWEAVE(x, a, b) \
+ MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm))
+#endif
+
+#define MUL(x, a, b) \
+ MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \
+ MUL_NOWEAVE(&tmp, a, b)
+
+#define SWAPPA \
+ ptmp = pa1; \
+ pa1 = pa2; \
+ pa2 = ptmp
+#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y)))
+
+/* Do modular exponentiation using integer multiply code. */
+mp_err
+mp_exptmod_safe_i(const mp_int *montBase,
+ const mp_int *exponent,
+ const mp_int *modulus,
+ mp_int *result,
+ mp_mont_modulus *mmm,
+ int nLen,
+ mp_size bits_in_exponent,
+ mp_size window_bits,
+ mp_size num_powers)
+{
+ mp_int *pa1, *pa2, *ptmp;
+ mp_size i;
+ mp_size first_window;
+ mp_err res;
+ int expOff;
+ mp_int accum1, accum2, accum[WEAVE_WORD_SIZE];
+ mp_int tmp;
+ mp_digit *powersArray = NULL;
+ mp_digit *powers = NULL;
+
+ MP_DIGITS(&accum1) = 0;
+ MP_DIGITS(&accum2) = 0;
+ MP_DIGITS(&accum[0]) = 0;
+ MP_DIGITS(&accum[1]) = 0;
+ MP_DIGITS(&accum[2]) = 0;
+ MP_DIGITS(&accum[3]) = 0;
+ MP_DIGITS(&tmp) = 0;
+
+ /* grab the first window value. This allows us to preload accumulator1
+ * and save a conversion, some squares and a multiple*/
+ MP_CHECKOK(mpl_get_bits(exponent,
+ bits_in_exponent - window_bits, window_bits));
+ first_window = (mp_size)res;
+
+ MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2));
+
+ /* build the first WEAVE_WORD powers inline */
+ /* if WEAVE_WORD_SIZE is not 4, this code will have to change */
+ if (num_powers > 2) {
+ MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2));
+ MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2));
+ mp_set(&accum[0], 1);
+ MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0]));
+ MP_CHECKOK(mp_copy(montBase, &accum[1]));
+ SQR(montBase, &accum[2]);
+ MUL_NOWEAVE(montBase, &accum[2], &accum[3]);
+ powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1));
+ if (!powersArray) {
+ res = MP_MEM;
+ goto CLEANUP;
+ }
+ /* powers[i] = base ** (i); */
+ powers = (mp_digit *)MP_ALIGN(powersArray, num_powers);
+ MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers));
+ if (first_window < 4) {
+ MP_CHECKOK(mp_copy(&accum[first_window], &accum1));
+ first_window = num_powers;
+ }
+ } else {
+ if (first_window == 0) {
+ mp_set(&accum1, 1);
+ MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1));
+ } else {
+ /* assert first_window == 1? */
+ MP_CHECKOK(mp_copy(montBase, &accum1));
+ }
+ }
+
+ /*
+ * calculate all the powers in the powers array.
+ * this adds 2**(k-1)-2 square operations over just calculating the
+ * odd powers where k is the window size in the two other mp_modexpt
+ * implementations in this file. We will get some of that
+ * back by not needing the first 'k' squares and one multiply for the
+ * first window.
+ * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if
+ * num_powers > 2, in which case powers will have been allocated.
+ */
+ for (i = WEAVE_WORD_SIZE; i < num_powers; i++) {
+ int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */
+ if (i & 1) {
+ MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]);
+ /* we've filled the array do our 'per array' processing */
+ if (acc_index == (WEAVE_WORD_SIZE - 1)) {
+ MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1),
+ nLen, num_powers));
+
+ if (first_window <= i) {
+ MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)],
+ &accum1));
+ first_window = num_powers;
+ }
+ }
+ } else {
+ /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source
+ * and target are the same so we need to copy.. After that, the
+ * value is overwritten, so we need to fetch it from the stored
+ * weave array */
+ if (i > 2 * WEAVE_WORD_SIZE) {
+ MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1);
+ if (half_power_index == acc_index) {
+ /* copy is cheaper than weave_to_mpi */
+ MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2));
+ SQR(&accum2, &accum[acc_index]);
+ } else {
+ SQR(&accum[half_power_index], &accum[acc_index]);
+ }
+ }
+ }
+ }
+/* if the accum1 isn't set, Then there is something wrong with our logic
+ * above and is an internal programming error.
+ */
+#if MP_ARGCHK == 2
+ assert(MP_USED(&accum1) != 0);
+#endif
+
+ /* set accumulator to montgomery residue of 1 */
+ pa1 = &accum1;
+ pa2 = &accum2;
+
+ /* tmp is not used if window_bits == 1. */
+ if (window_bits != 1) {
+ MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2));
+ }
+
+ for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) {
+ mp_size smallExp;
+ MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits));
+ smallExp = (mp_size)res;
+
+ /* handle unroll the loops */
+ switch (window_bits) {
+ case 1:
+ if (!smallExp) {
+ SQR(pa1, pa2);
+ SWAPPA;
+ } else if (smallExp & 1) {
+ SQR(pa1, pa2);
+ MUL_NOWEAVE(montBase, pa2, pa1);
+ } else {
+ abort();
+ }
+ break;
+ case 6:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ /* fall through */
+ case 4:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ MUL(smallExp, pa1, pa2);
+ SWAPPA;
+ break;
+ case 5:
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ SQR(pa2, pa1);
+ SQR(pa1, pa2);
+ MUL(smallExp, pa2, pa1);
+ break;
+ default:
+ abort(); /* could do a loop? */
+ }
+ }
+
+ res = s_mp_redc(pa1, mmm);
+ mp_exch(pa1, result);
+
+CLEANUP:
+ mp_clear(&accum1);
+ mp_clear(&accum2);
+ mp_clear(&accum[0]);
+ mp_clear(&accum[1]);
+ mp_clear(&accum[2]);
+ mp_clear(&accum[3]);
+ mp_clear(&tmp);
+ /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */
+ free(powersArray);
+ return res;
+}
+#undef SQR
+#undef MUL
+#endif
+
+mp_err
+mp_exptmod(const mp_int *inBase, const mp_int *exponent,
+ const mp_int *modulus, mp_int *result)
+{
+ const mp_int *base;
+ mp_size bits_in_exponent, i, window_bits, odd_ints;
+ mp_err res;
+ int nLen;
+ mp_int montBase, goodBase;
+ mp_mont_modulus mmm;
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ static unsigned int max_window_bits;
+#endif
+
+ /* function for computing n0prime only works if n0 is odd */
+ if (!mp_isodd(modulus))
+ return s_mp_exptmod(inBase, exponent, modulus, result);
+
+ MP_DIGITS(&montBase) = 0;
+ MP_DIGITS(&goodBase) = 0;
+
+ if (mp_cmp(inBase, modulus) < 0) {
+ base = inBase;
+ } else {
+ MP_CHECKOK(mp_init(&goodBase));
+ base = &goodBase;
+ MP_CHECKOK(mp_mod(inBase, modulus, &goodBase));
+ }
+
+ nLen = MP_USED(modulus);
+ MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2));
+
+ mmm.N = *modulus; /* a copy of the mp_int struct */
+
+ /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX
+ ** where n0 = least significant mp_digit of N, the modulus.
+ */
+ mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0));
+
+ MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase));
+
+ bits_in_exponent = mpl_significant_bits(exponent);
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ if (bits_in_exponent > 780)
+ window_bits = 6;
+ else if (bits_in_exponent > 256)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+ } else
+#endif
+ if (bits_in_exponent > 480)
+ window_bits = 6;
+ else if (bits_in_exponent > 160)
+ window_bits = 5;
+ else if (bits_in_exponent > 20)
+ window_bits = 4;
+ /* RSA public key exponents are typically under 20 bits (common values
+ * are: 3, 17, 65537) and a 4-bit window is inefficient
+ */
+ else
+ window_bits = 1;
+
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ /*
+ * clamp the window size based on
+ * the cache line size.
+ */
+ if (!max_window_bits) {
+ unsigned long cache_size = s_mpi_getProcessorLineSize();
+ /* processor has no cache, use 'fast' code always */
+ if (cache_size == 0) {
+ mp_using_cache_safe_exp = 0;
+ }
+ if ((cache_size == 0) || (cache_size >= 64)) {
+ max_window_bits = 6;
+ } else if (cache_size >= 32) {
+ max_window_bits = 5;
+ } else if (cache_size >= 16) {
+ max_window_bits = 4;
+ } else
+ max_window_bits = 1; /* should this be an assert? */
+ }
+
+ /* clamp the window size down before we caclulate bits_in_exponent */
+ if (mp_using_cache_safe_exp) {
+ if (window_bits > max_window_bits) {
+ window_bits = max_window_bits;
+ }
+ }
+#endif
+
+ odd_ints = 1 << (window_bits - 1);
+ i = bits_in_exponent % window_bits;
+ if (i != 0) {
+ bits_in_exponent += window_bits - i;
+ }
+
+#ifdef MP_USING_MONT_MULF
+ if (mp_using_mont_mulf) {
+ MP_CHECKOK(s_mp_pad(&montBase, nLen));
+ res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+ } else
+#endif
+#ifdef MP_USING_CACHE_SAFE_MOD_EXP
+ if (mp_using_cache_safe_exp) {
+ res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, 1 << window_bits);
+ } else
+#endif
+ res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen,
+ bits_in_exponent, window_bits, odd_ints);
+
+CLEANUP:
+ mp_clear(&montBase);
+ mp_clear(&goodBase);
+ /* Don't mp_clear mmm.N because it is merely a copy of modulus.
+ ** Just zap it.
+ */
+ memset(&mmm, 0, sizeof mmm);
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c
new file mode 100644
index 000000000..58287192e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.c
@@ -0,0 +1,599 @@
+/*
+ * mpprime.c
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include <stdlib.h>
+#include <string.h>
+
+#define SMALL_TABLE 0 /* determines size of hard-wired prime table */
+
+#define RANDOM() rand()
+
+#include "primes.c" /* pull in the prime digit table */
+
+/*
+ Test if any of a given vector of digits divides a. If not, MP_NO
+ is returned; otherwise, MP_YES is returned and 'which' is set to
+ the index of the integer in the vector which divided a.
+ */
+mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which);
+
+/* {{{ mpp_divis(a, b) */
+
+/*
+ mpp_divis(a, b)
+
+ Returns MP_YES if a is divisible by b, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis(mp_int *a, mp_int *b)
+{
+ mp_err res;
+ mp_int rem;
+
+ if ((res = mp_init(&rem)) != MP_OKAY)
+ return res;
+
+ if ((res = mp_mod(a, b, &rem)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp_z(&rem) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&rem);
+ return res;
+
+} /* end mpp_divis() */
+
+/* }}} */
+
+/* {{{ mpp_divis_d(a, d) */
+
+/*
+ mpp_divis_d(a, d)
+
+ Return MP_YES if a is divisible by d, or MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_d(mp_int *a, mp_digit d)
+{
+ mp_err res;
+ mp_digit rem;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ if (d == 0)
+ return MP_NO;
+
+ if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0)
+ return MP_YES;
+ else
+ return MP_NO;
+
+} /* end mpp_divis_d() */
+
+/* }}} */
+
+/* {{{ mpp_random(a) */
+
+/*
+ mpp_random(a)
+
+ Assigns a random value to a. This value is generated using the
+ standard C library's rand() function, so it should not be used for
+ cryptographic purposes, but it should be fine for primality testing,
+ since all we really care about there is good statistical properties.
+
+ As many digits as a currently has are filled with random digits.
+ */
+
+mp_err
+mpp_random(mp_int *a)
+
+{
+ mp_digit next = 0;
+ unsigned int ix, jx;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ for (ix = 0; ix < USED(a); ix++) {
+ for (jx = 0; jx < sizeof(mp_digit); jx++) {
+ next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX);
+ }
+ DIGIT(a, ix) = next;
+ }
+
+ return MP_OKAY;
+
+} /* end mpp_random() */
+
+/* }}} */
+
+/* {{{ mpp_random_size(a, prec) */
+
+mp_err
+mpp_random_size(mp_int *a, mp_size prec)
+{
+ mp_err res;
+
+ ARGCHK(a != NULL && prec > 0, MP_BADARG);
+
+ if ((res = s_mp_pad(a, prec)) != MP_OKAY)
+ return res;
+
+ return mpp_random(a);
+
+} /* end mpp_random_size() */
+
+/* }}} */
+
+/* {{{ mpp_divis_vector(a, vec, size, which) */
+
+/*
+ mpp_divis_vector(a, vec, size, which)
+
+ Determines if a is divisible by any of the 'size' digits in vec.
+ Returns MP_YES and sets 'which' to the index of the offending digit,
+ if it is; returns MP_NO if it is not.
+ */
+
+mp_err
+mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG);
+
+ return s_mpp_divp(a, vec, size, which);
+
+} /* end mpp_divis_vector() */
+
+/* }}} */
+
+/* {{{ mpp_divis_primes(a, np) */
+
+/*
+ mpp_divis_primes(a, np)
+
+ Test whether a is divisible by any of the first 'np' primes. If it
+ is, returns MP_YES and sets *np to the value of the digit that did
+ it. If not, returns MP_NO.
+ */
+mp_err
+mpp_divis_primes(mp_int *a, mp_digit *np)
+{
+ int size, which;
+ mp_err res;
+
+ ARGCHK(a != NULL && np != NULL, MP_BADARG);
+
+ size = (int)*np;
+ if (size > prime_tab_size)
+ size = prime_tab_size;
+
+ res = mpp_divis_vector(a, prime_tab, size, &which);
+ if (res == MP_YES)
+ *np = prime_tab[which];
+
+ return res;
+
+} /* end mpp_divis_primes() */
+
+/* }}} */
+
+/* {{{ mpp_fermat(a, w) */
+
+/*
+ Using w as a witness, try pseudo-primality testing based on Fermat's
+ little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod
+ a). So, we compute z = w^a (mod a) and compare z to w; if they are
+ equal, the test passes and we return MP_YES. Otherwise, we return
+ MP_NO.
+ */
+mp_err
+mpp_fermat(mp_int *a, mp_digit w)
+{
+ mp_int base, test;
+ mp_err res;
+
+ if ((res = mp_init(&base)) != MP_OKAY)
+ return res;
+
+ mp_set(&base, w);
+
+ if ((res = mp_init(&test)) != MP_OKAY)
+ goto TEST;
+
+ /* Compute test = base^a (mod a) */
+ if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (mp_cmp(&base, &test) == 0)
+ res = MP_YES;
+ else
+ res = MP_NO;
+
+CLEANUP:
+ mp_clear(&test);
+TEST:
+ mp_clear(&base);
+
+ return res;
+
+} /* end mpp_fermat() */
+
+/* }}} */
+
+/*
+ Perform the fermat test on each of the primes in a list until
+ a) one of them shows a is not prime, or
+ b) the list is exhausted.
+ Returns: MP_YES if it passes tests.
+ MP_NO if fermat test reveals it is composite
+ Some MP error code if some other error occurs.
+ */
+mp_err
+mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes)
+{
+ mp_err rv = MP_YES;
+
+ while (nPrimes-- > 0 && rv == MP_YES) {
+ rv = mpp_fermat(a, *primes++);
+ }
+ return rv;
+}
+
+/* {{{ mpp_pprime(a, nt) */
+
+/*
+ mpp_pprime(a, nt)
+
+ Performs nt iteration of the Miller-Rabin probabilistic primality
+ test on a. Returns MP_YES if the tests pass, MP_NO if one fails.
+ If MP_NO is returned, the number is definitely composite. If MP_YES
+ is returned, it is probably prime (but that is not guaranteed).
+ */
+
+mp_err
+mpp_pprime(mp_int *a, int nt)
+{
+ mp_err res;
+ mp_int x, amo, m, z; /* "amo" = "a minus one" */
+ int iter;
+ unsigned int jx;
+ mp_size b;
+
+ ARGCHK(a != NULL, MP_BADARG);
+
+ MP_DIGITS(&x) = 0;
+ MP_DIGITS(&amo) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&z) = 0;
+
+ /* Initialize temporaries... */
+ MP_CHECKOK(mp_init(&amo));
+ /* Compute amo = a - 1 for what follows... */
+ MP_CHECKOK(mp_sub_d(a, 1, &amo));
+
+ b = mp_trailing_zeros(&amo);
+ if (!b) { /* a was even ? */
+ res = MP_NO;
+ goto CLEANUP;
+ }
+
+ MP_CHECKOK(mp_init_size(&x, MP_USED(a)));
+ MP_CHECKOK(mp_init(&z));
+ MP_CHECKOK(mp_init(&m));
+ MP_CHECKOK(mp_div_2d(&amo, b, &m, 0));
+
+ /* Do the test nt times... */
+ for (iter = 0; iter < nt; iter++) {
+
+ /* Choose a random value for 1 < x < a */
+ MP_CHECKOK(s_mp_pad(&x, USED(a)));
+ mpp_random(&x);
+ MP_CHECKOK(mp_mod(&x, a, &x));
+ if (mp_cmp_d(&x, 1) <= 0) {
+ iter--; /* don't count this iteration */
+ continue; /* choose a new x */
+ }
+
+ /* Compute z = (x ** m) mod a */
+ MP_CHECKOK(mp_exptmod(&x, &m, a, &z));
+
+ if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ continue;
+ }
+
+ res = MP_NO; /* just in case the following for loop never executes. */
+ for (jx = 1; jx < b; jx++) {
+ /* z = z^2 (mod a) */
+ MP_CHECKOK(mp_sqrmod(&z, a, &z));
+ res = MP_NO; /* previous line set res to MP_YES */
+
+ if (mp_cmp_d(&z, 1) == 0) {
+ break;
+ }
+ if (mp_cmp(&z, &amo) == 0) {
+ res = MP_YES;
+ break;
+ }
+ } /* end testing loop */
+
+ /* If the test passes, we will continue iterating, but a failed
+ test means the candidate is definitely NOT prime, so we will
+ immediately break out of this loop
+ */
+ if (res == MP_NO)
+ break;
+
+ } /* end iterations loop */
+
+CLEANUP:
+ mp_clear(&m);
+ mp_clear(&z);
+ mp_clear(&x);
+ mp_clear(&amo);
+ return res;
+
+} /* end mpp_pprime() */
+
+/* }}} */
+
+/* Produce table of composites from list of primes and trial value.
+** trial must be odd. List of primes must not include 2.
+** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest
+** prime in list of primes. After this function is finished,
+** if sieve[i] is non-zero, then (trial + 2*i) is composite.
+** Each prime used in the sieve costs one division of trial, and eliminates
+** one or more values from the search space. (3 eliminates 1/3 of the values
+** alone!) Each value left in the search space costs 1 or more modular
+** exponentations. So, these divisions are a bargain!
+*/
+mp_err
+mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve)
+{
+ mp_err res;
+ mp_digit rem;
+ mp_size ix;
+ unsigned long offset;
+
+ memset(sieve, 0, nSieve);
+
+ for (ix = 0; ix < nPrimes; ix++) {
+ mp_digit prime = primes[ix];
+ mp_size i;
+ if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ offset = 0;
+ } else {
+ offset = prime - rem;
+ }
+
+ for (i = offset; i < nSieve * 2; i += prime) {
+ if (i % 2 == 0) {
+ sieve[i / 2] = 1;
+ }
+ }
+ }
+
+ return MP_OKAY;
+}
+
+#define SIEVE_SIZE 32 * 1024
+
+mp_err
+mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
+ unsigned long *nTries)
+{
+ mp_digit np;
+ mp_err res;
+ unsigned int i = 0;
+ mp_int trial;
+ mp_int q;
+ mp_size num_tests;
+ unsigned char *sieve;
+
+ ARGCHK(start != 0, MP_BADARG);
+ ARGCHK(nBits > 16, MP_RANGE);
+
+ sieve = malloc(SIEVE_SIZE);
+ ARGCHK(sieve != NULL, MP_MEM);
+
+ MP_DIGITS(&trial) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_CHECKOK(mp_init(&trial));
+ MP_CHECKOK(mp_init(&q));
+ /* values originally taken from table 4.4,
+ * HandBook of Applied Cryptography, augmented by FIPS-186
+ * requirements, Table C.2 and C.3 */
+ if (nBits >= 2000) {
+ num_tests = 3;
+ } else if (nBits >= 1536) {
+ num_tests = 4;
+ } else if (nBits >= 1024) {
+ num_tests = 5;
+ } else if (nBits >= 550) {
+ num_tests = 6;
+ } else if (nBits >= 450) {
+ num_tests = 7;
+ } else if (nBits >= 400) {
+ num_tests = 8;
+ } else if (nBits >= 350) {
+ num_tests = 9;
+ } else if (nBits >= 300) {
+ num_tests = 10;
+ } else if (nBits >= 250) {
+ num_tests = 20;
+ } else if (nBits >= 200) {
+ num_tests = 41;
+ } else if (nBits >= 100) {
+ num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the
+ * required more iterations for larger aux primes */
+ } else
+ num_tests = 50;
+
+ if (strong)
+ --nBits;
+ MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1));
+ MP_CHECKOK(mpl_set_bit(start, 0, 1));
+ for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) {
+ MP_CHECKOK(mpl_set_bit(start, i, 0));
+ }
+ /* start sieveing with prime value of 3. */
+ MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1,
+ sieve, SIEVE_SIZE));
+
+#ifdef DEBUG_SIEVE
+ res = 0;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (!sieve[i])
+ ++res;
+ }
+ fprintf(stderr, "sieve found %d potential primes.\n", res);
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+ res = MP_NO;
+ for (i = 0; i < SIEVE_SIZE; ++i) {
+ if (sieve[i]) /* this number is composite */
+ continue;
+ MP_CHECKOK(mp_add_d(start, 2 * i, &trial));
+ FPUTC('.', stderr);
+ /* run a Fermat test */
+ res = mpp_fermat(&trial, 2);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ FPUTC('+', stderr);
+ /* If that passed, run some Miller-Rabin tests */
+ res = mpp_pprime(&trial, num_tests);
+ if (res != MP_OKAY) {
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+ FPUTC('!', stderr);
+
+ if (!strong)
+ break; /* success !! */
+
+ /* At this point, we have strong evidence that our candidate
+ is itself prime. If we want a strong prime, we need now
+ to test q = 2p + 1 for primality...
+ */
+ MP_CHECKOK(mp_mul_2(&trial, &q));
+ MP_CHECKOK(mp_add_d(&q, 1, &q));
+
+ /* Test q for small prime divisors ... */
+ np = prime_tab_size;
+ res = mpp_divis_primes(&q, &np);
+ if (res == MP_YES) { /* is composite */
+ mp_clear(&q);
+ continue;
+ }
+ if (res != MP_NO)
+ goto CLEANUP;
+
+ /* And test with Fermat, as with its parent ... */
+ res = mpp_fermat(&q, 2);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* And test with Miller-Rabin, as with its parent ... */
+ res = mpp_pprime(&q, num_tests);
+ if (res != MP_YES) {
+ mp_clear(&q);
+ if (res == MP_NO)
+ continue; /* was composite */
+ goto CLEANUP;
+ }
+
+ /* If it passed, we've got a winner */
+ mp_exch(&q, &trial);
+ mp_clear(&q);
+ break;
+
+ } /* end of loop through sieved values */
+ if (res == MP_YES)
+ mp_exch(&trial, start);
+CLEANUP:
+ mp_clear(&trial);
+ mp_clear(&q);
+ if (nTries)
+ *nTries += i;
+ if (sieve != NULL) {
+ memset(sieve, 0, SIEVE_SIZE);
+ free(sieve);
+ }
+ return res;
+}
+
+/*========================================================================*/
+/*------------------------------------------------------------------------*/
+/* Static functions visible only to the library internally */
+
+/* {{{ s_mpp_divp(a, vec, size, which) */
+
+/*
+ Test for divisibility by members of a vector of digits. Returns
+ MP_NO if a is not divisible by any of them; returns MP_YES and sets
+ 'which' to the index of the offender, if it is. Will stop on the
+ first digit against which a is divisible.
+ */
+
+mp_err
+s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which)
+{
+ mp_err res;
+ mp_digit rem;
+
+ int ix;
+
+ for (ix = 0; ix < size; ix++) {
+ if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY)
+ return res;
+
+ if (rem == 0) {
+ if (which)
+ *which = ix;
+ return MP_YES;
+ }
+ }
+
+ return MP_NO;
+
+} /* end s_mpp_divp() */
+
+/* }}} */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h
new file mode 100644
index 000000000..c47c61836
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpprime.h
@@ -0,0 +1,38 @@
+/*
+ * mpprime.h
+ *
+ * Utilities for finding and working with prime and pseudo-prime
+ * integers
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_MP_PRIME_
+#define _H_MP_PRIME_
+
+#include "mpi.h"
+
+extern const int prime_tab_size; /* number of primes available */
+extern const mp_digit prime_tab[];
+
+/* Tests for divisibility */
+mp_err mpp_divis(mp_int *a, mp_int *b);
+mp_err mpp_divis_d(mp_int *a, mp_digit d);
+
+/* Random selection */
+mp_err mpp_random(mp_int *a);
+mp_err mpp_random_size(mp_int *a, mp_size prec);
+
+/* Pseudo-primality testing */
+mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which);
+mp_err mpp_divis_primes(mp_int *a, mp_digit *np);
+mp_err mpp_fermat(mp_int *a, mp_digit w);
+mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes);
+mp_err mpp_pprime(mp_int *a, int nt);
+mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes,
+ unsigned char *sieve, mp_size nSieve);
+mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong,
+ unsigned long *nTries);
+
+#endif /* end _H_MP_PRIME_ */
diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c
new file mode 100644
index 000000000..423311b65
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparc.c
@@ -0,0 +1,221 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "vis_proto.h"
+
+/***************************************************************/
+
+typedef int t_s32;
+typedef unsigned int t_u32;
+#if defined(__sparcv9)
+typedef long t_s64;
+typedef unsigned long t_u64;
+#else
+typedef long long t_s64;
+typedef unsigned long long t_u64;
+#endif
+typedef double t_d64;
+
+/***************************************************************/
+
+typedef union {
+ t_d64 d64;
+ struct {
+ t_s32 i0;
+ t_s32 i1;
+ } i32s;
+} d64_2_i32;
+
+/***************************************************************/
+
+#define BUFF_SIZE 256
+
+#define A_BITS 19
+#define A_MASK ((1 << A_BITS) - 1)
+
+/***************************************************************/
+
+static t_u64 mask_cnst[] = {
+ 0x8000000080000000ull
+};
+
+/***************************************************************/
+
+#define DEF_VARS(N) \
+ t_d64 *py = (t_d64 *)y; \
+ t_d64 mask = *((t_d64 *)mask_cnst); \
+ t_d64 ca = (1u << 31) - 1; \
+ t_d64 da = (t_d64)a; \
+ t_s64 buff[N], s; \
+ d64_2_i32 dy
+
+/***************************************************************/
+
+#define MUL_U32_S64_2(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \
+ buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da
+
+#define MUL_U32_S64_2_D(i) \
+ dy.d64 = vis_fxnor(mask, py[i]); \
+ d0 = ca - (t_d64)dy.i32s.i0; \
+ d1 = ca - (t_d64)dy.i32s.i1; \
+ buff[4 * (i)] = (t_s64)(d0 * da); \
+ buff[4 * (i) + 1] = (t_s64)(d0 * db); \
+ buff[4 * (i) + 2] = (t_s64)(d1 * da); \
+ buff[4 * (i) + 3] = (t_s64)(d1 * db)
+
+/***************************************************************/
+
+#define ADD_S64_U32(i) \
+ s = buff[i] + x[i] + c; \
+ z[i] = s; \
+ c = (s >> 32)
+
+#define ADD_S64_U32_D(i) \
+ s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \
+ z[i] = s; \
+ uc = ((t_u64)s >> 32)
+
+/***************************************************************/
+
+#define MUL_U32_S64_8(i) \
+ MUL_U32_S64_2(i); \
+ MUL_U32_S64_2(i + 1); \
+ MUL_U32_S64_2(i + 2); \
+ MUL_U32_S64_2(i + 3)
+
+#define MUL_U32_S64_D_8(i) \
+ MUL_U32_S64_2_D(i); \
+ MUL_U32_S64_2_D(i + 1); \
+ MUL_U32_S64_2_D(i + 2); \
+ MUL_U32_S64_2_D(i + 3)
+
+/***************************************************************/
+
+#define ADD_S64_U32_8(i) \
+ ADD_S64_U32(i); \
+ ADD_S64_U32(i + 1); \
+ ADD_S64_U32(i + 2); \
+ ADD_S64_U32(i + 3); \
+ ADD_S64_U32(i + 4); \
+ ADD_S64_U32(i + 5); \
+ ADD_S64_U32(i + 6); \
+ ADD_S64_U32(i + 7)
+
+#define ADD_S64_U32_D_8(i) \
+ ADD_S64_U32_D(i); \
+ ADD_S64_U32_D(i + 1); \
+ ADD_S64_U32_D(i + 2); \
+ ADD_S64_U32_D(i + 3); \
+ ADD_S64_U32_D(i + 4); \
+ ADD_S64_U32_D(i + 5); \
+ ADD_S64_U32_D(i + 6); \
+ ADD_S64_U32_D(i + 7)
+
+/***************************************************************/
+
+t_u32
+mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ if (a < (1 << A_BITS)) {
+
+ if (n == 8) {
+ DEF_VARS(8);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ ADD_S64_U32_8(0);
+
+ return c;
+
+ } else if (n == 16) {
+ DEF_VARS(16);
+ t_s32 c = 0;
+
+ MUL_U32_S64_8(0);
+ MUL_U32_S64_8(4);
+ ADD_S64_U32_8(0);
+ ADD_S64_U32_8(8);
+
+ return c;
+
+ } else {
+ DEF_VARS(BUFF_SIZE);
+ t_s32 i, c = 0;
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32(i);
+ }
+
+ return c;
+ }
+ } else {
+
+ if (n == 8) {
+ DEF_VARS(2 * 8);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ ADD_S64_U32_D_8(0);
+
+ return uc;
+
+ } else if (n == 16) {
+ DEF_VARS(2 * 16);
+ t_d64 d0, d1, db;
+ t_u32 uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+ MUL_U32_S64_D_8(0);
+ MUL_U32_S64_D_8(4);
+ ADD_S64_U32_D_8(0);
+ ADD_S64_U32_D_8(8);
+
+ return uc;
+
+ } else {
+ DEF_VARS(2 * BUFF_SIZE);
+ t_d64 d0, d1, db;
+ t_u32 i, uc = 0;
+
+ da = (t_d64)(a & A_MASK);
+ db = (t_d64)(a >> A_BITS);
+
+#pragma pipeloop(0)
+ for (i = 0; i < (n + 1) / 2; i++) {
+ MUL_U32_S64_2_D(i);
+ }
+
+#pragma pipeloop(0)
+ for (i = 0; i < n; i++) {
+ ADD_S64_U32_D(i);
+ }
+
+ return uc;
+ }
+ }
+}
+
+/***************************************************************/
+
+t_u32
+mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a)
+{
+ return mul_add(x, x, y, n, a);
+}
+
+/***************************************************************/
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
new file mode 100644
index 000000000..66122a1d9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s
@@ -0,0 +1,1607 @@
+! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs.
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 14 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000106
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */
+/* 000000 19 ( 0 0) */ .word 1127219200,0
+/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 21 ( 0 0) */ .align 16
+/* 0x0010 27 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1
+/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3
+/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5
+/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1
+/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84]
+/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3
+/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3
+/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2
+/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2
+/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4
+/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0
+/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5
+/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3
+/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0
+/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0
+/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0068 ( 7 8) */ subcc %o2,8,%g0
+/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50
+/* 0x0070 ( 8 12) */ ldd [%o0],%f8
+/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4
+/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6
+/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10
+/* 0x0080 198 (11 14) */ fmovs %f8,%f12
+/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4
+/* 0x0088 200 (12 14) */ ld [%fp+84],%f13
+/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14
+/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6
+/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16
+/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10
+/* 0x009c 208 (15 17) */ ld [%i1],%g2
+/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8
+/* 0x00a4 210 (16 21) */ fitod %f4,%f18
+/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3
+/* 0x00ac 212 (17 22) */ fitod %f5,%f4
+/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4
+/* 0x00b4 214 (18 23) */ fitod %f6,%f20
+/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5
+/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0
+/* 0x00c0 217 (19 24) */ fitod %f7,%f6
+/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1
+/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16
+/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12
+/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2
+/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4
+/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3
+/* 0x00dc 224 (23 28) */ fitod %f10,%f18
+/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20
+/* 0x00e4 226 (25 30) */ fitod %f11,%f10
+/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6
+/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12
+/* 0x00f0 229 (27 32) */ fitod %f16,%f22
+/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4
+/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18
+/* 0x00fc 232 (29 34) */ fitod %f17,%f16
+/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20
+/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10
+/* 0x0108 235 (31 36) */ fdtox %f12,%f12
+/* 0x010c 236 (31 32) */ std %f12,[%sp+152]
+/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6
+/* 0x0114 238 (32 37) */ fdtox %f4,%f4
+/* 0x0118 239 (32 33) */ std %f4,[%sp+144]
+/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4
+/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12
+/* 0x0124 242 (34 39) */ fdtox %f20,%f18
+/* 0x0128 243 (34 35) */ std %f18,[%sp+136]
+/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4
+/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14
+/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10
+/* 0x0138 247 (36 41) */ fdtox %f6,%f6
+/* 0x013c 248 (36 37) */ std %f6,[%sp+128]
+/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5
+/* 0x0144 250 (37 38) */ add %o4,%g2,%o4
+/* 0x0148 251 (38 39) */ st %o4,[%i0]
+/* 0x014c 252 (38 39) */ srax %o4,32,%g2
+/* 0x0150 253 (38 43) */ fdtox %f12,%f6
+/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4
+/* 0x0158 255 (39 40) */ std %f6,[%sp+120]
+/* 0x015c 256 (39 40) */ add %o5,%g3,%g3
+/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7
+/* 0x0164 258 (40 41) */ add %g3,%g2,%g2
+/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6
+/* 0x016c 260 (40 45) */ fdtox %f10,%f8
+/* 0x0170 261 (41 42) */ std %f8,[%sp+112]
+/* 0x0174 262 (41 42) */ srax %g2,32,%o5
+/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3
+/* 0x017c 264 (42 43) */ add %o7,%g4,%g4
+/* 0x0180 265 (43 44) */ st %g2,[%i0+4]
+/* 0x0184 266 (43 44) */ add %g4,%o5,%g4
+/* 0x0188 267 (43 48) */ fdtox %f4,%f4
+/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5
+/* 0x0190 269 (44 45) */ add %g3,%g5,%g3
+/* 0x0194 270 (44 45) */ srax %g4,32,%g5
+/* 0x0198 271 (45 46) */ std %f4,[%sp+104]
+/* 0x019c 272 (45 46) */ add %g3,%g5,%g3
+/* 0x01a0 273 (45 50) */ fdtox %f6,%f4
+/* 0x01a4 274 (46 47) */ std %f4,[%sp+96]
+/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0
+/* 0x01ac 276 (46 47) */ srax %g3,32,%o5
+/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5
+/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0
+/* 0x01b8 279 (48 49) */ st %g4,[%i0+8]
+/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5
+/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1
+/* 0x01c4 282 (49 50) */ srax %o0,32,%g5
+/* 0x01c8 283 (50 51) */ st %o0,[%i0+16]
+/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1
+/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5
+/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2
+/* 0x01d8 287 (51 52) */ srax %o1,32,%o5
+/* 0x01dc 288 (52 53) */ st %o1,[%i0+20]
+/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2
+/* 0x01e4 290 (53 54) */ st %o2,[%i0+24]
+/* 0x01e8 291 (53 54) */ srax %o2,32,%g4
+/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2
+/* 0x01f0 293 (54 55) */ st %g3,[%i0+12]
+/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2
+/* 0x01f8 295 (55 56) */ st %g2,[%i0+28]
+/* 0x01fc 299 (55 56) */ srax %g2,32,%o7
+/* 0x0200 300 (56 57) */ or %g0,%o7,%i0
+/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (59 61) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000037
+!
+
+ .L77000037: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50
+/* 0x0214 ( 1 5) */ ldd [%o0],%f8
+/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4
+/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6
+/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14
+/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12
+/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13
+/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4
+/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6
+/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16
+/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20
+/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8
+/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7
+/* 0x0248 332 ( 9 14) */ fitod %f4,%f18
+/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22
+/* 0x0250 334 (10 15) */ fitod %f5,%f4
+/* 0x0254 335 (11 12) */ stx %o7,[%sp+96]
+/* 0x0258 336 (11 16) */ fitod %f6,%f24
+/* 0x025c 337 (12 14) */ ld [%i1+44],%o7
+/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10
+/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26
+/* 0x0268 340 (13 18) */ fitod %f7,%f6
+/* 0x026c 341 (14 15) */ stx %o7,[%sp+104]
+/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18
+/* 0x0274 343 (15 17) */ ld [%i1+48],%o7
+/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4
+/* 0x027c 345 (16 18) */ ld [%i1+36],%o5
+/* 0x0280 346 (16 21) */ fitod %f10,%f28
+/* 0x0284 347 (17 18) */ stx %o7,[%sp+112]
+/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16
+/* 0x028c 349 (18 20) */ ld [%i1],%g2
+/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24
+/* 0x0294 351 (19 20) */ stx %o5,[%sp+120]
+/* 0x0298 352 (19 24) */ fitod %f11,%f10
+/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18
+/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5
+/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6
+/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4
+/* 0x02ac 357 (21 26) */ fitod %f16,%f30
+/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20
+/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3
+/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2
+/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28
+/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24
+/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128]
+/* 0x02c8 364 (24 29) */ fdtox %f18,%f18
+/* 0x02cc 365 (25 26) */ std %f18,[%sp+272]
+/* 0x02d0 366 (25 30) */ fitod %f17,%f16
+/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6
+/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10
+/* 0x02dc 369 (27 32) */ fitod %f20,%f18
+/* 0x02e0 370 (28 33) */ fdtox %f4,%f4
+/* 0x02e4 371 (28 29) */ std %f4,[%sp+264]
+/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28
+/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4
+/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4
+/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22
+/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5
+/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0
+/* 0x0300 378 (31 36) */ fitod %f21,%f20
+/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10
+/* 0x0308 380 (32 34) */ ld [%i1+20],%o1
+/* 0x030c 381 (32 37) */ fdtox %f24,%f24
+/* 0x0310 382 (33 34) */ std %f24,[%sp+256]
+/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16
+/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7
+/* 0x031c 385 (34 39) */ fdtox %f6,%f6
+/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4
+/* 0x0324 387 (35 36) */ std %f6,[%sp+248]
+/* 0x0328 388 (35 40) */ fitod %f22,%f24
+/* 0x032c 389 (36 38) */ ld [%i1+32],%o4
+/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6
+/* 0x0334 391 (36 37) */ add %o7,%g2,%g2
+/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7
+/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26
+/* 0x0340 394 (37 38) */ srax %g2,32,%o5
+/* 0x0344 395 (38 39) */ st %g2,[%i0]
+/* 0x0348 396 (38 43) */ fitod %f23,%f18
+/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16
+/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2
+/* 0x0354 399 (39 44) */ fdtox %f28,%f22
+/* 0x0358 400 (39 40) */ add %o7,%g3,%g3
+/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7
+/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20
+/* 0x0364 403 (40 41) */ add %g3,%o5,%g3
+/* 0x0368 404 (41 42) */ std %f22,[%sp+240]
+/* 0x036c 405 (41 46) */ fitod %f26,%f22
+/* 0x0370 406 (41 42) */ srax %g3,32,%o5
+/* 0x0374 407 (41 42) */ add %g2,%g5,%g2
+/* 0x0378 408 (42 43) */ st %g3,[%i0+4]
+/* 0x037c 409 (42 47) */ fdtox %f10,%f10
+/* 0x0380 410 (42 43) */ add %o7,%g4,%g4
+/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6
+/* 0x0388 412 (43 44) */ std %f10,[%sp+232]
+/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12
+/* 0x0390 414 (43 44) */ add %g4,%o5,%g4
+/* 0x0394 415 (44 45) */ st %g4,[%i0+8]
+/* 0x0398 416 (44 45) */ srax %g4,32,%o5
+/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10
+/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7
+/* 0x03a4 419 (45 50) */ fdtox %f4,%f4
+/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2
+/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20
+/* 0x03b0 422 (46 47) */ std %f4,[%sp+224]
+/* 0x03b4 423 (46 47) */ srax %g2,32,%g5
+/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4
+/* 0x03bc 425 (47 48) */ st %g2,[%i0+12]
+/* 0x03c0 426 (47 52) */ fitod %f27,%f24
+/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3
+/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5
+/* 0x03cc 429 (48 53) */ fdtox %f16,%f16
+/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2
+/* 0x03d4 431 (49 50) */ std %f16,[%sp+216]
+/* 0x03d8 432 (49 50) */ srax %g2,32,%g4
+/* 0x03dc 433 (49 54) */ fitod %f12,%f18
+/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10
+/* 0x03e4 435 (50 51) */ st %g2,[%i0+16]
+/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16
+/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2
+/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2
+/* 0x03f4 439 (51 56) */ fitod %f13,%f12
+/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2
+/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22
+/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3
+/* 0x0404 443 (52 53) */ srax %g2,32,%g4
+/* 0x0408 444 (52 57) */ fdtox %f6,%f6
+/* 0x040c 445 (53 54) */ std %f6,[%sp+208]
+/* 0x0410 446 (53 58) */ fdtox %f20,%f6
+/* 0x0414 447 (54 55) */ stx %o4,[%sp+136]
+/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4
+/* 0x041c 449 (55 56) */ std %f6,[%sp+200]
+/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6
+/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16
+/* 0x0428 452 (56 57) */ st %g2,[%i0+20]
+/* 0x042c 453 (56 57) */ add %g3,%o2,%g2
+/* 0x0430 454 (56 61) */ fdtox %f10,%f10
+/* 0x0434 455 (57 59) */ ld [%i1+28],%o3
+/* 0x0438 456 (57 58) */ add %g2,%g4,%g2
+/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5
+/* 0x0440 458 (58 59) */ srax %g2,32,%g4
+/* 0x0444 459 (59 60) */ std %f10,[%sp+192]
+/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10
+/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4
+/* 0x0450 462 (60 61) */ st %g2,[%i0+24]
+/* 0x0454 463 (60 61) */ add %g5,%o3,%g2
+/* 0x0458 464 (60 65) */ fdtox %f22,%f12
+/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6
+/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0
+/* 0x0464 467 (61 62) */ add %g2,%g4,%g2
+/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3
+/* 0x046c 469 (62 63) */ srax %g2,32,%g4
+/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1
+/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5
+/* 0x0478 472 (64 65) */ add %g3,%o0,%g3
+/* 0x047c 473 (64 69) */ fdtox %f4,%f4
+/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8
+/* 0x0484 475 (65 66) */ std %f12,[%sp+184]
+/* 0x0488 476 (65 66) */ add %g3,%g4,%g3
+/* 0x048c 477 (65 70) */ fdtox %f16,%f12
+/* 0x0490 478 (66 67) */ std %f12,[%sp+176]
+/* 0x0494 479 (66 67) */ srax %g3,32,%o0
+/* 0x0498 480 (66 67) */ add %g5,%o1,%g5
+/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2
+/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5
+/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4
+/* 0x04a8 484 (68 69) */ srax %g5,32,%o1
+/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4
+/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0
+/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4
+/* 0x04b8 488 (71 72) */ std %f4,[%sp+168]
+/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4
+/* 0x04c0 490 (71 76) */ fdtox %f6,%f4
+/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3
+/* 0x04c8 492 (72 73) */ srax %g4,32,%o2
+/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1
+/* 0x04d0 494 (74 75) */ std %f4,[%sp+160]
+/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0
+/* 0x04d8 496 (74 79) */ fdtox %f8,%f4
+/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5
+/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0
+/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144]
+/* 0x04e8 500 (77 78) */ st %g2,[%i0+28]
+/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2
+/* 0x04f0 502 (77 78) */ srax %o0,32,%o1
+/* 0x04f4 503 (78 79) */ std %f4,[%sp+152]
+/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1
+/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7
+/* 0x0500 506 (79 80) */ srax %o1,32,%o3
+/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2
+/* 0x0508 508 (81 83) */ ld [%i1+60],%o4
+/* 0x050c 509 (82 83) */ add %o7,%o2,%o2
+/* 0x0510 510 (83 84) */ add %o2,%o3,%o2
+/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5
+/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2
+/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3
+/* 0x0520 514 (86 87) */ st %g3,[%i0+32]
+/* 0x0524 515 (86 87) */ add %g2,%o5,%g2
+/* 0x0528 516 (86 87) */ srax %o2,32,%o5
+/* 0x052c 517 (87 88) */ st %g5,[%i0+36]
+/* 0x0530 518 (87 88) */ add %g2,%o5,%g2
+/* 0x0534 519 (87 88) */ add %o3,%o4,%g3
+/* 0x0538 520 (88 89) */ st %o0,[%i0+44]
+/* 0x053c 521 (88 89) */ srax %g2,32,%g5
+/* 0x0540 522 (89 90) */ st %o1,[%i0+48]
+/* 0x0544 523 (89 90) */ add %g3,%g5,%g3
+/* 0x0548 524 (90 91) */ st %o2,[%i0+52]
+/* 0x054c 528 (90 91) */ srax %g3,32,%o7
+/* 0x0550 529 (91 92) */ st %g4,[%i0+40]
+/* 0x0554 530 (92 93) */ st %g2,[%i0+56]
+/* 0x0558 531 (93 94) */ st %g3,[%i0+60]
+/* 0x055c 532 (93 94) */ or %g0,%o7,%i0
+/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0564 (96 98) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000076
+!
+
+ .L77000076: /* frequency 1.0 confidence 0.0 */
+/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6
+/* 0x056c 546 ( 0 1) */ add %o2,1,%g2
+/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14
+/* 0x0574 548 ( 0 1) */ or %g0,0,%o7
+/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9
+/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3
+/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5
+/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18
+/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4
+/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8
+/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1
+/* 0x0598 557 ( 3 4) */ or %g0,0,%g2
+/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0
+/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2
+/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0
+/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16
+/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50
+/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0
+/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0
+/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x05bc ( 7 8) */ sub %o1,2,%o1
+/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2
+/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4
+/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2
+/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0
+/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2
+/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12
+/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6
+/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10
+/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8
+/* 0x05e4 579 (15 20) */ fitod %f3,%f0
+/* 0x05e8 580 (16 21) */ fitod %f2,%f4
+/* 0x05ec 581 (17 22) */ fitod %f7,%f2
+/* 0x05f0 582 (18 23) */ fitod %f6,%f6
+/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0
+/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22
+/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4
+/* 0x0604 588 ( 0 1) */ add %g2,2,%g2
+/* 0x0608 589 ( 0 1) */ add %o4,32,%o4
+/* 0x060c 590 ( 1 6) */ fitod %f9,%f24
+/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20
+/* 0x0614 592 ( 1 2) */ add %o0,8,%o0
+/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0
+/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12
+/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0
+/* 0x0624 596 ( 2 3) */ add %o5,32,%o5
+/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2
+/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4
+/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6
+/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32]
+/* 0x0638 601 ( 7 12) */ fitod %f8,%f4
+/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32]
+/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8
+/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6
+/* 0x0648 605 ( 9 14) */ fitod %f23,%f2
+/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20
+/* 0x0650 607 ( 9 10) */ add %o0,8,%o0
+/* 0x0654 608 (10 14) */ ldd [%o0],%f10
+/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0
+/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4
+/* 0x0660 611 (13 18) */ fdtox %f6,%f6
+/* 0x0664 612 (14 19) */ fdtox %f20,%f20
+/* 0x0668 613 (14 15) */ std %f6,[%o5-16]
+/* 0x066c 614 (15 20) */ fitod %f22,%f6
+/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50
+/* 0x0674 (15 16) */ std %f20,[%o4-16]
+
+!
+! ENTRY .L900000152
+!
+
+ .L900000152: /* frequency 1.0 confidence 0.0 */
+/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12
+/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22
+/* 0x0680 620 ( 0 1) */ add %o5,80,%o5
+/* 0x0684 621 ( 0 1) */ add %o4,80,%o4
+/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0
+/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24
+/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0694 625 ( 1 2) */ add %o0,8,%g5
+/* 0x0698 626 ( 2 7) */ fitod %f8,%f20
+/* 0x069c 627 ( 3 8) */ fitod %f9,%f8
+/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6
+/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26
+/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4
+/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12
+/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2
+/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0
+/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10
+/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20
+/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8
+/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6
+/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12
+/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4
+/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2
+/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0
+/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10
+/* 0x06dc 643 (16 21) */ fdtox %f24,%f24
+/* 0x06e0 644 (16 17) */ std %f24,[%o5-80]
+/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8
+/* 0x06e8 646 (17 22) */ fdtox %f22,%f22
+/* 0x06ec 647 (17 18) */ std %f22,[%o4-80]
+/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6
+/* 0x06f4 649 (18 23) */ fdtox %f20,%f20
+/* 0x06f8 650 (18 19) */ std %f20,[%o5-64]
+/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4
+/* 0x0700 652 (19 24) */ fdtox %f12,%f12
+/* 0x0704 653 (19 20) */ std %f12,[%o4-64]
+/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2
+/* 0x070c 655 (20 25) */ fdtox %f10,%f10
+/* 0x0710 656 (20 21) */ std %f10,[%o5-48]
+/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0
+/* 0x0718 658 (21 26) */ fdtox %f8,%f8
+/* 0x071c 659 (21 22) */ std %f8,[%o4-48]
+/* 0x0720 660 (22 27) */ fdtox %f6,%f6
+/* 0x0724 661 (22 23) */ std %f6,[%o5-32]
+/* 0x0728 662 (23 28) */ fdtox %f4,%f4
+/* 0x072c 663 (23 24) */ std %f4,[%o4-32]
+/* 0x0730 664 (24 29) */ fdtox %f2,%f2
+/* 0x0734 665 (24 25) */ std %f2,[%o5-16]
+/* 0x0738 666 (25 30) */ fdtox %f0,%f0
+/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0740 (25 26) */ std %f0,[%o4-16]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0
+
+!
+! ENTRY .L900000160
+!
+
+ .L900000160: /* frequency 1.0 confidence 0.0 */
+/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0
+/* 0x074c 673 ( 0 1) */ add %g2,1,%g2
+/* 0x0750 674 ( 0 1) */ add %g5,8,%g5
+/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0
+/* 0x0758 676 ( 4 9) */ fitod %f0,%f2
+/* 0x075c 677 ( 5 10) */ fitod %f1,%f0
+/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2
+/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0
+/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2
+/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0
+/* 0x0770 682 (19 24) */ fdtox %f2,%f2
+/* 0x0774 683 (19 20) */ std %f2,[%o5]
+/* 0x0778 684 (19 20) */ add %o5,16,%o5
+/* 0x077c 685 (20 25) */ fdtox %f0,%f0
+/* 0x0780 686 (20 21) */ std %f0,[%o4]
+/* 0x0784 687 (20 21) */ add %o4,16,%o4
+/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50
+/* 0x078c (23 27) */ ldd [%g5],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0
+
+!
+! ENTRY .L900000161
+!
+
+ .L900000161: /* frequency 1.0 confidence 0.0 */
+/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50
+/* 0x0798 ( 0 1) */ or %g0,%o7,%i0
+/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2
+/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3
+/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5
+/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4
+/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5
+/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2
+/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0
+/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4
+/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0
+/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1
+/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2
+/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4
+/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3
+/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3
+/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5
+/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0
+/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1
+/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4]
+/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000145
+!
+
+ .L900000145: /* frequency 1.0 confidence 0.0 */
+/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7
+/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2
+/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3
+/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0
+/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2
+/* 0x0800 729 ( 1 2) */ add %g4,3,%g4
+/* 0x0804 730 ( 2 3) */ st %o2,[%g2]
+/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3
+/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0
+/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2
+/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96]
+/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1
+/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2
+/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1
+/* 0x0824 738 ( 5 6) */ add %g2,12,%g2
+/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8]
+/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7
+/* 0x0830 741 ( 6 7) */ add %g3,12,%g3
+/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3
+/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1
+/* 0x083c 744 (10 11) */ add %o0,%o1,%o0
+/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1
+/* 0x0844 746 (11 12) */ add %o0,%o7,%o0
+/* 0x0848 747 (11 12) */ add %g5,24,%g5
+/* 0x084c 748 (11 12) */ st %o0,[%g2-4]
+/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50
+/* 0x0854 (12 13) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000148
+!
+
+ .L900000148: /* frequency 1.0 confidence 0.0 */
+/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2
+/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3
+/* 0x0860 754 ( 0 2) */ ld [%g3],%o0
+/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3
+/* 0x0868 756 ( 1 2) */ add %g2,8,%g2
+/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2
+/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8]
+/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0
+/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0
+/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4]
+/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0
+/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0888 ( 4 5) */ srax %o0,32,%o7
+/* 0x088c 765 ( 4 5) */ add %g3,4,%g3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0890 767 ( 0 2) */ ld [%g3],%o2
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0
+/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1
+/* 0x089c 771 ( 0 1) */ add %g4,1,%g4
+/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3
+/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5
+/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0
+/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0
+/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0
+/* 0x08b4 777 ( 3 4) */ st %o0,[%g2]
+/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2
+/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7
+/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50
+/* 0x08c4 ( 4 6) */ ld [%g3],%o2
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0
+/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4
+/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3
+/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240]
+/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2
+/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8
+/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244]
+/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4
+/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6
+/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18
+/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12
+/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10
+/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16
+/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20
+/* 0x0910 819 ( 7 12) */ fitod %f4,%f14
+/* 0x0914 823 ( 8 10) */ ld [%i1],%g2
+/* 0x0918 824 ( 8 13) */ fitod %f5,%f4
+/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13
+/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6
+/* 0x0924 827 (10 12) */ ld [%sp+244],%f17
+/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10
+/* 0x092c 829 (11 13) */ ld [%i1+28],%o3
+/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20
+/* 0x0934 831 (12 14) */ ld [%i1+4],%g3
+/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12
+/* 0x093c 833 (13 14) */ stx %o3,[%sp+96]
+/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14
+/* 0x0944 835 (14 16) */ ld [%i1+8],%g4
+/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8
+/* 0x094c 837 (15 17) */ ld [%i1+12],%g5
+/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4
+/* 0x0954 839 (16 18) */ ld [%i1+16],%o0
+/* 0x0958 840 (16 21) */ fitod %f6,%f22
+/* 0x095c 841 (17 19) */ ld [%i1+20],%o1
+/* 0x0960 842 (17 22) */ fitod %f7,%f6
+/* 0x0964 843 (18 20) */ ld [%i1+24],%o2
+/* 0x0968 844 (18 23) */ fitod %f10,%f16
+/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24
+/* 0x0970 846 (19 24) */ fitod %f20,%f28
+/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14
+/* 0x0978 848 (20 25) */ fitod %f11,%f10
+/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26
+/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22
+/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4
+/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6
+/* 0x098c 853 (23 28) */ fdtox %f24,%f24
+/* 0x0990 854 (23 24) */ std %f24,[%sp+224]
+/* 0x0994 855 (24 29) */ fdtox %f14,%f14
+/* 0x0998 856 (24 25) */ std %f14,[%sp+232]
+/* 0x099c 857 (25 30) */ fdtox %f26,%f14
+/* 0x09a0 858 (25 26) */ std %f14,[%sp+208]
+/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4
+/* 0x09a8 860 (26 31) */ fitod %f21,%f20
+/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30
+/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5
+/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16
+/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22
+/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4
+/* 0x09c0 866 (28 33) */ fdtox %f4,%f4
+/* 0x09c4 867 (28 29) */ std %f4,[%sp+216]
+/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24
+/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26
+/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4
+/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6
+/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10
+/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2
+/* 0x09e0 874 (30 31) */ st %g2,[%i0]
+/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7
+/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5
+/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18
+/* 0x09f0 878 (32 37) */ fdtox %f30,%f28
+/* 0x09f4 879 (32 33) */ std %f28,[%sp+192]
+/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14
+/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4
+/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7
+/* 0x0a04 883 (33 38) */ fdtox %f22,%f20
+/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16
+/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200]
+/* 0x0a10 886 (34 39) */ fdtox %f24,%f20
+/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22
+/* 0x0a18 888 (35 36) */ std %f20,[%sp+176]
+/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4
+/* 0x0a20 890 (35 40) */ fdtox %f6,%f6
+/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4
+/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3
+/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3
+/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10
+/* 0x0a34 895 (37 38) */ std %f6,[%sp+184]
+/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3
+/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6
+/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20
+/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4
+/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3
+/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5
+/* 0x0a50 902 (38 43) */ fdtox %f16,%f14
+/* 0x0a54 903 (39 40) */ std %f6,[%sp+160]
+/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12
+/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7
+/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3
+/* 0x0a64 907 (40 45) */ fdtox %f4,%f16
+/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18
+/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168]
+/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4
+/* 0x0a74 911 (41 46) */ fdtox %f10,%f4
+/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3
+/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4
+/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4
+/* 0x0a84 915 (42 47) */ fdtox %f22,%f14
+/* 0x0a88 916 (43 44) */ std %f16,[%sp+144]
+/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5
+/* 0x0a90 918 (43 48) */ fdtox %f20,%f6
+/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7
+/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3
+/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16
+/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152]
+/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5
+/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8
+/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3
+/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4
+/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5
+/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128]
+/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5
+/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136]
+/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3
+/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112]
+/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0
+/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7
+/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0
+/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3
+/* 0x0adc 937 (52 53) */ std %f8,[%sp+120]
+/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4
+/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7
+/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104]
+/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3
+/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5
+/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1
+/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0
+/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1
+/* 0x0b00 946 (56 57) */ st %g3,[%i0+4]
+/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3
+/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4
+/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3
+/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0
+/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2
+/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3
+/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20]
+/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2
+/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2
+/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4
+/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3
+/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2
+/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0
+/* 0x0b38 960 (62 63) */ st %o2,[%i0+24]
+/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2
+/* 0x0b40 962 (63 64) */ st %o0,[%i0+16]
+/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2
+/* 0x0b48 964 (64 65) */ st %g4,[%i0+8]
+/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7
+/* 0x0b50 969 (65 66) */ st %g5,[%i0+12]
+/* 0x0b54 970 (66 67) */ st %g2,[%i0+28]
+/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0
+/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0b60 (69 71) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0
+/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4
+/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6
+/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2
+/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3
+/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8
+/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4
+/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356]
+/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20
+/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6
+/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352]
+/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8
+/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30
+/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22
+/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10
+/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24
+/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12
+/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4
+/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14
+/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26
+/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16
+/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25
+/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28
+/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27
+/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32
+/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22
+/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18
+/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24
+/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20
+/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4
+/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26
+/* 0x0be8 1024 (19 24) */ fitod %f7,%f6
+/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28
+/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32
+/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22
+/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10
+/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44
+/* 0x0c00 1030 (22 27) */ fitod %f9,%f8
+/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4
+/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12
+/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6
+/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46
+/* 0x0c14 1035 (25 30) */ fitod %f10,%f34
+/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22
+/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336]
+/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22
+/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26
+/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328]
+/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4
+/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320]
+/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26
+/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8
+/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4
+/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312]
+/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4
+/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32
+/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344]
+/* 0x0c50 1050 (32 37) */ fitod %f11,%f10
+/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32
+/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34
+/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6
+/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304]
+/* 0x0c64 1058 (35 40) */ fitod %f12,%f36
+/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6
+/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22
+/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296]
+/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22
+/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4
+/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280]
+/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8
+/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10
+/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4
+/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12
+/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36
+/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6
+/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272]
+/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14
+/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6
+/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22
+/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264]
+/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8
+/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256]
+/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22
+/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4
+/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248]
+/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8
+/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4
+/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10
+/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38
+/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6
+/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240]
+/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12
+/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232]
+/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6
+/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8
+/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224]
+/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22
+/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216]
+/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8
+/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14
+/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4
+/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22
+/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16
+/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6
+/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208]
+/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6
+/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200]
+/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10
+/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40
+/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6
+/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4
+/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192]
+/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16
+/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4
+/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18
+/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32
+/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288]
+/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8
+/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6
+/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184]
+/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22
+/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6
+/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4
+/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176]
+/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10
+/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4
+/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168]
+/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4
+/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160]
+/* 0x0d70 1125 (69 74) */ fitod %f18,%f42
+/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4
+/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22
+/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6
+/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8
+/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4
+/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152]
+/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4
+/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6
+/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136]
+/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22
+/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144]
+/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22
+/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128]
+/* 0x0da8 1139 (79 84) */ fitod %f19,%f22
+/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6
+/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4
+/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22
+/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6
+/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120]
+/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4
+/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112]
+/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2
+/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3
+/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4
+/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2
+/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4
+/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5
+/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2
+/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6
+/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3
+/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4
+/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0
+/* 0x0df4 1161 (93 94) */ st %g4,[%i0]
+/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2
+/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4
+/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3
+/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2
+/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4
+/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5
+/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6
+/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1
+/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2
+/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3
+/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2
+/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2
+/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3
+/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4
+/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0
+/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5
+/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4]
+/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2
+/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3
+/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2
+/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4
+/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3
+/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0
+/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1
+/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4
+/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5
+/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8]
+/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3
+/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4
+/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0
+/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2
+/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4
+/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5
+/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1
+/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2
+/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2
+/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0
+/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3
+/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2
+/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5
+/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1
+/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12]
+/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4
+/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3
+/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0
+/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4
+/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3
+/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2
+/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1
+/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5
+/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4
+/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16]
+/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2
+/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4
+/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0
+/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1
+/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2
+/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2
+/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5
+/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2
+/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20]
+/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3
+/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2
+/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0
+/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1
+/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3
+/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2
+/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5
+/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3
+/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24]
+/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4
+/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3
+/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0
+/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1
+/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4
+/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2
+/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5
+/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4
+/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28]
+/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2
+/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4
+/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0
+/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1
+/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2
+/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2
+/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5
+/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2
+/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32]
+/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3
+/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2
+/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0
+/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1
+/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3
+/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2
+/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5
+/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3
+/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36]
+/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4
+/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3
+/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0
+/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1
+/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4
+/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2
+/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5
+/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4
+/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96]
+/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4
+/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40]
+/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2
+/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1
+/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0
+/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2
+/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104]
+/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5
+/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2
+/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44]
+/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2
+/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2
+/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1
+/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0
+/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48]
+/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3
+/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2
+/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3
+/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5
+/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3
+/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0
+/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4
+/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3
+/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52]
+/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1
+/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4
+/* 0x1008 1297 (151 152) */ st %g3,[%i0+56]
+/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2
+/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2
+/* 0x1014 1300 (152 153) */ st %g2,[%i0+60]
+/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0
+
+!
+! ENTRY .L900000159
+!
+
+ .L900000159: /* frequency 1.0 confidence 0.0 */
+/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+/* 0x11a8 1441 ( 0 0) */ .type mul_add,2
+/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x11a8 1445 ( 0 0) */ .align 16
+/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1
+/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4
+/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3
+/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2
+/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3
+/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1
+/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2
+/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1
+/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result =
+/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7
+/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2
+/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x11d8 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x11d8 8 ( 0 0) */ .word -2147483648
+/* 0x11dc 9 ( 0 0) */ .word -2147483648
+/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object
+/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
new file mode 100644
index 000000000..e2fbe0bd0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s
@@ -0,0 +1,1645 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .section ".text",#alloc,#execinstr
+/* 000000 0 ( 0 0) */ .register %g2,#scratch
+/* 000000 ( 0 0) */ .register %g3,#scratch
+/* 000000 3 ( 0 0) */ .file "mpv_sparc.c"
+/* 000000 15 ( 0 0) */ .align 8
+!
+! SUBROUTINE .L_const_seg_900000101
+!
+! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME)
+
+ .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */
+/* 000000 20 ( 0 0) */ .word 1127219200,0
+/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304
+/* 0x0010 22 ( 0 0) */ .align 8
+/* 0x0010 28 ( 0 0) */ .global mul_add
+
+!
+! ENTRY mul_add
+!
+
+ .global mul_add
+ mul_add: /* frequency 1.0 confidence 0.0 */
+/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1
+/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2
+/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1
+/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2
+/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp
+
+!
+! ENTRY .L900000149
+!
+
+ .L900000149: /* frequency 1.0 confidence 0.0 */
+/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result =
+/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3
+/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5
+/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3
+/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1
+/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4
+/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2
+/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2
+/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0
+/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0
+/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7
+/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5
+/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0
+/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50
+/* 0x0060 ( 7 8) */ subcc %i3,8,%g0
+/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50
+/* 0x0068 ( 8 9) */ subcc %i3,16,%g0
+/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4
+/* 0x0070 195 (10 11) */ st %i4,[%sp+2287]
+/* 0x0074 196 (11 14) */ ldd [%o0],%f8
+/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4
+/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10
+/* 0x0080 199 (13 16) */ fitod %f4,%f12
+/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14
+/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7
+/* 0x008c 202 (14 17) */ fitod %f5,%f4
+/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10
+/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16
+/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18
+/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4
+/* 0x00a0 210 (17 20) */ ld [%i1],%g2
+/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16
+/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3
+/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4
+/* 0x00b0 214 (20 23) */ fitod %f16,%f20
+/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0
+/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5
+/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1
+/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2
+/* 0x00c4 219 (24 25) */ fmovs %f8,%f6
+/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3
+/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6
+/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8
+/* 0x00d4 223 (28 31) */ fitod %f10,%f12
+/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4
+/* 0x00dc 225 (29 32) */ fitod %f11,%f10
+/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8
+/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12
+/* 0x00e8 228 (32 35) */ fdtox %f4,%f4
+/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271]
+/* 0x00f0 230 (33 36) */ fdtox %f8,%f8
+/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279]
+/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12
+/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10
+/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4
+/* 0x0104 235 (36 39) */ fitod %f17,%f8
+/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16
+/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4
+/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10
+/* 0x0114 239 (38 41) */ fdtox %f12,%f12
+/* 0x0118 240 (38 39) */ std %f12,[%sp+2263]
+/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4
+/* 0x0120 242 (39 42) */ fitod %f16,%f18
+/* 0x0124 243 (39 40) */ add %o4,%g2,%g2
+/* 0x0128 244 (39 40) */ st %g2,[%i0]
+/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4
+/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8
+/* 0x0134 247 (40 41) */ srax %g2,32,%o5
+/* 0x0138 248 (41 44) */ fdtox %f10,%f10
+/* 0x013c 249 (41 42) */ std %f10,[%sp+2255]
+/* 0x0140 250 (42 45) */ fdtox %f4,%f4
+/* 0x0144 251 (42 43) */ std %f4,[%sp+2247]
+/* 0x0148 252 (42 43) */ add %o4,%g3,%o4
+/* 0x014c 253 (43 46) */ fitod %f17,%f12
+/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2
+/* 0x0154 255 (43 44) */ add %o4,%o5,%g3
+/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8
+/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10
+/* 0x0160 258 (44 45) */ st %g3,[%i0+4]
+/* 0x0164 259 (44 45) */ srax %g3,32,%g3
+/* 0x0168 260 (45 46) */ add %g2,%g4,%g4
+/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2
+/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4
+/* 0x0174 263 (46 47) */ add %g4,%g3,%g3
+/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4
+/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10
+/* 0x0180 266 (47 50) */ fdtox %f8,%f8
+/* 0x0184 267 (47 48) */ std %f8,[%sp+2239]
+/* 0x0188 268 (48 49) */ add %g4,%o0,%g4
+/* 0x018c 269 (48 49) */ add %g2,%g5,%g2
+/* 0x0190 270 (48 49) */ st %g3,[%i0+8]
+/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4
+/* 0x0198 272 (49 50) */ srax %g3,32,%o0
+/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5
+/* 0x01a0 274 (50 53) */ fdtox %f10,%f6
+/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231]
+/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2
+/* 0x01ac 277 (51 52) */ srax %g2,32,%g3
+/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1
+/* 0x01b4 279 (51 52) */ st %g2,[%i0+12]
+/* 0x01b8 280 (52 55) */ fdtox %f4,%f4
+/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223]
+/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3
+/* 0x01c4 283 (53 54) */ srax %g3,32,%g4
+/* 0x01c8 284 (53 54) */ st %g3,[%i0+16]
+/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0
+/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4
+/* 0x01d4 287 (55 56) */ srax %g4,32,%g2
+/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5
+/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2
+/* 0x01e0 290 (56 57) */ st %g4,[%i0+20]
+/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2
+/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5
+/* 0x01ec 293 (57 58) */ st %g2,[%i0+24]
+/* 0x01f0 294 (58 59) */ srax %g2,32,%g3
+/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2
+/* 0x01f8 296 (59 60) */ st %g2,[%i0+28]
+/* 0x01fc 300 (60 61) */ srax %g2,32,%o3
+/* 0x0200 301 (61 62) */ srl %o3,0,%i0
+/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0208 (64 65) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000158
+!
+
+ .L900000158: /* frequency 1.0 confidence 0.0 */
+/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50
+/* 0x0210 ( 0 1) */ st %i4,[%sp+2223]
+/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4
+/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351]
+/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8
+/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10
+/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14
+/* 0x022c 321 ( 5 8) */ fitod %f4,%f12
+/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7
+/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10
+/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16
+/* 0x023c 325 ( 7 10) */ fitod %f5,%f4
+/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18
+/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20
+/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16
+/* 0x024c 335 (10 13) */ ld [%i1],%g2
+/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4
+/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22
+/* 0x0258 338 (11 14) */ fitod %f16,%f28
+/* 0x025c 339 (12 15) */ ld [%i1+4],%g3
+/* 0x0260 340 (13 16) */ ld [%i1+8],%g4
+/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22
+/* 0x0268 342 (14 17) */ ld [%i1+12],%g5
+/* 0x026c 343 (15 18) */ ld [%i1+16],%o0
+/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24
+/* 0x0274 345 (17 20) */ ld [%i1+20],%o1
+/* 0x0278 346 (17 18) */ fmovs %f8,%f6
+/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26
+/* 0x0280 348 (19 22) */ ld [%i1+24],%o2
+/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6
+/* 0x0288 350 (20 23) */ ld [%i1+28],%o3
+/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8
+/* 0x0290 355 (21 24) */ ld [%i1+32],%o4
+/* 0x0294 356 (21 24) */ fitod %f10,%f12
+/* 0x0298 357 (22 25) */ ld [%i1+36],%o7
+/* 0x029c 358 (22 25) */ fitod %f11,%f10
+/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4
+/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1
+/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8
+/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5
+/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12
+/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10
+/* 0x02b8 365 (26 29) */ fdtox %f8,%f8
+/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343]
+/* 0x02c0 367 (27 30) */ fitod %f17,%f8
+/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12
+/* 0x02c8 369 (28 31) */ fdtox %f4,%f4
+/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335]
+/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10
+/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16
+/* 0x02d8 373 (30 33) */ fdtox %f12,%f12
+/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327]
+/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5
+/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8
+/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4
+/* 0x02ec 378 (33 36) */ fitod %f17,%f12
+/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2
+/* 0x02f4 380 (33 34) */ st %g2,[%i0]
+/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5
+/* 0x02fc 382 (34 37) */ fitod %f16,%f18
+/* 0x0300 383 (34 35) */ srax %g2,32,%l0
+/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16
+/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20
+/* 0x030c 386 (36 39) */ fdtox %f10,%f10
+/* 0x0310 387 (36 37) */ std %f10,[%sp+2319]
+/* 0x0314 388 (36 37) */ add %o5,%g3,%g3
+/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4
+/* 0x031c 390 (37 40) */ fitod %f16,%f8
+/* 0x0320 391 (37 38) */ add %g3,%l0,%g3
+/* 0x0324 392 (37 38) */ st %g3,[%i0+4]
+/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5
+/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18
+/* 0x0330 395 (38 39) */ srax %g3,32,%l3
+/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2
+/* 0x0338 397 (39 42) */ fdtox %f4,%f4
+/* 0x033c 398 (40 41) */ std %f4,[%sp+2311]
+/* 0x0340 399 (40 43) */ fdtox %f20,%f20
+/* 0x0344 400 (40 41) */ add %o5,%g4,%g4
+/* 0x0348 401 (41 42) */ std %f20,[%sp+2303]
+/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4
+/* 0x0350 403 (41 42) */ add %g4,%l3,%g4
+/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18
+/* 0x0358 405 (42 43) */ st %g4,[%i0+8]
+/* 0x035c 406 (42 45) */ fitod %f17,%f16
+/* 0x0360 407 (42 43) */ srax %g4,32,%l4
+/* 0x0364 408 (43 46) */ ld [%i1+44],%l0
+/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20
+/* 0x036c 410 (43 44) */ add %l2,%g5,%l2
+/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5
+/* 0x0374 412 (44 47) */ fitod %f22,%f8
+/* 0x0378 413 (44 45) */ add %l2,%l4,%l2
+/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4
+/* 0x0380 415 (45 46) */ st %l2,[%i0+12]
+/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10
+/* 0x0388 417 (46 49) */ ld [%i1+52],%l3
+/* 0x038c 418 (46 49) */ fdtox %f18,%f18
+/* 0x0390 419 (46 47) */ add %g5,%o0,%l4
+/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12
+/* 0x0398 421 (47 48) */ std %f18,[%sp+2295]
+/* 0x039c 422 (47 48) */ srax %l2,32,%o0
+/* 0x03a0 423 (47 50) */ fitod %f23,%f16
+/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5
+/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8
+/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4
+/* 0x03b0 427 (49 50) */ st %l4,[%i0+16]
+/* 0x03b4 428 (49 50) */ srax %l4,32,%o0
+/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18
+/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5
+/* 0x03c0 431 (50 53) */ fdtox %f4,%f4
+/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287]
+/* 0x03c8 433 (51 54) */ fdtox %f12,%f12
+/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4
+/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279]
+/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12
+/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2
+/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8
+/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5
+/* 0x03e4 440 (53 56) */ fitod %f18,%f10
+/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2
+/* 0x03ec 442 (54 55) */ st %g2,[%i0+20]
+/* 0x03f0 443 (54 57) */ fitod %f19,%f16
+/* 0x03f4 444 (54 55) */ srax %g2,32,%o0
+/* 0x03f8 445 (55 58) */ fdtox %f8,%f8
+/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263]
+/* 0x0400 447 (55 56) */ add %g5,%o2,%g3
+/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5
+/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10
+/* 0x040c 450 (56 57) */ add %g3,%o0,%g3
+/* 0x0410 451 (57 58) */ st %g3,[%i0+24]
+/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8
+/* 0x0418 453 (57 58) */ srax %g3,32,%o0
+/* 0x041c 454 (58 61) */ fdtox %f4,%f4
+/* 0x0420 455 (58 59) */ std %f4,[%sp+2271]
+/* 0x0424 456 (58 59) */ add %g5,%o3,%g4
+/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18
+/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4
+/* 0x0430 459 (59 60) */ add %g4,%o0,%g4
+/* 0x0434 460 (60 61) */ st %g4,[%i0+28]
+/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10
+/* 0x043c 462 (60 61) */ srax %g4,32,%o0
+/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5
+/* 0x0444 464 (61 64) */ fitod %f18,%f12
+/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8
+/* 0x044c 466 (62 65) */ fdtox %f4,%f4
+/* 0x0450 467 (62 63) */ std %f4,[%sp+2255]
+/* 0x0454 468 (63 64) */ add %g5,%o4,%l2
+/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5
+/* 0x045c 470 (63 66) */ fdtox %f10,%f16
+/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4
+/* 0x0464 472 (64 65) */ std %f16,[%sp+2247]
+/* 0x0468 473 (64 65) */ add %l2,%o0,%l2
+/* 0x046c 474 (65 68) */ fdtox %f8,%f8
+/* 0x0470 475 (65 66) */ std %f8,[%sp+2239]
+/* 0x0474 476 (65 66) */ add %g5,%o7,%l4
+/* 0x0478 477 (66 69) */ fitod %f19,%f10
+/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5
+/* 0x0480 479 (66 67) */ srax %l2,32,%o0
+/* 0x0484 480 (67 68) */ add %l4,%o0,%l4
+/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4
+/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0
+/* 0x0490 483 (68 69) */ srax %l4,32,%o1
+/* 0x0494 484 (68 69) */ add %g5,%l1,%l1
+/* 0x0498 485 (68 69) */ st %l2,[%i0+32]
+/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8
+/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3
+/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1
+/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5
+/* 0x04ac 490 (70 71) */ srax %o1,32,%o2
+/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0
+/* 0x04b4 492 (71 74) */ fdtox %f4,%f4
+/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231]
+/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2
+/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3
+/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4
+/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5
+/* 0x04cc 498 (73 74) */ srax %o2,32,%o3
+/* 0x04d0 499 (73 74) */ st %l4,[%i0+36]
+/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2
+/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0
+/* 0x04dc 502 (75 76) */ srax %g2,32,%g3
+/* 0x04e0 503 (75 78) */ fdtox %f4,%f4
+/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223]
+/* 0x04e8 505 (76 77) */ st %o1,[%i0+40]
+/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3
+/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5
+/* 0x04f4 508 (77 78) */ st %o2,[%i0+44]
+/* 0x04f8 509 (77 78) */ srax %g3,32,%g4
+/* 0x04fc 510 (78 79) */ st %g2,[%i0+48]
+/* 0x0500 511 (78 79) */ add %g5,%g4,%g4
+/* 0x0504 512 (79 80) */ st %g3,[%i0+52]
+/* 0x0508 513 (79 80) */ srax %g4,32,%g5
+/* 0x050c 514 (80 83) */ ld [%i1+60],%g3
+/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2
+/* 0x0514 516 (82 83) */ st %g4,[%i0+56]
+/* 0x0518 517 (83 84) */ add %g2,%g3,%g2
+/* 0x051c 518 (84 85) */ add %g2,%g5,%g2
+/* 0x0520 519 (84 85) */ st %g2,[%i0+60]
+/* 0x0524 523 (85 86) */ srax %g2,32,%o3
+/* 0x0528 524 (86 87) */ srl %o3,0,%i0
+/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0530 (89 90) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L900000157
+!
+
+ .L900000157: /* frequency 1.0 confidence 0.0 */
+/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14
+/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8
+/* 0x053c 539 ( 0 1) */ add %i3,1,%g2
+/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7
+/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3
+/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4
+/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2
+/* 0x0550 547 ( 2 3) */ or %g0,0,%g5
+/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18
+/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6
+/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1
+/* 0x0560 551 ( 3 4) */ or %g0,0,%o0
+/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0
+/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3
+/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16
+/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50
+/* 0x0574 ( 6 7) */ subcc %i3,0,%g0
+/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2
+/* 0x057c 558 ( 7 8) */ or %g0,0,%i0
+/* 0x0580 559 ( 7 8) */ or %g0,1,%g3
+/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0
+/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50
+/* 0x058c ( 9 10) */ or %g0,0,%o1
+/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0
+/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3
+/* 0x0598 565 (10 13) */ ldd [%i2],%f2
+/* 0x059c 566 (10 11) */ or %g0,7,%o0
+/* 0x05a0 567 (10 11) */ or %g0,2,%i0
+/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8
+/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4
+/* 0x05ac 570 (11 12) */ or %g0,16,%o2
+/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2
+/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6
+/* 0x05b8 573 (12 13) */ or %g0,48,%o4
+/* 0x05bc 574 (13 16) */ fitod %f8,%f12
+/* 0x05c0 575 (13 14) */ or %g0,24,%o1
+/* 0x05c4 576 (13 14) */ or %g0,3,%g3
+/* 0x05c8 577 (14 17) */ fitod %f2,%f0
+/* 0x05cc 578 (15 18) */ fitod %f3,%f20
+/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2
+/* 0x05d4 580 (16 19) */ fitod %f9,%f10
+/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8
+/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0
+/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22
+/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20
+/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12
+/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10
+/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0
+/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4
+/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22
+/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6
+/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20
+/* 0x0604 592 (23 26) */ fdtox %f0,%f0
+/* 0x0608 593 (23 24) */ std %f0,[%fp-217]
+/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10
+/* 0x0610 595 (24 27) */ fdtox %f22,%f22
+/* 0x0614 596 (24 25) */ std %f22,[%fp-209]
+/* 0x0618 597 (25 28) */ fitod %f5,%f0
+/* 0x061c 598 (26 29) */ fdtox %f10,%f10
+/* 0x0620 599 (27 30) */ fdtox %f20,%f20
+/* 0x0624 600 (27 28) */ std %f20,[%fp-201]
+/* 0x0628 601 (28 31) */ fitod %f4,%f4
+/* 0x062c 602 (28 29) */ std %f10,[%fp-193]
+/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10
+/* 0x0634 604 (30 33) */ fitod %f7,%f2
+/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0
+/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4
+/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8
+
+!
+! ENTRY .L900000144
+!
+
+ .L900000144: /* frequency 1.0 confidence 0.0 */
+/* 0x0644 609 ( 0 3) */ fitod %f11,%f22
+/* 0x0648 610 ( 0 1) */ add %o0,3,%o0
+/* 0x064c 611 ( 0 1) */ add %g3,6,%g3
+/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0
+/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24
+/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0
+/* 0x065c 615 ( 1 2) */ add %i0,6,%i0
+/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x0664 617 ( 2 5) */ fitod %f6,%f4
+/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0
+/* 0x066c 619 ( 3 4) */ add %o4,8,%i1
+/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20
+/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6
+/* 0x0678 622 ( 4 5) */ add %o2,16,%o4
+/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4
+/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4]
+/* 0x0684 625 ( 5 6) */ add %o1,16,%o2
+/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6
+/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4]
+/* 0x0690 628 ( 7 10) */ fitod %f9,%f0
+/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2
+/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24
+/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12
+/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4
+/* 0x06a4 633 (10 13) */ fdtox %f2,%f2
+/* 0x06a8 634 (10 11) */ add %i1,8,%o1
+/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22
+/* 0x06b0 636 (11 14) */ fdtox %f24,%f10
+/* 0x06b4 637 (11 12) */ add %o4,16,%i4
+/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4
+/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4]
+/* 0x06c0 640 (12 13) */ add %o2,16,%i1
+/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10
+/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4]
+/* 0x06cc 643 (14 17) */ fitod %f7,%f2
+/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12
+/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24
+/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0
+/* 0x06dc 647 (16 19) */ fitod %f8,%f4
+/* 0x06e0 648 (17 20) */ fdtox %f12,%f20
+/* 0x06e4 649 (17 18) */ add %o1,8,%o4
+/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12
+/* 0x06ec 651 (18 21) */ fdtox %f24,%f8
+/* 0x06f0 652 (18 19) */ add %i4,16,%o2
+/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4
+/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4]
+/* 0x06fc 655 (19 20) */ add %i1,16,%o1
+/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8
+/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50
+/* 0x0708 (20 21) */ std %f20,[%o1+%g4]
+
+!
+! ENTRY .L900000147
+!
+
+ .L900000147: /* frequency 1.0 confidence 0.0 */
+/* 0x070c 660 ( 0 3) */ fitod %f6,%f6
+/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24
+/* 0x0714 662 ( 0 1) */ add %i4,32,%l4
+/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2
+/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22
+/* 0x0720 665 ( 1 2) */ add %i1,32,%l3
+/* 0x0724 666 ( 2 5) */ fitod %f10,%f28
+/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2
+/* 0x072c 668 ( 2 3) */ add %i4,48,%l2
+/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4
+/* 0x0734 670 ( 3 4) */ add %i1,48,%l1
+/* 0x0738 671 ( 3 4) */ add %i4,64,%l0
+/* 0x073c 672 ( 4 7) */ fitod %f11,%f26
+/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1
+/* 0x0744 674 ( 4 5) */ add %i1,64,%i5
+/* 0x0748 675 ( 5 8) */ fitod %f8,%f6
+/* 0x074c 676 ( 5 6) */ add %i4,80,%i4
+/* 0x0750 677 ( 5 6) */ add %i1,80,%i1
+/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0
+/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20
+/* 0x075c 680 ( 6 7) */ add %i4,16,%o4
+/* 0x0760 681 ( 7 10) */ fitod %f9,%f4
+/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12
+/* 0x0768 683 ( 7 8) */ add %i1,16,%o3
+/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10
+/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0
+/* 0x0774 686 ( 8 9) */ add %g3,12,%g3
+/* 0x0778 687 ( 9 12) */ fitod %f0,%f2
+/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8
+/* 0x0780 689 (11 14) */ fitod %f1,%f0
+/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10
+/* 0x0788 691 (12 15) */ fdtox %f24,%f24
+/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4]
+/* 0x0790 693 (12 13) */ add %i0,12,%i0
+/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6
+/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8
+/* 0x079c 696 (14 17) */ fdtox %f22,%f22
+/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4]
+/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4
+/* 0x07a8 699 (16 19) */ fdtox %f20,%f20
+/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4]
+/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6
+/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2
+/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0
+/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4
+/* 0x07c0 705 (19 22) */ fdtox %f12,%f12
+/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4]
+/* 0x07c8 707 (20 23) */ fdtox %f10,%f10
+/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4]
+/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2
+/* 0x07d4 710 (21 24) */ fdtox %f8,%f8
+/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4]
+/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0
+/* 0x07e0 713 (22 25) */ fdtox %f6,%f6
+/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4]
+/* 0x07e8 715 (23 26) */ fdtox %f4,%f4
+/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4]
+/* 0x07f0 717 (24 27) */ fdtox %f2,%f2
+/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4]
+/* 0x07f8 719 (25 28) */ fdtox %f0,%f0
+/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50
+/* 0x0800 (25 26) */ std %f0,[%o3+%g4]
+
+!
+! ENTRY .L77000077
+!
+
+ .L77000077: /* frequency 1.0 confidence 0.0 */
+/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L900000155
+!
+
+ .L900000155: /* frequency 1.0 confidence 0.0 */
+/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1
+/* 0x0810 727 ( 0 1) */ add %o0,1,%o0
+/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4
+/* 0x0818 729 ( 1 2) */ add %i0,2,%i0
+/* 0x081c 730 ( 2 5) */ fitod %f0,%f2
+/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1
+/* 0x0824 732 ( 2 3) */ add %g3,2,%g3
+/* 0x0828 733 ( 3 6) */ fitod %f1,%f0
+/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1
+/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0
+/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2
+/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2
+/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1
+/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0
+/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0
+/* 0x084c 742 (11 14) */ fdtox %f2,%f2
+/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4]
+/* 0x0854 744 (12 15) */ fdtox %f0,%f0
+/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4]
+/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50
+/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0
+
+!
+! ENTRY .L77000043
+!
+
+ .L77000043: /* frequency 1.0 confidence 0.0 */
+/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0
+
+!
+! ENTRY .L900000156
+!
+
+ .L900000156: /* frequency 1.0 confidence 0.0 */
+/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50
+/* 0x086c ( 0 1) */ or %g0,%g5,%o3
+/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1
+/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3
+/* 0x0878 763 ( 1 2) */ or %g0,0,%i0
+/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0
+/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50
+/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2
+/* 0x0888 767 ( 3 6) */ ld [%o5],%i3
+/* 0x088c 768 ( 3 4) */ or %g0,8,%g2
+/* 0x0890 769 ( 3 4) */ or %g0,16,%o4
+/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3
+/* 0x0898 771 ( 4 5) */ or %g0,3,%i0
+/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1
+/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2
+/* 0x08a4 774 ( 6 7) */ st %o1,[%o7]
+/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1
+/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2
+/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0
+/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1
+/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1
+/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0
+/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4]
+/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000140
+!
+
+ .L900000140: /* frequency 1.0 confidence 0.0 */
+/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1
+/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4
+/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2
+/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5
+/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1
+/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0
+/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1
+/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0
+/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2]
+/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5
+/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0
+/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2
+/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4
+/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2
+/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0
+/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1
+/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0
+/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1]
+/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50
+/* 0x0914 ( 7 8) */ srax %o0,32,%o0
+
+!
+! ENTRY .L900000143
+!
+
+ .L900000143: /* frequency 1.0 confidence 0.0 */
+/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3
+/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0
+/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0
+/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2]
+/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5
+/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50
+/* 0x0934 ( 3 4) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000078
+!
+
+ .L77000078: /* frequency 1.0 confidence 0.0 */
+/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0
+
+!
+! ENTRY .L900000154
+!
+
+ .L900000154: /* frequency 1.0 confidence 0.0 */
+/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2
+/* 0x0940 817 ( 0 1) */ add %i0,1,%i0
+/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4
+/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2
+/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0
+/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0
+/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1
+/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0
+/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0
+/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2]
+/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5
+/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50
+/* 0x096c ( 7 8) */ sra %i0,0,%o0
+
+!
+! ENTRY .L77000047
+!
+
+ .L77000047: /* frequency 1.0 confidence 0.0 */
+/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3
+
+!
+! ENTRY .L77000061
+!
+
+ .L77000061: /* frequency 1.0 confidence 0.0 */
+
+/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0
+/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x097c ( 4 5) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000048
+!
+
+ .L77000048: /* frequency 1.0 confidence 0.0 */
+/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50
+/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4
+/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6
+/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3
+/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351]
+/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4
+/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355]
+/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12
+/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10
+/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4
+/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18
+/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12
+/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9
+/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10
+/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15
+/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22
+/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20
+/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12
+/* 0x09d4 876 (10 13) */ ld [%i1],%g2
+/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3
+/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22
+/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4
+/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18
+/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5
+/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12
+/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0
+/* 0x09f8 885 (14 17) */ fitod %f18,%f26
+/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1
+/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20
+/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2
+/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3
+/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8
+/* 0x0a10 891 (20 21) */ fmovs %f6,%f14
+/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8
+/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6
+/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14
+/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10
+/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24
+/* 0x0a28 897 (28 31) */ fdtox %f14,%f14
+/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335]
+/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28
+/* 0x0a34 900 (29 32) */ fitod %f19,%f14
+/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18
+/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10
+/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343]
+/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4
+/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22
+/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18
+/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311]
+/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10
+/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4
+/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12
+/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319]
+/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12
+/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5
+/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4
+/* 0x0a70 915 (35 38) */ fdtox %f4,%f4
+/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327]
+/* 0x0a78 917 (36 39) */ fdtox %f28,%f24
+/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303]
+/* 0x0a80 919 (37 40) */ fitod %f20,%f4
+/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4
+/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24
+/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7
+/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14
+/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4
+/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12
+/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5
+/* 0x0aa0 927 (39 42) */ fitod %f21,%f18
+/* 0x0aa4 928 (40 41) */ st %o4,[%i0]
+/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7
+/* 0x0aac 930 (40 43) */ fdtox %f22,%f20
+/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287]
+/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10
+/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5
+/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20
+/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295]
+/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7
+/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4
+/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14
+/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2
+/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10
+/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3
+/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271]
+/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3
+/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12
+/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0
+/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10
+/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16
+/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279]
+/* 0x0af8 949 (46 49) */ fdtox %f20,%f12
+/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4
+/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255]
+/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0
+/* 0x0b08 953 (47 50) */ fdtox %f14,%f12
+/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5
+/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2
+/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8
+/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1
+/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0
+/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4
+/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6
+/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263]
+/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2
+/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4
+/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0
+/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5
+/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2
+/* 0x0b40 967 (52 53) */ st %g3,[%i0+4]
+/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2
+/* 0x0b48 969 (52 55) */ fdtox %f16,%f10
+/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7
+/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2
+/* 0x0b54 972 (53 56) */ fdtox %f4,%f4
+/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239]
+/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7
+/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247]
+/* 0x0b64 976 (55 58) */ fdtox %f8,%f4
+/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7
+/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5
+/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0
+/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223]
+/* 0x0b78 981 (57 60) */ fdtox %f6,%f4
+/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5
+/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7
+/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231]
+/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0
+/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5
+/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1
+/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5
+/* 0x0b98 989 (61 62) */ st %g2,[%i0+12]
+/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0
+/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4
+/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1
+/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1
+/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7
+/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3
+/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5
+/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4
+/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16]
+/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2
+/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20]
+/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4
+/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8]
+/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2
+/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4
+/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24]
+/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2
+/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3
+/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2
+/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2
+/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28]
+/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3
+/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0
+/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x0bfc (76 77) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000050
+!
+
+ .L77000050: /* frequency 1.0 confidence 0.0 */
+/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0
+/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50
+/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2
+/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4
+/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2
+/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483]
+/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2
+/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479]
+/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4
+/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8
+/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10
+/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16
+/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14
+/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4
+/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13
+/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7
+/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10
+/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14
+/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4
+/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12
+/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6
+/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12
+/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6
+/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8
+/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18
+/* 0x0c64 1056 (20 23) */ fitod %f15,%f14
+/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10
+/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8
+/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20
+/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4
+/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14
+/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10
+/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463]
+/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10
+/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18
+/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471]
+/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8
+/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4
+/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447]
+/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4
+/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18
+/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455]
+/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10
+/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439]
+/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14
+/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8
+/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431]
+/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10
+/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4
+/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8
+/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14
+/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423]
+/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10
+/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4
+/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415]
+/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8
+/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10
+/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14
+/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8
+/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10
+/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14
+/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18
+/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4
+/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8
+/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20
+/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10
+/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4
+/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8
+/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399]
+/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8
+/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18
+/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407]
+/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10
+/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383]
+/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14
+/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10
+/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18
+/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391]
+/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8
+/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375]
+/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4
+/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14
+/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367]
+/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8
+/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10
+/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4
+/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351]
+/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8
+/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14
+/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359]
+/* 0x0d68 1124 (54 57) */ fitod %f8,%f4
+/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10
+/* 0x0d70 1126 (56 59) */ fitod %f9,%f8
+/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4
+/* 0x0d78 1128 (58 61) */ fitod %f10,%f14
+/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8
+/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18
+/* 0x0d84 1131 (60 63) */ fitod %f11,%f10
+/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4
+/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14
+/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20
+/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8
+/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10
+/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4
+/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335]
+/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4
+/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18
+/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343]
+/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8
+/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319]
+/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14
+/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8
+/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18
+/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327]
+/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4
+/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311]
+/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10
+/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14
+/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303]
+/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4
+/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8
+/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10
+/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287]
+/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4
+/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14
+/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295]
+/* 0x0df8 1160 (74 77) */ fitod %f4,%f10
+/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4
+/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8
+/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10
+/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4
+/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14
+/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18
+/* 0x0e14 1167 (80 83) */ fitod %f9,%f8
+/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10
+/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20
+/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14
+/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18
+/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279]
+/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4
+/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10
+/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271]
+/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10
+/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263]
+/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4
+/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255]
+/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10
+/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4
+/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10
+/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247]
+/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4
+/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239]
+/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2
+/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4
+/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3
+/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4
+/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2
+/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5
+/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2
+/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6
+/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3
+/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4
+/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4
+/* 0x0e8c 1200 (96 97) */ st %g4,[%i0]
+/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4
+/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0
+/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2
+/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6
+/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3
+/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2
+/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4
+/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5
+/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1
+/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2
+/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3
+/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4
+/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4]
+/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223]
+/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2
+/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3
+/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2
+/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5
+/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2
+/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231]
+/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0
+/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4
+/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5
+/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2
+/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8]
+/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0
+/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3
+/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5
+/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3
+/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1
+/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3
+/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12]
+/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5
+/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0
+/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2
+/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5
+/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4
+/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2
+/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1
+/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5
+/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2
+/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16]
+/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0
+/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3
+/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5
+/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3
+/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4
+/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3
+/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1
+/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5
+/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20]
+/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2
+/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5
+/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0
+/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2
+/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4
+/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2
+/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1
+/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5
+/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24]
+/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3
+/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5
+/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0
+/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3
+/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4
+/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3
+/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5
+/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1
+/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2
+/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5
+/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4
+/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0
+/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2
+/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28]
+/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5
+/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2
+/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1
+/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3
+/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5
+/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0
+/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4
+/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3
+/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5
+/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32]
+/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3
+/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2
+/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5
+/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4
+/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0
+/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1
+/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5
+/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36]
+/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3
+/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5
+/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2
+/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4
+/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2
+/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1
+/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5
+/* 0x101c 1303 (141 142) */ st %g2,[%i0+40]
+/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0
+/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2
+/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5
+/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3
+/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4
+/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3
+/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1
+/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5
+/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0
+/* 0x1044 1313 (145 146) */ st %g3,[%i0+44]
+/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3
+/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5
+/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2
+/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1
+/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2
+/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4
+/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5
+/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0
+/* 0x1068 1322 (149 150) */ st %g2,[%i0+48]
+/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3
+/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1
+/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2
+/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3
+/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5
+/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0
+/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4
+/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2
+/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5
+/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1
+/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2
+/* 0x1098 1334 (154 155) */ st %g3,[%i0+52]
+/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3
+/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56]
+/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2
+/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3
+/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2
+/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60]
+/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3
+/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0
+/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x10c0 (161 162) */ restore %g0,%g0,%g0
+
+!
+! ENTRY .L77000073
+!
+
+ .L77000073: /* frequency 1.0 confidence 0.0 */
+
+
+ or %g0, %i4, %o2
+ or %g0, %o0, %o1
+ or %g0, %i3, %o0
+
+!
+! ENTRY .L77000052
+!
+
+ .L77000052: /* frequency 1.0 confidence 0.0 */
+/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2
+/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227]
+/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3
+/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14
+/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2
+/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223]
+/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5
+/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2
+/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6
+/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2
+/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1
+/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20
+/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3
+/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8
+/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3
+/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10
+/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9
+/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0
+/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11
+/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1
+/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18
+/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1
+/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4
+/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16
+/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50
+/* 0x1094 ( 8 9) */ subcc %o0,0,%g0
+/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2
+/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1
+/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1
+/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0
+/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7
+/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1
+/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1
+/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4
+/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50
+/* 0x10bc (13 14) */ sub %o3,2,%o2
+/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2
+/* 0x10c4 1365 (14 15) */ add %o1,16,%g5
+/* 0x10c8 1366 (14 15) */ or %g0,4,%g4
+/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0
+/* 0x10d0 1368 (15 16) */ add %o1,8,%o1
+/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6
+/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4
+/* 0x10dc 1371 (16 17) */ add %o1,16,%o1
+/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12
+/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0
+/* 0x10e8 1374 (17 18) */ add %o1,8,%o1
+/* 0x10ec 1375 (18 21) */ fitod %f7,%f2
+/* 0x10f0 1376 (19 22) */ fitod %f6,%f6
+/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10
+/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8
+/* 0x1100 1380 (23 26) */ fitod %f13,%f4
+/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6
+/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000154
+!
+
+ .L990000154: /* frequency 1.0 confidence 0.0 */
+/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24
+/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4
+/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4
+/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22
+/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26
+/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0
+/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7
+/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28
+/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6
+/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2
+/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3
+/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0
+/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4
+/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2
+/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12
+/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6
+/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96]
+/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96]
+/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2
+/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6
+/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96]
+/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1
+/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12
+/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4
+/* 0x116c 1408 (10 11) */ std %f0,[%o4-96]
+/* 0x1170 1409 (11 14) */ ldd [%o1],%f0
+/* 0x1174 1410 (11 14) */ fitod %f9,%f2
+/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28
+/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24
+/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22
+/* 0x1184 1414 (13 16) */ fdtox %f4,%f4
+/* 0x1188 1415 (14 17) */ fitod %f10,%f6
+/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10
+/* 0x1190 1417 (15 18) */ fdtox %f24,%f24
+/* 0x1194 1418 (16 19) */ fdtox %f22,%f22
+/* 0x1198 1419 (16 17) */ std %f24,[%g3-64]
+/* 0x119c 1420 (17 18) */ std %f22,[%g2-64]
+/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10
+/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6
+/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64]
+/* 0x11ac 1424 (18 19) */ add %o1,8,%o1
+/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10
+/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0
+/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64]
+/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22
+/* 0x11c0 1429 (20 23) */ fitod %f13,%f4
+/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2
+/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26
+/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24
+/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0
+/* 0x11d4 1434 (23 26) */ fitod %f8,%f6
+/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8
+/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26
+/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24
+/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32]
+/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32]
+/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8
+/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6
+/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32]
+/* 0x11f8 1443 (27 28) */ add %o1,8,%o1
+/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8
+/* 0x1200 1445 (28 29) */ std %f0,[%o4-32]
+/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50
+/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0
+
+!
+! ENTRY .L990000157
+!
+
+ .L990000157: /* frequency 1.0 confidence 0.0 */
+/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28
+/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24
+/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3
+/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12
+/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26
+/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2
+/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4
+/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22
+/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7
+/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6
+/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128]
+/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4
+/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2
+/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0
+/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6
+/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24
+/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10
+/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128]
+/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10
+/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128]
+/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26
+/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10
+/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2
+/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22
+/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12
+/* 0x1270 1474 (10 13) */ fdtox %f0,%f0
+/* 0x1274 1475 (10 11) */ std %f0,[%o4-128]
+/* 0x1278 1476 (11 14) */ fitod %f8,%f4
+/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6
+/* 0x1280 1478 (12 15) */ fdtox %f26,%f0
+/* 0x1284 1479 (12 13) */ std %f0,[%g3-96]
+/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10
+/* 0x128c 1481 (13 16) */ fdtox %f2,%f2
+/* 0x1290 1482 (13 14) */ std %f2,[%g2-96]
+/* 0x1294 1483 (14 17) */ fitod %f9,%f0
+/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2
+/* 0x129c 1485 (15 18) */ fdtox %f24,%f8
+/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96]
+/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4
+/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8
+/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12
+/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96]
+/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0
+/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6
+/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64]
+/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10
+/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64]
+/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6
+/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2
+/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64]
+/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4
+/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2
+/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8
+/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64]
+/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6
+/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32]
+/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0
+/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4
+/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32]
+/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2
+/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32]
+/* 0x1300 1510 (26 29) */ fdtox %f0,%f0
+/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50
+/* 0x1308 (26 27) */ std %f0,[%o4-32]
+
+!
+! ENTRY .L77000054
+!
+
+ .L77000054: /* frequency 1.0 confidence 0.0 */
+/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0
+
+!
+! ENTRY .L990000161
+!
+
+ .L990000161: /* frequency 1.0 confidence 0.0 */
+/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0
+/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4
+/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1
+/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0
+/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2
+/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0
+/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2
+/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0
+/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6
+/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4
+/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2
+/* 0x133c 1527 (11 14) */ fdtox %f6,%f6
+/* 0x1340 1528 (11 12) */ std %f6,[%g3]
+/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0
+/* 0x1348 1530 (12 15) */ fdtox %f4,%f4
+/* 0x134c 1531 (12 13) */ std %f4,[%g2]
+/* 0x1350 1532 (12 13) */ add %g2,32,%g2
+/* 0x1354 1533 (13 16) */ fdtox %f2,%f2
+/* 0x1358 1534 (13 14) */ std %f2,[%o7]
+/* 0x135c 1535 (13 14) */ add %o7,32,%o7
+/* 0x1360 1536 (14 17) */ fdtox %f0,%f0
+/* 0x1364 1537 (14 15) */ std %f0,[%o4]
+/* 0x1368 1538 (14 15) */ add %o4,32,%o4
+/* 0x136c 1539 (15 16) */ add %g3,32,%g3
+/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50
+/* 0x1374 (16 19) */ ldd [%o1],%f0
+
+!
+! ENTRY .L77000056
+!
+
+ .L77000056: /* frequency 1.0 confidence 0.0 */
+/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0
+
+!
+! ENTRY .L990000162
+!
+
+ .L990000162: /* frequency 1.0 confidence 0.0 */
+/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50
+/* 0x1380 ( 0 1) */ nop
+/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1
+/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1
+/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4
+/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5
+/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1
+/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1
+/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7
+/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2
+/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2
+/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3
+/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0
+/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50
+/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1
+/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2
+/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3
+/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1
+/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4
+/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2
+/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1
+/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1
+/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2
+/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5
+/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1
+/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1
+/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1
+/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1
+/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0
+/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1
+/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3
+/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0
+/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1
+/* 0x1400 1586 (16 17) */ add %g4,8,%g4
+/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3
+/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0
+/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2
+/* 0x1410 1590 (18 19) */ st %o0,[%g3-4]
+/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000142
+!
+
+ .L990000142: /* frequency 1.0 confidence 0.0 */
+/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2
+/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2
+/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3
+/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5
+/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1
+/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0
+/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2
+/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0
+/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1
+/* 0x143c 1602 ( 4 5) */ st %o1,[%g3]
+/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5
+/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0
+/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1
+/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0
+/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3
+/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2
+/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0
+/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3
+/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1
+/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0
+/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12]
+/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5
+/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4
+/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0
+/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1
+/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2
+/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3
+/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2
+/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1
+/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0
+/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2
+/* 0x1494 1624 (12 13) */ st %o2,[%g3-8]
+/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5
+/* 0x149c 1626 (12 13) */ add %g5,64,%g5
+/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2
+/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0
+/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1
+/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0
+/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3
+/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2
+/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0
+/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4]
+/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50
+/* 0x14c4 (16 17) */ srlx %o0,32,%o0
+
+!
+! ENTRY .L990000145
+!
+
+ .L990000145: /* frequency 1.0 confidence 0.0 */
+/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3
+/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3
+/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2
+/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0
+/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0
+/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4]
+/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0
+/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50
+/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5
+
+!
+! ENTRY .L77000058
+!
+
+ .L77000058: /* frequency 1.0 confidence 0.0 */
+/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2
+
+!
+! ENTRY .L990000160
+!
+
+ .L990000160: /* frequency 1.0 confidence 0.0 */
+/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3
+/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0
+/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2
+/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1
+/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2
+/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2
+/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0
+/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5
+/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0
+/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4
+/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0
+/* 0x151c 1661 ( 4 5) */ st %o0,[%g3]
+/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0
+/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5
+/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3
+/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50
+/* 0x1530 ( 6 8) */ ldx [%g2],%o2
+
+!
+! ENTRY .L77770061
+!
+
+ .L77770061: /* frequency 1.0 confidence 0.0 */
+/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0
+
+
+/* 0x124c 1476 ( 0 0) */ .type mul_add,2
+/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add)
+/* 0x124c 1480 ( 0 0) */ .align 8
+/* 0x1250 1486 ( 0 0) */ .global mul_add_inp
+
+!
+! ENTRY mul_add_inp
+!
+
+ .global mul_add_inp
+ mul_add_inp: /* frequency 1.0 confidence 0.0 */
+/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp
+/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3
+/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2
+/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0
+/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1
+/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result =
+/* 0x1268 ( 4 5) */ srl %i3,0,%o4
+/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0
+/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1
+/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0
+/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2
+/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp)
+
+ .section ".data",#alloc,#write
+/* 0x1278 6 ( 0 0) */ .align 8
+
+!
+! ENTRY mask_cnst
+!
+
+ mask_cnst: /* frequency 1.0 confidence 0.0 */
+/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160
+/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object
+/* 0x1280 10 ( 0 0) */ .size mask_cnst,8
+
diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c
new file mode 100644
index 000000000..94e86eedb
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mpvalpha.c
@@ -0,0 +1,183 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi-priv.h"
+#include <c_asm.h>
+
+#define MP_MUL_DxD(a, b, Phi, Plo) \
+ { \
+ Plo = asm("mulq %a0, %a1, %v0", a, b); \
+ Phi = asm("umulh %a0, %a1, %v0", a, b); \
+ }
+
+/* This is empty for the loop in s_mpv_mul_d */
+#define CARRY_ADD
+
+#define ONE_MUL \
+ a_i = *a++; \
+ MP_MUL_DxD(a_i, b, a1b1, a0b0); \
+ a0b0 += carry; \
+ if (a0b0 < carry) \
+ ++a1b1; \
+ CARRY_ADD \
+ *c++ = a0b0; \
+ carry = a1b1;
+
+#define FOUR_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL \
+ ONE_MUL
+
+#define SIXTEEN_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL \
+ FOUR_MUL
+
+#define THIRTYTWO_MUL \
+ SIXTEEN_MUL \
+ SIXTEEN_MUL
+
+#define ONETWENTYEIGHT_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL
+
+#define EXPAND_256(CALL) \
+ mp_digit carry = 0; \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ if (a_len & 255) { \
+ if (a_len & 1) { \
+ ONE_MUL \
+ } \
+ if (a_len & 2) { \
+ ONE_MUL \
+ ONE_MUL \
+ } \
+ if (a_len & 4) { \
+ FOUR_MUL \
+ } \
+ if (a_len & 8) { \
+ FOUR_MUL \
+ FOUR_MUL \
+ } \
+ if (a_len & 16) { \
+ SIXTEEN_MUL \
+ } \
+ if (a_len & 32) { \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 64) { \
+ THIRTYTWO_MUL \
+ THIRTYTWO_MUL \
+ } \
+ if (a_len & 128) { \
+ ONETWENTYEIGHT_MUL \
+ } \
+ a_len = a_len & (-256); \
+ } \
+ if (a_len >= 256) { \
+ carry = CALL(a, a_len, b, c, carry); \
+ c += a_len; \
+ }
+
+#define FUNC_NAME(NAME) \
+ mp_digit NAME(const mp_digit *a, \
+ mp_size a_len, \
+ mp_digit b, mp_digit *c, \
+ mp_digit carry)
+
+#define DECLARE_MUL_256(FNAME) \
+ FUNC_NAME(FNAME) \
+ { \
+ mp_digit a_i; \
+ mp_digit a0b0, a1b1; \
+ while (a_len) { \
+ ONETWENTYEIGHT_MUL \
+ ONETWENTYEIGHT_MUL \
+ a_len -= 256; \
+ } \
+ return carry; \
+ }
+
+/* Expanding the loop in s_mpv_mul_d appeared to slow down the
+ (admittedly) small number of tests (i.e., timetest) used to
+ measure performance, so this define disables that optimization. */
+#define DO_NOT_EXPAND 1
+
+/* Need forward declaration so it can be instantiated after
+ the routine that uses it; this helps locality somewhat */
+#if !defined(DO_NOT_EXPAND)
+FUNC_NAME(s_mpv_mul_d_MUL256);
+#endif
+
+/* c = a * b */
+void
+s_mpv_mul_d(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+#if defined(DO_NOT_EXPAND)
+ mp_digit carry = 0;
+ while (a_len--) {
+ mp_digit a_i = *a++;
+ mp_digit a0b0, a1b1;
+
+ MP_MUL_DxD(a_i, b, a1b1, a0b0);
+
+ a0b0 += carry;
+ if (a0b0 < carry)
+ ++a1b1;
+ *c++ = a0b0;
+ carry = a1b1;
+ }
+#else
+ EXPAND_256(s_mpv_mul_d_MUL256)
+#endif
+ *c = carry;
+}
+
+#if !defined(DO_NOT_EXPAND)
+DECLARE_MUL_256(s_mpv_mul_d_MUL256)
+#endif
+
+#undef CARRY_ADD
+/* This is redefined for the loop in s_mpv_mul_d_add */
+#define CARRY_ADD \
+ a0b0 += a_i = *c; \
+ if (a0b0 < a_i) \
+ ++a1b1;
+
+/* Need forward declaration so it can be instantiated between the
+ two routines that use it; this helps locality somewhat */
+FUNC_NAME(s_mpv_mul_d_add_MUL256);
+
+/* c += a * b */
+void
+s_mpv_mul_d_add(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ *c = carry;
+}
+
+/* Instantiate multiply 256 routine here */
+DECLARE_MUL_256(s_mpv_mul_d_add_MUL256)
+
+/* Presently, this is only used by the Montgomery arithmetic code. */
+/* c += a * b */
+void
+s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len,
+ mp_digit b, mp_digit *c)
+{
+ EXPAND_256(s_mpv_mul_d_add_MUL256)
+ while (carry) {
+ mp_digit c_i = *c;
+ carry += c_i;
+ *c++ = carry;
+ carry = carry < c_i;
+ }
+}
diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c
new file mode 100644
index 000000000..461d40ab3
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/mulsqr.c
@@ -0,0 +1,84 @@
+/*
+ * Test whether to include squaring code given the current settings
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_SQUARE 1 /* make sure squaring code is included */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ntests, prec, ix;
+ unsigned int seed;
+ clock_t start, stop;
+ double multime, sqrtime;
+ mp_int a, c;
+
+ seed = (unsigned int)time(NULL);
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <ntests> <nbits>\n", argv[0]);
+ return 1;
+ }
+
+ if ((ntests = abs(atoi(argv[1]))) == 0) {
+ fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]);
+ return 1;
+ }
+ if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) {
+ fprintf(stderr, "%s: must request at least %d bits.\n", argv[0],
+ CHAR_BIT);
+ return 1;
+ }
+
+ prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+
+ mp_init_size(&a, prec);
+ mp_init_size(&c, 2 * prec);
+
+ /* Test multiplication by self */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_mul(&a, &a, &c);
+ }
+ stop = clock();
+
+ multime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ /* Test squaring */
+ srand(seed);
+ start = clock();
+ for (ix = 0; ix < ntests; ix++) {
+ mpp_random_size(&a, prec);
+ mp_sqr(&a, &c);
+ }
+ stop = clock();
+
+ sqrtime = (double)(stop - start) / CLOCKS_PER_SEC;
+
+ printf("Multiply: %.4f\n", multime);
+ printf("Square: %.4f\n", sqrtime);
+ if (multime < sqrtime) {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime));
+ printf("Prefer: multiply\n");
+ } else {
+ printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime));
+ printf("Prefer: square\n");
+ }
+
+ mp_clear(&a);
+ mp_clear(&c);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/multest b/security/nss/lib/freebl/mpi/multest
new file mode 100755
index 000000000..24752e019
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/multest
@@ -0,0 +1,76 @@
+#!/bin/sh
+#
+# multest
+#
+# Run multiply and square timing tests, to compute a chart for the
+# current processor and compiler combination.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ECHO=/bin/echo
+MAKE=gmake
+
+$ECHO "\n** Running multiply and square timing tests\n"
+
+$ECHO "Bringing 'mulsqr' up to date ... "
+if $MAKE mulsqr ; then
+ :
+else
+ $ECHO "\nMake failed to build mulsqr.\n"
+ exit 1
+fi
+
+if [ ! -x ./mulsqr ] ; then
+ $ECHO "\nCannot find 'mulsqr' program, testing cannot continue.\n"
+ exit 1
+fi
+
+sizes='64 128 192 256 320 384 448 512 640 768 896 1024 1536 2048'
+ntests=500000
+
+$ECHO "Running timing tests, please wait ... "
+
+trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP
+
+touch tt$$.tmp
+$ECHO $ntests tests >> tt$$.tmp
+for size in $sizes ; do
+ $ECHO "$size bits ... \c"
+ set -A res `./mulsqr $ntests $size|head -3|tr -d '%'|awk '{print $2}'`
+ $ECHO $size"\t"${res[0]}"\t"${res[1]}"\t"${res[2]} >> tt$$.tmp
+ $ECHO "(done)"
+done
+mv tt$$.tmp mulsqr-results.txt
+rm -f tt$$.tmp
+
+$ECHO "\n** Running Karatsuba-Ofman multiplication tests\n"
+
+$ECHO "Brining 'karatsuba' up to date ... "
+if $MAKE karatsuba ; then
+ :
+else
+ $ECHO "\nMake failed to build karatsuba.\n"
+ exit 1
+fi
+
+if [ ! -x ./karatsuba ] ; then
+ $ECHO "\nCannot find 'karatsuba' program, testing cannot continue.\n"
+ exit 1
+fi
+
+ntests=100000
+
+trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP
+
+touch tt$$.tmp
+for size in $sizes ; do
+ $ECHO "$size bits ... "
+ ./karatsuba $ntests $size >> tt$$.tmp
+ tail -2 tt$$.tmp
+done
+mv tt$$.tmp karatsuba-results.txt
+rm -f tt$$.tmp
+
+exit 0
diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c
new file mode 100644
index 000000000..c8bd93ff9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/primes.c
@@ -0,0 +1,841 @@
+/*
+ * These tables of primes wwere generated using the 'sieve' program
+ * (sieve.c) and converted to this format with 'ptab.pl'.
+ *
+ * The 'small' table is just the first 128 primes. The 'large' table
+ * is a table of all the prime values that will fit into a single
+ * mp_digit (given the current size of an mp_digit, which is two bytes).
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#if SMALL_TABLE
+#define MP_PRIME_TAB_SIZE 128
+#else
+#define MP_PRIME_TAB_SIZE 6542
+#endif
+
+const int prime_tab_size = MP_PRIME_TAB_SIZE;
+const mp_digit prime_tab[] = {
+ 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
+ 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
+ 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
+ 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
+ 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
+ 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
+ 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
+ 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
+ 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
+ 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
+ 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
+ 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
+ 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
+ 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
+ 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
+ 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
+#if !SMALL_TABLE
+ 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
+ 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
+ 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
+ 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
+ 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
+ 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
+ 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
+ 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
+ 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
+ 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
+ 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
+ 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
+ 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
+ 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
+ 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
+ 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653,
+ 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D,
+ 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3,
+ 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713,
+ 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755,
+ 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D,
+ 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3,
+ 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815,
+ 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851,
+ 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B,
+ 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB,
+ 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907,
+ 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949,
+ 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977,
+ 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7,
+ 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13,
+ 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61,
+ 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85,
+ 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5,
+ 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1,
+ 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D,
+ 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B,
+ 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3,
+ 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07,
+ 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F,
+ 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95,
+ 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5,
+ 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13,
+ 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55,
+ 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3,
+ 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5,
+ 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17,
+ 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57,
+ 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F,
+ 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5,
+ 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17,
+ 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53,
+ 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3,
+ 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9,
+ 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021,
+ 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073,
+ 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3,
+ 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1,
+ 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139,
+ 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181,
+ 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3,
+ 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D,
+ 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241,
+ 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F,
+ 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD,
+ 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D,
+ 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367,
+ 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393,
+ 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF,
+ 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F,
+ 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471,
+ 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD,
+ 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517,
+ 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543,
+ 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583,
+ 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5,
+ 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615,
+ 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F,
+ 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F,
+ 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9,
+ 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709,
+ 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777,
+ 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9,
+ 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5,
+ 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843,
+ 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F,
+ 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9,
+ 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB,
+ 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949,
+ 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3,
+ 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED,
+ 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D,
+ 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B,
+ 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1,
+ 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB,
+ 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B,
+ 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73,
+ 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1,
+ 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27,
+ 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55,
+ 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5,
+ 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21,
+ 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D,
+ 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89,
+ 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5,
+ 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B,
+ 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D,
+ 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD,
+ 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF,
+ 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49,
+ 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97,
+ 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1,
+ 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027,
+ 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063,
+ 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1,
+ 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB,
+ 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159,
+ 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1,
+ 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5,
+ 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B,
+ 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263,
+ 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D,
+ 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED,
+ 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333,
+ 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383,
+ 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9,
+ 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B,
+ 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F,
+ 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1,
+ 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9,
+ 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513,
+ 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573,
+ 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1,
+ 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605,
+ 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F,
+ 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681,
+ 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9,
+ 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735,
+ 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773,
+ 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3,
+ 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807,
+ 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F,
+ 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881,
+ 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB,
+ 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923,
+ 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983,
+ 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3,
+ 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13,
+ 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B,
+ 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9,
+ 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB,
+ 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F,
+ 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99,
+ 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3,
+ 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35,
+ 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87,
+ 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB,
+ 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F,
+ 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89,
+ 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3,
+ 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25,
+ 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F,
+ 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5,
+ 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB,
+ 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45,
+ 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81,
+ 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB,
+ 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023,
+ 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071,
+ 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9,
+ 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF,
+ 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D,
+ 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D,
+ 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3,
+ 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215,
+ 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B,
+ 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7,
+ 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9,
+ 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335,
+ 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379,
+ 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB,
+ 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419,
+ 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D,
+ 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB,
+ 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D,
+ 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571,
+ 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B,
+ 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7,
+ 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637,
+ 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D,
+ 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1,
+ 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F,
+ 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D,
+ 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3,
+ 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847,
+ 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D,
+ 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5,
+ 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923,
+ 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B,
+ 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1,
+ 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5,
+ 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27,
+ 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D,
+ 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1,
+ 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21,
+ 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71,
+ 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD,
+ 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3,
+ 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29,
+ 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71,
+ 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1,
+ 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B,
+ 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F,
+ 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D,
+ 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3,
+ 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F,
+ 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87,
+ 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7,
+ 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37,
+ 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79,
+ 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD,
+ 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B,
+ 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D,
+ 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB,
+ 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115,
+ 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B,
+ 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB,
+ 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F,
+ 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261,
+ 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D,
+ 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307,
+ 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F,
+ 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF,
+ 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED,
+ 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B,
+ 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F,
+ 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF,
+ 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513,
+ 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577,
+ 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7,
+ 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609,
+ 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645,
+ 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D,
+ 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF,
+ 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F,
+ 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771,
+ 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF,
+ 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807,
+ 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855,
+ 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1,
+ 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919,
+ 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969,
+ 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1,
+ 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41,
+ 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89,
+ 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF,
+ 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B,
+ 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD,
+ 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF,
+ 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D,
+ 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55,
+ 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93,
+ 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05,
+ 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51,
+ 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B,
+ 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9,
+ 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35,
+ 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85,
+ 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5,
+ 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09,
+ 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D,
+ 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9,
+ 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF,
+ 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047,
+ 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7,
+ 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B,
+ 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149,
+ 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7,
+ 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7,
+ 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243,
+ 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297,
+ 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9,
+ 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341,
+ 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387,
+ 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9,
+ 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413,
+ 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455,
+ 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D,
+ 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7,
+ 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D,
+ 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585,
+ 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED,
+ 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F,
+ 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B,
+ 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD,
+ 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707,
+ 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761,
+ 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5,
+ 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F,
+ 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D,
+ 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB,
+ 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1,
+ 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B,
+ 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F,
+ 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED,
+ 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17,
+ 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77,
+ 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB,
+ 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19,
+ 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79,
+ 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1,
+ 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29,
+ 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F,
+ 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF,
+ 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05,
+ 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47,
+ 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95,
+ 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7,
+ 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B,
+ 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49,
+ 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5,
+ 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27,
+ 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65,
+ 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5,
+ 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F,
+ 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085,
+ 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9,
+ 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B,
+ 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191,
+ 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1,
+ 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241,
+ 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295,
+ 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD,
+ 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F,
+ 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377,
+ 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB,
+ 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427,
+ 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475,
+ 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7,
+ 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521,
+ 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F,
+ 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD,
+ 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607,
+ 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B,
+ 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B,
+ 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5,
+ 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F,
+ 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791,
+ 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809,
+ 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B,
+ 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871,
+ 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7,
+ 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917,
+ 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B,
+ 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB,
+ 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07,
+ 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B,
+ 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1,
+ 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39,
+ 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93,
+ 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF,
+ 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59,
+ 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83,
+ 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3,
+ 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25,
+ 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61,
+ 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7,
+ 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15,
+ 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B,
+ 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3,
+ 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F,
+ 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D,
+ 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5,
+ 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9,
+ 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027,
+ 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079,
+ 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7,
+ 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121,
+ 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183,
+ 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB,
+ 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219,
+ 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277,
+ 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB,
+ 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303,
+ 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F,
+ 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1,
+ 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D,
+ 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D,
+ 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1,
+ 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D,
+ 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D,
+ 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5,
+ 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633,
+ 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685,
+ 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1,
+ 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741,
+ 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD,
+ 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF,
+ 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853,
+ 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B,
+ 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD,
+ 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F,
+ 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993,
+ 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF,
+ 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11,
+ 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59,
+ 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1,
+ 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D,
+ 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71,
+ 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7,
+ 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19,
+ 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81,
+ 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7,
+ 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F,
+ 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D,
+ 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5,
+ 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F,
+ 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B,
+ 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7,
+ 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7,
+ 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D,
+ 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87,
+ 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED,
+ 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F,
+ 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095,
+ 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB,
+ 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D,
+ 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D,
+ 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB,
+ 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231,
+ 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F,
+ 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7,
+ 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B,
+ 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D,
+ 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F,
+ 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7,
+ 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425,
+ 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483,
+ 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF,
+ 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D,
+ 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9,
+ 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9,
+ 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639,
+ 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699,
+ 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1,
+ 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B,
+ 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777,
+ 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5,
+ 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F,
+ 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B,
+ 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED,
+ 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927,
+ 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981,
+ 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB,
+ 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35,
+ 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79,
+ 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD,
+ 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13,
+ 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF,
+ 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF,
+ 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53,
+ 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F,
+ 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9,
+ 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41,
+ 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7,
+ 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5,
+ 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69,
+ 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF,
+ 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB,
+ 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45,
+ 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D,
+ 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9,
+ 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023,
+ 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053,
+ 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF,
+ 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113,
+ 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167,
+ 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9,
+ 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215,
+ 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275,
+ 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD,
+ 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7,
+ 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D,
+ 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5,
+ 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409,
+ 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469,
+ 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1,
+ 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535,
+ 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589,
+ 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD,
+ 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635,
+ 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9,
+ 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD,
+ 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739,
+ 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787,
+ 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9,
+ 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F,
+ 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887,
+ 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD,
+ 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929,
+ 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D,
+ 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7,
+ 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13,
+ 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75,
+ 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF,
+ 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F,
+ 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D,
+ 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD,
+ 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23,
+ 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67,
+ 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1,
+ 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15,
+ 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97,
+ 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1,
+ 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47,
+ 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95,
+ 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05,
+ 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D,
+ 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3,
+ 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5,
+ 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057,
+ 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5,
+ 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1,
+ 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129,
+ 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D,
+ 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED,
+ 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243,
+ 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B,
+ 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5,
+ 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B,
+ 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387,
+ 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5,
+ 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F,
+ 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459,
+ 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3,
+ 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3,
+ 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553,
+ 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D,
+ 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9,
+ 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D,
+ 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691,
+ 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD,
+ 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F,
+ 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759,
+ 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9,
+ 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805,
+ 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F,
+ 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7,
+ 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937,
+ 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3,
+ 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15,
+ 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59,
+ 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB,
+ 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07,
+ 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73,
+ 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5,
+ 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09,
+ 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57,
+ 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9,
+ 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED,
+ 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D,
+ 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1,
+ 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05,
+ 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61,
+ 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7,
+ 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD,
+ 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57,
+ 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3,
+ 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015,
+ 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069,
+ 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF,
+ 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119,
+ 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173,
+ 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD,
+ 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D,
+ 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281,
+ 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9,
+ 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D,
+ 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B,
+ 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B,
+ 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B,
+ 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5,
+ 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501,
+ 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F,
+ 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF,
+ 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609,
+ 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659,
+ 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5,
+ 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF,
+ 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755,
+ 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3,
+ 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827,
+ 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893,
+ 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5,
+ 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F,
+ 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D,
+ 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1,
+ 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25,
+ 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65,
+ 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF,
+ 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F,
+ 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61,
+ 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF,
+ 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33,
+ 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F,
+ 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3,
+ 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F,
+ 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F,
+ 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3,
+ 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27,
+ 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D,
+ 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9,
+ 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39,
+ 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89,
+ 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD,
+ 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029,
+ 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D,
+ 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7,
+ 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119,
+ 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179,
+ 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD,
+ 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221,
+ 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B,
+ 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD,
+ 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313,
+ 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371,
+ 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5,
+ 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF,
+ 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461,
+ 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7,
+ 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509,
+ 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F,
+ 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3,
+ 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641,
+ 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1,
+ 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1,
+ 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757,
+ 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD,
+ 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803,
+ 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857,
+ 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7,
+ 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF,
+ 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937,
+ 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989,
+ 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9,
+ 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37,
+ 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F,
+ 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9,
+ 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29,
+ 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87,
+ 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7,
+ 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B,
+ 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91,
+ 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15,
+ 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57,
+ 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB,
+ 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD,
+ 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65,
+ 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7,
+ 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07,
+ 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61,
+ 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9,
+ 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7,
+ 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F,
+ 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB,
+ 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D,
+ 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171,
+ 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD,
+ 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217,
+ 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279,
+ 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3,
+ 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315,
+ 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B,
+ 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD,
+ 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B,
+ 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D,
+ 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1,
+ 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F,
+ 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565,
+ 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1,
+ 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D,
+ 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687,
+ 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1,
+ 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F,
+ 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7,
+ 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB,
+ 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D,
+ 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF,
+ 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933,
+ 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F,
+ 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9,
+ 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09,
+ 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51,
+ 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D,
+ 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5,
+ 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43,
+ 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD,
+ 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D,
+ 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F,
+ 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9,
+ 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF,
+ 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53,
+ 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B,
+ 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9,
+ 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D,
+ 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77,
+ 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1,
+ 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F,
+ 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67,
+ 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5,
+ 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005,
+ 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071,
+ 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7,
+ 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135,
+ 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F,
+ 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1,
+ 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219,
+ 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279,
+ 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD,
+ 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327,
+ 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363,
+ 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9,
+ 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419,
+ 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455,
+ 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD,
+ 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537,
+ 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585,
+ 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615,
+ 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657,
+ 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695,
+ 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3,
+ 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F,
+ 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B,
+ 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF,
+ 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B,
+ 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869,
+ 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB,
+ 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915,
+ 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963,
+ 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7,
+ 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D,
+ 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3,
+ 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB,
+ 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B,
+ 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1,
+ 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B,
+ 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B,
+ 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD,
+ 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F,
+ 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B,
+ 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3,
+ 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19,
+ 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81,
+ 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1,
+ 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71,
+ 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5,
+ 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B,
+ 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071,
+ 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF,
+ 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7,
+ 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175,
+ 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7,
+ 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227,
+ 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277,
+ 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF,
+ 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319,
+ 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B,
+ 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409,
+ 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455,
+ 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9,
+ 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B,
+ 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B,
+ 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF,
+ 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D,
+ 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F,
+ 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB,
+ 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755,
+ 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D,
+ 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7,
+ 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D,
+ 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877,
+ 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7,
+ 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9,
+ 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941,
+ 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3,
+ 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13,
+ 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B,
+ 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF,
+ 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F,
+ 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7,
+ 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37,
+ 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73,
+ 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD,
+ 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51,
+ 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99,
+ 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03,
+ 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D,
+ 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93,
+ 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3,
+ 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D,
+ 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D,
+ 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1,
+#endif
+};
diff --git a/security/nss/lib/freebl/mpi/stats b/security/nss/lib/freebl/mpi/stats
new file mode 100755
index 000000000..a5deb94c0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/stats
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+#
+# Treat each line as a sequence of comma and/or space delimited
+# floating point numbers, and compute basic statistics on them.
+# These are written to standard output
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+$min = 1.7976931348623157E+308;
+$max = 2.2250738585072014E-308;
+$sum = $num = 0;
+
+while(<>) {
+ chomp;
+
+ @nums = split(/[\s,]+/, $_);
+ next if($#nums < 0);
+
+ $num += scalar @nums;
+ foreach (@nums) {
+ $min = $_ if($_ < $min);
+ $max = $_ if($_ > $max);
+ $sum += $_;
+ }
+}
+
+if($num) {
+ $avg = $sum / $num;
+} else {
+ $min = $max = 0;
+}
+
+printf "%d\tmin=%.2f, avg=%.2f, max=%.2f, sum=%.2f\n",
+ $num, $min, $avg, $max, $sum;
+
+# end
diff --git a/security/nss/lib/freebl/mpi/target.mk b/security/nss/lib/freebl/mpi/target.mk
new file mode 100644
index 000000000..dd74564b1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/target.mk
@@ -0,0 +1,233 @@
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+##
+## Define CFLAGS to contain any local options your compiler
+## setup requires.
+##
+## Conditional compilation options are no longer here; see
+## the file 'mpi-config.h' instead.
+##
+MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC
+CFLAGS= -O $(MPICMN)
+#CFLAGS=-ansi -fullwarn -woff 1521 -O3 $(MPICMN)
+#CFLAGS=-ansi -pedantic -Wall -O3 $(MPICMN)
+#CFLAGS=-ansi -pedantic -Wall -g -O2 -DMP_DEBUG=1 $(MPICMN)
+
+ifeq ($(TARGET),mipsIRIX)
+#IRIX
+#MPICMN += -DMP_MONT_USE_MP_MUL
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+MPICMN += -DMP_USE_UINT_DIGIT
+#MPICMN += -DMP_NO_MP_WORD
+AS_OBJS = mpi_mips.o
+#ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 -exceptions
+ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3
+#CFLAGS=-ansi -n32 -O3 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+CFLAGS=-ansi -n32 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+#CFLAGS=-ansi -n32 -g -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN)
+#CFLAGS=-ansi -64 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE -DMP_NO_MP_WORD \
+ $(MPICMN)
+endif
+
+ifeq ($(TARGET),alphaOSF1)
+#Alpha/OSF1
+MPICMN += -DMP_ASSEMBLY_MULTIPLY
+AS_OBJS+= mpvalpha.o
+#CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT $(MPICMN)
+CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT \
+ -DMP_NO_MP_WORD $(MPICMN)
+endif
+
+ifeq ($(TARGET),v9SOLARIS)
+#Solaris 64
+SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xchip=ultra -xarch=v9a -KPIC -mt
+#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v9a -KPIC -mt
+SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v9a -KPIC -mt
+AS_OBJS += montmulfv9.o
+AS_OBJS += mpi_sparc.o mpv_sparcv9.o
+MPICMN += -DMP_USE_UINT_DIGIT
+#MPICMN += -DMP_NO_MP_WORD
+MPICMN += -DMP_ASSEMBLY_MULTIPLY
+MPICMN += -DMP_USING_MONT_MULF
+CFLAGS= -O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN)
+#CFLAGS= -g -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),v8plusSOLARIS)
+#Solaris 32
+SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8plusa -KPIC -mt
+SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt
+AS_OBJS += montmulfv8.o
+AS_OBJS += mpi_sparc.o mpv_sparcv8.o
+#AS_OBJS = montmulf.o
+MPICMN += -DMP_ASSEMBLY_MULTIPLY
+MPICMN += -DMP_USING_MONT_MULF
+MPICMN += -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_MP_WORD
+CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_6 -xarch=v8plus -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),v8SOLARIS)
+#Solaris 32
+#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8 -KPIC -mt
+#SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt
+#AS_OBJS = montmulfv8.o mpi_sparc.o mpv_sparcv8.o
+#AS_OBJS = montmulf.o
+#MPICMN += -DMP_USING_MONT_MULF
+#MPICMN += -DMP_ASSEMBLY_MULTIPLY
+MPICMN += -DMP_USE_LONG_LONG_MULTIPLY -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_MP_WORD
+CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \
+ -DSOLARIS2_6 -xarch=v8 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),ia64HPUX)
+#HPUX 32 on ia64 -- 64 bit digits SCREAM.
+# This one is for DD32 which is the 32-bit ABI with 64-bit registers.
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+endif
+
+ifeq ($(TARGET),ia64HPUX64)
+#HPUX 32 on ia64
+# This one is for DD64 which is the 64-bit ABI
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN)
+endif
+
+ifeq ($(TARGET),PA2.0WHPUX)
+#HPUX64 (HP PA 2.0 Wide) using MAXPY and 64-bit digits
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+AS_OBJS = mpi_hp.o hpma512.o hppa20.o
+CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +O3 +DChpux -DHPUX11 -DXP_UNIX \
+ $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \
+ $(MPICMN)
+AS = $(CC) $(CFLAGS) -c
+endif
+
+ifeq ($(TARGET),PA2.0NHPUX)
+#HPUX32 (HP PA 2.0 Narrow) hybrid model, using 32-bit digits
+# This one is for DA2.0 (N) which is the 32-bit ABI with 64-bit registers.
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+AS_OBJS = mpi_hp.o hpma512.o hppa20.o
+CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \
+ -Wl,+k $(MPICMN)
+#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \
+ -Wl,+k $(MPICMN)
+AS = $(CC) $(CFLAGS) -c
+endif
+
+ifeq ($(TARGET),PA1.1HPUX)
+#HPUX32 (HP PA 1.1) Pure 32 bit
+MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD
+#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY
+CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+ -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN)
+##CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \
+# -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN)
+endif
+
+ifeq ($(TARGET),32AIX)
+#
+CC = xlC_r
+MPICMN += -DMP_USE_UINT_DIGIT
+MPICMN += -DMP_NO_DIV_WORD
+#MPICMN += -DMP_NO_MUL_WORD
+MPICMN += -DMP_NO_ADD_WORD
+MPICMN += -DMP_NO_SUB_WORD
+#MPICMN += -DMP_NO_MP_WORD
+#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY
+CFLAGS = -O -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN)
+#CFLAGS = -g -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN)
+#CFLAGS += -pg
+endif
+
+ifeq ($(TARGET),64AIX)
+#
+CC = xlC_r
+MPICMN += -DMP_USE_UINT_DIGIT
+CFLAGS = -O -O2 -DAIX -DSYSV -qarch=com -DAIX_64BIT -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN)
+OBJECT_MODE=64
+export OBJECT_MODE
+endif
+
+ifeq ($(TARGET),x86LINUX)
+#Linux
+AS_OBJS = mpi_x86.o
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+MPICMN += -DMP_MONT_USE_MP_MUL -DMP_IS_LITTLE_ENDIAN
+CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
+#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -DDEBUG -UNDEBUG -D_REENTRANT $(MPICMN)
+#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \
+ -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \
+ -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN)
+endif
+
+ifeq ($(TARGET),armLINUX)
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE
+MPICMN += -DMP_USE_UINT_DIGIT
+AS_OBJS += mpi_arm.o
+endif
+
+ifeq ($(TARGET),AMD64SOLARIS)
+ASFLAGS += -xarch=generic64
+AS_OBJS = mpi_amd64.o mpi_amd64_sun.o
+MP_CONFIG = -DMP_ASSEMBLY_MULTIPLY -DMPI_AMD64
+MP_CONFIG += -DMP_IS_LITTLE_ENDIAN
+CFLAGS = -xarch=generic64 -xO4 -I. -DMP_API_COMPATIBLE -DMP_IOFUNC $(MP_CONFIG)
+MPICMN += $(MP_CONFIG)
+
+mpi_amd64_asm.o: mpi_amd64_sun.s
+ $(AS) -xarch=generic64 -P -D_ASM mpi_amd64_sun.s
+endif
+
+ifeq ($(TARGET),WIN32)
+ifeq ($(CPU_ARCH),x86_64)
+AS_OBJS = mpi_amd64.obj mpi_amd64_masm.obj mp_comba_amd64_masm.asm
+CFLAGS = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER)
+CFLAGS += -DWIN32 -DWIN64 -D_WINDOWS -D_AMD_64_ -D_M_AMD64 -DWIN95 -DXP_PC
+CFLAGS += $(MPICMN)
+
+$(AS_OBJS): %.obj : %.asm
+ ml64 -Cp -Sn -Zi -coff -nologo -c $<
+
+$(LIBOBJS): %.obj : %.c
+ cl $(CFLAGS) -Fo$@ -c $<
+else
+AS_OBJS = mpi_x86.obj
+MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D
+MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD -DMP_API_COMPATIBLE
+MPICMN += -DMP_MONT_USE_MP_MUL
+MPICMN += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN
+CFLAGS = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER)
+CFLAGS += -DWIN32 -D_WINDOWS -D_X86_ -DWIN95 -DXP_PC
+CFLAGS += $(MPICMN)
+
+$(AS_OBJS): %.obj : %.asm
+ ml -Cp -Sn -Zi -coff -nologo -c $<
+
+$(LIBOBJS): %.obj : %.c
+ cl $(CFLAGS) -Fo$@ -c $<
+
+endif
+endif
diff --git a/security/nss/lib/freebl/mpi/test-arrays.txt b/security/nss/lib/freebl/mpi/test-arrays.txt
new file mode 100644
index 000000000..6c8908c1a
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/test-arrays.txt
@@ -0,0 +1,55 @@
+#
+# Test suite table for MPI library
+#
+# Format of entries:
+# suite-name:function-name:description
+#
+# suite-name The name used to identify this test in mpi-test
+# function-name The function called to perform this test in mpi-test.c
+# description A brief description of what the suite tests
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+list:test_list:print out a list of the available test suites
+copy:test_copy:test assignment of mp-int structures
+exchange:test_exch:test exchange of mp-int structures
+zero:test_zero:test zeroing of an mp-int
+set:test_set:test setting an mp-int to a small constant
+absolute-value:test_abs:test the absolute value function
+negate:test_neg:test the arithmetic negation function
+add-digit:test_add_d:test digit addition
+add:test_add:test full addition
+subtract-digit:test_sub_d:test digit subtraction
+subtract:test_sub:test full subtraction
+multiply-digit:test_mul_d:test digit multiplication
+multiply:test_mul:test full multiplication
+square:test_sqr:test full squaring function
+divide-digit:test_div_d:test digit division
+divide-2:test_div_2:test division by two
+divide-2d:test_div_2d:test division & remainder by 2^d
+divide:test_div:test full division
+expt-digit:test_expt_d:test digit exponentiation
+expt:test_expt:test full exponentiation
+expt-2:test_2expt:test power-of-two exponentiation
+modulo-digit:test_mod_d:test digit modular reduction
+modulo:test_mod:test full modular reduction
+mod-add:test_addmod:test modular addition
+mod-subtract:test_submod:test modular subtraction
+mod-multiply:test_mulmod:test modular multiplication
+mod-square:test_sqrmod:test modular squaring function
+mod-expt:test_exptmod:test full modular exponentiation
+mod-expt-digit:test_exptmod_d:test digit modular exponentiation
+mod-inverse:test_invmod:test modular inverse function
+compare-digit:test_cmp_d:test digit comparison function
+compare-zero:test_cmp_z:test zero comparison function
+compare:test_cmp:test general signed comparison
+compare-magnitude:test_cmp_mag:test general magnitude comparison
+parity:test_parity:test parity comparison functions
+gcd:test_gcd:test greatest common divisor functions
+lcm:test_lcm:test least common multiple function
+conversion:test_convert:test general radix conversion facilities
+binary:test_raw:test raw output format
+pprime:test_pprime:test probabilistic primality tester
+fermat:test_fermat:test Fermat pseudoprimality tester
diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE b/security/nss/lib/freebl/mpi/tests/LICENSE
new file mode 100644
index 000000000..c2c5d0190
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/LICENSE
@@ -0,0 +1,6 @@
+Within this directory, each of the file listed below is licensed under
+the terms given in the file LICENSE-MPL, also in this directory.
+
+pi1k.txt
+pi2k.txt
+pi5k.txt
diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE-MPL b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-1.c b/security/nss/lib/freebl/mpi/tests/mptest-1.c
new file mode 100644
index 000000000..449134668
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-1.c
@@ -0,0 +1,43 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 1: Simple input test (drives single-digit multiply and add,
+ * as well as I/O routines)
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#ifdef MAC_CW_SIOUX
+#include <console.h>
+#endif
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ix;
+ mp_int mp;
+
+#ifdef MAC_CW_SIOUX
+ argc = ccommand(&argv);
+#endif
+
+ mp_init(&mp);
+
+ for (ix = 1; ix < argc; ix++) {
+ mp_read_radix(&mp, argv[ix], 10);
+ mp_print(&mp, stdout);
+ fputc('\n', stdout);
+ }
+
+ mp_clear(&mp);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-2.c b/security/nss/lib/freebl/mpi/tests/mptest-2.c
new file mode 100644
index 000000000..1505e6afd
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-2.c
@@ -0,0 +1,62 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 2: Basic addition and subtraction test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, b, c;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 2: Basic addition and subtraction\n\n");
+
+ mp_init(&a);
+ mp_init(&b);
+
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&b, argv[2], 10);
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&c);
+ printf("c = a + b\n");
+
+ mp_add(&a, &b, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("c = a - b\n");
+
+ mp_sub(&a, &b, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mp_clear(&c);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3.c b/security/nss/lib/freebl/mpi/tests/mptest-3.c
new file mode 100644
index 000000000..86fb24654
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-3.c
@@ -0,0 +1,105 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 3: Multiplication, division, and exponentiation test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include <time.h>
+
+#include "mpi.h"
+
+#define EXPT 0 /* define nonzero to get exponentiate test */
+
+int
+main(int argc, char *argv[])
+{
+ int ix;
+ mp_int a, b, c, d;
+ mp_digit r;
+ mp_err res;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 3: Multiplication and division\n\n");
+ srand(time(NULL));
+
+ mp_init(&a);
+ mp_init(&b);
+
+ mp_read_variable_radix(&a, argv[1], 10);
+ mp_read_variable_radix(&b, argv[2], 10);
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&c);
+ printf("\nc = a * b\n");
+
+ mp_mul(&a, &b, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = b * 32523\n");
+
+ mp_mul_d(&b, 32523, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&d);
+ printf("\nc = a / b, d = a mod b\n");
+
+ mp_div(&a, &b, &c, &d);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("d = ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+
+ ix = rand() % 256;
+ printf("\nc = a / %d, r = a mod %d\n", ix, ix);
+ mp_div_d(&a, (mp_digit)ix, &c, &r);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("r = %04X\n", r);
+
+#if EXPT
+ printf("\nc = a ** b\n");
+ mp_expt(&a, &b, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+#endif
+
+ ix = rand() % 256;
+ printf("\nc = 2^%d\n", ix);
+ mp_2expt(&c, ix);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mp_clear(&d);
+ mp_clear(&c);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3a.c b/security/nss/lib/freebl/mpi/tests/mptest-3a.c
new file mode 100644
index 000000000..c6cea7046
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-3a.c
@@ -0,0 +1,123 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 3a: Multiplication vs. squaring timing test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include <time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ix, num, prec = 8;
+ double d1, d2;
+ clock_t start, finish;
+ time_t seed;
+ mp_int a, c, d;
+
+ seed = time(NULL);
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]);
+ return 1;
+ }
+
+ if ((num = atoi(argv[1])) < 0)
+ num = -num;
+
+ if (!num) {
+ fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+ return 1;
+ }
+
+ if (argc > 2) {
+ if ((prec = atoi(argv[2])) <= 0)
+ prec = 8;
+ else
+ prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+ }
+
+ printf("Test 3a: Multiplication vs squaring timing test\n"
+ "Precision: %d digits (%u bits)\n"
+ "# of tests: %d\n\n",
+ prec, prec * DIGIT_BIT, num);
+
+ mp_init_size(&a, prec);
+
+ mp_init(&c);
+ mp_init(&d);
+
+ printf("Verifying accuracy ... \n");
+ srand((unsigned int)seed);
+ for (ix = 0; ix < num; ix++) {
+ mpp_random_size(&a, prec);
+ mp_mul(&a, &a, &c);
+ mp_sqr(&a, &d);
+
+ if (mp_cmp(&c, &d) != 0) {
+ printf("Error! Results not accurate:\n");
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("d = ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+ mp_sub(&c, &d, &d);
+ printf("dif ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+ mp_clear(&c);
+ mp_clear(&d);
+ mp_clear(&a);
+ return 1;
+ }
+ }
+ printf("Accuracy is confirmed for the %d test samples\n", num);
+ mp_clear(&d);
+
+ printf("Testing squaring ... \n");
+ srand((unsigned int)seed);
+ start = clock();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random_size(&a, prec);
+ mp_sqr(&a, &c);
+ }
+ finish = clock();
+
+ d2 = (double)(finish - start) / CLOCKS_PER_SEC;
+
+ printf("Testing multiplication ... \n");
+ srand((unsigned int)seed);
+ start = clock();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random(&a);
+ mp_mul(&a, &a, &c);
+ }
+ finish = clock();
+
+ d1 = (double)(finish - start) / CLOCKS_PER_SEC;
+
+ printf("Multiplication time: %.3f sec (%.3f each)\n", d1, d1 / num);
+ printf("Squaring time: %.3f sec (%.3f each)\n", d2, d2 / num);
+ printf("Improvement: %.2f%%\n", (1.0 - (d2 / d1)) * 100.0);
+
+ mp_clear(&c);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4.c b/security/nss/lib/freebl/mpi/tests/mptest-4.c
new file mode 100644
index 000000000..0f326ac2c
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4.c
@@ -0,0 +1,111 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 4: Modular arithmetic tests
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ix;
+ mp_int a, b, c, m;
+ mp_digit r;
+
+ if (argc < 4) {
+ fprintf(stderr, "Usage: %s <a> <b> <m>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 4: Modular arithmetic\n\n");
+
+ mp_init(&a);
+ mp_init(&b);
+ mp_init(&m);
+
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&b, argv[2], 10);
+ mp_read_radix(&m, argv[3], 10);
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("m = ");
+ mp_print(&m, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&c);
+ printf("\nc = a (mod m)\n");
+
+ mp_mod(&a, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = b (mod m)\n");
+
+ mp_mod(&b, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = b (mod 1853)\n");
+
+ mp_mod_d(&b, 1853, &r);
+ printf("c = %04X\n", r);
+
+ printf("\nc = (a + b) mod m\n");
+
+ mp_addmod(&a, &b, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = (a - b) mod m\n");
+
+ mp_submod(&a, &b, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = (a * b) mod m\n");
+
+ mp_mulmod(&a, &b, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = (a ** b) mod m\n");
+
+ mp_exptmod(&a, &b, &m, &c);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ printf("\nIn-place modular squaring test:\n");
+ for (ix = 0; ix < 5; ix++) {
+ printf("a = (a * a) mod m a = ");
+ mp_sqrmod(&a, &m, &a);
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ }
+
+ mp_clear(&c);
+ mp_clear(&m);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4a.c b/security/nss/lib/freebl/mpi/tests/mptest-4a.c
new file mode 100644
index 000000000..0c8e18872
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4a.c
@@ -0,0 +1,109 @@
+/*
+ * mptest4a - modular exponentiation speed test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+typedef struct {
+ unsigned int sec;
+ unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+ struct timeval clk;
+ instant_t res;
+
+ res.sec = res.usec = 0;
+
+ if (gettimeofday(&clk, NULL) != 0)
+ return res;
+
+ res.sec = clk.tv_sec;
+ res.usec = clk.tv_usec;
+
+ return res;
+}
+
+extern mp_err s_mp_pad();
+
+int
+main(int argc, char *argv[])
+{
+ int ix, num, prec = 8;
+ unsigned int d;
+ instant_t start, finish;
+ time_t seed;
+ mp_int a, m, c;
+
+ seed = time(NULL);
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-tests> [<precision>]\n", argv[0]);
+ return 1;
+ }
+
+ if ((num = atoi(argv[1])) < 0)
+ num = -num;
+
+ if (!num) {
+ fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+ return 1;
+ }
+
+ if (argc > 2) {
+ if ((prec = atoi(argv[2])) <= 0)
+ prec = 8;
+ }
+
+ printf("Test 3a: Modular exponentiation timing test\n"
+ "Precision: %d digits (%d bits)\n"
+ "# of tests: %d\n\n",
+ prec, prec * DIGIT_BIT, num);
+
+ mp_init_size(&a, prec);
+ mp_init_size(&m, prec);
+ mp_init_size(&c, prec);
+ s_mp_pad(&a, prec);
+ s_mp_pad(&m, prec);
+ s_mp_pad(&c, prec);
+
+ printf("Testing modular exponentiation ... \n");
+ srand((unsigned int)seed);
+
+ start = now();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random(&a);
+ mpp_random(&c);
+ mpp_random(&m);
+ mp_exptmod(&a, &c, &m, &c);
+ }
+ finish = now();
+
+ d = (finish.sec - start.sec) * 1000000;
+ d -= start.usec;
+ d += finish.usec;
+
+ printf("Total time elapsed: %u usec\n", d);
+ printf("Time per exponentiation: %u usec (%.3f sec)\n",
+ (d / num), (double)(d / num) / 1000000);
+
+ mp_clear(&c);
+ mp_clear(&a);
+ mp_clear(&m);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4b.c b/security/nss/lib/freebl/mpi/tests/mptest-4b.c
new file mode 100644
index 000000000..1bb2f911f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-4b.c
@@ -0,0 +1,107 @@
+/*
+ * mptest-4b.c
+ *
+ * Test speed of a large modular exponentiation of a primitive element
+ * modulo a prime.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+char *g_prime =
+ "34BD53C07350E817CCD49721020F1754527959C421C1533244769D4CF060A8B1C3DA"
+ "25094BE723FB1E2369B55FEEBBE0FAC16425161BF82684062B5EC5D7D47D1B23C117"
+ "0FA19745E44A55E148314E582EB813AC9EE5126295E2E380CACC2F6D206B293E5ED9"
+ "23B54EE961A8C69CD625CE4EC38B70C649D7F014432AEF3A1C93";
+char *g_gen = "5";
+
+typedef struct {
+ unsigned int sec;
+ unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+ struct timeval clk;
+ instant_t res;
+
+ res.sec = res.usec = 0;
+
+ if (gettimeofday(&clk, NULL) != 0)
+ return res;
+
+ res.sec = clk.tv_sec;
+ res.usec = clk.tv_usec;
+
+ return res;
+}
+
+extern mp_err s_mp_pad();
+
+int
+main(int argc, char *argv[])
+{
+ instant_t start, finish;
+ mp_int prime, gen, expt, res;
+ unsigned int ix, diff;
+ int num;
+
+ srand(time(NULL));
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
+ return 1;
+ }
+
+ if ((num = atoi(argv[1])) < 0)
+ num = -num;
+
+ if (num == 0)
+ ++num;
+
+ mp_init(&prime);
+ mp_init(&gen);
+ mp_init(&res);
+ mp_read_radix(&prime, g_prime, 16);
+ mp_read_radix(&gen, g_gen, 16);
+
+ mp_init_size(&expt, USED(&prime) - 1);
+ s_mp_pad(&expt, USED(&prime) - 1);
+
+ printf("Testing %d modular exponentations ... \n", num);
+
+ start = now();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random(&expt);
+ mp_exptmod(&gen, &expt, &prime, &res);
+ }
+ finish = now();
+
+ diff = (finish.sec - start.sec) * 1000000;
+ diff += finish.usec;
+ diff -= start.usec;
+
+ printf("%d operations took %u usec (%.3f sec)\n",
+ num, diff, (double)diff / 1000000.0);
+ printf("That is %.3f sec per operation.\n",
+ ((double)diff / 1000000.0) / num);
+
+ mp_clear(&expt);
+ mp_clear(&res);
+ mp_clear(&gen);
+ mp_clear(&prime);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5.c b/security/nss/lib/freebl/mpi/tests/mptest-5.c
new file mode 100644
index 000000000..dff3ed470
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-5.c
@@ -0,0 +1,85 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 5: Other number theoretic functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, b, c, x, y;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 5: Number theoretic functions\n\n");
+
+ mp_init(&a);
+ mp_init(&b);
+
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&b, argv[2], 10);
+
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&c);
+ printf("\nc = (a, b)\n");
+
+ mp_gcd(&a, &b, &c);
+ printf("Euclid: c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ /*
+ mp_bgcd(&a, &b, &c);
+ printf("Binary: c = "); mp_print(&c, stdout); fputc('\n', stdout);
+ */
+ mp_init(&x);
+ mp_init(&y);
+ printf("\nc = (a, b) = ax + by\n");
+
+ mp_xgcd(&a, &b, &c, &x, &y);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("x = ");
+ mp_print(&x, stdout);
+ fputc('\n', stdout);
+ printf("y = ");
+ mp_print(&y, stdout);
+ fputc('\n', stdout);
+
+ printf("\nc = a^-1 (mod b)\n");
+ if (mp_invmod(&a, &b, &c) == MP_UNDEF) {
+ printf("a has no inverse mod b\n");
+ } else {
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ }
+
+ mp_clear(&y);
+ mp_clear(&x);
+ mp_clear(&c);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5a.c b/security/nss/lib/freebl/mpi/tests/mptest-5a.c
new file mode 100644
index 000000000..c410a6a84
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-5a.c
@@ -0,0 +1,147 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 5a: Greatest common divisor speed test, binary vs. Euclid
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include <sys/time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+typedef struct {
+ unsigned int sec;
+ unsigned int usec;
+} instant_t;
+
+instant_t
+now(void)
+{
+ struct timeval clk;
+ instant_t res;
+
+ res.sec = res.usec = 0;
+
+ if (gettimeofday(&clk, NULL) != 0)
+ return res;
+
+ res.sec = clk.tv_sec;
+ res.usec = clk.tv_usec;
+
+ return res;
+}
+
+#define PRECISION 16
+
+int
+main(int argc, char *argv[])
+{
+ int ix, num, prec = PRECISION;
+ mp_int a, b, c, d;
+ instant_t start, finish;
+ time_t seed;
+ unsigned int d1, d2;
+
+ seed = time(NULL);
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-tests>\n", argv[0]);
+ return 1;
+ }
+
+ if ((num = atoi(argv[1])) < 0)
+ num = -num;
+
+ printf("Test 5a: Euclid vs. Binary, a GCD speed test\n\n"
+ "Number of tests: %d\n"
+ "Precision: %d digits\n\n",
+ num, prec);
+
+ mp_init_size(&a, prec);
+ mp_init_size(&b, prec);
+ mp_init(&c);
+ mp_init(&d);
+
+ printf("Verifying accuracy ... \n");
+ srand((unsigned int)seed);
+ for (ix = 0; ix < num; ix++) {
+ mpp_random_size(&a, prec);
+ mpp_random_size(&b, prec);
+
+ mp_gcd(&a, &b, &c);
+ mp_bgcd(&a, &b, &d);
+
+ if (mp_cmp(&c, &d) != 0) {
+ printf("Error! Results not accurate:\n");
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("d = ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+
+ mp_clear(&a);
+ mp_clear(&b);
+ mp_clear(&c);
+ mp_clear(&d);
+ return 1;
+ }
+ }
+ mp_clear(&d);
+ printf("Accuracy confirmed for the %d test samples\n", num);
+
+ printf("Testing Euclid ... \n");
+ srand((unsigned int)seed);
+ start = now();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random_size(&a, prec);
+ mpp_random_size(&b, prec);
+ mp_gcd(&a, &b, &c);
+ }
+ finish = now();
+
+ d1 = (finish.sec - start.sec) * 1000000;
+ d1 -= start.usec;
+ d1 += finish.usec;
+
+ printf("Testing binary ... \n");
+ srand((unsigned int)seed);
+ start = now();
+ for (ix = 0; ix < num; ix++) {
+ mpp_random_size(&a, prec);
+ mpp_random_size(&b, prec);
+ mp_bgcd(&a, &b, &c);
+ }
+ finish = now();
+
+ d2 = (finish.sec - start.sec) * 1000000;
+ d2 -= start.usec;
+ d2 += finish.usec;
+
+ printf("Euclidean algorithm time: %u usec\n", d1);
+ printf("Binary algorithm time: %u usec\n", d2);
+ printf("Improvement: %.2f%%\n",
+ (1.0 - ((double)d2 / (double)d1)) * 100.0);
+
+ mp_clear(&c);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-6.c b/security/nss/lib/freebl/mpi/tests/mptest-6.c
new file mode 100644
index 000000000..4febf39c5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-6.c
@@ -0,0 +1,78 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 6: Output functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mpi.h"
+
+void
+print_buf(FILE *ofp, char *buf, int len)
+{
+ int ix, brk = 0;
+
+ for (ix = 0; ix < len; ix++) {
+ fprintf(ofp, "%02X ", buf[ix]);
+
+ brk = (brk + 1) & 0xF;
+ if (!brk)
+ fputc('\n', ofp);
+ }
+
+ if (brk)
+ fputc('\n', ofp);
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ix, size;
+ mp_int a;
+ char *buf;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 6: Output functions\n\n");
+
+ mp_init(&a);
+
+ mp_read_radix(&a, argv[1], 10);
+
+ printf("\nConverting to a string:\n");
+
+ printf("Rx Size Representation\n");
+ for (ix = 2; ix <= MAX_RADIX; ix++) {
+ size = mp_radix_size(&a, ix);
+
+ buf = calloc(size, sizeof(char));
+ mp_toradix(&a, buf, ix);
+ printf("%2d: %3d: %s\n", ix, size, buf);
+ free(buf);
+ }
+
+ printf("\nRaw output:\n");
+ size = mp_raw_size(&a);
+ buf = calloc(size, sizeof(char));
+
+ printf("Size: %d bytes\n", size);
+
+ mp_toraw(&a, buf);
+ print_buf(stdout, buf, size);
+ free(buf);
+
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-7.c b/security/nss/lib/freebl/mpi/tests/mptest-7.c
new file mode 100644
index 000000000..1e83fbf96
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-7.c
@@ -0,0 +1,85 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 7: Random and divisibility tests
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_IOFUNC 1
+#include "mpi.h"
+
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_digit num;
+ mp_int a, b;
+
+ srand(time(NULL));
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 7: Random & divisibility tests\n\n");
+
+ mp_init(&a);
+ mp_init(&b);
+
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&b, argv[2], 10);
+
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ if (mpp_divis(&a, &b) == MP_YES)
+ printf("a is divisible by b\n");
+ else
+ printf("a is not divisible by b\n");
+
+ if (mpp_divis(&b, &a) == MP_YES)
+ printf("b is divisible by a\n");
+ else
+ printf("b is not divisible by a\n");
+
+ printf("\nb = mpp_random()\n");
+ mpp_random(&b);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ mpp_random(&b);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ mpp_random(&b);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ printf("\nTesting a for divisibility by first 170 primes\n");
+ num = 170;
+ if (mpp_divis_primes(&a, &num) == MP_YES)
+ printf("It is divisible by at least one of them\n");
+ else
+ printf("It is not divisible by any of them\n");
+
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-8.c b/security/nss/lib/freebl/mpi/tests/mptest-8.c
new file mode 100644
index 000000000..a9d3afff9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-8.c
@@ -0,0 +1,68 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test 8: Probabilistic primality tester
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#define MP_IOFUNC 1
+#include "mpi.h"
+
+#include "mpprime.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ix;
+ mp_digit num;
+ mp_int a;
+
+ srand(time(NULL));
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+ return 1;
+ }
+
+ printf("Test 8: Probabilistic primality testing\n\n");
+
+ mp_init(&a);
+
+ mp_read_radix(&a, argv[1], 10);
+
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+
+ printf("\nChecking for divisibility by small primes ... \n");
+ num = 170;
+ if (mpp_divis_primes(&a, &num) == MP_YES) {
+ printf("it is not prime\n");
+ goto CLEANUP;
+ }
+ printf("Passed that test (not divisible by any small primes).\n");
+
+ for (ix = 0; ix < 10; ix++) {
+ printf("\nPerforming Rabin-Miller test, iteration %d\n", ix + 1);
+
+ if (mpp_pprime(&a, 5) == MP_NO) {
+ printf("it is not prime\n");
+ goto CLEANUP;
+ }
+ }
+ printf("All tests passed; a is probably prime\n");
+
+CLEANUP:
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-9.c b/security/nss/lib/freebl/mpi/tests/mptest-9.c
new file mode 100644
index 000000000..133264e89
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-9.c
@@ -0,0 +1,109 @@
+/*
+ * mptest-9.c
+ *
+ * Test logical functions
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mplogic.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, b, c;
+ int pco;
+ mp_err res;
+
+ printf("Test 9: Logical functions\n\n");
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_init(&b);
+ mp_init(&c);
+ mp_read_radix(&a, argv[1], 16);
+ mp_read_radix(&b, argv[2], 16);
+
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+
+ mpl_not(&a, &c);
+ printf("~a = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_and(&a, &b, &c);
+ printf("a & b = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_or(&a, &b, &c);
+ printf("a | b = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_xor(&a, &b, &c);
+ printf("a ^ b = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_rsh(&a, &c, 1);
+ printf("a >> 1 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ mpl_rsh(&a, &c, 5);
+ printf("a >> 5 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ mpl_rsh(&a, &c, 16);
+ printf("a >> 16 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_lsh(&a, &c, 1);
+ printf("a << 1 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ mpl_lsh(&a, &c, 5);
+ printf("a << 5 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ mpl_lsh(&a, &c, 16);
+ printf("a << 16 = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+
+ mpl_num_set(&a, &pco);
+ printf("population(a) = %d\n", pco);
+ mpl_num_set(&b, &pco);
+ printf("population(b) = %d\n", pco);
+
+ res = mpl_parity(&a);
+ if (res == MP_EVEN)
+ printf("a has even parity\n");
+ else
+ printf("a has odd parity\n");
+
+ mp_clear(&c);
+ mp_clear(&b);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/mptest-b.c b/security/nss/lib/freebl/mpi/tests/mptest-b.c
new file mode 100644
index 000000000..07f30eaf8
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/mptest-b.c
@@ -0,0 +1,230 @@
+/*
+ * Simple test driver for MPI library
+ *
+ * Test GF2m: Binary Polynomial Arithmetic
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <limits.h>
+
+#include "mp_gf2m.h"
+
+int
+main(int argc, char *argv[])
+{
+ int ix;
+ mp_int pp, a, b, x, y, order;
+ mp_int c, d, e;
+ mp_digit r;
+ mp_err res;
+ unsigned int p[] = { 163, 7, 6, 3, 0 };
+ unsigned int ptemp[10];
+
+ printf("Test b: Binary Polynomial Arithmetic\n\n");
+
+ mp_init(&pp);
+ mp_init(&a);
+ mp_init(&b);
+ mp_init(&x);
+ mp_init(&y);
+ mp_init(&order);
+
+ mp_read_radix(&pp, "0800000000000000000000000000000000000000C9", 16);
+ mp_read_radix(&a, "1", 16);
+ mp_read_radix(&b, "020A601907B8C953CA1481EB10512F78744A3205FD", 16);
+ mp_read_radix(&x, "03F0EBA16286A2D57EA0991168D4994637E8343E36", 16);
+ mp_read_radix(&y, "00D51FBC6C71A0094FA2CDD545B11C5C0C797324F1", 16);
+ mp_read_radix(&order, "040000000000000000000292FE77E70C12A4234C33", 16);
+ printf("pp = ");
+ mp_print(&pp, stdout);
+ fputc('\n', stdout);
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("x = ");
+ mp_print(&x, stdout);
+ fputc('\n', stdout);
+ printf("y = ");
+ mp_print(&y, stdout);
+ fputc('\n', stdout);
+ printf("order = ");
+ mp_print(&order, stdout);
+ fputc('\n', stdout);
+
+ mp_init(&c);
+ mp_init(&d);
+ mp_init(&e);
+
+ /* Test polynomial conversion */
+ ix = mp_bpoly2arr(&pp, ptemp, 10);
+ if (
+ (ix != 5) ||
+ (ptemp[0] != p[0]) ||
+ (ptemp[1] != p[1]) ||
+ (ptemp[2] != p[2]) ||
+ (ptemp[3] != p[3]) ||
+ (ptemp[4] != p[4])) {
+ printf("Polynomial to array conversion not correct\n");
+ return -1;
+ }
+
+ printf("Polynomial conversion test #1 successful.\n");
+ MP_CHECKOK(mp_barr2poly(p, &c));
+ if (mp_cmp(&pp, &c) != 0) {
+ printf("Array to polynomial conversion not correct\n");
+ return -1;
+ }
+ printf("Polynomial conversion test #2 successful.\n");
+
+ /* Test addition */
+ MP_CHECKOK(mp_badd(&a, &a, &c));
+ if (mp_cmp_z(&c) != 0) {
+ printf("a+a should equal zero\n");
+ return -1;
+ }
+ printf("Addition test #1 successful.\n");
+ MP_CHECKOK(mp_badd(&a, &b, &c));
+ MP_CHECKOK(mp_badd(&b, &c, &c));
+ if (mp_cmp(&c, &a) != 0) {
+ printf("c = (a + b) + b should equal a\n");
+ printf("a = ");
+ mp_print(&a, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Addition test #2 successful.\n");
+
+ /* Test multiplication */
+ mp_set(&c, 2);
+ MP_CHECKOK(mp_bmul(&b, &c, &c));
+ MP_CHECKOK(mp_badd(&b, &c, &c));
+ mp_set(&d, 3);
+ MP_CHECKOK(mp_bmul(&b, &d, &d));
+ if (mp_cmp(&c, &d) != 0) {
+ printf("c = (2 * b) + b should equal c = 3 * b\n");
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("d = ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Multiplication test #1 successful.\n");
+
+ /* Test modular reduction */
+ MP_CHECKOK(mp_bmod(&b, p, &c));
+ if (mp_cmp(&b, &c) != 0) {
+ printf("c = b mod p should equal b\n");
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular reduction test #1 successful.\n");
+ MP_CHECKOK(mp_badd(&b, &pp, &c));
+ MP_CHECKOK(mp_bmod(&c, p, &c));
+ if (mp_cmp(&b, &c) != 0) {
+ printf("c = (b + p) mod p should equal b\n");
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular reduction test #2 successful.\n");
+ MP_CHECKOK(mp_bmul(&b, &pp, &c));
+ MP_CHECKOK(mp_bmod(&c, p, &c));
+ if (mp_cmp_z(&c) != 0) {
+ printf("c = (b * p) mod p should equal 0\n");
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular reduction test #3 successful.\n");
+
+ /* Test modular multiplication */
+ MP_CHECKOK(mp_bmulmod(&b, &pp, p, &c));
+ if (mp_cmp_z(&c) != 0) {
+ printf("c = (b * p) mod p should equal 0\n");
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular multiplication test #1 successful.\n");
+ mp_set(&c, 1);
+ MP_CHECKOK(mp_badd(&pp, &c, &c));
+ MP_CHECKOK(mp_bmulmod(&b, &c, p, &c));
+ if (mp_cmp(&b, &c) != 0) {
+ printf("c = (b * (p + 1)) mod p should equal b\n");
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular multiplication test #2 successful.\n");
+
+ /* Test modular squaring */
+ MP_CHECKOK(mp_copy(&b, &c));
+ MP_CHECKOK(mp_bmulmod(&b, &c, p, &c));
+ MP_CHECKOK(mp_bsqrmod(&b, p, &d));
+ if (mp_cmp(&c, &d) != 0) {
+ printf("c = (b * b) mod p should equal d = b^2 mod p\n");
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ printf("d = ");
+ mp_print(&d, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular squaring test #1 successful.\n");
+
+ /* Test modular division */
+ MP_CHECKOK(mp_bdivmod(&b, &x, &pp, p, &c));
+ MP_CHECKOK(mp_bmulmod(&c, &x, p, &c));
+ if (mp_cmp(&b, &c) != 0) {
+ printf("c = (b / x) * x mod p should equal b\n");
+ printf("b = ");
+ mp_print(&b, stdout);
+ fputc('\n', stdout);
+ printf("c = ");
+ mp_print(&c, stdout);
+ fputc('\n', stdout);
+ return -1;
+ }
+ printf("Modular division test #1 successful.\n");
+
+CLEANUP:
+
+ mp_clear(&order);
+ mp_clear(&y);
+ mp_clear(&x);
+ mp_clear(&b);
+ mp_clear(&a);
+ mp_clear(&pp);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/tests/pi1k.txt b/security/nss/lib/freebl/mpi/tests/pi1k.txt
new file mode 100644
index 000000000..5ff6209ff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi1k.txt
@@ -0,0 +1 @@
+31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989
diff --git a/security/nss/lib/freebl/mpi/tests/pi2k.txt b/security/nss/lib/freebl/mpi/tests/pi2k.txt
new file mode 100644
index 000000000..9ce82acd1
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi2k.txt
@@ -0,0 +1 @@
+314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759010
diff --git a/security/nss/lib/freebl/mpi/tests/pi5k.txt b/security/nss/lib/freebl/mpi/tests/pi5k.txt
new file mode 100644
index 000000000..901fac2ea
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/tests/pi5k.txt
@@ -0,0 +1 @@
+314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745530506820349625245174939965143142980919065925093722169646151570985838741059788595977297549893016175392846813826868386894277415599185592524595395943104997252468084598727364469584865383673622262609912460805124388439045124413654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767889525213852254995466672782398645659611635488623057745649803559363456817432411251507606947945109659609402522887971089314566913686722874894056010150330861792868092087476091782493858900971490967598526136554978189312978482168299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610213596953623144295248493718711014576540359027993440374200731057853906219838744780847848968332144571386875194350643021845319104848100537061468067491927819119793995206141966342875444064374512371819217999839101591956181467514269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672218256259966150142150306803844773454920260541466592520149744285073251866600213243408819071048633173464965145390579626856100550810665879699816357473638405257145910289706414011097120628043903975951567715770042033786993600723055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816909152801735067127485832228718352093539657251210835791513698820914442100675103346711031412671113699086585163983150197016515116851714376576183515565088490998985998238734552833163550764791853589322618548963213293308985706420467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325974636673058360414281388303203824903758985243744170291327656180937734440307074692112019130203303801976211011004492932151608424448596376698389522868478312355265821314495768572624334418930396864262434107732269780280731891544110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201855810072936065987648611791045334885034611365768675324944166803962657978771855608455296541266540853061434443185867697514566140680070023787765913440171274947042056223053899456131407112700040785473326993908145466464588079727082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923099079654737612551765675135751782966645477917450112996148903046399471329621073404375189573596145890193897131117904297828564750320319869151402870808599048010941214722131794764777262241425485454033215718530614228813758504306332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120918076383271664162748888007869256029022847210403172118608204190004229661711963779213375751149595015660496318629472654736425230817703675159067350235072835405670403867435136222247715891504953098444893330963408780769325993978054193414473774418426312986080998886874132604721
diff --git a/security/nss/lib/freebl/mpi/timetest b/security/nss/lib/freebl/mpi/timetest
new file mode 100755
index 000000000..c6f07bb30
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/timetest
@@ -0,0 +1,99 @@
+#!/bin/sh
+
+# Simple timing test for the MPI library. Basically, we use prime
+# generation as a timing test, since it exercises most of the pathways
+# of the library fairly heavily. The 'primegen' tool outputs a line
+# summarizing timing results. We gather these and process them for
+# statistical information, which is collected into a file.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+# Avoid using built-in shell echoes
+ECHO=/bin/echo
+MAKE=gmake
+PERL=perl
+
+# Use a fixed seed so timings will be more consistent
+# This one is the 11th-18th decimal digits of 'e'
+#export SEED=45904523
+SEED=45904523; export SEED
+
+#------------------------------------------------------------------------
+
+$ECHO "\n** Running timing tests for MPI library\n"
+
+$ECHO "Bringing 'metime' up to date ... "
+if $MAKE metime ; then
+ :
+else
+ $ECHO "\nMake failed to build metime.\n"
+ exit 1
+fi
+
+if [ ! -x ./metime ] ; then
+ $ECHO "\nCannot find 'metime' program, testing cannot continue.\n"
+ exit 1
+fi
+
+#------------------------------------------------------------------------
+
+$ECHO "Bringing 'primegen' up to date ... "
+if $MAKE primegen ; then
+ :
+else
+ $ECHO "\nMake failed to build primegen.\n"
+ exit 1
+fi
+
+if [ ! -x ./primegen ] ; then
+ $ECHO "\nCannot find 'primegen' program, testing cannot continue.\n"
+ exit 1
+fi
+
+#------------------------------------------------------------------------
+
+rm -f timing-results.txt
+touch timing-results.txt
+
+sizes="256 512 1024 2048"
+ntests=10
+
+trap 'echo "oop!";rm -f tt*.tmp timing-results.txt;exit 0' INT HUP
+
+$ECHO "\n-- Modular exponentiation\n"
+$ECHO "Modular exponentiation:" >> timing-results.txt
+
+$ECHO "Running $ntests modular exponentiations per test:"
+for size in $sizes ; do
+ $ECHO "- Gathering statistics for $size bits ... "
+ secs=`./metime $ntests $size | tail -1 | awk '{print $2}'`
+ $ECHO "$size: " $secs " seconds per op" >> timing-results.txt
+ tail -1 timing-results.txt
+done
+
+$ECHO "<done>";
+
+sizes="256 512 1024"
+ntests=1
+
+$ECHO "\n-- Prime generation\n"
+$ECHO "Prime generation:" >> timing-results.txt
+
+$ECHO "Generating $ntests prime values per test:"
+for size in $sizes ; do
+ $ECHO "- Gathering statistics for $size bits ... "
+ ./primegen $size $ntests | grep ticks | awk '{print $7}' | tr -d '(' > tt$$.tmp
+ $ECHO "$size:" >> timing-results.txt
+ $PERL stats tt$$.tmp >> timing-results.txt
+ tail -1 timing-results.txt
+ rm -f tt$$.tmp
+done
+
+$ECHO "<done>"
+
+trap 'rm -f tt*.tmp timing-results.txt' INT HUP
+
+exit 0
+
diff --git a/security/nss/lib/freebl/mpi/types.pl b/security/nss/lib/freebl/mpi/types.pl
new file mode 100755
index 000000000..c5f38afa5
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/types.pl
@@ -0,0 +1,127 @@
+#!/usr/bin/perl
+
+#
+# types.pl - find recommended type definitions for digits and words
+#
+# This script scans the Makefile for the C compiler and compilation
+# flags currently in use, and using this combination, attempts to
+# compile a simple test program that outputs the sizes of the various
+# unsigned integer types, in bytes. Armed with these, it finds all
+# the "viable" type combinations for mp_digit and mp_word, where
+# viability is defined by the requirement that mp_word be at least two
+# times the precision of mp_digit.
+#
+# Of these, the one with the largest digit size is chosen, and
+# appropriate typedef statements are written to standard output.
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+@_=split(/\//,$0);chomp($prog=pop(@_));
+
+# The array of integer types to be considered...
+@TYPES = (
+ "unsigned char",
+ "unsigned short",
+ "unsigned int",
+ "unsigned long"
+);
+
+# Macro names for the maximum unsigned value of each type
+%TMAX = (
+ "unsigned char" => "UCHAR_MAX",
+ "unsigned short" => "USHRT_MAX",
+ "unsigned int" => "UINT_MAX",
+ "unsigned long" => "ULONG_MAX"
+);
+
+# Read the Makefile to find out which C compiler to use
+open(MFP, "<Makefile") or die "$prog: Makefile: $!\n";
+while(<MFP>) {
+ chomp;
+ if(/^CC=(.*)$/) {
+ $cc = $1;
+ last if $cflags;
+ } elsif(/^CFLAGS=(.*)$/) {
+ $cflags = $1;
+ last if $cc;
+ }
+}
+close(MFP);
+
+# If we couldn't find that, use 'cc' by default
+$cc = "cc" unless $cc;
+
+printf STDERR "Using '%s' as the C compiler.\n", $cc;
+
+print STDERR "Determining type sizes ... \n";
+open(OFP, ">tc$$.c") or die "$prog: tc$$.c: $!\n";
+print OFP "#include <stdio.h>\n\nint main(void)\n{\n";
+foreach $type (@TYPES) {
+ printf OFP "\tprintf(\"%%d\\n\", (int)sizeof(%s));\n", $type;
+}
+print OFP "\n\treturn 0;\n}\n";
+close(OFP);
+
+system("$cc $cflags -o tc$$ tc$$.c");
+
+die "$prog: unable to build test program\n" unless(-x "tc$$");
+
+open(IFP, "./tc$$|") or die "$prog: can't execute test program\n";
+$ix = 0;
+while(<IFP>) {
+ chomp;
+ $size{$TYPES[$ix++]} = $_;
+}
+close(IFP);
+
+unlink("tc$$");
+unlink("tc$$.c");
+
+print STDERR "Selecting viable combinations ... \n";
+while(($type, $size) = each(%size)) {
+ push(@ts, [ $size, $type ]);
+}
+
+# Sort them ascending by size
+@ts = sort { $a->[0] <=> $b->[0] } @ts;
+
+# Try all possible combinations, finding pairs in which the word size
+# is twice the digit size. The number of possible pairs is too small
+# to bother doing this more efficiently than by brute force
+for($ix = 0; $ix <= $#ts; $ix++) {
+ $w = $ts[$ix];
+
+ for($jx = 0; $jx <= $#ts; $jx++) {
+ $d = $ts[$jx];
+
+ if($w->[0] == 2 * $d->[0]) {
+ push(@valid, [ $d, $w ]);
+ }
+ }
+}
+
+# Sort descending by digit size
+@valid = sort { $b->[0]->[0] <=> $a->[0]->[0] } @valid;
+
+# Select the maximum as the recommended combination
+$rec = shift(@valid);
+
+printf("typedef %-18s mp_sign;\n", "char");
+printf("typedef %-18s mp_digit; /* %d byte type */\n",
+ $rec->[0]->[1], $rec->[0]->[0]);
+printf("typedef %-18s mp_word; /* %d byte type */\n",
+ $rec->[1]->[1], $rec->[1]->[0]);
+printf("typedef %-18s mp_size;\n", "unsigned int");
+printf("typedef %-18s mp_err;\n\n", "int");
+
+printf("#define %-18s (CHAR_BIT*sizeof(mp_digit))\n", "DIGIT_BIT");
+printf("#define %-18s %s\n", "DIGIT_MAX", $TMAX{$rec->[0]->[1]});
+printf("#define %-18s (CHAR_BIT*sizeof(mp_word))\n", "MP_WORD_BIT");
+printf("#define %-18s %s\n\n", "MP_WORD_MAX", $TMAX{$rec->[1]->[1]});
+printf("#define %-18s (DIGIT_MAX+1)\n\n", "RADIX");
+
+printf("#define %-18s \"%%0%dX\"\n", "DIGIT_FMT", (2 * $rec->[0]->[0]));
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE b/security/nss/lib/freebl/mpi/utils/LICENSE
new file mode 100644
index 000000000..5f96df7ab
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/LICENSE
@@ -0,0 +1,4 @@
+Within this directory, each of the file listed below is licensed under
+the terms given in the file LICENSE-MPL, also in this directory.
+
+PRIMES
diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE-MPL b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL
new file mode 100644
index 000000000..41dc2327f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL
@@ -0,0 +1,3 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
diff --git a/security/nss/lib/freebl/mpi/utils/PRIMES b/security/nss/lib/freebl/mpi/utils/PRIMES
new file mode 100644
index 000000000..ed65703ff
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/PRIMES
@@ -0,0 +1,41 @@
+Probable primes (sorted by number of significant bits)
+
+ 128: 81386202757205669562183851789305348631
+
+ 128: 180241813863264101444573802809858694397
+
+ 128: 245274683055224433281596312431122059021
+
+ 128: 187522309397665259809392608791686659539
+
+ 256: 83252422946206411852330647237287722547866360773229941071371588246436\
+ 513990159
+
+ 256: 79132571131322331023736933767063051273085304521895229780914612117520\
+ 058517909
+
+ 256: 72081815425552909748220041100909735706208853818662000557743644603407\
+ 965465527
+
+ 256: 87504602391905701494845474079163412737334477797316409702279059573654\
+ 274811271
+
+ 512: 12233064210800062190450937494718705259777386009095453001870729392786\
+ 63450255179083524798507997690270500580265258111668148238355016411719\
+ 9168737693316468563
+
+ 512: 12003639081420725322369909586347545220275253633035565716386136197501\
+ 88208318984400479275215620499883521216480724155582768193682335576385\
+ 2069481074929084063
+
+1024: 16467877625718912296741904171202513097057724053648819680815842057593\
+ 20371835940722471475475803725455063836431454757000451907612224427007\
+ 63984592414360595161051906727075047683803534852982766542661204179549\
+ 77327573530800542562611753617736693359790119074768292178493884576587\
+ 0230450429880021317876149636714743053
+
+1024: 16602953991090311275234291158294516471009930684624948451178742895360\
+ 86073703307475884280944414508444679430090561246728195735962931545473\
+ 40743240318558456247740186704660778277799687988031119436541068736925\
+ 20563780233711166724859277827382391527748470939542560819625727876091\
+ 5372193745283891895989104479029844957
diff --git a/security/nss/lib/freebl/mpi/utils/README b/security/nss/lib/freebl/mpi/utils/README
new file mode 100644
index 000000000..61c8e2efa
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/README
@@ -0,0 +1,206 @@
+This Source Code Form is subject to the terms of the Mozilla Public
+License, v. 2.0. If a copy of the MPL was not distributed with this
+file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+Additional MPI utilities
+------------------------
+
+The files 'mpprime.h' and 'mpprime.c' define some useful extensions to
+the MPI library for dealing with prime numbers (in particular, testing
+for divisbility, and the Rabin-Miller probabilistic primality test).
+
+The files 'mplogic.h' and 'mplogic.c' define extensions to the MPI
+library for doing bitwise logical operations and shifting.
+
+This document assumes you have read the help file for the MPI library
+and understand its conventions.
+
+Divisibility (mpprime.h)
+------------
+
+To test a number for divisibility by another number:
+
+mpp_divis(a, b) - test if b|a
+mpp_divis_d(a, d) - test if d|a
+
+Each of these functions returns MP_YES if its initial argument is
+divisible by its second, or MP_NO if it is not. Other errors may be
+returned as appropriate (such as MP_RANGE if you try to test for
+divisibility by zero).
+
+Randomness (mpprime.h)
+----------
+
+To generate random data:
+
+mpp_random(a) - fill a with random data
+mpp_random_size(a, p) - fill a with p digits of random data
+
+The mpp_random_size() function increases the precision of a to at
+least p, then fills all those digits randomly. The mp_random()
+function fills a to its current precision (as determined by the number
+of significant digits, USED(a))
+
+Note that these functions simply use the C library's rand() function
+to fill a with random digits up to its precision. This should be
+adequate for primality testing, but should not be used for
+cryptographic applications where truly random values are required for
+security.
+
+You should call srand() in your driver program in order to seed the
+random generator; this function doesn't call it.
+
+Primality Testing (mpprime.h)
+-----------------
+
+mpp_divis_vector(a, v, s, w) - is a divisible by any of the s values
+ in v, and if so, w = which.
+mpp_divis_primes(a, np) - is a divisible by any of the first np primes?
+mpp_fermat(a, w) - is a pseudoprime with respect to witness w?
+mpp_pprime(a, nt) - run nt iterations of Rabin-Miller on a.
+
+The mpp_divis_vector() function tests a for divisibility by each
+member of an array of digits. The array is v, the size of that array
+is s. Returns MP_YES if a is divisible, and stores the index of the
+offending digit in w. Returns MP_NO if a is not divisible by any of
+the digits in the array.
+
+A small table of primes is compiled into the library (typically the
+first 128 primes, although you can change this by editing the file
+'primes.c' before you build). The global variable prime_tab_size
+contains the number of primes in the table, and the values themselves
+are in the array prime_tab[], which is an array of mp_digit.
+
+The mpp_divis_primes() function is basically just a wrapper around
+mpp_divis_vector() that uses prime_tab[] as the test vector. The np
+parameter is a pointer to an mp_digit -- on input, it should specify
+the number of primes to be tested against. If a is divisible by any
+of the primes, MP_YES is returned and np is given the prime value that
+divided a (you can use this if you're factoring, for example).
+Otherwise, MP_NO is returned and np is untouched.
+
+The function mpp_fermat() performs Fermat's test, using w as a
+witness. This test basically relies on the fact that if a is prime,
+and w is relatively prime to a, then:
+
+ w^a = w (mod a)
+
+That is,
+
+ w^(a - 1) = 1 (mod a)
+
+The function returns MP_YES if the test passes, MP_NO if it fails. If
+w is relatively prime to a, and the test fails, a is definitely
+composite. If w is relatively prime to a and the test passes, then a
+is either prime, or w is a false witness (the probability of this
+happening depends on the choice of w and of a ... consult a number
+theory textbook for more information about this).
+
+Note: If (w, a) != 1, the output of this test is meaningless.
+----
+
+The function mpp_pprime() performs the Rabin-Miller probabilistic
+primality test for nt rounds. If all the tests pass, MP_YES is
+returned, and a is probably prime. The probability that an answer of
+MP_YES is incorrect is no greater than 1 in 4^nt, and in fact is
+usually much less than that (this is a pessimistic estimate). If any
+test fails, MP_NO is returned, and a is definitely composite.
+
+Bruce Schneier recommends at least 5 iterations of this test for most
+cryptographic applications; Knuth suggests that 25 are reasonable.
+Run it as many times as you feel are necessary.
+
+See the programs 'makeprime.c' and 'isprime.c' for reasonable examples
+of how to use these functions for primality testing.
+
+
+Bitwise Logic (mplogic.c)
+-------------
+
+The four commonest logical operations are implemented as:
+
+mpl_not(a, b) - Compute bitwise (one's) complement, b = ~a
+
+mpl_and(a, b, c) - Compute bitwise AND, c = a & b
+
+mpl_or(a, b, c) - Compute bitwise OR, c = a | b
+
+mpl_xor(a, b, c) - Compute bitwise XOR, c = a ^ b
+
+Left and right shifts are available as well. These take a number to
+shift, a destination, and a shift amount. The shift amount must be a
+digit value between 0 and DIGIT_BIT inclusive; if it is not, MP_RANGE
+will be returned and the shift will not happen.
+
+mpl_rsh(a, b, d) - Compute logical right shift, b = a >> d
+
+mpl_lsh(a, b, d) - Compute logical left shift, b = a << d
+
+Since these are logical shifts, they fill with zeroes (the library
+uses a signed magnitude representation, so there are no sign bits to
+extend anyway).
+
+
+Command-line Utilities
+----------------------
+
+A handful of interesting command-line utilities are provided. These
+are:
+
+lap.c - Find the order of a mod m. Usage is 'lap <a> <m>'.
+ This uses a dumb algorithm, so don't use it for
+ a really big modulus.
+
+invmod.c - Find the inverse of a mod m, if it exists. Usage
+ is 'invmod <a> <m>'
+
+sieve.c - A simple bitmap-based implementation of the Sieve
+ of Eratosthenes. Used to generate the table of
+ primes in primes.c. Usage is 'sieve <nbits>'
+
+prng.c - Uses the routines in bbs_rand.{h,c} to generate
+ one or more 32-bit pseudo-random integers. This
+ is mainly an example, not intended for use in a
+ cryptographic application (the system time is
+ the only source of entropy used)
+
+dec2hex.c - Convert decimal to hexadecimal
+
+hex2dec.c - Convert hexadecimal to decimal
+
+basecvt.c - General radix conversion tool (supports 2-64)
+
+isprime.c - Probabilistically test an integer for primality
+ using the Rabin-Miller pseudoprime test combined
+ with division by small primes.
+
+primegen.c - Generate primes at random.
+
+exptmod.c - Perform modular exponentiation
+
+ptab.pl - A Perl script to munge the output of the sieve
+ program into a compilable C structure.
+
+
+Other Files
+-----------
+
+PRIMES - Some randomly generated numbers which are prime with
+ extremely high probability.
+
+README - You're reading me already.
+
+
+About the Author
+----------------
+
+This software was written by Michael J. Fromberger. You can contact
+the author as follows:
+
+E-mail: <sting@linguist.dartmouth.edu>
+
+Postal: 8000 Cummings Hall, Thayer School of Engineering
+ Dartmouth College, Hanover, New Hampshire, USA
+
+PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html
+ 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907
diff --git a/security/nss/lib/freebl/mpi/utils/basecvt.c b/security/nss/lib/freebl/mpi/utils/basecvt.c
new file mode 100644
index 000000000..0e9915406
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/basecvt.c
@@ -0,0 +1,68 @@
+/*
+ * basecvt.c
+ *
+ * Convert integer values specified on the command line from one input
+ * base to another. Accepts input and output bases between 2 and 36
+ * inclusive.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+#define IBASE 10
+#define OBASE 16
+#define USAGE "Usage: %s ibase obase [value]\n"
+#define MAXBASE 64
+#define MINBASE 2
+
+int
+main(int argc, char *argv[])
+{
+ int ix, ibase = IBASE, obase = OBASE;
+ mp_int val;
+
+ ix = 1;
+ if (ix < argc) {
+ ibase = atoi(argv[ix++]);
+
+ if (ibase < MINBASE || ibase > MAXBASE) {
+ fprintf(stderr, "%s: input radix must be between %d and %d inclusive\n",
+ argv[0], MINBASE, MAXBASE);
+ return 1;
+ }
+ }
+ if (ix < argc) {
+ obase = atoi(argv[ix++]);
+
+ if (obase < MINBASE || obase > MAXBASE) {
+ fprintf(stderr, "%s: output radix must be between %d and %d inclusive\n",
+ argv[0], MINBASE, MAXBASE);
+ return 1;
+ }
+ }
+
+ mp_init(&val);
+ while (ix < argc) {
+ char *out;
+ int outlen;
+
+ mp_read_radix(&val, argv[ix++], ibase);
+
+ outlen = mp_radix_size(&val, obase);
+ out = calloc(outlen, sizeof(char));
+ mp_toradix(&val, out, obase);
+
+ printf("%s\n", out);
+ free(out);
+ }
+
+ mp_clear(&val);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.c b/security/nss/lib/freebl/mpi/utils/bbs_rand.c
new file mode 100644
index 000000000..fed2fe2e6
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.c
@@ -0,0 +1,65 @@
+/*
+ * Blum, Blum & Shub PRNG using the MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "bbs_rand.h"
+
+#define SEED 1
+#define MODULUS 2
+
+/* This modulus is the product of two randomly generated 512-bit
+ prime integers, each of which is congruent to 3 (mod 4). */
+static char *bbs_modulus =
+ "75A2A6E1D27393B86562B9CE7279A8403CB4258A637DAB5233465373E37837383EDC"
+ "332282B8575927BC4172CE8C147B4894050EE9D2BDEED355C121037270CA2570D127"
+ "7D2390CD1002263326635CC6B259148DE3A1A03201980A925E395E646A5E9164B0EC"
+ "28559EBA58C87447245ADD0651EDA507056A1129E3A3E16E903D64B437";
+
+static int bbs_init = 0; /* flag set when library is initialized */
+static mp_int bbs_state; /* the current state of the generator */
+
+/* Suggested size of random seed data */
+int bbs_seed_size = (sizeof(bbs_modulus) / 2);
+
+void
+bbs_srand(unsigned char *data, int len)
+{
+ if ((bbs_init & SEED) == 0) {
+ mp_init(&bbs_state);
+ bbs_init |= SEED;
+ }
+
+ mp_read_raw(&bbs_state, (char *)data, len);
+
+} /* end bbs_srand() */
+
+unsigned int
+bbs_rand(void)
+{
+ static mp_int modulus;
+ unsigned int result = 0, ix;
+
+ if ((bbs_init & MODULUS) == 0) {
+ mp_init(&modulus);
+ mp_read_radix(&modulus, bbs_modulus, 16);
+ bbs_init |= MODULUS;
+ }
+
+ for (ix = 0; ix < sizeof(unsigned int); ix++) {
+ mp_digit d;
+
+ mp_sqrmod(&bbs_state, &modulus, &bbs_state);
+ d = DIGIT(&bbs_state, 0);
+
+ result = (result << CHAR_BIT) | (d & UCHAR_MAX);
+ }
+
+ return result;
+
+} /* end bbs_rand() */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.h b/security/nss/lib/freebl/mpi/utils/bbs_rand.h
new file mode 100644
index 000000000..d12269bf9
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.h
@@ -0,0 +1,24 @@
+/*
+ * bbs_rand.h
+ *
+ * Blum, Blum & Shub PRNG using the MPI library
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _H_BBSRAND_
+#define _H_BBSRAND_
+
+#include <limits.h>
+#include "mpi.h"
+
+#define BBS_RAND_MAX UINT_MAX
+
+/* Suggested length of seed data */
+extern int bbs_seed_size;
+
+void bbs_srand(unsigned char *data, int len);
+unsigned int bbs_rand(void);
+
+#endif /* end _H_BBSRAND_ */
diff --git a/security/nss/lib/freebl/mpi/utils/bbsrand.c b/security/nss/lib/freebl/mpi/utils/bbsrand.c
new file mode 100644
index 000000000..d9151e005
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/bbsrand.c
@@ -0,0 +1,35 @@
+/*
+ * bbsrand.c
+ *
+ * Test driver for routines in bbs_rand.h
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <limits.h>
+
+#include "bbs_rand.h"
+
+#define NUM_TESTS 100
+
+int
+main(void)
+{
+ unsigned int seed, result, ix;
+
+ seed = time(NULL);
+ bbs_srand((unsigned char *)&seed, sizeof(seed));
+
+ for (ix = 0; ix < NUM_TESTS; ix++) {
+ result = bbs_rand();
+
+ printf("Test %3u: %08X\n", ix + 1, result);
+ }
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/dec2hex.c b/security/nss/lib/freebl/mpi/utils/dec2hex.c
new file mode 100644
index 000000000..ef3a52095
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/dec2hex.c
@@ -0,0 +1,40 @@
+/*
+ * dec2hex.c
+ *
+ * Convert decimal integers into hexadecimal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a;
+ char *buf;
+ int len;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_read_radix(&a, argv[1], 10);
+ len = mp_radix_size(&a, 16);
+ buf = malloc(len);
+ mp_toradix(&a, buf, 16);
+
+ printf("%s\n", buf);
+
+ free(buf);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/exptmod.c b/security/nss/lib/freebl/mpi/utils/exptmod.c
new file mode 100644
index 000000000..3ac9078f4
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/exptmod.c
@@ -0,0 +1,55 @@
+/*
+ * exptmod.c
+ *
+ * Command line tool to perform modular exponentiation on arbitrary
+ * precision integers.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, b, m;
+ mp_err res;
+ char *str;
+ int len, rval = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b> <m>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_init(&b);
+ mp_init(&m);
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&b, argv[2], 10);
+ mp_read_radix(&m, argv[3], 10);
+
+ if ((res = mp_exptmod(&a, &b, &m, &a)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+ rval = 1;
+ } else {
+ len = mp_radix_size(&a, 10);
+ str = calloc(len, sizeof(char));
+ mp_toradix(&a, str, 10);
+
+ printf("%s\n", str);
+
+ free(str);
+ }
+
+ mp_clear(&a);
+ mp_clear(&b);
+ mp_clear(&m);
+
+ return rval;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/fact.c b/security/nss/lib/freebl/mpi/utils/fact.c
new file mode 100644
index 000000000..da8e61a32
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/fact.c
@@ -0,0 +1,84 @@
+/*
+ * fact.c
+ *
+ * Compute factorial of input integer
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+mp_err mp_fact(mp_int *a, mp_int *b);
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a;
+ mp_err res;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <number>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_read_radix(&a, argv[1], 10);
+
+ if ((res = mp_fact(&a, &a)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", argv[0],
+ mp_strerror(res));
+ mp_clear(&a);
+ return 1;
+ }
+
+ {
+ char *buf;
+ int len;
+
+ len = mp_radix_size(&a, 10);
+ buf = malloc(len);
+ mp_todecimal(&a, buf);
+
+ puts(buf);
+
+ free(buf);
+ }
+
+ mp_clear(&a);
+ return 0;
+}
+
+mp_err
+mp_fact(mp_int *a, mp_int *b)
+{
+ mp_int ix, s;
+ mp_err res = MP_OKAY;
+
+ if (mp_cmp_z(a) < 0)
+ return MP_UNDEF;
+
+ mp_init(&s);
+ mp_add_d(&s, 1, &s); /* s = 1 */
+ mp_init(&ix);
+ mp_add_d(&ix, 1, &ix); /* ix = 1 */
+
+ for (/* */; mp_cmp(&ix, a) <= 0; mp_add_d(&ix, 1, &ix)) {
+ if ((res = mp_mul(&s, &ix, &s)) != MP_OKAY)
+ break;
+ }
+
+ mp_clear(&ix);
+
+ /* Copy out results if we got them */
+ if (res == MP_OKAY)
+ mp_copy(&s, b);
+
+ mp_clear(&s);
+
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/gcd.c b/security/nss/lib/freebl/mpi/utils/gcd.c
new file mode 100644
index 000000000..9f11a250b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/gcd.c
@@ -0,0 +1,95 @@
+/*
+ * gcd.c
+ *
+ * Greatest common divisor
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+char *g_prog = NULL;
+
+void print_mp_int(mp_int *mp, FILE *ofp);
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, b, x, y;
+ mp_err res;
+ int ext = 0;
+
+ g_prog = argv[0];
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <b>\n", g_prog);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_read_radix(&a, argv[1], 10);
+ mp_init(&b);
+ mp_read_radix(&b, argv[2], 10);
+
+ /* If we were called 'xgcd', compute x, y so that g = ax + by */
+ if (strcmp(g_prog, "xgcd") == 0) {
+ ext = 1;
+ mp_init(&x);
+ mp_init(&y);
+ }
+
+ if (ext) {
+ if ((res = mp_xgcd(&a, &b, &a, &x, &y)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", g_prog, mp_strerror(res));
+ mp_clear(&a);
+ mp_clear(&b);
+ mp_clear(&x);
+ mp_clear(&y);
+ return 1;
+ }
+ } else {
+ if ((res = mp_gcd(&a, &b, &a)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", g_prog,
+ mp_strerror(res));
+ mp_clear(&a);
+ mp_clear(&b);
+ return 1;
+ }
+ }
+
+ print_mp_int(&a, stdout);
+ if (ext) {
+ fputs("x = ", stdout);
+ print_mp_int(&x, stdout);
+ fputs("y = ", stdout);
+ print_mp_int(&y, stdout);
+ }
+
+ mp_clear(&a);
+ mp_clear(&b);
+
+ if (ext) {
+ mp_clear(&x);
+ mp_clear(&y);
+ }
+
+ return 0;
+}
+
+void
+print_mp_int(mp_int *mp, FILE *ofp)
+{
+ char *buf;
+ int len;
+
+ len = mp_radix_size(mp, 10);
+ buf = calloc(len, sizeof(char));
+ mp_todecimal(mp, buf);
+ fprintf(ofp, "%s\n", buf);
+ free(buf);
+}
diff --git a/security/nss/lib/freebl/mpi/utils/hex2dec.c b/security/nss/lib/freebl/mpi/utils/hex2dec.c
new file mode 100644
index 000000000..9b21d22e0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/hex2dec.c
@@ -0,0 +1,40 @@
+/*
+ * hex2dec.c
+ *
+ * Convert decimal integers into hexadecimal
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a;
+ char *buf;
+ int len;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <a>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_read_radix(&a, argv[1], 16);
+ len = mp_radix_size(&a, 10);
+ buf = malloc(len);
+ mp_toradix(&a, buf, 10);
+
+ printf("%s\n", buf);
+
+ free(buf);
+ mp_clear(&a);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/identest.c b/security/nss/lib/freebl/mpi/utils/identest.c
new file mode 100644
index 000000000..321d2c2b0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/identest.c
@@ -0,0 +1,84 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mpi.h"
+#include "mpprime.h"
+#include <sys/types.h>
+#include <time.h>
+
+#define MAX_PREC (4096 / MP_DIGIT_BIT)
+
+mp_err
+identity_test(void)
+{
+ mp_size preca, precb;
+ mp_err res;
+ mp_int a, b;
+ mp_int t1, t2, t3, t4, t5;
+
+ preca = (rand() % MAX_PREC) + 1;
+ precb = (rand() % MAX_PREC) + 1;
+
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&b) = 0;
+ MP_DIGITS(&t1) = 0;
+ MP_DIGITS(&t2) = 0;
+ MP_DIGITS(&t3) = 0;
+ MP_DIGITS(&t4) = 0;
+ MP_DIGITS(&t5) = 0;
+
+ MP_CHECKOK(mp_init(&a));
+ MP_CHECKOK(mp_init(&b));
+ MP_CHECKOK(mp_init(&t1));
+ MP_CHECKOK(mp_init(&t2));
+ MP_CHECKOK(mp_init(&t3));
+ MP_CHECKOK(mp_init(&t4));
+ MP_CHECKOK(mp_init(&t5));
+
+ MP_CHECKOK(mpp_random_size(&a, preca));
+ MP_CHECKOK(mpp_random_size(&b, precb));
+
+ if (mp_cmp(&a, &b) < 0)
+ mp_exch(&a, &b);
+
+ MP_CHECKOK(mp_mod(&a, &b, &t1)); /* t1 = a%b */
+ MP_CHECKOK(mp_div(&a, &b, &t2, NULL)); /* t2 = a/b */
+ MP_CHECKOK(mp_mul(&b, &t2, &t3)); /* t3 = (a/b)*b */
+ MP_CHECKOK(mp_add(&t1, &t3, &t4)); /* t4 = a%b + (a/b)*b */
+ MP_CHECKOK(mp_sub(&t4, &a, &t5)); /* t5 = a%b + (a/b)*b - a */
+ if (mp_cmp_z(&t5) != 0) {
+ res = MP_UNDEF;
+ goto CLEANUP;
+ }
+
+CLEANUP:
+ mp_clear(&t5);
+ mp_clear(&t4);
+ mp_clear(&t3);
+ mp_clear(&t2);
+ mp_clear(&t1);
+ mp_clear(&b);
+ mp_clear(&a);
+ return res;
+}
+
+int
+main(void)
+{
+ unsigned int seed = (unsigned int)time(NULL);
+ unsigned long count = 0;
+ mp_err res;
+
+ srand(seed);
+
+ while (MP_OKAY == (res = identity_test())) {
+ if ((++count % 100) == 0)
+ fputc('.', stderr);
+ }
+
+ fprintf(stderr, "\ntest failed, err %d\n", res);
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/invmod.c b/security/nss/lib/freebl/mpi/utils/invmod.c
new file mode 100644
index 000000000..9b4b04d3f
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/invmod.c
@@ -0,0 +1,61 @@
+/*
+ * invmod.c
+ *
+ * Compute modular inverses
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "mpi.h"
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, m;
+ mp_err res;
+ char *buf;
+ int len, out = 0;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <m>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_init(&m);
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&m, argv[2], 10);
+
+ if (mp_cmp(&a, &m) > 0)
+ mp_mod(&a, &m, &a);
+
+ switch ((res = mp_invmod(&a, &m, &a))) {
+ case MP_OKAY:
+ len = mp_radix_size(&a, 10);
+ buf = malloc(len);
+
+ mp_toradix(&a, buf, 10);
+ printf("%s\n", buf);
+ free(buf);
+ break;
+
+ case MP_UNDEF:
+ printf("No inverse\n");
+ out = 1;
+ break;
+
+ default:
+ printf("error: %s (%d)\n", mp_strerror(res), res);
+ out = 2;
+ break;
+ }
+
+ mp_clear(&a);
+ mp_clear(&m);
+
+ return out;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/isprime.c b/security/nss/lib/freebl/mpi/utils/isprime.c
new file mode 100644
index 000000000..d2d86957e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/isprime.c
@@ -0,0 +1,89 @@
+/*
+ * isprime.c
+ *
+ * Probabilistic primality tester command-line tool
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+#define RM_TESTS 15 /* how many iterations of Rabin-Miller? */
+#define MINIMUM 1024 /* don't bother us with a < this */
+
+int g_tests = RM_TESTS;
+char *g_prog = NULL;
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a;
+ mp_digit np = prime_tab_size; /* from mpprime.h */
+ int res = 0;
+
+ g_prog = argv[0];
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <a>, where <a> is a decimal integer\n"
+ "Use '0x' prefix for a hexadecimal value\n",
+ g_prog);
+ return 1;
+ }
+
+ /* Read number of tests from environment, if present */
+ {
+ char *tmp;
+
+ if ((tmp = PR_GetEnvSecure("RM_TESTS")) != NULL) {
+ if ((g_tests = atoi(tmp)) <= 0)
+ g_tests = RM_TESTS;
+ }
+ }
+
+ mp_init(&a);
+ if (argv[1][0] == '0' && argv[1][1] == 'x')
+ mp_read_radix(&a, argv[1] + 2, 16);
+ else
+ mp_read_radix(&a, argv[1], 10);
+
+ if (mp_cmp_d(&a, MINIMUM) <= 0) {
+ fprintf(stderr, "%s: please use a value greater than %d\n",
+ g_prog, MINIMUM);
+ mp_clear(&a);
+ return 1;
+ }
+
+ /* Test for divisibility by small primes */
+ if (mpp_divis_primes(&a, &np) != MP_NO) {
+ printf("Not prime (divisible by small prime %d)\n", np);
+ res = 2;
+ goto CLEANUP;
+ }
+
+ /* Test with Fermat's test, using 2 as a witness */
+ if (mpp_fermat(&a, 2) != MP_YES) {
+ printf("Not prime (failed Fermat test)\n");
+ res = 2;
+ goto CLEANUP;
+ }
+
+ /* Test with Rabin-Miller probabilistic test */
+ if (mpp_pprime(&a, g_tests) == MP_NO) {
+ printf("Not prime (failed pseudoprime test)\n");
+ res = 2;
+ goto CLEANUP;
+ }
+
+ printf("Probably prime, 1 in 4^%d chance of false positive\n", g_tests);
+
+CLEANUP:
+ mp_clear(&a);
+
+ return res;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/lap.c b/security/nss/lib/freebl/mpi/utils/lap.c
new file mode 100644
index 000000000..501e4531d
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/lap.c
@@ -0,0 +1,90 @@
+/*
+ * lap.c
+ *
+ * Find least annihilating power of a mod m
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "mpi.h"
+
+void sig_catch(int ign);
+
+int g_quit = 0;
+
+int
+main(int argc, char *argv[])
+{
+ mp_int a, m, p, k;
+
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <a> <m>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&a);
+ mp_init(&m);
+ mp_init(&p);
+ mp_add_d(&p, 1, &p);
+
+ mp_read_radix(&a, argv[1], 10);
+ mp_read_radix(&m, argv[2], 10);
+
+ mp_init_copy(&k, &a);
+
+ signal(SIGINT, sig_catch);
+#ifndef __OS2__
+ signal(SIGHUP, sig_catch);
+#endif
+ signal(SIGTERM, sig_catch);
+
+ while (mp_cmp(&p, &m) < 0) {
+ if (g_quit) {
+ int len;
+ char *buf;
+
+ len = mp_radix_size(&p, 10);
+ buf = malloc(len);
+ mp_toradix(&p, buf, 10);
+
+ fprintf(stderr, "Terminated at: %s\n", buf);
+ free(buf);
+ return 1;
+ }
+ if (mp_cmp_d(&k, 1) == 0) {
+ int len;
+ char *buf;
+
+ len = mp_radix_size(&p, 10);
+ buf = malloc(len);
+ mp_toradix(&p, buf, 10);
+
+ printf("%s\n", buf);
+
+ free(buf);
+ break;
+ }
+
+ mp_mulmod(&k, &a, &m, &k);
+ mp_add_d(&p, 1, &p);
+ }
+
+ if (mp_cmp(&p, &m) >= 0)
+ printf("No annihilating power.\n");
+
+ mp_clear(&p);
+ mp_clear(&m);
+ mp_clear(&a);
+ return 0;
+}
+
+void
+sig_catch(int ign)
+{
+ g_quit = 1;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/makeprime.c b/security/nss/lib/freebl/mpi/utils/makeprime.c
new file mode 100644
index 000000000..401b7532b
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/makeprime.c
@@ -0,0 +1,116 @@
+/*
+ * makeprime.c
+ *
+ * A simple prime generator function (and test driver). Prints out the
+ * first prime it finds greater than or equal to the starting value.
+ *
+ * Usage: makeprime <start>
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+/* These two must be included for make_prime() to work */
+
+#include "mpi.h"
+#include "mpprime.h"
+
+/*
+ make_prime(p, nr)
+
+ Find the smallest prime integer greater than or equal to p, where
+ primality is verified by 'nr' iterations of the Rabin-Miller
+ probabilistic primality test. The caller is responsible for
+ generating the initial value of p.
+
+ Returns MP_OKAY if a prime has been generated, otherwise the error
+ code indicates some other problem. The value of p is clobbered; the
+ caller should keep a copy if the value is needed.
+ */
+mp_err make_prime(mp_int *p, int nr);
+
+/* The main() is not required -- it's just a test driver */
+int
+main(int argc, char *argv[])
+{
+ mp_int start;
+ mp_err res;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <start-value>\n", argv[0]);
+ return 1;
+ }
+
+ mp_init(&start);
+ if (argv[1][0] == '0' && tolower(argv[1][1]) == 'x') {
+ mp_read_radix(&start, argv[1] + 2, 16);
+ } else {
+ mp_read_radix(&start, argv[1], 10);
+ }
+ mp_abs(&start, &start);
+
+ if ((res = make_prime(&start, 5)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+ mp_clear(&start);
+
+ return 1;
+
+ } else {
+ char *buf = malloc(mp_radix_size(&start, 10));
+
+ mp_todecimal(&start, buf);
+ printf("%s\n", buf);
+ free(buf);
+
+ mp_clear(&start);
+
+ return 0;
+ }
+
+} /* end main() */
+
+/*------------------------------------------------------------------------*/
+
+mp_err
+make_prime(mp_int *p, int nr)
+{
+ mp_err res;
+
+ if (mp_iseven(p)) {
+ mp_add_d(p, 1, p);
+ }
+
+ do {
+ mp_digit which = prime_tab_size;
+
+ /* First test for divisibility by a few small primes */
+ if ((res = mpp_divis_primes(p, &which)) == MP_YES)
+ continue;
+ else if (res != MP_NO)
+ goto CLEANUP;
+
+ /* If that passes, try one iteration of Fermat's test */
+ if ((res = mpp_fermat(p, 2)) == MP_NO)
+ continue;
+ else if (res != MP_YES)
+ goto CLEANUP;
+
+ /* If that passes, run Rabin-Miller as often as requested */
+ if ((res = mpp_pprime(p, nr)) == MP_YES)
+ break;
+ else if (res != MP_NO)
+ goto CLEANUP;
+
+ } while ((res = mp_add_d(p, 2, p)) == MP_OKAY);
+
+CLEANUP:
+ return res;
+
+} /* end make_prime() */
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/utils/metime.c b/security/nss/lib/freebl/mpi/utils/metime.c
new file mode 100644
index 000000000..122875ee0
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/metime.c
@@ -0,0 +1,102 @@
+/*
+ * metime.c
+ *
+ * Modular exponentiation timing test
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mpprime.h"
+
+double clk_to_sec(clock_t start, clock_t stop);
+
+int
+main(int argc, char *argv[])
+{
+ int ix, num, prec = 8;
+ unsigned int seed;
+ clock_t start, stop;
+ double sec;
+
+ mp_int a, m, c;
+
+ if (PR_GetEnvSecure("SEED") != NULL)
+ seed = abs(atoi(PR_GetEnvSecure("SEED")));
+ else
+ seed = (unsigned int)time(NULL);
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-tests> [<nbits>]\n", argv[0]);
+ return 1;
+ }
+
+ if ((num = atoi(argv[1])) < 0)
+ num = -num;
+
+ if (!num) {
+ fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]);
+ return 1;
+ }
+
+ if (argc > 2) {
+ if ((prec = atoi(argv[2])) <= 0)
+ prec = 8;
+ else
+ prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT;
+ }
+
+ printf("Modular exponentiation timing test\n"
+ "Precision: %d digits (%d bits)\n"
+ "# of tests: %d\n\n",
+ prec, prec * DIGIT_BIT, num);
+
+ mp_init_size(&a, prec);
+ mp_init_size(&m, prec);
+ mp_init_size(&c, prec);
+
+ srand(seed);
+
+ start = clock();
+ for (ix = 0; ix < num; ix++) {
+
+ mpp_random_size(&a, prec);
+ mpp_random_size(&c, prec);
+ mpp_random_size(&m, prec);
+ /* set msb and lsb of m */
+ DIGIT(&m, 0) |= 1;
+ DIGIT(&m, USED(&m) - 1) |= (mp_digit)1 << (DIGIT_BIT - 1);
+ if (mp_cmp(&a, &m) > 0)
+ mp_sub(&a, &m, &a);
+
+ mp_exptmod(&a, &c, &m, &c);
+ }
+ stop = clock();
+
+ sec = clk_to_sec(start, stop);
+
+ printf("Total: %.3f seconds\n", sec);
+ printf("Individual: %.3f seconds\n", sec / num);
+
+ mp_clear(&c);
+ mp_clear(&a);
+ mp_clear(&m);
+
+ return 0;
+}
+
+double
+clk_to_sec(clock_t start, clock_t stop)
+{
+ return (double)(stop - start) / CLOCKS_PER_SEC;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/utils/pi.c b/security/nss/lib/freebl/mpi/utils/pi.c
new file mode 100644
index 000000000..7e3109786
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/pi.c
@@ -0,0 +1,171 @@
+/*
+ * pi.c
+ *
+ * Compute pi to an arbitrary number of digits. Uses Machin's formula,
+ * like everyone else on the planet:
+ *
+ * pi = 16 * arctan(1/5) - 4 * arctan(1/239)
+ *
+ * This is pretty effective for up to a few thousand digits, but it
+ * gets pretty slow after that.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+
+mp_err arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum);
+
+int
+main(int argc, char *argv[])
+{
+ mp_err res;
+ mp_digit ndigits;
+ mp_int sum1, sum2;
+ clock_t start, stop;
+ int out = 0;
+
+ /* Make the user specify precision on the command line */
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <num-digits>\n", argv[0]);
+ return 1;
+ }
+
+ if ((ndigits = abs(atoi(argv[1]))) == 0) {
+ fprintf(stderr, "%s: you must request at least 1 digit\n", argv[0]);
+ return 1;
+ }
+
+ start = clock();
+ mp_init(&sum1);
+ mp_init(&sum2);
+
+ /* sum1 = 16 * arctan(1/5) */
+ if ((res = arctan(16, 5, ndigits, &sum1)) != MP_OKAY) {
+ fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res));
+ out = 1;
+ goto CLEANUP;
+ }
+
+ /* sum2 = 4 * arctan(1/239) */
+ if ((res = arctan(4, 239, ndigits, &sum2)) != MP_OKAY) {
+ fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res));
+ out = 1;
+ goto CLEANUP;
+ }
+
+ /* pi = sum1 - sum2 */
+ if ((res = mp_sub(&sum1, &sum2, &sum1)) != MP_OKAY) {
+ fprintf(stderr, "%s: mp_sub: %s\n", argv[0], mp_strerror(res));
+ out = 1;
+ goto CLEANUP;
+ }
+ stop = clock();
+
+ /* Write the output in decimal */
+ {
+ char *buf = malloc(mp_radix_size(&sum1, 10));
+
+ if (buf == NULL) {
+ fprintf(stderr, "%s: out of memory\n", argv[0]);
+ out = 1;
+ goto CLEANUP;
+ }
+ mp_todecimal(&sum1, buf);
+ printf("%s\n", buf);
+ free(buf);
+ }
+
+ fprintf(stderr, "Computation took %.2f sec.\n",
+ (double)(stop - start) / CLOCKS_PER_SEC);
+
+CLEANUP:
+ mp_clear(&sum1);
+ mp_clear(&sum2);
+
+ return out;
+}
+
+/* Compute sum := mul * arctan(1/x), to 'prec' digits of precision */
+mp_err
+arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum)
+{
+ mp_int t, v;
+ mp_digit q = 1, rd;
+ mp_err res;
+ int sign = 1;
+
+ prec += 3; /* push inaccuracies off the end */
+
+ mp_init(&t);
+ mp_set(&t, 10);
+ mp_init(&v);
+ if ((res = mp_expt_d(&t, prec, &t)) != MP_OKAY || /* get 10^prec */
+ (res = mp_mul_d(&t, mul, &t)) != MP_OKAY || /* ... times mul */
+ (res = mp_mul_d(&t, x, &t)) != MP_OKAY) /* ... times x */
+ goto CLEANUP;
+
+ /*
+ The extra multiplication by x in the above takes care of what
+ would otherwise have to be a special case for 1 / x^1 during the
+ first loop iteration. A little sneaky, but effective.
+
+ We compute arctan(1/x) by the formula:
+
+ 1 1 1 1
+ - - ----- + ----- - ----- + ...
+ x 3 x^3 5 x^5 7 x^7
+
+ We multiply through by 'mul' beforehand, which gives us a couple
+ more iterations and more precision
+ */
+
+ x *= x; /* works as long as x < sqrt(RADIX), which it is here */
+
+ mp_zero(sum);
+
+ do {
+ if ((res = mp_div_d(&t, x, &t, &rd)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (sign < 0 && rd != 0)
+ mp_add_d(&t, 1, &t);
+
+ if ((res = mp_div_d(&t, q, &v, &rd)) != MP_OKAY)
+ goto CLEANUP;
+
+ if (sign < 0 && rd != 0)
+ mp_add_d(&v, 1, &v);
+
+ if (sign > 0)
+ res = mp_add(sum, &v, sum);
+ else
+ res = mp_sub(sum, &v, sum);
+
+ if (res != MP_OKAY)
+ goto CLEANUP;
+
+ sign *= -1;
+ q += 2;
+
+ } while (mp_cmp_z(&t) != 0);
+
+ /* Chop off inaccurate low-order digits */
+ mp_div_d(sum, 1000, sum, NULL);
+
+CLEANUP:
+ mp_clear(&v);
+ mp_clear(&t);
+
+ return res;
+}
+
+/*------------------------------------------------------------------------*/
+/* HERE THERE BE DRAGONS */
diff --git a/security/nss/lib/freebl/mpi/utils/primegen.c b/security/nss/lib/freebl/mpi/utils/primegen.c
new file mode 100644
index 000000000..f62a56a4e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/primegen.c
@@ -0,0 +1,159 @@
+/*
+ * primegen.c
+ *
+ * Generates random integers which are prime with a high degree of
+ * probability using the Miller-Rabin probabilistic primality testing
+ * algorithm.
+ *
+ * Usage:
+ * primegen <bits> [<num>]
+ *
+ * <bits> - number of significant bits each prime should have
+ * <num> - number of primes to generate
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <time.h>
+
+#include "mpi.h"
+#include "mplogic.h"
+#include "mpprime.h"
+
+#define NUM_TESTS 5 /* Number of Rabin-Miller iterations to test with */
+
+#ifdef DEBUG
+#define FPUTC(x, y) fputc(x, y)
+#else
+#define FPUTC(x, y)
+#endif
+
+int
+main(int argc, char *argv[])
+{
+ unsigned char *raw;
+ char *out;
+ unsigned long nTries;
+ int rawlen, bits, outlen, ngen, ix, jx;
+ int g_strong = 0;
+ mp_int testval;
+ mp_err res;
+ clock_t start, end;
+
+ /* We'll just use the C library's rand() for now, although this
+ won't be good enough for cryptographic purposes */
+ if ((out = PR_GetEnvSecure("SEED")) == NULL) {
+ srand((unsigned int)time(NULL));
+ } else {
+ srand((unsigned int)atoi(out));
+ }
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <bits> [<count> [strong]]\n", argv[0]);
+ return 1;
+ }
+
+ if ((bits = abs(atoi(argv[1]))) < CHAR_BIT) {
+ fprintf(stderr, "%s: please request at least %d bits.\n",
+ argv[0], CHAR_BIT);
+ return 1;
+ }
+
+ /* If optional third argument is given, use that as the number of
+ primes to generate; otherwise generate one prime only.
+ */
+ if (argc < 3) {
+ ngen = 1;
+ } else {
+ ngen = abs(atoi(argv[2]));
+ }
+
+ /* If fourth argument is given, and is the word "strong", we'll
+ generate strong (Sophie Germain) primes.
+ */
+ if (argc > 3 && strcmp(argv[3], "strong") == 0)
+ g_strong = 1;
+
+ /* testval - candidate being tested; nTries - number tried so far */
+ if ((res = mp_init(&testval)) != MP_OKAY) {
+ fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+ return 1;
+ }
+
+ if (g_strong) {
+ printf("Requested %d strong prime value(s) of %d bits.\n",
+ ngen, bits);
+ } else {
+ printf("Requested %d prime value(s) of %d bits.\n", ngen, bits);
+ }
+
+ rawlen = (bits / CHAR_BIT) + ((bits % CHAR_BIT) ? 1 : 0) + 1;
+
+ if ((raw = calloc(rawlen, sizeof(unsigned char))) == NULL) {
+ fprintf(stderr, "%s: out of memory, sorry.\n", argv[0]);
+ return 1;
+ }
+
+ /* This loop is one for each prime we need to generate */
+ for (jx = 0; jx < ngen; jx++) {
+
+ raw[0] = 0; /* sign is positive */
+
+ /* Pack the initializer with random bytes */
+ for (ix = 1; ix < rawlen; ix++)
+ raw[ix] = (rand() * rand()) & UCHAR_MAX;
+
+ raw[1] |= 0x80; /* set high-order bit of test value */
+ raw[rawlen - 1] |= 1; /* set low-order bit of test value */
+
+ /* Make an mp_int out of the initializer */
+ mp_read_raw(&testval, (char *)raw, rawlen);
+
+ /* Initialize candidate counter */
+ nTries = 0;
+
+ start = clock(); /* time generation for this prime */
+ do {
+ res = mpp_make_prime(&testval, bits, g_strong, &nTries);
+ if (res != MP_NO)
+ break;
+ /* This code works whether digits are 16 or 32 bits */
+ res = mp_add_d(&testval, 32 * 1024, &testval);
+ res = mp_add_d(&testval, 32 * 1024, &testval);
+ FPUTC(',', stderr);
+ } while (1);
+ end = clock();
+
+ if (res != MP_YES) {
+ break;
+ }
+ FPUTC('\n', stderr);
+ puts("The following value is probably prime:");
+ outlen = mp_radix_size(&testval, 10);
+ out = calloc(outlen, sizeof(unsigned char));
+ mp_toradix(&testval, (char *)out, 10);
+ printf("10: %s\n", out);
+ mp_toradix(&testval, (char *)out, 16);
+ printf("16: %s\n\n", out);
+ free(out);
+
+ printf("Number of candidates tried: %lu\n", nTries);
+ printf("This computation took %ld clock ticks (%.2f seconds)\n",
+ (end - start), ((double)(end - start) / CLOCKS_PER_SEC));
+
+ FPUTC('\n', stderr);
+ } /* end of loop to generate all requested primes */
+
+ if (res != MP_OKAY)
+ fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res));
+
+ free(raw);
+ mp_clear(&testval);
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/prng.c b/security/nss/lib/freebl/mpi/utils/prng.c
new file mode 100644
index 000000000..38748d18e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/prng.c
@@ -0,0 +1,57 @@
+/*
+ * prng.c
+ *
+ * Command-line pseudo-random number generator
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <time.h>
+
+#ifdef __OS2__
+#include <types.h>
+#include <process.h>
+#else
+#include <unistd.h>
+#endif
+
+#include "bbs_rand.h"
+
+int
+main(int argc, char *argv[])
+{
+ unsigned char *seed;
+ unsigned int ix, num = 1;
+ pid_t pid;
+
+ if (argc > 1) {
+ num = atoi(argv[1]);
+ if (num <= 0)
+ num = 1;
+ }
+
+ pid = getpid();
+ srand(time(NULL) * (unsigned int)pid);
+
+ /* Not a perfect seed, but not bad */
+ seed = malloc(bbs_seed_size);
+ for (ix = 0; ix < bbs_seed_size; ix++) {
+ seed[ix] = rand() % UCHAR_MAX;
+ }
+
+ bbs_srand(seed, bbs_seed_size);
+ memset(seed, 0, bbs_seed_size);
+ free(seed);
+
+ while (num-- > 0) {
+ ix = bbs_rand();
+
+ printf("%u\n", ix);
+ }
+
+ return 0;
+}
diff --git a/security/nss/lib/freebl/mpi/utils/ptab.pl b/security/nss/lib/freebl/mpi/utils/ptab.pl
new file mode 100755
index 000000000..ef2e565be
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/ptab.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl
+
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+while(<>) {
+ chomp;
+ push(@primes, $_);
+}
+
+printf("mp_size prime_tab_size = %d;\n", ($#primes + 1));
+print "mp_digit prime_tab[] = {\n";
+
+print "\t";
+$last = pop(@primes);
+foreach $prime (sort {$a<=>$b} @primes) {
+ printf("0x%04X, ", $prime);
+ $brk = ($brk + 1) % 8;
+ print "\n\t" if(!$brk);
+}
+printf("0x%04X", $last);
+print "\n" if($brk);
+print "};\n\n";
+
+exit 0;
diff --git a/security/nss/lib/freebl/mpi/utils/sieve.c b/security/nss/lib/freebl/mpi/utils/sieve.c
new file mode 100644
index 000000000..57768af9e
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/utils/sieve.c
@@ -0,0 +1,243 @@
+/*
+ * sieve.c
+ *
+ * Finds prime numbers using the Sieve of Eratosthenes
+ *
+ * This implementation uses a bitmap to represent all odd integers in a
+ * given range. We iterate over this bitmap, crossing off the
+ * multiples of each prime we find. At the end, all the remaining set
+ * bits correspond to prime integers.
+ *
+ * Here, we make two passes -- once we have generated a sieve-ful of
+ * primes, we copy them out, reset the sieve using the highest
+ * generated prime from the first pass as a base. Then we cross out
+ * all the multiples of all the primes we found the first time through,
+ * and re-sieve. In this way, we get double use of the memory we
+ * allocated for the sieve the first time though. Since we also
+ * implicitly ignore multiples of 2, this amounts to 4 times the
+ * values.
+ *
+ * This could (and probably will) be generalized to re-use the sieve a
+ * few more times.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <limits.h>
+
+typedef unsigned char byte;
+
+typedef struct {
+ int size;
+ byte *bits;
+ long base;
+ int next;
+ int nbits;
+} sieve;
+
+void sieve_init(sieve *sp, long base, int nbits);
+void sieve_grow(sieve *sp, int nbits);
+long sieve_next(sieve *sp);
+void sieve_reset(sieve *sp, long base);
+void sieve_cross(sieve *sp, long val);
+void sieve_clear(sieve *sp);
+
+#define S_ISSET(S, B) (((S)->bits[(B) / CHAR_BIT] >> ((B) % CHAR_BIT)) & 1)
+#define S_SET(S, B) ((S)->bits[(B) / CHAR_BIT] |= (1 << ((B) % CHAR_BIT)))
+#define S_CLR(S, B) ((S)->bits[(B) / CHAR_BIT] &= ~(1 << ((B) % CHAR_BIT)))
+#define S_VAL(S, B) ((S)->base + (2 * (B)))
+#define S_BIT(S, V) (((V) - ((S)->base)) / 2)
+
+int
+main(int argc, char *argv[])
+{
+ sieve s;
+ long pr, *p;
+ int c, ix, cur = 0;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s <width>\n", argv[0]);
+ return 1;
+ }
+
+ c = atoi(argv[1]);
+ if (c < 0)
+ c = -c;
+
+ fprintf(stderr, "%s: sieving to %d positions\n", argv[0], c);
+
+ sieve_init(&s, 3, c);
+
+ c = 0;
+ while ((pr = sieve_next(&s)) > 0) {
+ ++c;
+ }
+
+ p = calloc(c, sizeof(long));
+ if (!p) {
+ fprintf(stderr, "%s: out of memory after first half\n", argv[0]);
+ sieve_clear(&s);
+ exit(1);
+ }
+
+ fprintf(stderr, "%s: half done ... \n", argv[0]);
+
+ for (ix = 0; ix < s.nbits; ix++) {
+ if (S_ISSET(&s, ix)) {
+ p[cur] = S_VAL(&s, ix);
+ printf("%ld\n", p[cur]);
+ ++cur;
+ }
+ }
+
+ sieve_reset(&s, p[cur - 1]);
+ fprintf(stderr, "%s: crossing off %d found primes ... \n", argv[0], cur);
+ for (ix = 0; ix < cur; ix++) {
+ sieve_cross(&s, p[ix]);
+ if (!(ix % 1000))
+ fputc('.', stderr);
+ }
+ fputc('\n', stderr);
+
+ free(p);
+
+ fprintf(stderr, "%s: sieving again from %ld ... \n", argv[0], p[cur - 1]);
+ c = 0;
+ while ((pr = sieve_next(&s)) > 0) {
+ ++c;
+ }
+
+ fprintf(stderr, "%s: done!\n", argv[0]);
+ for (ix = 0; ix < s.nbits; ix++) {
+ if (S_ISSET(&s, ix)) {
+ printf("%ld\n", S_VAL(&s, ix));
+ }
+ }
+
+ sieve_clear(&s);
+
+ return 0;
+}
+
+void
+sieve_init(sieve *sp, long base, int nbits)
+{
+ sp->size = (nbits / CHAR_BIT);
+
+ if (nbits % CHAR_BIT)
+ ++sp->size;
+
+ sp->bits = calloc(sp->size, sizeof(byte));
+ memset(sp->bits, UCHAR_MAX, sp->size);
+ if (!(base & 1))
+ ++base;
+ sp->base = base;
+
+ sp->next = 0;
+ sp->nbits = sp->size * CHAR_BIT;
+}
+
+void
+sieve_grow(sieve *sp, int nbits)
+{
+ int ns = (nbits / CHAR_BIT);
+
+ if (nbits % CHAR_BIT)
+ ++ns;
+
+ if (ns > sp->size) {
+ byte *tmp;
+ int ix;
+
+ tmp = calloc(ns, sizeof(byte));
+ if (tmp == NULL) {
+ fprintf(stderr, "Error: out of memory in sieve_grow\n");
+ return;
+ }
+
+ memcpy(tmp, sp->bits, sp->size);
+ for (ix = sp->size; ix < ns; ix++) {
+ tmp[ix] = UCHAR_MAX;
+ }
+
+ free(sp->bits);
+ sp->bits = tmp;
+ sp->size = ns;
+
+ sp->nbits = sp->size * CHAR_BIT;
+ }
+}
+
+long
+sieve_next(sieve *sp)
+{
+ long out;
+ int ix = 0;
+ long val;
+
+ if (sp->next > sp->nbits)
+ return -1;
+
+ out = S_VAL(sp, sp->next);
+#ifdef DEBUG
+ fprintf(stderr, "Sieving %ld\n", out);
+#endif
+
+ /* Sieve out all multiples of the current prime */
+ val = out;
+ while (ix < sp->nbits) {
+ val += out;
+ ix = S_BIT(sp, val);
+ if ((val & 1) && ix < sp->nbits) { /* && S_ISSET(sp, ix)) { */
+ S_CLR(sp, ix);
+#ifdef DEBUG
+ fprintf(stderr, "Crossing out %ld (bit %d)\n", val, ix);
+#endif
+ }
+ }
+
+ /* Scan ahead to the next prime */
+ ++sp->next;
+ while (sp->next < sp->nbits && !S_ISSET(sp, sp->next))
+ ++sp->next;
+
+ return out;
+}
+
+void
+sieve_cross(sieve *sp, long val)
+{
+ int ix = 0;
+ long cur = val;
+
+ while (cur < sp->base)
+ cur += val;
+
+ ix = S_BIT(sp, cur);
+ while (ix < sp->nbits) {
+ if (cur & 1)
+ S_CLR(sp, ix);
+ cur += val;
+ ix = S_BIT(sp, cur);
+ }
+}
+
+void
+sieve_reset(sieve *sp, long base)
+{
+ memset(sp->bits, UCHAR_MAX, sp->size);
+ sp->base = base;
+ sp->next = 0;
+}
+
+void
+sieve_clear(sieve *sp)
+{
+ if (sp->bits)
+ free(sp->bits);
+
+ sp->bits = NULL;
+}
diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il
new file mode 100644
index 000000000..d2e8024ac
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_32.il
@@ -0,0 +1,1291 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! The interface to the VIS instructions as declared below (and in the VIS
+! User's Manual) will not change, but the macro implementation might change
+! in the future.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,8
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,8
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,8
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,8
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,8
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,8
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,8
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,8
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,8
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,8
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,8
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,8
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %icc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,8
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,8
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ faligndata %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmple32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpne32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq16 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpgt32 %f4,%f10,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fcmpeq32 %f4,%f10,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ st %o2,[%sp+0x4c]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8x16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16au %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ st %o0,[%sp+0x44]
+ ld [%sp+0x44],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmul8x16al %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fmul8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8sux16 %f4,%f10,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fmuld8ulx16 %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ std %o0,[%sp+0x40]
+ ldd [%sp+0x40],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpadd32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpadd32s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub16 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub16s %f4,%f10,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpsub32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpsub32s %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack16 %f4,%f0
+ fpack16 %f10,%f1
+ std %f0,[%o4]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,20
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpackfix %f4,%f0
+ fpackfix %f10,%f1
+ st %f0,[%o4+0]
+ st %f1,[%o4+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpack16 %f4,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fpack32 %f4,%f10,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fpackfix %f4,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f6
+ fpackfix %f4,%f0
+ fpackfix %f6,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+!
+ .inline vis_pdist,24
+ std %o4,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ pdist %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fpmerge %f4,%f10,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fexpand %f4,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fmovs %f5, %f2
+ fexpand %f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnor %f4,%f10,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnors %f4,%f10,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fandnot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fandnot1s %f4,%f10,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fnot1 %f4,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fnot1s %f4,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxor %f4,%f10,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxors %f4,%f10,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fnand %f4,%f10,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fnands %f4,%f10,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fand %f4,%f10,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fands %f4,%f10,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fxnor %f4,%f10,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fxnors %f4,%f10,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ fsrc1 %f4,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ fsrc1s %f4,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ fornot1 %f4,%f10,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fornot1s %f4,%f10,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ std %o2,[%sp+0x48]
+ ldd [%sp+0x48],%f10
+ for %f4,%f10,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ st %o1,[%sp+0x48]
+ ld [%sp+0x48],%f10
+ fors %f4,%f10,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,16
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,16
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,4
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,8
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,8
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,8
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,4
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,4
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,8
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,4
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,4
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,8
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,4
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,4
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,8
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,4
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array8 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array16 %o3,%o2,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ sllx %o0,32,%o0
+ srl %o1,0,%o1 ! clear the most significant 32 bits of %o1
+ or %o0,%o1,%o3 ! join %o0 and %o1 into %o3
+ array32 %o3,%o2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0;
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ std %o0,[%sp+0x48] ! store double frs1
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ std %o0,[%sp+0x48] ! store double frs1;
+ ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1;
+ st %o2,[%sp+0x44] ! store float frs2;
+ ld [%sp+0x44],%f2 ! %f2 = float frs2;
+ fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ st %o0,[%sp+0x48] ! store float frs1
+ ld [%sp+0x48],%f0
+ st %o1,[%sp+0x48] ! store float frs2
+ ld [%sp+0x48],%f1
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,4
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,4
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,4
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,4
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,4
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,4
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,8
+ st %o0,[%sp+0x48]
+ ld [%sp+0x48],%f4
+ sta %f4,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,12
+ std %o0,[%sp+0x48]
+ ldd [%sp+0x48],%f4
+ stda %f4,[%o2]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,4
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,4
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,4
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,8
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,8
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,4
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,4
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il
new file mode 100644
index 000000000..cbe2b5aa2
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_64.il
@@ -0,0 +1,997 @@
+!
+! This Source Code Form is subject to the terms of the Mozilla Public
+! License, v. 2.0. If a copy of the MPL was not distributed with this
+! file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+! This file is to be used in place of vis.il in 64-bit builds.
+
+!--------------------------------------------------------------------
+! Pure edge handling instructions
+!
+! int vis_edge8(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8,16
+ edge8 %o0,%o1,%o0
+ .end
+!
+! int vis_edge8l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8l,16
+ edge8l %o0,%o1,%o0
+ .end
+!
+! int vis_edge16(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16,16
+ edge16 %o0,%o1,%o0
+ .end
+!
+! int vis_edge16l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16l,16
+ edge16l %o0,%o1,%o0
+ .end
+!
+! int vis_edge32(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32,16
+ edge32 %o0,%o1,%o0
+ .end
+!
+! int vis_edge32l(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32l,16
+ edge32l %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Edge handling instructions with negative return values if cc set
+!
+! int vis_edge8cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8cc,16
+ edge8 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge8lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge8lcc,16
+ edge8l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16cc,16
+ edge16 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge16lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge16lcc,16
+ edge16l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32cc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32cc,16
+ edge32 %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+!
+! int vis_edge32lcc(void */*frs1*/, void */*frs2*/);
+!
+ .inline vis_edge32lcc,16
+ edge32l %o0,%o1,%o0
+ mov 0,%o1
+ movgu %xcc,-1024,%o1
+ or %o1,%o0,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Alignment instructions
+!
+! void *vis_alignaddr(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddr,12
+ alignaddr %o0,%o1,%o0
+ .end
+!
+! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/);
+!
+ .inline vis_alignaddrl,12
+ alignaddrl %o0,%o1,%o0
+ .end
+!
+! double vis_faligndata(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_faligndata,16
+ faligndata %f0,%f2,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned comparison instructions
+!
+! int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple16,16
+ fcmple16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne16,16
+ fcmpne16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmple32,16
+ fcmple32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpne32,16
+ fcmpne32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt16,16
+ fcmpgt16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq16,16
+ fcmpeq16 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpgt32,16
+ fcmpgt32 %f0,%f2,%o0
+ .end
+!
+! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fcmpeq32,16
+ fcmpeq32 %f0,%f2,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Partitioned arithmetic
+!
+! double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16,12
+ fmul8x16 %f1,%f2,%f0
+ .end
+!
+! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+!
+ .inline vis_fmul8x16_dummy,16
+ fmul8x16 %f1,%f4,%f0
+ .end
+!
+! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16au,8
+ fmul8x16au %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmul8x16al,8
+ fmul8x16al %f1,%f3,%f0
+ .end
+!
+! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8sux16,16
+ fmul8sux16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fmul8ulx16,16
+ fmul8ulx16 %f0,%f2,%f0
+ .end
+!
+! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8sux16,8
+ fmuld8sux16 %f1,%f3,%f0
+ .end
+!
+! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fmuld8ulx16,8
+ fmuld8ulx16 %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd16,16
+ fpadd16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd16s,8
+ fpadd16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpadd32,16
+ fpadd32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpadd32s,8
+ fpadd32s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub16,16
+ fpsub16 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub16s,8
+ fpsub16s %f1,%f3,%f0
+ .end
+!
+! double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpsub32,16
+ fpsub32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpsub32s,8
+ fpsub32s %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel packing
+!
+! float vis_fpack16(double /*frs2*/);
+!
+ .inline vis_fpack16,8
+ fpack16 %f0,%f0
+ .end
+!
+! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpack16_pair,16
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ .end
+!
+! void vis_st2_fpack16(double, double, double *)
+!
+ .inline vis_st2_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! void vis_std_fpack16(double, double, double *)
+!
+ .inline vis_std_fpack16,24
+ fpack16 %f0,%f0
+ fpack16 %f2,%f1
+ std %f0,[%o2]
+ .end
+!
+! void vis_st2_fpackfix(double, double, double *)
+!
+ .inline vis_st2_fpackfix,24
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ st %f0,[%o2+0]
+ st %f1,[%o2+4]
+ .end
+!
+! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_hi,16
+ fpack16 %f2,%f0
+ .end
+
+! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack16_to_lo,16
+ fpack16 %f2,%f3
+ fmovs %f3,%f1 /* without this, optimizer goes wrong */
+ .end
+
+!
+! double vis_fpack32(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fpack32,16
+ fpack32 %f0,%f2,%f0
+ .end
+!
+! float vis_fpackfix(double /*frs2*/);
+!
+ .inline vis_fpackfix,8
+ fpackfix %f0,%f0
+ .end
+!
+! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+!
+ .inline vis_fpackfix_pair,16
+ fpackfix %f0,%f0
+ fpackfix %f2,%f1
+ .end
+
+!--------------------------------------------------------------------
+! Motion estimation
+!
+! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/,
+! double pxls2 /*frs2*/);
+!
+ .inline vis_pxldist64,24
+ pdist %f2,%f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Channel merging
+!
+! double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fpmerge,8
+ fpmerge %f1,%f3,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Pixel expansion
+!
+! double vis_fexpand(float /*frs2*/);
+!
+ .inline vis_fexpand,4
+ fexpand %f1,%f0
+ .end
+
+! double vis_fexpand_hi(double /*frs2*/);
+!
+ .inline vis_fexpand_hi,8
+ fexpand %f0,%f0
+ .end
+
+! double vis_fexpand_lo(double /*frs2*/);
+!
+ .inline vis_fexpand_lo,8
+ fexpand %f1,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Bitwise logical operations
+!
+! double vis_fnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnor,16
+ fnor %f0,%f2,%f0
+ .end
+!
+! float vis_fnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnors,8
+ fnors %f1,%f3,%f0
+ .end
+!
+! double vis_fandnot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fandnot,16
+ fandnot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fandnots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fandnots,8
+ fandnot1s %f1,%f3,%f0
+ .end
+!
+! double vis_fnot(double /*frs1*/);
+!
+ .inline vis_fnot,8
+ fnot1 %f0,%f0
+ .end
+!
+! float vis_fnots(float /*frs1*/);
+!
+ .inline vis_fnots,4
+ fnot1s %f1,%f0
+ .end
+!
+! double vis_fxor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxor,16
+ fxor %f0,%f2,%f0
+ .end
+!
+! float vis_fxors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxors,8
+ fxors %f1,%f3,%f0
+ .end
+!
+! double vis_fnand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fnand,16
+ fnand %f0,%f2,%f0
+ .end
+!
+! float vis_fnands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fnands,8
+ fnands %f1,%f3,%f0
+ .end
+!
+! double vis_fand(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fand,16
+ fand %f0,%f2,%f0
+ .end
+!
+! float vis_fands(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fands,8
+ fands %f1,%f3,%f0
+ .end
+!
+! double vis_fxnor(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fxnor,16
+ fxnor %f0,%f2,%f0
+ .end
+!
+! float vis_fxnors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fxnors,8
+ fxnors %f1,%f3,%f0
+ .end
+!
+! double vis_fsrc(double /*frs1*/);
+!
+ .inline vis_fsrc,8
+ fsrc1 %f0,%f0
+ .end
+!
+! float vis_fsrcs(float /*frs1*/);
+!
+ .inline vis_fsrcs,4
+ fsrc1s %f1,%f0
+ .end
+!
+! double vis_fornot(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_fornot,16
+ fornot1 %f0,%f2,%f0
+ .end
+!
+! float vis_fornots(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fornots,8
+ fornot1s %f1,%f3,%f0
+ .end
+!
+! double vis_for(double /*frs1*/, double /*frs2*/);
+!
+ .inline vis_for,16
+ for %f0,%f2,%f0
+ .end
+!
+! float vis_fors(float /*frs1*/, float /*frs2*/);
+!
+ .inline vis_fors,8
+ fors %f1,%f3,%f0
+ .end
+!
+! double vis_fzero(/* void */)
+!
+ .inline vis_fzero,0
+ fzero %f0
+ .end
+!
+! float vis_fzeros(/* void */)
+!
+ .inline vis_fzeros,0
+ fzeros %f0
+ .end
+!
+! double vis_fone(/* void */)
+!
+ .inline vis_fone,0
+ fone %f0
+ .end
+!
+! float vis_fones(/* void */)
+!
+ .inline vis_fones,0
+ fones %f0
+ .end
+
+!--------------------------------------------------------------------
+! Partial store instructions
+!
+! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8P,20
+ stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8PL,20
+ stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL
+ .end
+!
+! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask);
+!
+ .inline vis_stdfa_ASI_PST8P_int_pair,28
+ ld [%o0],%f4
+ ld [%o1],%f5
+ stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P
+ .end
+!
+! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST8S,20
+ stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S
+ .end
+!
+! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16P,20
+ stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P
+ .end
+!
+! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST16S,20
+ stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S
+ .end
+!
+! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32P,20
+ stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P
+ .end
+!
+! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask)
+!
+ .inline vis_stdfa_ASI_PST32S,20
+ stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S
+ .end
+
+!--------------------------------------------------------------------
+! Short store instructions
+!
+! vis_stdfa_ASI_FL8P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8P,16
+ stda %f0,[%o1]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL8P_index,24
+ stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P
+ .end
+!
+! vis_stdfa_ASI_FL8S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8S,16
+ stda %f0,[%o1]0xd1 ! ASI_FL8_S
+ .end
+!
+! vis_stdfa_ASI_FL16P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16P,16
+ stda %f0,[%o1]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index)
+!
+ .inline vis_stdfa_ASI_FL16P_index,24
+ stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P
+ .end
+!
+! vis_stdfa_ASI_FL16S(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16S,16
+ stda %f0,[%o1]0xd3 ! ASI_FL16_S
+ .end
+!
+! vis_stdfa_ASI_FL8PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8PL,16
+ stda %f0,[%o1]0xd8 ! ASI_FL8_PL
+ .end
+!
+! vis_stdfa_ASI_FL8SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL8SL,16
+ stda %f0,[%o1]0xd9 ! ASI_FL8_SL
+ .end
+!
+! vis_stdfa_ASI_FL16PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16PL,16
+ stda %f0,[%o1]0xda ! ASI_FL16_PL
+ .end
+!
+! vis_stdfa_ASI_FL16SL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_FL16SL,16
+ stda %f0,[%o1]0xdb ! ASI_FL16_SL
+ .end
+
+!--------------------------------------------------------------------
+! Short load instructions
+!
+! double vis_lddfa_ASI_FL8P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8P,8
+ ldda [%o0]0xd0,%f4 ! ASI_FL8_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_FL8P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8P_index,16
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_hi,12
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index)
+!
+ .inline vis_lddfa_ASI_FL8P_lo,12
+ sll %o1,16,%o1
+ sra %o1,16,%o1
+ ldda [%o0+%o1]0xd0,%f4
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8S,8
+ ldda [%o0]0xd1,%f4 ! ASI_FL8_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16P,8
+ ldda [%o0]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16P_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16P_index,16
+ ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16S(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16S,8
+ ldda [%o0]0xd3,%f4 ! ASI_FL16_S
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8PL,8
+ ldda [%o0]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL8PL_index,16
+ ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL8SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL8SL,8
+ ldda [%o0]0xd9,%f4 ! ASI_FL8_SL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16PL,8
+ ldda [%o0]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index)
+!
+ .inline vis_lddfa_ASI_FL16PL_index,16
+ ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL
+ fmovd %f4,%f0
+ .end
+!
+! double vis_lddfa_ASI_FL16SL(void *rs1)
+!
+ .inline vis_lddfa_ASI_FL16SL,8
+ ldda [%o0]0xdb,%f4 ! ASI_FL16_SL
+ fmovd %f4,%f0
+ .end
+
+!--------------------------------------------------------------------
+! Graphics status register
+!
+! unsigned int vis_read_gsr(void)
+!
+ .inline vis_read_gsr,0
+ rd %gsr,%o0
+ .end
+!
+! void vis_write_gsr(unsigned int /* GSR */)
+!
+ .inline vis_write_gsr,4
+ wr %g0,%o0,%gsr
+ .end
+
+!--------------------------------------------------------------------
+! Voxel texture mapping
+!
+! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/)
+!
+ .inline vis_array8,12
+ array8 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array16,12
+ array16 %o0,%o1,%o0
+ .end
+!
+! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/)
+!
+ .inline vis_array32,12
+ array32 %o0,%o1,%o0
+ .end
+
+!--------------------------------------------------------------------
+! Register aliasing and type casts
+!
+! float vis_read_hi(double /* frs1 */);
+!
+ .inline vis_read_hi,8
+ fmovs %f0,%f0
+ .end
+!
+! float vis_read_lo(double /* frs1 */);
+!
+ .inline vis_read_lo,8
+ fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0;
+ .end
+!
+! double vis_write_hi(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_hi,12
+ fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_write_lo(double /* frs1 */, float /* frs2 */);
+!
+ .inline vis_write_lo,12
+ fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1;
+ .end
+!
+! double vis_freg_pair(float /* frs1 */, float /* frs2 */);
+!
+ .inline vis_freg_pair,8
+ fmovs %f1,%f0 ! %f1 = float frs1; put in hi;
+ fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1;
+ .end
+!
+! float vis_to_float(unsigned int /*value*/);
+!
+ .inline vis_to_float,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ .end
+!
+! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+!
+ .inline vis_to_double,8
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f0
+ st %o1,[%sp+2183]
+ ld [%sp+2183],%f1
+ .end
+!
+! double vis_to_double_dup(unsigned int /*value*/);
+!
+ .inline vis_to_double_dup,4
+ st %o0,[%sp+2183]
+ ld [%sp+2183],%f1
+ fmovs %f1,%f0 ! duplicate value
+ .end
+!
+! double vis_ll_to_double(unsigned long long /*value*/);
+!
+ .inline vis_ll_to_double,8
+ stx %o0,[%sp+2183]
+ ldd [%sp+2183],%f0
+ .end
+
+!--------------------------------------------------------------------
+! Address space identifier (ASI) register
+!
+! unsigned int vis_read_asi(void)
+!
+ .inline vis_read_asi,0
+ rd %asi,%o0
+ .end
+!
+! void vis_write_asi(unsigned int /* ASI */)
+!
+ .inline vis_write_asi,4
+ wr %g0,%o0,%asi
+ .end
+
+!--------------------------------------------------------------------
+! Load/store from/into alternate space
+!
+! float vis_ldfa_ASI_REG(void *rs1)
+!
+ .inline vis_ldfa_ASI_REG,8
+ lda [%o0+0]%asi,%f4
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_P(void *rs1)
+!
+ .inline vis_ldfa_ASI_P,8
+ lda [%o0]0x80,%f4 ! ASI_P
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! float vis_ldfa_ASI_PL(void *rs1)
+!
+ .inline vis_ldfa_ASI_PL,8
+ lda [%o0]0x88,%f4 ! ASI_PL
+ fmovs %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_REG(void *rs1)
+!
+ .inline vis_lddfa_ASI_REG,8
+ ldda [%o0+0]%asi,%f4
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_P(void *rs1)
+!
+ .inline vis_lddfa_ASI_P,8
+ ldda [%o0]0x80,%f4 ! ASI_P
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! double vis_lddfa_ASI_PL(void *rs1)
+!
+ .inline vis_lddfa_ASI_PL,8
+ ldda [%o0]0x88,%f4 ! ASI_PL
+ fmovd %f4,%f0 ! Compiler can clean this up
+ .end
+!
+! vis_stfa_ASI_REG(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_REG,12
+ sta %f1,[%o1+0]%asi
+ .end
+!
+! vis_stfa_ASI_P(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_P,12
+ sta %f1,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stfa_ASI_PL(float frs, void *rs1)
+!
+ .inline vis_stfa_ASI_PL,12
+ sta %f1,[%o1]0x88 ! ASI_PL
+ .end
+!
+! vis_stdfa_ASI_REG(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_REG,16
+ stda %f0,[%o1+0]%asi
+ .end
+!
+! vis_stdfa_ASI_P(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_P,16
+ stda %f0,[%o1]0x80 ! ASI_P
+ .end
+!
+! vis_stdfa_ASI_PL(double frd, void *rs1)
+!
+ .inline vis_stdfa_ASI_PL,16
+ stda %f0,[%o1]0x88 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_REG(void *rs1)
+!
+ .inline vis_lduha_ASI_REG,8
+ lduha [%o0+0]%asi,%o0
+ .end
+!
+! unsigned short vis_lduha_ASI_P(void *rs1)
+!
+ .inline vis_lduha_ASI_P,8
+ lduha [%o0]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL(void *rs1)
+!
+ .inline vis_lduha_ASI_PL,8
+ lduha [%o0]0x88,%o0 ! ASI_PL
+ .end
+!
+! unsigned short vis_lduha_ASI_P_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_P_index,16
+ lduha [%o0+%o1]0x80,%o0 ! ASI_P
+ .end
+!
+! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index)
+!
+ .inline vis_lduha_ASI_PL_index,16
+ lduha [%o0+%o1]0x88,%o0 ! ASI_PL
+ .end
+
+!--------------------------------------------------------------------
+! Prefetch
+!
+! void vis_prefetch_read(void * /*address*/);
+!
+ .inline vis_prefetch_read,8
+ prefetch [%o0+0],0
+ .end
+!
+! void vis_prefetch_write(void * /*address*/);
+!
+ .inline vis_prefetch_write,8
+ prefetch [%o0+0],2
+ .end
diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h
new file mode 100644
index 000000000..275de59df
--- /dev/null
+++ b/security/nss/lib/freebl/mpi/vis_proto.h
@@ -0,0 +1,234 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Prototypes for the inline templates in vis.il
+ */
+
+#ifndef VIS_PROTO_H
+#define VIS_PROTO_H
+
+#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI"
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+/* Pure edge handling instructions */
+int vis_edge8(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16l(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32l(void * /*frs1*/, void * /*frs2*/);
+
+/* Edge handling instructions with negative return values if cc set. */
+int vis_edge8cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32cc(void * /*frs1*/, void * /*frs2*/);
+int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/);
+
+/* Alignment instructions. */
+void *vis_alignaddr(void * /*rs1*/, int /*rs2*/);
+void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/);
+double vis_faligndata(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned comparison instructions. */
+int vis_fcmple16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne16(double /*frs1*/, double /*frs2*/);
+int vis_fcmple32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpne32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq16(double /*frs1*/, double /*frs2*/);
+int vis_fcmpgt32(double /*frs1*/, double /*frs2*/);
+int vis_fcmpeq32(double /*frs1*/, double /*frs2*/);
+
+/* Partitioned multiplication. */
+#if 0
+double vis_fmul8x16(float /*frs1*/, double /*frs2*/);
+#endif
+double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/);
+double vis_fmul8x16au(float /*frs1*/, float /*frs2*/);
+double vis_fmul8x16al(float /*frs1*/, float /*frs2*/);
+double vis_fmul8sux16(double /*frs1*/, double /*frs2*/);
+double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/);
+double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/);
+double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/);
+
+/* Partitioned addition & subtraction. */
+double vis_fpadd16(double /*frs1*/, double /*frs2*/);
+float vis_fpadd16s(float /*frs1*/, float /*frs2*/);
+double vis_fpadd32(double /*frs1*/, double /*frs2*/);
+float vis_fpadd32s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub16(double /*frs1*/, double /*frs2*/);
+float vis_fpsub16s(float /*frs1*/, float /*frs2*/);
+double vis_fpsub32(double /*frs1*/, double /*frs2*/);
+float vis_fpsub32s(float /*frs1*/, float /*frs2*/);
+
+/* Pixel packing & clamping. */
+float vis_fpack16(double /*frs2*/);
+double vis_fpack32(double /*frs1*/, double /*frs2*/);
+float vis_fpackfix(double /*frs2*/);
+
+/* Combined pack ops. */
+double vis_fpack16_pair(double /*frs2*/, double /*frs2*/);
+double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/);
+void vis_st2_fpack16(double, double, double *);
+void vis_std_fpack16(double, double, double *);
+void vis_st2_fpackfix(double, double, double *);
+
+double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/);
+double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/);
+
+/* Motion estimation. */
+double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/);
+
+/* Channel merging. */
+double vis_fpmerge(float /*frs1*/, float /*frs2*/);
+
+/* Pixel expansion. */
+double vis_fexpand(float /*frs2*/);
+double vis_fexpand_hi(double /*frs2*/);
+double vis_fexpand_lo(double /*frs2*/);
+
+/* Bitwise logical operators. */
+double vis_fnor(double /*frs1*/, double /*frs2*/);
+float vis_fnors(float /*frs1*/, float /*frs2*/);
+double vis_fandnot(double /*frs1*/, double /*frs2*/);
+float vis_fandnots(float /*frs1*/, float /*frs2*/);
+double vis_fnot(double /*frs1*/);
+float vis_fnots(float /*frs1*/);
+double vis_fxor(double /*frs1*/, double /*frs2*/);
+float vis_fxors(float /*frs1*/, float /*frs2*/);
+double vis_fnand(double /*frs1*/, double /*frs2*/);
+float vis_fnands(float /*frs1*/, float /*frs2*/);
+double vis_fand(double /*frs1*/, double /*frs2*/);
+float vis_fands(float /*frs1*/, float /*frs2*/);
+double vis_fxnor(double /*frs1*/, double /*frs2*/);
+float vis_fxnors(float /*frs1*/, float /*frs2*/);
+double vis_fsrc(double /*frs1*/);
+float vis_fsrcs(float /*frs1*/);
+double vis_fornot(double /*frs1*/, double /*frs2*/);
+float vis_fornots(float /*frs1*/, float /*frs2*/);
+double vis_for(double /*frs1*/, double /*frs2*/);
+float vis_fors(float /*frs1*/, float /*frs2*/);
+double vis_fzero(void);
+float vis_fzeros(void);
+double vis_fone(void);
+float vis_fones(void);
+
+/* Partial stores. */
+void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/,
+ void * /*rs3*/, int /*rmask*/);
+void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/);
+
+/* Byte & short stores. */
+void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/);
+void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/);
+void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/);
+
+/* Byte & short loads. */
+double vis_lddfa_ASI_FL8P(void * /*rs1*/);
+double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/);
+double vis_lddfa_ASI_FL8S(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P(void * /*rs1*/);
+double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/);
+double vis_lddfa_ASI_FL16S(void * /*rs1*/);
+double vis_lddfa_ASI_FL8PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL8SL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16PL(void * /*rs1*/);
+double vis_lddfa_ASI_FL16SL(void * /*rs1*/);
+
+/* Direct write to GSR, read from GSR */
+void vis_write_gsr(unsigned int /*GSR*/);
+unsigned int vis_read_gsr(void);
+
+/* Voxel texture mapping. */
+#if !defined(_NO_LONGLONG)
+unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/);
+unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Register aliasing and type casts. */
+float vis_read_hi(double /*frs1*/);
+float vis_read_lo(double /*frs1*/);
+double vis_write_hi(double /*frs1*/, float /*frs2*/);
+double vis_write_lo(double /*frs1*/, float /*frs2*/);
+double vis_freg_pair(float /*frs1*/, float /*frs2*/);
+float vis_to_float(unsigned int /*value*/);
+double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/);
+double vis_to_double_dup(unsigned int /*value*/);
+#if !defined(_NO_LONGLONG)
+double vis_ll_to_double(unsigned long long /*value*/);
+#endif /* !defined(_NO_LONGLONG) */
+
+/* Miscellany (no inlines) */
+void vis_error(char * /*fmt*/, int /*a0*/);
+void vis_sim_init(void);
+
+/* For better performance */
+#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg))
+
+/* Nicknames for explicit ASI loads and stores. */
+#define vis_st_u8 vis_stdfa_ASI_FL8P
+#define vis_st_u8_i vis_stdfa_ASI_FL8P_index
+#define vis_st_u8_le vis_stdfa_ASI_FL8PL
+#define vis_st_u16 vis_stdfa_ASI_FL16P
+#define vis_st_u16_i vis_stdfa_ASI_FL16P_index
+#define vis_st_u16_le vis_stdfa_ASI_FL16PL
+
+#define vis_ld_u8 vis_lddfa_ASI_FL8P
+#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index
+#define vis_ld_u8_le vis_lddfa_ASI_FL8PL
+#define vis_ld_u16 vis_lddfa_ASI_FL16P
+#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index
+#define vis_ld_u16_le vis_lddfa_ASI_FL16PL
+
+#define vis_pst_8 vis_stdfa_ASI_PST8P
+#define vis_pst_16 vis_stdfa_ASI_PST16P
+#define vis_pst_32 vis_stdfa_ASI_PST32P
+
+#define vis_st_u8s vis_stdfa_ASI_FL8S
+#define vis_st_u8s_le vis_stdfa_ASI_FL8SL
+#define vis_st_u16s vis_stdfa_ASI_FL16S
+#define vis_st_u16s_le vis_stdfa_ASI_FL16SL
+
+#define vis_ld_u8s vis_lddfa_ASI_FL8S
+#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL
+#define vis_ld_u16s vis_lddfa_ASI_FL16S
+#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL
+
+#define vis_pst_8s vis_stdfa_ASI_PST8S
+#define vis_pst_16s vis_stdfa_ASI_PST16S
+#define vis_pst_32s vis_stdfa_ASI_PST32S
+
+/* "<" and ">=" may be implemented in terms of ">" and "<=". */
+#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a))
+#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a))
+#define vis_fcmpge16(a, b) vis_fcmple16((b), (a))
+#define vis_fcmpge32(a, b) vis_fcmple32((b), (a))
+
+#ifdef __cplusplus
+} // End of extern "C"
+#endif /* __cplusplus */
+
+#endif /* VIS_PROTO_H */
diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c
new file mode 100644
index 000000000..5ed039689
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.c
@@ -0,0 +1,150 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+#include "prtypes.h"
+#include "secerr.h"
+#include "blapi.h"
+#include "hasht.h"
+#include "plhash.h"
+#include "nsslowhash.h"
+#include "blapii.h"
+
+struct NSSLOWInitContextStr {
+ int count;
+};
+
+struct NSSLOWHASHContextStr {
+ const SECHashObject *hashObj;
+ void *hashCtxt;
+};
+
+static int
+nsslow_GetFIPSEnabled(void)
+{
+#ifdef LINUX
+ FILE *f;
+ char d;
+ size_t size;
+
+ f = fopen("/proc/sys/crypto/fips_enabled", "r");
+ if (!f)
+ return 0;
+
+ size = fread(&d, 1, 1, f);
+ fclose(f);
+ if (size != 1)
+ return 0;
+ if (d != '1')
+ return 0;
+#endif
+ return 1;
+}
+
+static NSSLOWInitContext dummyContext = { 0 };
+static PRBool post_failed = PR_TRUE;
+
+NSSLOWInitContext *
+NSSLOW_Init(void)
+{
+#ifdef FREEBL_NO_DEPEND
+ (void)FREEBL_InitStubs();
+#endif
+
+ /* make sure the FIPS product is installed if we are trying to
+ * go into FIPS mode */
+ if (nsslow_GetFIPSEnabled()) {
+ if (BL_FIPSEntryOK(PR_TRUE) != SECSuccess) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ post_failed = PR_TRUE;
+ return NULL;
+ }
+ }
+ post_failed = PR_FALSE;
+
+ return &dummyContext;
+}
+
+void
+NSSLOW_Shutdown(NSSLOWInitContext *context)
+{
+ PORT_Assert(context == &dummyContext);
+ return;
+}
+
+void
+NSSLOW_Reset(NSSLOWInitContext *context)
+{
+ PORT_Assert(context == &dummyContext);
+ return;
+}
+
+NSSLOWHASHContext *
+NSSLOWHASH_NewContext(NSSLOWInitContext *initContext,
+ HASH_HashType hashType)
+{
+ NSSLOWHASHContext *context;
+
+ if (post_failed) {
+ PORT_SetError(SEC_ERROR_PKCS11_DEVICE_ERROR);
+ return NULL;
+ }
+
+ if (initContext != &dummyContext) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return (NULL);
+ }
+
+ context = PORT_ZNew(NSSLOWHASHContext);
+ if (!context) {
+ return NULL;
+ }
+ context->hashObj = HASH_GetRawHashObject(hashType);
+ if (!context->hashObj) {
+ PORT_Free(context);
+ return NULL;
+ }
+ context->hashCtxt = context->hashObj->create();
+ if (!context->hashCtxt) {
+ PORT_Free(context);
+ return NULL;
+ }
+
+ return context;
+}
+
+void
+NSSLOWHASH_Begin(NSSLOWHASHContext *context)
+{
+ return context->hashObj->begin(context->hashCtxt);
+}
+
+void
+NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf,
+ unsigned int len)
+{
+ return context->hashObj->update(context->hashCtxt, buf, len);
+}
+
+void
+NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf,
+ unsigned int *ret, unsigned int len)
+{
+ return context->hashObj->end(context->hashCtxt, buf, ret, len);
+}
+
+void
+NSSLOWHASH_Destroy(NSSLOWHASHContext *context)
+{
+ context->hashObj->destroy(context->hashCtxt, PR_TRUE);
+ PORT_Free(context);
+}
+
+unsigned int
+NSSLOWHASH_Length(NSSLOWHASHContext *context)
+{
+ return context->hashObj->length;
+}
diff --git a/security/nss/lib/freebl/nsslowhash.h b/security/nss/lib/freebl/nsslowhash.h
new file mode 100644
index 000000000..d8f058715
--- /dev/null
+++ b/security/nss/lib/freebl/nsslowhash.h
@@ -0,0 +1,33 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Provide FIPS validated hashing for applications that only need hashing.
+ * NOTE: mac'ing requires keys and will not work in this interface.
+ * Also NOTE: this only works with Hashing. Only the FIPS interface is enabled.
+ */
+
+#ifndef _NSSLOWHASH_H_
+#define _NSSLOWHASH_H_
+
+typedef struct NSSLOWInitContextStr NSSLOWInitContext;
+typedef struct NSSLOWHASHContextStr NSSLOWHASHContext;
+
+NSSLOWInitContext *NSSLOW_Init(void);
+void NSSLOW_Shutdown(NSSLOWInitContext *context);
+void NSSLOW_Reset(NSSLOWInitContext *context);
+NSSLOWHASHContext *NSSLOWHASH_NewContext(
+ NSSLOWInitContext *initContext,
+ HASH_HashType hashType);
+void NSSLOWHASH_Begin(NSSLOWHASHContext *context);
+void NSSLOWHASH_Update(NSSLOWHASHContext *context,
+ const unsigned char *buf,
+ unsigned int len);
+void NSSLOWHASH_End(NSSLOWHASHContext *context,
+ unsigned char *buf,
+ unsigned int *ret, unsigned int len);
+void NSSLOWHASH_Destroy(NSSLOWHASHContext *context);
+unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context);
+
+#endif
diff --git a/security/nss/lib/freebl/os2_rand.c b/security/nss/lib/freebl/os2_rand.c
new file mode 100644
index 000000000..407b08014
--- /dev/null
+++ b/security/nss/lib/freebl/os2_rand.c
@@ -0,0 +1,334 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#define INCL_DOS
+#define INCL_DOSERRORS
+#include <os2.h>
+#include "secrng.h"
+#include "prerror.h"
+#include <stdlib.h>
+#include <time.h>
+#include <stdio.h>
+#include <sys/stat.h>
+
+static BOOL
+clockTickTime(unsigned long *phigh, unsigned long *plow)
+{
+ APIRET rc = NO_ERROR;
+ QWORD qword = { 0, 0 };
+
+ rc = DosTmrQueryTime(&qword);
+ if (rc != NO_ERROR)
+ return FALSE;
+
+ *phigh = qword.ulHi;
+ *plow = qword.ulLo;
+
+ return TRUE;
+}
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbuf)
+{
+ unsigned long high = 0;
+ unsigned long low = 0;
+ clock_t val = 0;
+ int n = 0;
+ int nBytes = 0;
+ time_t sTime;
+
+ if (maxbuf <= 0)
+ return 0;
+
+ clockTickTime(&high, &low);
+
+ /* get the maximally changing bits first */
+ nBytes = sizeof(low) > maxbuf ? maxbuf : sizeof(low);
+ memcpy(buf, &low, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ nBytes = sizeof(high) > maxbuf ? maxbuf : sizeof(high);
+ memcpy(((char *)buf) + n, &high, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ /* get the number of milliseconds that have elapsed since application started */
+ val = clock();
+
+ nBytes = sizeof(val) > maxbuf ? maxbuf : sizeof(val);
+ memcpy(((char *)buf) + n, &val, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ /* get the time in seconds since midnight Jan 1, 1970 */
+ time(&sTime);
+ nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime);
+ memcpy(((char *)buf) + n, &sTime, nBytes);
+ n += nBytes;
+
+ return n;
+}
+
+static BOOL
+EnumSystemFiles(void (*func)(const char *))
+{
+ APIRET rc;
+ ULONG sysInfo = 0;
+ char bootLetter[2];
+ char sysDir[_MAX_PATH] = "";
+ char filename[_MAX_PATH];
+ HDIR hdir = HDIR_CREATE;
+ ULONG numFiles = 1;
+ FILEFINDBUF3 fileBuf = { 0 };
+ ULONG buflen = sizeof(FILEFINDBUF3);
+
+ if (DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, (PVOID)&sysInfo,
+ sizeof(ULONG)) == NO_ERROR) {
+ bootLetter[0] = sysInfo + 'A' - 1;
+ bootLetter[1] = '\0';
+ strcpy(sysDir, bootLetter);
+ strcpy(sysDir + 1, ":\\OS2\\");
+
+ strcpy(filename, sysDir);
+ strcat(filename, "*.*");
+ }
+
+ rc = DosFindFirst(filename, &hdir, FILE_NORMAL, &fileBuf, buflen,
+ &numFiles, FIL_STANDARD);
+ if (rc == NO_ERROR) {
+ do {
+ // pass the full pathname to the callback
+ sprintf(filename, "%s%s", sysDir, fileBuf.achName);
+ (*func)(filename);
+
+ numFiles = 1;
+ rc = DosFindNext(hdir, &fileBuf, buflen, &numFiles);
+ if (rc != NO_ERROR && rc != ERROR_NO_MORE_FILES)
+ printf("DosFindNext errod code = %d\n", rc);
+ } while (rc == NO_ERROR);
+
+ rc = DosFindClose(hdir);
+ if (rc != NO_ERROR)
+ printf("DosFindClose error code = %d", rc);
+ } else
+ printf("DosFindFirst error code = %d", rc);
+
+ return TRUE;
+}
+
+static int dwNumFiles, dwReadEvery, dwFileToRead = 0;
+
+static void
+CountFiles(const char *file)
+{
+ dwNumFiles++;
+}
+
+static void
+ReadFiles(const char *file)
+{
+ if ((dwNumFiles % dwReadEvery) == 0)
+ RNG_FileForRNG(file);
+
+ dwNumFiles++;
+}
+
+static void
+ReadSingleFile(const char *filename)
+{
+ unsigned char buffer[1024];
+ FILE *file;
+
+ file = fopen((char *)filename, "rb");
+ if (file != NULL) {
+ while (fread(buffer, 1, sizeof(buffer), file) > 0)
+ ;
+ fclose(file);
+ }
+}
+
+static void
+ReadOneFile(const char *file)
+{
+ if (dwNumFiles == dwFileToRead) {
+ ReadSingleFile(file);
+ }
+
+ dwNumFiles++;
+}
+
+static void
+ReadSystemFiles(void)
+{
+ // first count the number of files
+ dwNumFiles = 0;
+ if (!EnumSystemFiles(CountFiles))
+ return;
+
+ RNG_RandomUpdate(&dwNumFiles, sizeof(dwNumFiles));
+
+ // now read 10 files
+ if (dwNumFiles == 0)
+ return;
+
+ dwReadEvery = dwNumFiles / 10;
+ if (dwReadEvery == 0)
+ dwReadEvery = 1; // less than 10 files
+
+ dwNumFiles = 0;
+ EnumSystemFiles(ReadFiles);
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ unsigned long *plong = 0;
+ PTIB ptib;
+ PPIB ppib;
+ APIRET rc = NO_ERROR;
+ DATETIME dt;
+ COUNTRYCODE cc = { 0 };
+ COUNTRYINFO ci = { 0 };
+ unsigned long actual = 0;
+ char path[_MAX_PATH] = "";
+ char fullpath[_MAX_PATH] = "";
+ unsigned long pathlength = sizeof(path);
+ FSALLOCATE fsallocate;
+ FILESTATUS3 fstatus;
+ unsigned long defaultdrive = 0;
+ unsigned long logicaldrives = 0;
+ unsigned long sysInfo[QSV_MAX] = { 0 };
+ char buffer[20];
+ int nBytes = 0;
+
+ nBytes = RNG_GetNoise(buffer, sizeof(buffer));
+ RNG_RandomUpdate(buffer, nBytes);
+
+ /* allocate memory and use address and memory */
+ plong = (unsigned long *)malloc(sizeof(*plong));
+ RNG_RandomUpdate(&plong, sizeof(plong));
+ RNG_RandomUpdate(plong, sizeof(*plong));
+ free(plong);
+
+ /* process info */
+ rc = DosGetInfoBlocks(&ptib, &ppib);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(ptib, sizeof(*ptib));
+ RNG_RandomUpdate(ppib, sizeof(*ppib));
+ }
+
+ /* time */
+ rc = DosGetDateTime(&dt);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&dt, sizeof(dt));
+ }
+
+ /* country */
+ rc = DosQueryCtryInfo(sizeof(ci), &cc, &ci, &actual);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&cc, sizeof(cc));
+ RNG_RandomUpdate(&ci, sizeof(ci));
+ RNG_RandomUpdate(&actual, sizeof(actual));
+ }
+
+ /* current directory */
+ rc = DosQueryCurrentDir(0, path, &pathlength);
+ strcat(fullpath, "\\");
+ strcat(fullpath, path);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(fullpath, strlen(fullpath));
+ // path info
+ rc = DosQueryPathInfo(fullpath, FIL_STANDARD, &fstatus, sizeof(fstatus));
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&fstatus, sizeof(fstatus));
+ }
+ }
+
+ /* file system info */
+ rc = DosQueryFSInfo(0, FSIL_ALLOC, &fsallocate, sizeof(fsallocate));
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&fsallocate, sizeof(fsallocate));
+ }
+
+ /* drive info */
+ rc = DosQueryCurrentDisk(&defaultdrive, &logicaldrives);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&defaultdrive, sizeof(defaultdrive));
+ RNG_RandomUpdate(&logicaldrives, sizeof(logicaldrives));
+ }
+
+ /* system info */
+ rc = DosQuerySysInfo(1L, QSV_MAX, (PVOID)&sysInfo, sizeof(ULONG) * QSV_MAX);
+ if (rc == NO_ERROR) {
+ RNG_RandomUpdate(&sysInfo, sizeof(sysInfo));
+ }
+
+ // now let's do some files
+ ReadSystemFiles();
+
+ /* more noise */
+ nBytes = RNG_GetNoise(buffer, sizeof(buffer));
+ RNG_RandomUpdate(buffer, nBytes);
+}
+
+void
+RNG_FileForRNG(const char *filename)
+{
+ struct stat stat_buf;
+ unsigned char buffer[1024];
+ FILE *file = 0;
+ int nBytes = 0;
+ static int totalFileBytes = 0;
+
+ if (stat((char *)filename, &stat_buf) < 0)
+ return;
+
+ RNG_RandomUpdate((unsigned char *)&stat_buf, sizeof(stat_buf));
+
+ file = fopen((char *)filename, "r");
+ if (file != NULL) {
+ for (;;) {
+ size_t bytes = fread(buffer, 1, sizeof(buffer), file);
+
+ if (bytes == 0)
+ break;
+
+ RNG_RandomUpdate(buffer, bytes);
+ totalFileBytes += bytes;
+ if (totalFileBytes > 250000)
+ break;
+ }
+ fclose(file);
+ }
+
+ nBytes = RNG_GetNoise(buffer, 20);
+ RNG_RandomUpdate(buffer, nBytes);
+}
+
+static void
+rng_systemJitter(void)
+{
+ dwNumFiles = 0;
+ EnumSystemFiles(ReadOneFile);
+ dwFileToRead++;
+ if (dwFileToRead >= dwNumFiles) {
+ dwFileToRead = 0;
+ }
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ return rng_systemFromNoise(dest, maxLen);
+}
diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
new file mode 100644
index 000000000..3c803c167
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c
@@ -0,0 +1,881 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation of poly1305 is by Andrew Moon
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
+ * domain. It implements SIMD vectorization based on the algorithm described in
+ * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte
+ * block size. */
+
+#include <emmintrin.h>
+#include <stdint.h>
+
+#include "poly1305.h"
+#include "blapii.h"
+
+#define ALIGN(x) __attribute__((aligned(x)))
+#define INLINE inline
+#define U8TO64_LE(m) (*(uint64_t *)(m))
+#define U8TO32_LE(m) (*(uint32_t *)(m))
+#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v
+
+typedef __m128i xmmi;
+typedef unsigned __int128 uint128_t;
+
+static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 };
+static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 };
+static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 };
+
+static uint128_t INLINE
+add128(uint128_t a, uint128_t b)
+{
+ return a + b;
+}
+
+static uint128_t INLINE
+add128_64(uint128_t a, uint64_t b)
+{
+ return a + b;
+}
+
+static uint128_t INLINE
+mul64x64_128(uint64_t a, uint64_t b)
+{
+ return (uint128_t)a * b;
+}
+
+static uint64_t INLINE
+lo128(uint128_t a)
+{
+ return (uint64_t)a;
+}
+
+static uint64_t INLINE
+shr128(uint128_t v, const int shift)
+{
+ return (uint64_t)(v >> shift);
+}
+
+static uint64_t INLINE
+shr128_pair(uint64_t hi, uint64_t lo, const int shift)
+{
+ return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
+}
+
+typedef struct poly1305_power_t {
+ union {
+ xmmi v;
+ uint64_t u[2];
+ uint32_t d[4];
+ } R20, R21, R22, R23, R24, S21, S22, S23, S24;
+} poly1305_power;
+
+typedef struct poly1305_state_internal_t {
+ poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */
+ union {
+ xmmi H[5]; /* 80 bytes */
+ uint64_t HH[10];
+ };
+ /* uint64_t r0,r1,r2; [24 bytes] */
+ /* uint64_t pad0,pad1; [16 bytes] */
+ uint64_t started; /* 8 bytes */
+ uint64_t leftover; /* 8 bytes */
+ uint8_t buffer[64]; /* 64 bytes */
+} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */
+
+static poly1305_state_internal INLINE
+ *
+ poly1305_aligned_state(poly1305_state *state)
+{
+ return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
+}
+
+/* copy 0-63 bytes */
+static void INLINE NO_SANITIZE_ALIGNMENT
+poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes)
+{
+ size_t offset = src - dst;
+ if (bytes & 32) {
+ _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0)));
+ _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16)));
+ dst += 32;
+ }
+ if (bytes & 16) {
+ _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset)));
+ dst += 16;
+ }
+ if (bytes & 8) {
+ *(uint64_t *)dst = *(uint64_t *)(dst + offset);
+ dst += 8;
+ }
+ if (bytes & 4) {
+ *(uint32_t *)dst = *(uint32_t *)(dst + offset);
+ dst += 4;
+ }
+ if (bytes & 2) {
+ *(uint16_t *)dst = *(uint16_t *)(dst + offset);
+ dst += 2;
+ }
+ if (bytes & 1) {
+ *(uint8_t *)dst = *(uint8_t *)(dst + offset);
+ }
+}
+
+/* zero 0-15 bytes */
+static void INLINE
+poly1305_block_zero(uint8_t *dst, size_t bytes)
+{
+ if (bytes & 8) {
+ *(uint64_t *)dst = 0;
+ dst += 8;
+ }
+ if (bytes & 4) {
+ *(uint32_t *)dst = 0;
+ dst += 4;
+ }
+ if (bytes & 2) {
+ *(uint16_t *)dst = 0;
+ dst += 2;
+ }
+ if (bytes & 1) {
+ *(uint8_t *)dst = 0;
+ }
+}
+
+static size_t INLINE
+poly1305_min(size_t a, size_t b)
+{
+ return (a < b) ? a : b;
+}
+
+void
+Poly1305Init(poly1305_state *state, const unsigned char key[32])
+{
+ poly1305_state_internal *st = poly1305_aligned_state(state);
+ poly1305_power *p;
+ uint64_t r0, r1, r2;
+ uint64_t t0, t1;
+
+ /* clamp key */
+ t0 = U8TO64_LE(key + 0);
+ t1 = U8TO64_LE(key + 8);
+ r0 = t0 & 0xffc0fffffff;
+ t0 >>= 44;
+ t0 |= t1 << 20;
+ r1 = t0 & 0xfffffc0ffff;
+ t1 >>= 24;
+ r2 = t1 & 0x00ffffffc0f;
+
+ /* store r in un-used space of st->P[1] */
+ p = &st->P[1];
+ p->R20.d[1] = (uint32_t)(r0);
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
+ p->R21.d[1] = (uint32_t)(r1);
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
+ p->R22.d[1] = (uint32_t)(r2);
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
+
+ /* store pad */
+ p->R23.d[1] = U8TO32_LE(key + 16);
+ p->R23.d[3] = U8TO32_LE(key + 20);
+ p->R24.d[1] = U8TO32_LE(key + 24);
+ p->R24.d[3] = U8TO32_LE(key + 28);
+
+ /* H = 0 */
+ st->H[0] = _mm_setzero_si128();
+ st->H[1] = _mm_setzero_si128();
+ st->H[2] = _mm_setzero_si128();
+ st->H[3] = _mm_setzero_si128();
+ st->H[4] = _mm_setzero_si128();
+
+ st->started = 0;
+ st->leftover = 0;
+}
+
+static void
+poly1305_first_block(poly1305_state_internal *st, const uint8_t *m)
+{
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+ xmmi T5, T6;
+ poly1305_power *p;
+ uint128_t d[3];
+ uint64_t r0, r1, r2;
+ uint64_t r20, r21, r22, s22;
+ uint64_t pad0, pad1;
+ uint64_t c;
+ uint64_t i;
+
+ /* pull out stored info */
+ p = &st->P[1];
+
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+ pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
+ pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
+
+ /* compute powers r^2,r^4 */
+ r20 = r0;
+ r21 = r1;
+ r22 = r2;
+ for (i = 0; i < 2; i++) {
+ s22 = r22 * (5 << 2);
+
+ d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22));
+ d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21));
+ d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20));
+
+ r20 = lo128(d[0]) & 0xfffffffffff;
+ c = shr128(d[0], 44);
+ d[1] = add128_64(d[1], c);
+ r21 = lo128(d[1]) & 0xfffffffffff;
+ c = shr128(d[1], 44);
+ d[2] = add128_64(d[2], c);
+ r22 = lo128(d[2]) & 0x3ffffffffff;
+ c = shr128(d[2], 42);
+ r20 += c * 5;
+ c = (r20 >> 44);
+ r20 = r20 & 0xfffffffffff;
+ r21 += c;
+
+ p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+ p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+ p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+ p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0));
+ p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0));
+ p->S21.v = _mm_mul_epu32(p->R21.v, FIVE);
+ p->S22.v = _mm_mul_epu32(p->R22.v, FIVE);
+ p->S23.v = _mm_mul_epu32(p->R23.v, FIVE);
+ p->S24.v = _mm_mul_epu32(p->R24.v, FIVE);
+ p--;
+ }
+
+ /* put saved info back */
+ p = &st->P[1];
+ p->R20.d[1] = (uint32_t)(r0);
+ p->R20.d[3] = (uint32_t)(r0 >> 32);
+ p->R21.d[1] = (uint32_t)(r1);
+ p->R21.d[3] = (uint32_t)(r1 >> 32);
+ p->R22.d[1] = (uint32_t)(r2);
+ p->R22.d[3] = (uint32_t)(r2 >> 32);
+ p->R23.d[1] = (uint32_t)(pad0);
+ p->R23.d[3] = (uint32_t)(pad0 >> 32);
+ p->R24.d[1] = (uint32_t)(pad1);
+ p->R24.d[3] = (uint32_t)(pad1 >> 32);
+
+ /* H = [Mx,My] */
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+ st->H[0] = _mm_and_si128(MMASK, T5);
+ st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+ st->H[2] = _mm_and_si128(MMASK, T5);
+ st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+}
+
+static void
+poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
+{
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+
+ poly1305_power *p;
+ xmmi H0, H1, H2, H3, H4;
+ xmmi T0, T1, T2, T3, T4, T5, T6;
+ xmmi M0, M1, M2, M3, M4;
+ xmmi C1, C2;
+
+ H0 = st->H[0];
+ H1 = st->H[1];
+ H2 = st->H[2];
+ H3 = st->H[3];
+ H4 = st->H[4];
+
+ while (bytes >= 64) {
+ /* H *= [r^4,r^4] */
+ p = &st->P[0];
+ T0 = _mm_mul_epu32(H0, p->R20.v);
+ T1 = _mm_mul_epu32(H0, p->R21.v);
+ T2 = _mm_mul_epu32(H0, p->R22.v);
+ T3 = _mm_mul_epu32(H0, p->R23.v);
+ T4 = _mm_mul_epu32(H0, p->R24.v);
+ T5 = _mm_mul_epu32(H1, p->S24.v);
+ T6 = _mm_mul_epu32(H1, p->R20.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H2, p->S23.v);
+ T6 = _mm_mul_epu32(H2, p->S24.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H3, p->S22.v);
+ T6 = _mm_mul_epu32(H3, p->S23.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H4, p->S21.v);
+ T6 = _mm_mul_epu32(H4, p->S22.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H1, p->R21.v);
+ T6 = _mm_mul_epu32(H1, p->R22.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H2, p->R20.v);
+ T6 = _mm_mul_epu32(H2, p->R21.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H3, p->S24.v);
+ T6 = _mm_mul_epu32(H3, p->R20.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H4, p->S23.v);
+ T6 = _mm_mul_epu32(H4, p->S24.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H1, p->R23.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H2, p->R22.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H3, p->R21.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H4, p->R20.v);
+ T4 = _mm_add_epi64(T4, T5);
+
+ /* H += [Mx,My]*[r^2,r^2] */
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+ M0 = _mm_and_si128(MMASK, T5);
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+ M2 = _mm_and_si128(MMASK, T5);
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+ p = &st->P[1];
+ T5 = _mm_mul_epu32(M0, p->R20.v);
+ T6 = _mm_mul_epu32(M0, p->R21.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(M1, p->S24.v);
+ T6 = _mm_mul_epu32(M1, p->R20.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(M2, p->S23.v);
+ T6 = _mm_mul_epu32(M2, p->S24.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(M3, p->S22.v);
+ T6 = _mm_mul_epu32(M3, p->S23.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(M4, p->S21.v);
+ T6 = _mm_mul_epu32(M4, p->S22.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(M0, p->R22.v);
+ T6 = _mm_mul_epu32(M0, p->R23.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(M1, p->R21.v);
+ T6 = _mm_mul_epu32(M1, p->R22.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(M2, p->R20.v);
+ T6 = _mm_mul_epu32(M2, p->R21.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(M3, p->S24.v);
+ T6 = _mm_mul_epu32(M3, p->R20.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(M4, p->S23.v);
+ T6 = _mm_mul_epu32(M4, p->S24.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(M0, p->R24.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(M1, p->R23.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(M2, p->R22.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(M3, p->R21.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(M4, p->R20.v);
+ T4 = _mm_add_epi64(T4, T5);
+
+ /* H += [Mx,My] */
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48)));
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56)));
+ M0 = _mm_and_si128(MMASK, T5);
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+ M2 = _mm_and_si128(MMASK, T5);
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+ T0 = _mm_add_epi64(T0, M0);
+ T1 = _mm_add_epi64(T1, M1);
+ T2 = _mm_add_epi64(T2, M2);
+ T3 = _mm_add_epi64(T3, M3);
+ T4 = _mm_add_epi64(T4, M4);
+
+ /* reduce */
+ C1 = _mm_srli_epi64(T0, 26);
+ C2 = _mm_srli_epi64(T3, 26);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_and_si128(T3, MMASK);
+ T1 = _mm_add_epi64(T1, C1);
+ T4 = _mm_add_epi64(T4, C2);
+ C1 = _mm_srli_epi64(T1, 26);
+ C2 = _mm_srli_epi64(T4, 26);
+ T1 = _mm_and_si128(T1, MMASK);
+ T4 = _mm_and_si128(T4, MMASK);
+ T2 = _mm_add_epi64(T2, C1);
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+ C1 = _mm_srli_epi64(T2, 26);
+ C2 = _mm_srli_epi64(T0, 26);
+ T2 = _mm_and_si128(T2, MMASK);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_add_epi64(T3, C1);
+ T1 = _mm_add_epi64(T1, C2);
+ C1 = _mm_srli_epi64(T3, 26);
+ T3 = _mm_and_si128(T3, MMASK);
+ T4 = _mm_add_epi64(T4, C1);
+
+ /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */
+ H0 = T0;
+ H1 = T1;
+ H2 = T2;
+ H3 = T3;
+ H4 = T4;
+
+ m += 64;
+ bytes -= 64;
+ }
+
+ st->H[0] = H0;
+ st->H[1] = H1;
+ st->H[2] = H2;
+ st->H[3] = H3;
+ st->H[4] = H4;
+}
+
+static size_t
+poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes)
+{
+ const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask);
+ const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128);
+ const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5);
+
+ poly1305_power *p;
+ xmmi H0, H1, H2, H3, H4;
+ xmmi M0, M1, M2, M3, M4;
+ xmmi T0, T1, T2, T3, T4, T5, T6;
+ xmmi C1, C2;
+
+ uint64_t r0, r1, r2;
+ uint64_t t0, t1, t2, t3, t4;
+ uint64_t c;
+ size_t consumed = 0;
+
+ H0 = st->H[0];
+ H1 = st->H[1];
+ H2 = st->H[2];
+ H3 = st->H[3];
+ H4 = st->H[4];
+
+ /* p = [r^2,r^2] */
+ p = &st->P[1];
+
+ if (bytes >= 32) {
+ /* H *= [r^2,r^2] */
+ T0 = _mm_mul_epu32(H0, p->R20.v);
+ T1 = _mm_mul_epu32(H0, p->R21.v);
+ T2 = _mm_mul_epu32(H0, p->R22.v);
+ T3 = _mm_mul_epu32(H0, p->R23.v);
+ T4 = _mm_mul_epu32(H0, p->R24.v);
+ T5 = _mm_mul_epu32(H1, p->S24.v);
+ T6 = _mm_mul_epu32(H1, p->R20.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H2, p->S23.v);
+ T6 = _mm_mul_epu32(H2, p->S24.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H3, p->S22.v);
+ T6 = _mm_mul_epu32(H3, p->S23.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H4, p->S21.v);
+ T6 = _mm_mul_epu32(H4, p->S22.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H1, p->R21.v);
+ T6 = _mm_mul_epu32(H1, p->R22.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H2, p->R20.v);
+ T6 = _mm_mul_epu32(H2, p->R21.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H3, p->S24.v);
+ T6 = _mm_mul_epu32(H3, p->R20.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H4, p->S23.v);
+ T6 = _mm_mul_epu32(H4, p->S24.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H1, p->R23.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H2, p->R22.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H3, p->R21.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H4, p->R20.v);
+ T4 = _mm_add_epi64(T4, T5);
+
+ /* H += [Mx,My] */
+ T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16)));
+ T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24)));
+ M0 = _mm_and_si128(MMASK, T5);
+ M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12));
+ M2 = _mm_and_si128(MMASK, T5);
+ M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26));
+ M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT);
+
+ T0 = _mm_add_epi64(T0, M0);
+ T1 = _mm_add_epi64(T1, M1);
+ T2 = _mm_add_epi64(T2, M2);
+ T3 = _mm_add_epi64(T3, M3);
+ T4 = _mm_add_epi64(T4, M4);
+
+ /* reduce */
+ C1 = _mm_srli_epi64(T0, 26);
+ C2 = _mm_srli_epi64(T3, 26);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_and_si128(T3, MMASK);
+ T1 = _mm_add_epi64(T1, C1);
+ T4 = _mm_add_epi64(T4, C2);
+ C1 = _mm_srli_epi64(T1, 26);
+ C2 = _mm_srli_epi64(T4, 26);
+ T1 = _mm_and_si128(T1, MMASK);
+ T4 = _mm_and_si128(T4, MMASK);
+ T2 = _mm_add_epi64(T2, C1);
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+ C1 = _mm_srli_epi64(T2, 26);
+ C2 = _mm_srli_epi64(T0, 26);
+ T2 = _mm_and_si128(T2, MMASK);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_add_epi64(T3, C1);
+ T1 = _mm_add_epi64(T1, C2);
+ C1 = _mm_srli_epi64(T3, 26);
+ T3 = _mm_and_si128(T3, MMASK);
+ T4 = _mm_add_epi64(T4, C1);
+
+ /* H = (H*[r^2,r^2] + [Mx,My]) */
+ H0 = T0;
+ H1 = T1;
+ H2 = T2;
+ H3 = T3;
+ H4 = T4;
+
+ consumed = 32;
+ }
+
+ /* finalize, H *= [r^2,r] */
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+
+ p->R20.d[2] = (uint32_t)(r0)&0x3ffffff;
+ p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff;
+ p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff;
+ p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff;
+ p->R24.d[2] = (uint32_t)((r2 >> 16));
+ p->S21.d[2] = p->R21.d[2] * 5;
+ p->S22.d[2] = p->R22.d[2] * 5;
+ p->S23.d[2] = p->R23.d[2] * 5;
+ p->S24.d[2] = p->R24.d[2] * 5;
+
+ /* H *= [r^2,r] */
+ T0 = _mm_mul_epu32(H0, p->R20.v);
+ T1 = _mm_mul_epu32(H0, p->R21.v);
+ T2 = _mm_mul_epu32(H0, p->R22.v);
+ T3 = _mm_mul_epu32(H0, p->R23.v);
+ T4 = _mm_mul_epu32(H0, p->R24.v);
+ T5 = _mm_mul_epu32(H1, p->S24.v);
+ T6 = _mm_mul_epu32(H1, p->R20.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H2, p->S23.v);
+ T6 = _mm_mul_epu32(H2, p->S24.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H3, p->S22.v);
+ T6 = _mm_mul_epu32(H3, p->S23.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H4, p->S21.v);
+ T6 = _mm_mul_epu32(H4, p->S22.v);
+ T0 = _mm_add_epi64(T0, T5);
+ T1 = _mm_add_epi64(T1, T6);
+ T5 = _mm_mul_epu32(H1, p->R21.v);
+ T6 = _mm_mul_epu32(H1, p->R22.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H2, p->R20.v);
+ T6 = _mm_mul_epu32(H2, p->R21.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H3, p->S24.v);
+ T6 = _mm_mul_epu32(H3, p->R20.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H4, p->S23.v);
+ T6 = _mm_mul_epu32(H4, p->S24.v);
+ T2 = _mm_add_epi64(T2, T5);
+ T3 = _mm_add_epi64(T3, T6);
+ T5 = _mm_mul_epu32(H1, p->R23.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H2, p->R22.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H3, p->R21.v);
+ T4 = _mm_add_epi64(T4, T5);
+ T5 = _mm_mul_epu32(H4, p->R20.v);
+ T4 = _mm_add_epi64(T4, T5);
+
+ C1 = _mm_srli_epi64(T0, 26);
+ C2 = _mm_srli_epi64(T3, 26);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_and_si128(T3, MMASK);
+ T1 = _mm_add_epi64(T1, C1);
+ T4 = _mm_add_epi64(T4, C2);
+ C1 = _mm_srli_epi64(T1, 26);
+ C2 = _mm_srli_epi64(T4, 26);
+ T1 = _mm_and_si128(T1, MMASK);
+ T4 = _mm_and_si128(T4, MMASK);
+ T2 = _mm_add_epi64(T2, C1);
+ T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE));
+ C1 = _mm_srli_epi64(T2, 26);
+ C2 = _mm_srli_epi64(T0, 26);
+ T2 = _mm_and_si128(T2, MMASK);
+ T0 = _mm_and_si128(T0, MMASK);
+ T3 = _mm_add_epi64(T3, C1);
+ T1 = _mm_add_epi64(T1, C2);
+ C1 = _mm_srli_epi64(T3, 26);
+ T3 = _mm_and_si128(T3, MMASK);
+ T4 = _mm_add_epi64(T4, C1);
+
+ /* H = H[0]+H[1] */
+ H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8));
+ H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8));
+ H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8));
+ H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8));
+ H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8));
+
+ t0 = _mm_cvtsi128_si32(H0);
+ c = (t0 >> 26);
+ t0 &= 0x3ffffff;
+ t1 = _mm_cvtsi128_si32(H1) + c;
+ c = (t1 >> 26);
+ t1 &= 0x3ffffff;
+ t2 = _mm_cvtsi128_si32(H2) + c;
+ c = (t2 >> 26);
+ t2 &= 0x3ffffff;
+ t3 = _mm_cvtsi128_si32(H3) + c;
+ c = (t3 >> 26);
+ t3 &= 0x3ffffff;
+ t4 = _mm_cvtsi128_si32(H4) + c;
+ c = (t4 >> 26);
+ t4 &= 0x3ffffff;
+ t0 = t0 + (c * 5);
+ c = (t0 >> 26);
+ t0 &= 0x3ffffff;
+ t1 = t1 + c;
+
+ st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull;
+ st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull;
+ st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull;
+
+ return consumed;
+}
+
+void
+Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes)
+{
+ poly1305_state_internal *st = poly1305_aligned_state(state);
+ size_t want;
+
+ /* need at least 32 initial bytes to start the accelerated branch */
+ if (!st->started) {
+ if ((st->leftover == 0) && (bytes > 32)) {
+ poly1305_first_block(st, m);
+ m += 32;
+ bytes -= 32;
+ } else {
+ want = poly1305_min(32 - st->leftover, bytes);
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
+ bytes -= want;
+ m += want;
+ st->leftover += want;
+ if ((st->leftover < 32) || (bytes == 0))
+ return;
+ poly1305_first_block(st, st->buffer);
+ st->leftover = 0;
+ }
+ st->started = 1;
+ }
+
+ /* handle leftover */
+ if (st->leftover) {
+ want = poly1305_min(64 - st->leftover, bytes);
+ poly1305_block_copy(st->buffer + st->leftover, m, want);
+ bytes -= want;
+ m += want;
+ st->leftover += want;
+ if (st->leftover < 64)
+ return;
+ poly1305_blocks(st, st->buffer, 64);
+ st->leftover = 0;
+ }
+
+ /* process 64 byte blocks */
+ if (bytes >= 64) {
+ want = (bytes & ~63);
+ poly1305_blocks(st, m, want);
+ m += want;
+ bytes -= want;
+ }
+
+ if (bytes) {
+ poly1305_block_copy(st->buffer + st->leftover, m, bytes);
+ st->leftover += bytes;
+ }
+}
+
+void
+Poly1305Finish(poly1305_state *state, unsigned char mac[16])
+{
+ poly1305_state_internal *st = poly1305_aligned_state(state);
+ size_t leftover = st->leftover;
+ uint8_t *m = st->buffer;
+ uint128_t d[3];
+ uint64_t h0, h1, h2;
+ uint64_t t0, t1;
+ uint64_t g0, g1, g2, c, nc;
+ uint64_t r0, r1, r2, s1, s2;
+ poly1305_power *p;
+
+ if (st->started) {
+ size_t consumed = poly1305_combine(st, m, leftover);
+ leftover -= consumed;
+ m += consumed;
+ }
+
+ /* st->HH will either be 0 or have the combined result */
+ h0 = st->HH[0];
+ h1 = st->HH[1];
+ h2 = st->HH[2];
+
+ p = &st->P[1];
+ r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1];
+ r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1];
+ r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1];
+ s1 = r1 * (5 << 2);
+ s2 = r2 * (5 << 2);
+
+ if (leftover < 16)
+ goto poly1305_donna_atmost15bytes;
+
+poly1305_donna_atleast16bytes:
+ t0 = U8TO64_LE(m + 0);
+ t1 = U8TO64_LE(m + 8);
+ h0 += t0 & 0xfffffffffff;
+ t0 = shr128_pair(t1, t0, 44);
+ h1 += t0 & 0xfffffffffff;
+ h2 += (t1 >> 24) | ((uint64_t)1 << 40);
+
+poly1305_donna_mul:
+ d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1));
+ d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2));
+ d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0));
+ h0 = lo128(d[0]) & 0xfffffffffff;
+ c = shr128(d[0], 44);
+ d[1] = add128_64(d[1], c);
+ h1 = lo128(d[1]) & 0xfffffffffff;
+ c = shr128(d[1], 44);
+ d[2] = add128_64(d[2], c);
+ h2 = lo128(d[2]) & 0x3ffffffffff;
+ c = shr128(d[2], 42);
+ h0 += c * 5;
+
+ m += 16;
+ leftover -= 16;
+ if (leftover >= 16)
+ goto poly1305_donna_atleast16bytes;
+
+/* final bytes */
+poly1305_donna_atmost15bytes:
+ if (!leftover)
+ goto poly1305_donna_finish;
+
+ m[leftover++] = 1;
+ poly1305_block_zero(m + leftover, 16 - leftover);
+ leftover = 16;
+
+ t0 = U8TO64_LE(m + 0);
+ t1 = U8TO64_LE(m + 8);
+ h0 += t0 & 0xfffffffffff;
+ t0 = shr128_pair(t1, t0, 44);
+ h1 += t0 & 0xfffffffffff;
+ h2 += (t1 >> 24);
+
+ goto poly1305_donna_mul;
+
+poly1305_donna_finish:
+ c = (h0 >> 44);
+ h0 &= 0xfffffffffff;
+ h1 += c;
+ c = (h1 >> 44);
+ h1 &= 0xfffffffffff;
+ h2 += c;
+ c = (h2 >> 42);
+ h2 &= 0x3ffffffffff;
+ h0 += c * 5;
+
+ g0 = h0 + 5;
+ c = (g0 >> 44);
+ g0 &= 0xfffffffffff;
+ g1 = h1 + c;
+ c = (g1 >> 44);
+ g1 &= 0xfffffffffff;
+ g2 = h2 + c - ((uint64_t)1 << 42);
+
+ c = (g2 >> 63) - 1;
+ nc = ~c;
+ h0 = (h0 & nc) | (g0 & c);
+ h1 = (h1 & nc) | (g1 & c);
+ h2 = (h2 & nc) | (g2 & c);
+
+ /* pad */
+ t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1];
+ t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1];
+ h0 += (t0 & 0xfffffffffff);
+ c = (h0 >> 44);
+ h0 &= 0xfffffffffff;
+ t0 = shr128_pair(t1, t0, 44);
+ h1 += (t0 & 0xfffffffffff) + c;
+ c = (h1 >> 44);
+ h1 &= 0xfffffffffff;
+ t1 = (t1 >> 24);
+ h2 += (t1) + c;
+
+ U64TO8_LE(mac + 0, ((h0) | (h1 << 44)));
+ U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24)));
+}
diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c
new file mode 100644
index 000000000..eb3e3cd55
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305.c
@@ -0,0 +1,314 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/* This implementation of poly1305 is by Andrew Moon
+ * (https://github.com/floodyberry/poly1305-donna) and released as public
+ * domain. */
+
+#include <string.h>
+
+#include "poly1305.h"
+
+#if defined(_MSC_VER) && _MSC_VER < 1600
+#include "prtypes.h"
+typedef PRUint32 uint32_t;
+typedef PRUint64 uint64_t;
+#else
+#include <stdint.h>
+#endif
+
+#if defined(NSS_X86) || defined(NSS_X64)
+/* We can assume little-endian. */
+static uint32_t
+U8TO32_LE(const unsigned char *m)
+{
+ uint32_t r;
+ memcpy(&r, m, sizeof(r));
+ return r;
+}
+
+static void
+U32TO8_LE(unsigned char *m, uint32_t v)
+{
+ memcpy(m, &v, sizeof(v));
+}
+#else
+static uint32_t
+U8TO32_LE(const unsigned char *m)
+{
+ return (uint32_t)m[0] |
+ (uint32_t)m[1] << 8 |
+ (uint32_t)m[2] << 16 |
+ (uint32_t)m[3] << 24;
+}
+
+static void
+U32TO8_LE(unsigned char *m, uint32_t v)
+{
+ m[0] = v;
+ m[1] = v >> 8;
+ m[2] = v >> 16;
+ m[3] = v >> 24;
+}
+#endif
+
+static uint64_t
+mul32x32_64(uint32_t a, uint32_t b)
+{
+ return (uint64_t)a * b;
+}
+
+struct poly1305_state_st {
+ uint32_t r0, r1, r2, r3, r4;
+ uint32_t s1, s2, s3, s4;
+ uint32_t h0, h1, h2, h3, h4;
+ unsigned char buf[16];
+ unsigned int buf_used;
+ unsigned char key[16];
+};
+
+/* update updates |state| given some amount of input data. This function may
+ * only be called with a |len| that is not a multiple of 16 at the end of the
+ * data. Otherwise the input must be buffered into 16 byte blocks. */
+static void
+update(struct poly1305_state_st *state, const unsigned char *in,
+ size_t len)
+{
+ uint32_t t0, t1, t2, t3;
+ uint64_t t[5];
+ uint32_t b;
+ uint64_t c;
+ size_t j;
+ unsigned char mp[16];
+
+ if (len < 16)
+ goto poly1305_donna_atmost15bytes;
+
+poly1305_donna_16bytes:
+ t0 = U8TO32_LE(in);
+ t1 = U8TO32_LE(in + 4);
+ t2 = U8TO32_LE(in + 8);
+ t3 = U8TO32_LE(in + 12);
+
+ in += 16;
+ len -= 16;
+
+ state->h0 += t0 & 0x3ffffff;
+ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
+ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
+ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
+ state->h4 += (t3 >> 8) | (1 << 24);
+
+poly1305_donna_mul:
+ t[0] = mul32x32_64(state->h0, state->r0) +
+ mul32x32_64(state->h1, state->s4) +
+ mul32x32_64(state->h2, state->s3) +
+ mul32x32_64(state->h3, state->s2) +
+ mul32x32_64(state->h4, state->s1);
+ t[1] = mul32x32_64(state->h0, state->r1) +
+ mul32x32_64(state->h1, state->r0) +
+ mul32x32_64(state->h2, state->s4) +
+ mul32x32_64(state->h3, state->s3) +
+ mul32x32_64(state->h4, state->s2);
+ t[2] = mul32x32_64(state->h0, state->r2) +
+ mul32x32_64(state->h1, state->r1) +
+ mul32x32_64(state->h2, state->r0) +
+ mul32x32_64(state->h3, state->s4) +
+ mul32x32_64(state->h4, state->s3);
+ t[3] = mul32x32_64(state->h0, state->r3) +
+ mul32x32_64(state->h1, state->r2) +
+ mul32x32_64(state->h2, state->r1) +
+ mul32x32_64(state->h3, state->r0) +
+ mul32x32_64(state->h4, state->s4);
+ t[4] = mul32x32_64(state->h0, state->r4) +
+ mul32x32_64(state->h1, state->r3) +
+ mul32x32_64(state->h2, state->r2) +
+ mul32x32_64(state->h3, state->r1) +
+ mul32x32_64(state->h4, state->r0);
+
+ state->h0 = (uint32_t)t[0] & 0x3ffffff;
+ c = (t[0] >> 26);
+ t[1] += c;
+ state->h1 = (uint32_t)t[1] & 0x3ffffff;
+ b = (uint32_t)(t[1] >> 26);
+ t[2] += b;
+ state->h2 = (uint32_t)t[2] & 0x3ffffff;
+ b = (uint32_t)(t[2] >> 26);
+ t[3] += b;
+ state->h3 = (uint32_t)t[3] & 0x3ffffff;
+ b = (uint32_t)(t[3] >> 26);
+ t[4] += b;
+ state->h4 = (uint32_t)t[4] & 0x3ffffff;
+ b = (uint32_t)(t[4] >> 26);
+ state->h0 += b * 5;
+
+ if (len >= 16)
+ goto poly1305_donna_16bytes;
+
+/* final bytes */
+poly1305_donna_atmost15bytes:
+ if (!len)
+ return;
+
+ for (j = 0; j < len; j++)
+ mp[j] = in[j];
+ mp[j++] = 1;
+ for (; j < 16; j++)
+ mp[j] = 0;
+ len = 0;
+
+ t0 = U8TO32_LE(mp + 0);
+ t1 = U8TO32_LE(mp + 4);
+ t2 = U8TO32_LE(mp + 8);
+ t3 = U8TO32_LE(mp + 12);
+
+ state->h0 += t0 & 0x3ffffff;
+ state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
+ state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
+ state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
+ state->h4 += (t3 >> 8);
+
+ goto poly1305_donna_mul;
+}
+
+void
+Poly1305Init(poly1305_state *statep, const unsigned char key[32])
+{
+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+ uint32_t t0, t1, t2, t3;
+
+ t0 = U8TO32_LE(key + 0);
+ t1 = U8TO32_LE(key + 4);
+ t2 = U8TO32_LE(key + 8);
+ t3 = U8TO32_LE(key + 12);
+
+ /* precompute multipliers */
+ state->r0 = t0 & 0x3ffffff;
+ t0 >>= 26;
+ t0 |= t1 << 6;
+ state->r1 = t0 & 0x3ffff03;
+ t1 >>= 20;
+ t1 |= t2 << 12;
+ state->r2 = t1 & 0x3ffc0ff;
+ t2 >>= 14;
+ t2 |= t3 << 18;
+ state->r3 = t2 & 0x3f03fff;
+ t3 >>= 8;
+ state->r4 = t3 & 0x00fffff;
+
+ state->s1 = state->r1 * 5;
+ state->s2 = state->r2 * 5;
+ state->s3 = state->r3 * 5;
+ state->s4 = state->r4 * 5;
+
+ /* init state */
+ state->h0 = 0;
+ state->h1 = 0;
+ state->h2 = 0;
+ state->h3 = 0;
+ state->h4 = 0;
+
+ state->buf_used = 0;
+ memcpy(state->key, key + 16, sizeof(state->key));
+}
+
+void
+Poly1305Update(poly1305_state *statep, const unsigned char *in,
+ size_t in_len)
+{
+ unsigned int i;
+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+
+ if (state->buf_used) {
+ unsigned int todo = 16 - state->buf_used;
+ if (todo > in_len)
+ todo = in_len;
+ for (i = 0; i < todo; i++)
+ state->buf[state->buf_used + i] = in[i];
+ state->buf_used += todo;
+ in_len -= todo;
+ in += todo;
+
+ if (state->buf_used == 16) {
+ update(state, state->buf, 16);
+ state->buf_used = 0;
+ }
+ }
+
+ if (in_len >= 16) {
+ size_t todo = in_len & ~0xf;
+ update(state, in, todo);
+ in += todo;
+ in_len &= 0xf;
+ }
+
+ if (in_len) {
+ for (i = 0; i < in_len; i++)
+ state->buf[i] = in[i];
+ state->buf_used = in_len;
+ }
+}
+
+void
+Poly1305Finish(poly1305_state *statep, unsigned char mac[16])
+{
+ struct poly1305_state_st *state = (struct poly1305_state_st *)statep;
+ uint64_t f0, f1, f2, f3;
+ uint32_t g0, g1, g2, g3, g4;
+ uint32_t b, nb;
+
+ if (state->buf_used)
+ update(state, state->buf, state->buf_used);
+
+ b = state->h0 >> 26;
+ state->h0 = state->h0 & 0x3ffffff;
+ state->h1 += b;
+ b = state->h1 >> 26;
+ state->h1 = state->h1 & 0x3ffffff;
+ state->h2 += b;
+ b = state->h2 >> 26;
+ state->h2 = state->h2 & 0x3ffffff;
+ state->h3 += b;
+ b = state->h3 >> 26;
+ state->h3 = state->h3 & 0x3ffffff;
+ state->h4 += b;
+ b = state->h4 >> 26;
+ state->h4 = state->h4 & 0x3ffffff;
+ state->h0 += b * 5;
+
+ g0 = state->h0 + 5;
+ b = g0 >> 26;
+ g0 &= 0x3ffffff;
+ g1 = state->h1 + b;
+ b = g1 >> 26;
+ g1 &= 0x3ffffff;
+ g2 = state->h2 + b;
+ b = g2 >> 26;
+ g2 &= 0x3ffffff;
+ g3 = state->h3 + b;
+ b = g3 >> 26;
+ g3 &= 0x3ffffff;
+ g4 = state->h4 + b - (1 << 26);
+
+ b = (g4 >> 31) - 1;
+ nb = ~b;
+ state->h0 = (state->h0 & nb) | (g0 & b);
+ state->h1 = (state->h1 & nb) | (g1 & b);
+ state->h2 = (state->h2 & nb) | (g2 & b);
+ state->h3 = (state->h3 & nb) | (g3 & b);
+ state->h4 = (state->h4 & nb) | (g4 & b);
+
+ f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]);
+ f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]);
+ f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]);
+ f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]);
+
+ U32TO8_LE(&mac[0], (uint32_t)f0);
+ f1 += (f0 >> 32);
+ U32TO8_LE(&mac[4], (uint32_t)f1);
+ f2 += (f1 >> 32);
+ U32TO8_LE(&mac[8], (uint32_t)f2);
+ f3 += (f2 >> 32);
+ U32TO8_LE(&mac[12], (uint32_t)f3);
+}
diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h
new file mode 100644
index 000000000..0a463483f
--- /dev/null
+++ b/security/nss/lib/freebl/poly1305.h
@@ -0,0 +1,28 @@
+/*
+ * poly1305.h - header file for Poly1305 implementation.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef FREEBL_POLY1305_H_
+#define FREEBL_POLY1305_H_
+
+typedef unsigned char poly1305_state[512];
+
+/* Poly1305Init sets up |state| so that it can be used to calculate an
+ * authentication tag with the one-time key |key|. Note that |key| is a
+ * one-time key and therefore there is no `reset' method because that would
+ * enable several messages to be authenticated with the same key. */
+extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]);
+
+/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or
+ * more times after poly1305_init. */
+extern void Poly1305Update(poly1305_state* state, const unsigned char* in,
+ size_t inLen);
+
+/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte
+ * authentication tag to |mac|. */
+extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]);
+
+#endif /* FREEBL_POLY1305_H_ */
diff --git a/security/nss/lib/freebl/pqg.c b/security/nss/lib/freebl/pqg.c
new file mode 100644
index 000000000..2f24afd24
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.c
@@ -0,0 +1,1878 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * PQG parameter generation/verification. Based on FIPS 186-3.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "secitem.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+
+#define MAX_ITERATIONS 1000 /* Maximum number of iterations of primegen */
+
+typedef enum {
+ FIPS186_1_TYPE, /* Probablistic */
+ FIPS186_3_TYPE, /* Probablistic */
+ FIPS186_3_ST_TYPE /* Shawe-Taylor provable */
+} pqgGenType;
+
+/*
+ * These test iterations are quite a bit larger than we previously had.
+ * This is because FIPS 186-3 is worried about the primes in PQG generation.
+ * It may be possible to purposefully construct composites which more
+ * iterations of Miller-Rabin than the for your normal randomly selected
+ * numbers.There are 3 ways to counter this: 1) use one of the cool provably
+ * prime algorithms (which would require a lot more work than DSA-2 deservers.
+ * 2) add a Lucas primality test (which requires coding a Lucas primality test,
+ * or 3) use a larger M-R test count. I chose the latter. It increases the time
+ * that it takes to prove the selected prime, but it shouldn't increase the
+ * overall time to run the algorithm (non-primes should still faile M-R
+ * realively quickly). If you want to get that last bit of performance,
+ * implement Lucas and adjust these two functions. See FIPS 186-3 Appendix C
+ * and F for more information.
+ */
+static int
+prime_testcount_p(int L, int N)
+{
+ switch (L) {
+ case 1024:
+ return 40;
+ case 2048:
+ return 56;
+ case 3072:
+ return 64;
+ default:
+ break;
+ }
+ return 50; /* L = 512-960 */
+}
+
+/* The q numbers are different if you run M-R followd by Lucas. I created
+ * a separate function so if someone wanted to add the Lucas check, they
+ * could do so fairly easily */
+static int
+prime_testcount_q(int L, int N)
+{
+ return prime_testcount_p(L, N);
+}
+
+/*
+ * generic function to make sure our input matches DSA2 requirements
+ * this gives us one place to go if we need to bump the requirements in the
+ * future.
+ */
+static SECStatus
+pqg_validate_dsa2(unsigned int L, unsigned int N)
+{
+
+ switch (L) {
+ case 1024:
+ if (N != DSA1_Q_BITS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ case 2048:
+ if ((N != 224) && (N != 256)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ case 3072:
+ if (N != 256) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static unsigned int
+pqg_get_default_N(unsigned int L)
+{
+ unsigned int N = 0;
+ switch (L) {
+ case 1024:
+ N = DSA1_Q_BITS;
+ break;
+ case 2048:
+ N = 224;
+ break;
+ case 3072:
+ N = 256;
+ break;
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ break; /* N already set to zero */
+ }
+ return N;
+}
+
+/*
+ * Select the lowest hash algorithm usable
+ */
+static HASH_HashType
+getFirstHash(unsigned int L, unsigned int N)
+{
+ if (N < 224) {
+ return HASH_AlgSHA1;
+ }
+ if (N < 256) {
+ return HASH_AlgSHA224;
+ }
+ if (N < 384) {
+ return HASH_AlgSHA256;
+ }
+ if (N < 512) {
+ return HASH_AlgSHA384;
+ }
+ return HASH_AlgSHA512;
+}
+
+/*
+ * find the next usable hash algorthim
+ */
+static HASH_HashType
+getNextHash(HASH_HashType hashtype)
+{
+ switch (hashtype) {
+ case HASH_AlgSHA1:
+ hashtype = HASH_AlgSHA224;
+ break;
+ case HASH_AlgSHA224:
+ hashtype = HASH_AlgSHA256;
+ break;
+ case HASH_AlgSHA256:
+ hashtype = HASH_AlgSHA384;
+ break;
+ case HASH_AlgSHA384:
+ hashtype = HASH_AlgSHA512;
+ break;
+ case HASH_AlgSHA512:
+ default:
+ hashtype = HASH_AlgTOTAL;
+ break;
+ }
+ return hashtype;
+}
+
+static unsigned int
+HASH_ResultLen(HASH_HashType type)
+{
+ const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+ PORT_Assert(hash_obj != NULL);
+ if (hash_obj == NULL) {
+ /* type is always a valid HashType. Thus a null hash_obj must be a bug */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return 0;
+ }
+ PORT_Assert(hash_obj->length != 0);
+ return hash_obj->length;
+}
+
+static SECStatus
+HASH_HashBuf(HASH_HashType type, unsigned char *dest,
+ const unsigned char *src, PRUint32 src_len)
+{
+ const SECHashObject *hash_obj = HASH_GetRawHashObject(type);
+ void *hashcx = NULL;
+ unsigned int dummy;
+
+ if (hash_obj == NULL) {
+ return SECFailure;
+ }
+
+ hashcx = hash_obj->create();
+ if (hashcx == NULL) {
+ return SECFailure;
+ }
+ hash_obj->begin(hashcx);
+ hash_obj->update(hashcx, src, src_len);
+ hash_obj->end(hashcx, dest, &dummy, hash_obj->length);
+ hash_obj->destroy(hashcx, PR_TRUE);
+ return SECSuccess;
+}
+
+unsigned int
+PQG_GetLength(const SECItem *obj)
+{
+ unsigned int len = obj->len;
+
+ if (obj->data == NULL) {
+ return 0;
+ }
+ if (len > 1 && obj->data[0] == 0) {
+ len--;
+ }
+ return len;
+}
+
+SECStatus
+PQG_Check(const PQGParams *params)
+{
+ unsigned int L, N;
+ SECStatus rv = SECSuccess;
+
+ if (params == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+ N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+
+ if (L < 1024) {
+ int j;
+
+ /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+ if (N != DSA1_Q_BITS) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ j = PQG_PBITS_TO_INDEX(L);
+ if (j < 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ rv = SECFailure;
+ }
+ } else {
+ /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+ rv = pqg_validate_dsa2(L, N);
+ }
+ return rv;
+}
+
+HASH_HashType
+PQG_GetHashType(const PQGParams *params)
+{
+ unsigned int L, N;
+
+ if (params == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return HASH_AlgNULL;
+ }
+
+ L = PQG_GetLength(&params->prime) * PR_BITS_PER_BYTE;
+ N = PQG_GetLength(&params->subPrime) * PR_BITS_PER_BYTE;
+ return getFirstHash(L, N);
+}
+
+/* Get a seed for generating P and Q. If in testing mode, copy in the
+** seed from FIPS 186-1 appendix 5. Otherwise, obtain bytes from the
+** global random number generator.
+*/
+static SECStatus
+getPQseed(SECItem *seed, PLArenaPool *arena)
+{
+ SECStatus rv;
+
+ if (!seed->data) {
+ seed->data = (unsigned char *)PORT_ArenaZAlloc(arena, seed->len);
+ }
+ if (!seed->data) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ rv = RNG_GenerateGlobalRandomBytes(seed->data, seed->len);
+ /*
+ * NIST CMVP disallows a sequence of 20 bytes with the most
+ * significant byte equal to 0. Perhaps they interpret
+ * "a sequence of at least 160 bits" as "a number >= 2^159".
+ * So we always set the most significant bit to 1. (bug 334533)
+ */
+ seed->data[0] |= 0x80;
+ return rv;
+}
+
+/* Generate a candidate h value. If in testing mode, use the h value
+** specified in FIPS 186-1 appendix 5, h = 2. Otherwise, obtain bytes
+** from the global random number generator.
+*/
+static SECStatus
+generate_h_candidate(SECItem *hit, mp_int *H)
+{
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+#ifdef FIPS_186_1_A5_TEST
+ memset(hit->data, 0, hit->len);
+ hit->data[hit->len - 1] = 0x02;
+#else
+ rv = RNG_GenerateGlobalRandomBytes(hit->data, hit->len);
+#endif
+ if (rv)
+ return SECFailure;
+ err = mp_read_unsigned_octets(H, hit->data, hit->len);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+addToSeed(const SECItem *seed,
+ unsigned long addend,
+ int seedlen, /* g in 186-1 */
+ SECItem *seedout)
+{
+ mp_int s, sum, modulus, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&s) = 0;
+ MP_DIGITS(&sum) = 0;
+ MP_DIGITS(&modulus) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&s));
+ CHECK_MPI_OK(mp_init(&sum));
+ CHECK_MPI_OK(mp_init(&modulus));
+ SECITEM_TO_MPINT(*seed, &s); /* s = seed */
+ /* seed += addend */
+ if (addend < MP_DIGIT_MAX) {
+ CHECK_MPI_OK(mp_add_d(&s, (mp_digit)addend, &s));
+ } else {
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_set_ulong(&tmp, addend));
+ CHECK_MPI_OK(mp_add(&s, &tmp, &s));
+ }
+ /*sum = s mod 2**seedlen */
+ CHECK_MPI_OK(mp_div_2d(&s, (mp_digit)seedlen, NULL, &sum));
+ if (seedout->data != NULL) {
+ SECITEM_ZfreeItem(seedout, PR_FALSE);
+ }
+ MPINT_TO_SECITEM(&sum, seedout, NULL);
+cleanup:
+ mp_clear(&s);
+ mp_clear(&sum);
+ mp_clear(&modulus);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/* Compute Hash[(SEED + addend) mod 2**g]
+** Result is placed in shaOutBuf.
+** This computation is used in steps 2 and 7 of FIPS 186 Appendix 2.2 and
+** step 11.2 of FIPS 186-3 Appendix A.1.1.2 .
+*/
+static SECStatus
+addToSeedThenHash(HASH_HashType hashtype,
+ const SECItem *seed,
+ unsigned long addend,
+ int seedlen, /* g in 186-1 */
+ unsigned char *hashOutBuf)
+{
+ SECItem str = { 0, 0, 0 };
+ SECStatus rv;
+ rv = addToSeed(seed, addend, seedlen, &str);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ rv = HASH_HashBuf(hashtype, hashOutBuf, str.data, str.len); /* hash result */
+ if (str.data)
+ SECITEM_ZfreeItem(&str, PR_FALSE);
+ return rv;
+}
+
+/*
+** Perform steps 2 and 3 of FIPS 186-1, appendix 2.2.
+** Generate Q from seed.
+*/
+static SECStatus
+makeQfromSeed(
+ unsigned int g, /* input. Length of seed in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q) /* output. */
+{
+ unsigned char sha1[SHA1_LENGTH];
+ unsigned char sha2[SHA1_LENGTH];
+ unsigned char U[SHA1_LENGTH];
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int i;
+ /* ******************************************************************
+ ** Step 2.
+ ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]."
+ **/
+ CHECK_SEC_OK(SHA1_HashBuf(sha1, seed->data, seed->len));
+ CHECK_SEC_OK(addToSeedThenHash(HASH_AlgSHA1, seed, 1, g, sha2));
+ for (i = 0; i < SHA1_LENGTH; ++i)
+ U[i] = sha1[i] ^ sha2[i];
+ /* ******************************************************************
+ ** Step 3.
+ ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+ ** and the least signficant bit to 1. In terms of boolean operations,
+ ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160."
+ */
+ U[0] |= 0x80; /* U is MSB first */
+ U[SHA1_LENGTH - 1] |= 0x01;
+ err = mp_read_unsigned_octets(Q, U, SHA1_LENGTH);
+cleanup:
+ memset(U, 0, SHA1_LENGTH);
+ memset(sha1, 0, SHA1_LENGTH);
+ memset(sha2, 0, SHA1_LENGTH);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Perform steps 6 and 7 of FIPS 186-3, appendix A.1.1.2.
+** Generate Q from seed.
+*/
+static SECStatus
+makeQ2fromSeed(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int N, /* input. Length of q in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q) /* output. */
+{
+ unsigned char U[HASH_LENGTH_MAX];
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int N_bytes = N / PR_BITS_PER_BYTE; /* length of N in bytes rather than bits */
+ int hashLen = HASH_ResultLen(hashtype);
+ int offset = 0;
+
+ /* ******************************************************************
+ ** Step 6.
+ ** "Compute U = hash[SEED] mod 2**N-1]."
+ **/
+ CHECK_SEC_OK(HASH_HashBuf(hashtype, U, seed->data, seed->len));
+ /* mod 2**N . Step 7 will explicitly set the top bit to 1, so no need
+ * to handle mod 2**N-1 */
+ if (hashLen > N_bytes) {
+ offset = hashLen - N_bytes;
+ }
+ /* ******************************************************************
+ ** Step 7.
+ ** computed_q = 2**(N-1) + U + 1 - (U mod 2)
+ **
+ ** This is the same as:
+ ** computed_q = 2**(N-1) | U | 1;
+ */
+ U[offset] |= 0x80; /* U is MSB first */
+ U[hashLen - 1] |= 0x01;
+ err = mp_read_unsigned_octets(Q, &U[offset], N_bytes);
+cleanup:
+ memset(U, 0, HASH_LENGTH_MAX);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Perform steps from FIPS 186-3, Appendix A.1.2.1 and Appendix C.6
+**
+** This generates a provable prime from two smaller prime. The resulting
+** prime p will have q0 as a multiple of p-1. q0 can be 1.
+**
+** This implments steps 4 thorough 22 of FIPS 186-3 A.1.2.1 and
+** steps 16 through 34 of FIPS 186-2 C.6
+*/
+#define MAX_ST_SEED_BITS (HASH_LENGTH_MAX * PR_BITS_PER_BYTE)
+static SECStatus
+makePrimefromPrimesShaweTaylor(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int length, /* input. Length of prime in bits. */
+ mp_int *c0, /* seed prime */
+ mp_int *q, /* sub prime, can be 1 */
+ mp_int *prime, /* output. */
+ SECItem *prime_seed, /* input/output. */
+ unsigned int *prime_gen_counter) /* input/output. */
+{
+ mp_int c;
+ mp_int c0_2;
+ mp_int t;
+ mp_int a;
+ mp_int z;
+ mp_int two_length_minus_1;
+ SECStatus rv = SECFailure;
+ int hashlen = HASH_ResultLen(hashtype);
+ int outlen = hashlen * PR_BITS_PER_BYTE;
+ int offset;
+ unsigned char bit, mask;
+ /* x needs to hold roundup(L/outlen)*outlen.
+ * This can be no larger than L+outlen-1, So we set it's size to
+ * our max L + max outlen and know we are safe */
+ unsigned char x[DSA_MAX_P_BITS / 8 + HASH_LENGTH_MAX];
+ mp_err err = MP_OKAY;
+ int i;
+ int iterations;
+ int old_counter;
+
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&c0_2) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&z) = 0;
+ MP_DIGITS(&two_length_minus_1) = 0;
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&c0_2));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&z));
+ CHECK_MPI_OK(mp_init(&two_length_minus_1));
+
+ /*
+ ** There is a slight mapping of variable names depending on which
+ ** FIPS 186 steps are being carried out. The mapping is as follows:
+ ** variable A.1.2.1 C.6
+ ** c0 p0 c0
+ ** q q 1
+ ** c p c
+ ** c0_2 2*p0*q 2*c0
+ ** length L length
+ ** prime_seed pseed prime_seed
+ ** prime_gen_counter pgen_counter prime_gen_counter
+ **
+ ** Also note: or iterations variable is actually iterations+1, since
+ ** iterations+1 works better in C.
+ */
+
+ /* Step 4/16 iterations = ceiling(length/outlen)-1 */
+ iterations = (length + outlen - 1) / outlen; /* NOTE: iterations +1 */
+ /* Step 5/17 old_counter = prime_gen_counter */
+ old_counter = *prime_gen_counter;
+ /*
+ ** Comment: Generate a pseudorandom integer x in the interval
+ ** [2**(lenght-1), 2**length].
+ **
+ ** Step 6/18 x = 0
+ */
+ PORT_Memset(x, 0, sizeof(x));
+ /*
+ ** Step 7/19 for i = 0 to iterations do
+ ** x = x + (HASH(prime_seed + i) * 2^(i*outlen))
+ */
+ for (i = 0; i < iterations; i++) {
+ /* is bigger than prime_seed should get to */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+ MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen]));
+ }
+ /* Step 8/20 prime_seed = prime_seed + iterations + 1 */
+ CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS,
+ prime_seed));
+ /*
+ ** Step 9/21 x = 2 ** (length-1) + x mod 2 ** (length-1)
+ **
+ ** This step mathematically sets the high bit and clears out
+ ** all the other bits higher than length. 'x' is stored
+ ** in the x array, MSB first. The above formula gives us an 'x'
+ ** which is length bytes long and has the high bit set. We also know
+ ** that length <= iterations*outlen since
+ ** iterations=ceiling(length/outlen). First we find the offset in
+ ** bytes into the array where the high bit is.
+ */
+ offset = (outlen * iterations - length) / PR_BITS_PER_BYTE;
+ /* now we want to set the 'high bit', since length may not be a
+ * multiple of 8,*/
+ bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+ /* we need to zero out the rest of the bits in the byte above */
+ mask = (bit - 1);
+ /* now we set it */
+ x[offset] = (mask & x[offset]) | bit;
+ /*
+ ** Comment: Generate a candidate prime c in the interval
+ ** [2**(lenght-1), 2**length].
+ **
+ ** Step 10 t = ceiling(x/(2q(p0)))
+ ** Step 22 t = ceiling(x/(2(c0)))
+ */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&t, &x[offset],
+ hashlen * iterations - offset)); /* t = x */
+ CHECK_MPI_OK(mp_mul(c0, q, &c0_2)); /* c0_2 is now c0*q */
+ CHECK_MPI_OK(mp_add(&c0_2, &c0_2, &c0_2)); /* c0_2 is now 2*q*c0 */
+ CHECK_MPI_OK(mp_add(&t, &c0_2, &t)); /* t = x+2*q*c0 */
+ CHECK_MPI_OK(mp_sub_d(&t, (mp_digit)1, &t)); /* t = x+2*q*c0 -1 */
+ /* t = floor((x+2qc0-1)/2qc0) = ceil(x/2qc0) */
+ CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+ /*
+ ** step 11: if (2tqp0 +1 > 2**length), then t = ceiling(2**(length-1)/2qp0)
+ ** step 12: t = 2tqp0 +1.
+ **
+ ** step 23: if (2tc0 +1 > 2**length), then t = ceiling(2**(length-1)/2c0)
+ ** step 24: t = 2tc0 +1.
+ */
+ CHECK_MPI_OK(mp_2expt(&two_length_minus_1, length - 1));
+step_23:
+ CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); /* c = t*2qc0 */
+ CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/
+ if (mpl_significant_bits(&c) > length) { /* if c > 2**length */
+ CHECK_MPI_OK(mp_sub_d(&c0_2, (mp_digit)1, &t)); /* t = 2qc0-1 */
+ /* t = 2**(length-1) + 2qc0 -1 */
+ CHECK_MPI_OK(mp_add(&two_length_minus_1, &t, &t));
+ /* t = floor((2**(length-1)+2qc0 -1)/2qco)
+ * = ceil(2**(lenght-2)/2qc0) */
+ CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL));
+ CHECK_MPI_OK(mp_mul(&t, &c0_2, &c));
+ CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/
+ }
+ /* Step 13/25 prime_gen_counter = prime_gen_counter + 1*/
+ (*prime_gen_counter)++;
+ /*
+ ** Comment: Test the candidate prime c for primality; first pick an
+ ** integer a between 2 and c-2.
+ **
+ ** Step 14/26 a=0
+ */
+ PORT_Memset(x, 0, sizeof(x)); /* use x for a */
+ /*
+ ** Step 15/27 for i = 0 to iterations do
+ ** a = a + (HASH(prime_seed + i) * 2^(i*outlen))
+ **
+ ** NOTE: we reuse the x array for 'a' initially.
+ */
+ for (i = 0; i < iterations; i++) {
+ /* MAX_ST_SEED_BITS is bigger than prime_seed should get to */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i,
+ MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen]));
+ }
+ /* Step 16/28 prime_seed = prime_seed + iterations + 1 */
+ CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS,
+ prime_seed));
+ /* Step 17/29 a = 2 + (a mod (c-3)). */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&a, x, iterations * hashlen));
+ CHECK_MPI_OK(mp_sub_d(&c, (mp_digit)3, &z)); /* z = c -3 */
+ CHECK_MPI_OK(mp_mod(&a, &z, &a)); /* a = a mod c -3 */
+ CHECK_MPI_OK(mp_add_d(&a, (mp_digit)2, &a)); /* a = 2 + a mod c -3 */
+ /*
+ ** Step 18 z = a**(2tq) mod p.
+ ** Step 30 z = a**(2t) mod c.
+ */
+ CHECK_MPI_OK(mp_mul(&t, q, &z)); /* z = tq */
+ CHECK_MPI_OK(mp_add(&z, &z, &z)); /* z = 2tq */
+ CHECK_MPI_OK(mp_exptmod(&a, &z, &c, &z)); /* z = a**(2tq) mod c */
+ /*
+ ** Step 19 if (( 1 == GCD(z-1,p)) and ( 1 == z**p0 mod p )), then
+ ** Step 31 if (( 1 == GCD(z-1,c)) and ( 1 == z**c0 mod c )), then
+ */
+ CHECK_MPI_OK(mp_sub_d(&z, (mp_digit)1, &a));
+ CHECK_MPI_OK(mp_gcd(&a, &c, &a));
+ if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+ CHECK_MPI_OK(mp_exptmod(&z, c0, &c, &a));
+ if (mp_cmp_d(&a, (mp_digit)1) == 0) {
+ /* Step 31.1 prime = c */
+ CHECK_MPI_OK(mp_copy(&c, prime));
+ /*
+ ** Step 31.2 return Success, prime, prime_seed,
+ ** prime_gen_counter
+ */
+ rv = SECSuccess;
+ goto cleanup;
+ }
+ }
+ /*
+ ** Step 20/32 If (prime_gen_counter > 4 * length + old_counter then
+ ** return (FAILURE, 0, 0, 0).
+ ** NOTE: the test is reversed, so we fall through on failure to the
+ ** cleanup routine
+ */
+ if (*prime_gen_counter < (4 * length + old_counter)) {
+ /* Step 21/33 t = t + 1 */
+ CHECK_MPI_OK(mp_add_d(&t, (mp_digit)1, &t));
+ /* Step 22/34 Go to step 23/11 */
+ goto step_23;
+ }
+
+ /* if (prime_gencont > (4*length + old_counter), fall through to failure */
+ rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+ mp_clear(&c);
+ mp_clear(&c0_2);
+ mp_clear(&t);
+ mp_clear(&a);
+ mp_clear(&z);
+ mp_clear(&two_length_minus_1);
+ PORT_Memset(x, 0, sizeof(x));
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv == SECFailure) {
+ mp_zero(prime);
+ if (prime_seed->data) {
+ SECITEM_FreeItem(prime_seed, PR_FALSE);
+ }
+ *prime_gen_counter = 0;
+ }
+ return rv;
+}
+
+/*
+** Perform steps from FIPS 186-3, Appendix C.6
+**
+** This generates a provable prime from a seed
+*/
+static SECStatus
+makePrimefromSeedShaweTaylor(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int length, /* input. Length of prime in bits. */
+ const SECItem *input_seed, /* input. */
+ mp_int *prime, /* output. */
+ SECItem *prime_seed, /* output. */
+ unsigned int *prime_gen_counter) /* output. */
+{
+ mp_int c;
+ mp_int c0;
+ mp_int one;
+ SECStatus rv = SECFailure;
+ int hashlen = HASH_ResultLen(hashtype);
+ int outlen = hashlen * PR_BITS_PER_BYTE;
+ int offset;
+ unsigned char bit, mask;
+ unsigned char x[HASH_LENGTH_MAX * 2];
+ mp_digit dummy;
+ mp_err err = MP_OKAY;
+ int i;
+
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&c0) = 0;
+ MP_DIGITS(&one) = 0;
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&c0));
+ CHECK_MPI_OK(mp_init(&one));
+
+ /* Step 1. if length < 2 then return (FAILURE, 0, 0, 0) */
+ if (length < 2) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* Step 2. if length >= 33 then goto step 14 */
+ if (length >= 33) {
+ mp_zero(&one);
+ CHECK_MPI_OK(mp_add_d(&one, (mp_digit)1, &one));
+
+ /* Step 14 (status, c0, prime_seed, prime_gen_counter) =
+ ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+ */
+ rv = makePrimefromSeedShaweTaylor(hashtype, (length + 1) / 2 + 1,
+ input_seed, &c0, prime_seed, prime_gen_counter);
+ /* Step 15 if FAILURE is returned, return (FAILURE, 0, 0, 0). */
+ if (rv != SECSuccess) {
+ goto cleanup;
+ }
+ /* Steps 16-34 */
+ rv = makePrimefromPrimesShaweTaylor(hashtype, length, &c0, &one,
+ prime, prime_seed, prime_gen_counter);
+ goto cleanup; /* we're done, one way or the other */
+ }
+ /* Step 3 prime_seed = input_seed */
+ CHECK_SEC_OK(SECITEM_CopyItem(NULL, prime_seed, input_seed));
+ /* Step 4 prime_gen_count = 0 */
+ *prime_gen_counter = 0;
+
+step_5:
+ /* Step 5 c = Hash(prime_seed) xor Hash(prime_seed+1). */
+ CHECK_SEC_OK(HASH_HashBuf(hashtype, x, prime_seed->data, prime_seed->len));
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, 1,
+ MAX_ST_SEED_BITS, &x[hashlen]));
+ for (i = 0; i < hashlen; i++) {
+ x[i] = x[i] ^ x[i + hashlen];
+ }
+ /* Step 6 c = 2**length-1 + c mod 2**length-1 */
+ /* This step mathematically sets the high bit and clears out
+ ** all the other bits higher than length. Right now c is stored
+ ** in the x array, MSB first. The above formula gives us a c which
+ ** is length bytes long and has the high bit set. We also know that
+ ** length < outlen since the smallest outlen is 160 bits and the largest
+ ** length at this point is 32 bits. So first we find the offset in bytes
+ ** into the array where the high bit is.
+ */
+ offset = (outlen - length) / PR_BITS_PER_BYTE;
+ /* now we want to set the 'high bit'. We have to calculate this since
+ * length may not be a multiple of 8.*/
+ bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */
+ /* we need to zero out the rest of the bits in the byte above */
+ mask = (bit - 1);
+ /* now we set it */
+ x[offset] = (mask & x[offset]) | bit;
+ /* Step 7 c = c*floor(c/2) + 1 */
+ /* set the low bit. much easier to find (the end of the array) */
+ x[hashlen - 1] |= 1;
+ /* now that we've set our bits, we can create our candidate "c" */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&c, &x[offset], hashlen - offset));
+ /* Step 8 prime_gen_counter = prime_gen_counter + 1 */
+ (*prime_gen_counter)++;
+ /* Step 9 prime_seed = prime_seed + 2 */
+ CHECK_SEC_OK(addToSeed(prime_seed, 2, MAX_ST_SEED_BITS, prime_seed));
+ /* Step 10 Perform deterministic primality test on c. For example, since
+ ** c is small, it's primality can be tested by trial division, See
+ ** See Appendic C.7.
+ **
+ ** We in fact test with trial division. mpi has a built int trial divider
+ ** that divides all divisors up to 2^16.
+ */
+ if (prime_tab[prime_tab_size - 1] < 0xFFF1) {
+ /* we aren't testing all the primes between 0 and 2^16, we really
+ * can't use this construction. Just fail. */
+ rv = SECFailure;
+ goto cleanup;
+ }
+ dummy = prime_tab_size;
+ err = mpp_divis_primes(&c, &dummy);
+ /* Step 11 if c is prime then */
+ if (err == MP_NO) {
+ /* Step 11.1 prime = c */
+ CHECK_MPI_OK(mp_copy(&c, prime));
+ /* Step 11.2 return SUCCESS prime, prime_seed, prime_gen_counter */
+ err = MP_OKAY;
+ rv = SECSuccess;
+ goto cleanup;
+ } else if (err != MP_YES) {
+ goto cleanup; /* function failed, bail out */
+ } else {
+ /* reset mp_err */
+ err = MP_OKAY;
+ }
+ /*
+ ** Step 12 if (prime_gen_counter > (4*len))
+ ** then return (FAILURE, 0, 0, 0))
+ ** Step 13 goto step 5
+ */
+ if (*prime_gen_counter <= (4 * length)) {
+ goto step_5;
+ }
+ /* if (prime_gencont > 4*length), fall through to failure */
+ rv = SECFailure; /* really is already set, but paranoia is good */
+
+cleanup:
+ mp_clear(&c);
+ mp_clear(&c0);
+ mp_clear(&one);
+ PORT_Memset(x, 0, sizeof(x));
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv == SECFailure) {
+ mp_zero(prime);
+ if (prime_seed->data) {
+ SECITEM_FreeItem(prime_seed, PR_FALSE);
+ }
+ *prime_gen_counter = 0;
+ }
+ return rv;
+}
+
+/*
+ * Find a Q and algorithm from Seed.
+ */
+static SECStatus
+findQfromSeed(
+ unsigned int L, /* input. Length of p in bits. */
+ unsigned int N, /* input. Length of q in bits. */
+ unsigned int g, /* input. Length of seed in bits. */
+ const SECItem *seed, /* input. */
+ mp_int *Q, /* input. */
+ mp_int *Q_, /* output. */
+ unsigned int *qseed_len, /* output */
+ HASH_HashType *hashtypePtr, /* output. Hash uses */
+ pqgGenType *typePtr) /* output. Generation Type used */
+{
+ HASH_HashType hashtype;
+ SECItem firstseed = { 0, 0, 0 };
+ SECItem qseed = { 0, 0, 0 };
+ SECStatus rv;
+
+ *qseed_len = 0; /* only set if FIPS186_3_ST_TYPE */
+
+ /* handle legacy small DSA first can only be FIPS186_1_TYPE */
+ if (L < 1024) {
+ rv = makeQfromSeed(g, seed, Q_);
+ if ((rv == SECSuccess) && (mp_cmp(Q, Q_) == 0)) {
+ *hashtypePtr = HASH_AlgSHA1;
+ *typePtr = FIPS186_1_TYPE;
+ return SECSuccess;
+ }
+ return SECFailure;
+ }
+ /* 1024 could use FIPS186_1 or FIPS186_3 algorithms, we need to try
+ * them both */
+ if (L == 1024) {
+ rv = makeQfromSeed(g, seed, Q_);
+ if (rv == SECSuccess) {
+ if (mp_cmp(Q, Q_) == 0) {
+ *hashtypePtr = HASH_AlgSHA1;
+ *typePtr = FIPS186_1_TYPE;
+ return SECSuccess;
+ }
+ }
+ /* fall through for FIPS186_3 types */
+ }
+ /* at this point we know we aren't using FIPS186_1, start trying FIPS186_3
+ * with appropriate hash types */
+ for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+ hashtype = getNextHash(hashtype)) {
+ rv = makeQ2fromSeed(hashtype, N, seed, Q_);
+ if (rv != SECSuccess) {
+ continue;
+ }
+ if (mp_cmp(Q, Q_) == 0) {
+ *hashtypePtr = hashtype;
+ *typePtr = FIPS186_3_TYPE;
+ return SECSuccess;
+ }
+ }
+ /*
+ * OK finally try FIPS186_3 Shawe-Taylor
+ */
+ firstseed = *seed;
+ firstseed.len = seed->len / 3;
+ for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL;
+ hashtype = getNextHash(hashtype)) {
+ unsigned int count;
+
+ rv = makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, Q_,
+ &qseed, &count);
+ if (rv != SECSuccess) {
+ continue;
+ }
+ if (mp_cmp(Q, Q_) == 0) {
+ /* check qseed as well... */
+ int offset = seed->len - qseed.len;
+ if ((offset < 0) ||
+ (PORT_Memcmp(&seed->data[offset], qseed.data, qseed.len) != 0)) {
+ /* we found q, but the seeds don't match. This isn't an
+ * accident, someone has been tweeking with the seeds, just
+ * fail a this point. */
+ SECITEM_FreeItem(&qseed, PR_FALSE);
+ return SECFailure;
+ }
+ *qseed_len = qseed.len;
+ *hashtypePtr = hashtype;
+ *typePtr = FIPS186_3_ST_TYPE;
+ SECITEM_FreeItem(&qseed, PR_FALSE);
+ return SECSuccess;
+ }
+ SECITEM_FreeItem(&qseed, PR_FALSE);
+ }
+ /* no hash algorithms found which match seed to Q, fail */
+ return SECFailure;
+}
+
+/*
+** Perform steps 7, 8 and 9 of FIPS 186, appendix 2.2.
+** which are the same as steps 11.1-11.5 of FIPS 186-2, App A.1.1.2
+** Generate P from Q, seed, L, and offset.
+*/
+static SECStatus
+makePfromQandSeed(
+ HASH_HashType hashtype, /* selected Hashing algorithm */
+ unsigned int L, /* Length of P in bits. Per FIPS 186. */
+ unsigned int N, /* Length of Q in bits. Per FIPS 186. */
+ unsigned int offset, /* Per FIPS 186, App 2.2. & 186-3 App A.1.1.2 */
+ unsigned int seedlen, /* input. Length of seed in bits. (g in 186-1)*/
+ const SECItem *seed, /* input. */
+ const mp_int *Q, /* input. */
+ mp_int *P) /* output. */
+{
+ unsigned int j; /* Per FIPS 186-3 App. A.1.1.2 (k in 186-1)*/
+ unsigned int n; /* Per FIPS 186, appendix 2.2. */
+ mp_digit b; /* Per FIPS 186, appendix 2.2. */
+ unsigned int outlen; /* Per FIPS 186-3 App. A.1.1.2 */
+ unsigned int hashlen; /* outlen in bytes */
+ unsigned char V_j[HASH_LENGTH_MAX];
+ mp_int W, X, c, twoQ, V_n, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ /* Initialize bignums */
+ MP_DIGITS(&W) = 0;
+ MP_DIGITS(&X) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&twoQ) = 0;
+ MP_DIGITS(&V_n) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&W));
+ CHECK_MPI_OK(mp_init(&X));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&twoQ));
+ CHECK_MPI_OK(mp_init(&tmp));
+ CHECK_MPI_OK(mp_init(&V_n));
+
+ hashlen = HASH_ResultLen(hashtype);
+ outlen = hashlen * PR_BITS_PER_BYTE;
+
+ /* L - 1 = n*outlen + b */
+ n = (L - 1) / outlen;
+ b = (L - 1) % outlen;
+
+ /* ******************************************************************
+ ** Step 11.1 (Step 7 in 186-1)
+ ** "for j = 0 ... n let
+ ** V_j = SHA[(SEED + offset + j) mod 2**seedlen]."
+ **
+ ** Step 11.2 (Step 8 in 186-1)
+ ** "W = V_0 + (V_1 * 2**outlen) + ... + (V_n-1 * 2**((n-1)*outlen))
+ ** + ((V_n mod 2**b) * 2**(n*outlen))
+ */
+ for (j = 0; j < n; ++j) { /* Do the first n terms of V_j */
+ /* Do step 11.1 for iteration j.
+ ** V_j = HASH[(seed + offset + j) mod 2**g]
+ */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + j, seedlen, V_j));
+ /* Do step 11.2 for iteration j.
+ ** W += V_j * 2**(j*outlen)
+ */
+ OCTETS_TO_MPINT(V_j, &tmp, hashlen); /* get bignum V_j */
+ CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, j * outlen)); /* tmp=V_j << j*outlen */
+ CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */
+ }
+ /* Step 11.2, continued.
+ ** [W += ((V_n mod 2**b) * 2**(n*outlen))]
+ */
+ CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + n, seedlen, V_j));
+ OCTETS_TO_MPINT(V_j, &V_n, hashlen); /* get bignum V_n */
+ CHECK_MPI_OK(mp_div_2d(&V_n, b, NULL, &tmp)); /* tmp = V_n mod 2**b */
+ CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, n * outlen)); /* tmp = tmp << n*outlen */
+ CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */
+ /* Step 11.3, (Step 8 in 186-1)
+ ** "X = W + 2**(L-1).
+ ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+ */
+ CHECK_MPI_OK(mpl_set_bit(&X, (mp_size)(L - 1), 1)); /* X = 2**(L-1) */
+ CHECK_MPI_OK(mp_add(&X, &W, &X)); /* X += W */
+ /*************************************************************
+ ** Step 11.4. (Step 9 in 186-1)
+ ** "c = X mod 2q"
+ */
+ CHECK_MPI_OK(mp_mul_2(Q, &twoQ)); /* 2q */
+ CHECK_MPI_OK(mp_mod(&X, &twoQ, &c)); /* c = X mod 2q */
+ /*************************************************************
+ ** Step 11.5. (Step 9 in 186-1)
+ ** "p = X - (c - 1).
+ ** Note that p is congruent to 1 mod 2q."
+ */
+ CHECK_MPI_OK(mp_sub_d(&c, 1, &c)); /* c -= 1 */
+ CHECK_MPI_OK(mp_sub(&X, &c, P)); /* P = X - c */
+cleanup:
+ mp_clear(&W);
+ mp_clear(&X);
+ mp_clear(&c);
+ mp_clear(&twoQ);
+ mp_clear(&V_n);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ return SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Generate G from h, P, and Q.
+*/
+static SECStatus
+makeGfromH(const mp_int *P, /* input. */
+ const mp_int *Q, /* input. */
+ mp_int *H, /* input and output. */
+ mp_int *G, /* output. */
+ PRBool *passed)
+{
+ mp_int exp, pm1;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ *passed = PR_FALSE;
+ MP_DIGITS(&exp) = 0;
+ MP_DIGITS(&pm1) = 0;
+ CHECK_MPI_OK(mp_init(&exp));
+ CHECK_MPI_OK(mp_init(&pm1));
+ CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */
+ if (mp_cmp(H, &pm1) >= 0) /* H >= P-1 */
+ CHECK_MPI_OK(mp_sub(H, &pm1, H)); /* H = H mod (P-1) */
+ /* Let b = 2**n (smallest power of 2 greater than P).
+ ** Since P-1 >= b/2, and H < b, quotient(H/(P-1)) = 0 or 1
+ ** so the above operation safely computes H mod (P-1)
+ */
+ /* Check for H = to 0 or 1. Regen H if so. (Regen means return error). */
+ if (mp_cmp_d(H, 1) <= 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* Compute G, according to the equation G = (H ** ((P-1)/Q)) mod P */
+ CHECK_MPI_OK(mp_div(&pm1, Q, &exp, NULL)); /* exp = (P-1)/Q */
+ CHECK_MPI_OK(mp_exptmod(H, &exp, P, G)); /* G = H ** exp mod P */
+ /* Check for G == 0 or G == 1, return error if so. */
+ if (mp_cmp_d(G, 1) <= 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+ *passed = PR_TRUE;
+cleanup:
+ mp_clear(&exp);
+ mp_clear(&pm1);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** Generate G from seed, index, P, and Q.
+*/
+static SECStatus
+makeGfromIndex(HASH_HashType hashtype,
+ const mp_int *P, /* input. */
+ const mp_int *Q, /* input. */
+ const SECItem *seed, /* input. */
+ unsigned char index, /* input. */
+ mp_int *G) /* input/output */
+{
+ mp_int e, pm1, W;
+ unsigned int count;
+ unsigned char data[HASH_LENGTH_MAX];
+ unsigned int len;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ const SECHashObject *hashobj = NULL;
+ void *hashcx = NULL;
+
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&pm1) = 0;
+ MP_DIGITS(&W) = 0;
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&pm1));
+ CHECK_MPI_OK(mp_init(&W));
+
+ /* initialize our hash stuff */
+ hashobj = HASH_GetRawHashObject(hashtype);
+ if (hashobj == NULL) {
+ /* shouldn't happen */
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ hashcx = hashobj->create();
+ if (hashcx == NULL) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */
+ /* Step 3 e = (p-1)/q */
+ CHECK_MPI_OK(mp_div(&pm1, Q, &e, NULL)); /* e = (P-1)/Q */
+/* Steps 4, 5, and 6 */
+/* count is a 16 bit value in the spec. We actually represent count
+ * as more than 16 bits so we can easily detect the 16 bit overflow */
+#define MAX_COUNT 0x10000
+ for (count = 1; count < MAX_COUNT; count++) {
+ /* step 7
+ * U = domain_param_seed || "ggen" || index || count
+ * step 8
+ * W = HASH(U)
+ */
+ hashobj->begin(hashcx);
+ hashobj->update(hashcx, seed->data, seed->len);
+ hashobj->update(hashcx, (unsigned char *)"ggen", 4);
+ hashobj->update(hashcx, &index, 1);
+ data[0] = (count >> 8) & 0xff;
+ data[1] = count & 0xff;
+ hashobj->update(hashcx, data, 2);
+ hashobj->end(hashcx, data, &len, sizeof(data));
+ OCTETS_TO_MPINT(data, &W, len);
+ /* step 9. g = W**e mod p */
+ CHECK_MPI_OK(mp_exptmod(&W, &e, P, G));
+ /* step 10. if (g < 2) then goto step 5 */
+ /* NOTE: this weird construct is to keep the flow according to the spec.
+ * the continue puts us back to step 5 of the for loop */
+ if (mp_cmp_d(G, 2) < 0) {
+ continue;
+ }
+ break; /* step 11 follows step 10 if the test condition is false */
+ }
+ if (count >= MAX_COUNT) {
+ rv = SECFailure; /* last part of step 6 */
+ }
+/* step 11.
+ * return valid G */
+cleanup:
+ PORT_Memset(data, 0, sizeof(data));
+ if (hashcx) {
+ hashobj->destroy(hashcx, PR_TRUE);
+ }
+ mp_clear(&e);
+ mp_clear(&pm1);
+ mp_clear(&W);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/* This code uses labels and gotos, so that it can follow the numbered
+** steps in the algorithms from FIPS 186-3 appendix A.1.1.2 very closely,
+** and so that the correctness of this code can be easily verified.
+** So, please forgive the ugly c code.
+**/
+static SECStatus
+pqg_ParamGen(unsigned int L, unsigned int N, pqgGenType type,
+ unsigned int seedBytes, PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int n; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int seedlen; /* Per FIPS 186-3 app A.1.1.2 (was 'g' 186-1)*/
+ unsigned int counter; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int offset; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ unsigned int outlen; /* Per FIPS 186-3, appendix A.1.1.2. */
+ unsigned int maxCount;
+ HASH_HashType hashtype;
+ SECItem *seed; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */
+ PLArenaPool *arena = NULL;
+ PQGParams *params = NULL;
+ PQGVerify *verify = NULL;
+ PRBool passed;
+ SECItem hit = { 0, 0, 0 };
+ SECItem firstseed = { 0, 0, 0 };
+ SECItem qseed = { 0, 0, 0 };
+ SECItem pseed = { 0, 0, 0 };
+ mp_int P, Q, G, H, l, p0;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECFailure;
+ int iterations = 0;
+
+ /* Step 1. L and N already checked by caller*/
+ /* Step 2. if (seedlen < N) return INVALID; */
+ if (seedBytes < N / PR_BITS_PER_BYTE || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ /* Initialize bignums */
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&G) = 0;
+ MP_DIGITS(&H) = 0;
+ MP_DIGITS(&l) = 0;
+ MP_DIGITS(&p0) = 0;
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&G));
+ CHECK_MPI_OK(mp_init(&H));
+ CHECK_MPI_OK(mp_init(&l));
+ CHECK_MPI_OK(mp_init(&p0));
+
+ /* parameters have been passed in, only generate G */
+ if (*pParams != NULL) {
+ /* we only support G index generation if generating separate from PQ */
+ if ((*pVfy == NULL) || (type == FIPS186_1_TYPE) ||
+ ((*pVfy)->h.len != 1) || ((*pVfy)->h.data == NULL) ||
+ ((*pVfy)->seed.data == NULL) || ((*pVfy)->seed.len == 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ params = *pParams;
+ verify = *pVfy;
+
+ /* fill in P Q, */
+ SECITEM_TO_MPINT((*pParams)->prime, &P);
+ SECITEM_TO_MPINT((*pParams)->subPrime, &Q);
+ hashtype = getFirstHash(L, N);
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &(*pVfy)->seed,
+ (*pVfy)->h.data[0], &G));
+ MPINT_TO_SECITEM(&G, &(*pParams)->base, (*pParams)->arena);
+ goto cleanup;
+ }
+ /* Initialize an arena for the params. */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ params = (PQGParams *)PORT_ArenaZAlloc(arena, sizeof(PQGParams));
+ if (!params) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return SECFailure;
+ }
+ params->arena = arena;
+ /* Initialize an arena for the verify. */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(params->arena, PR_TRUE);
+ return SECFailure;
+ }
+ verify = (PQGVerify *)PORT_ArenaZAlloc(arena, sizeof(PQGVerify));
+ if (!verify) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ PORT_FreeArena(params->arena, PR_TRUE);
+ return SECFailure;
+ }
+ verify->arena = arena;
+ seed = &verify->seed;
+ arena = NULL;
+
+ /* Select Hash and Compute lengths. */
+ /* getFirstHash gives us the smallest acceptable hash for this key
+ * strength */
+ hashtype = getFirstHash(L, N);
+ outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+
+ /* Step 3: n = Ceil(L/outlen)-1; (same as n = Floor((L-1)/outlen)) */
+ n = (L - 1) / outlen;
+ /* Step 4: (skipped since we don't use b): b = L -1 - (n*outlen); */
+ seedlen = seedBytes * PR_BITS_PER_BYTE; /* bits in seed */
+step_5:
+ /* ******************************************************************
+ ** Step 5. (Step 1 in 186-1)
+ ** "Choose an abitrary sequence of at least N bits and call it SEED.
+ ** Let g be the length of SEED in bits."
+ */
+ if (++iterations > MAX_ITERATIONS) { /* give up after a while */
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ goto cleanup;
+ }
+ seed->len = seedBytes;
+ CHECK_SEC_OK(getPQseed(seed, verify->arena));
+ /* ******************************************************************
+ ** Step 6. (Step 2 in 186-1)
+ **
+ ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]. (186-1)"
+ ** "Compute U = HASH[SEED] 2**(N-1). (186-3)"
+ **
+ ** Step 7. (Step 3 in 186-1)
+ ** "Form Q from U by setting the most signficant bit (the 2**159 bit)
+ ** and the least signficant bit to 1. In terms of boolean operations,
+ ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160. (186-1)"
+ **
+ ** "q = 2**(N-1) + U + 1 - (U mod 2) (186-3)
+ **
+ ** Note: Both formulations are the same for U < 2**(N-1) and N=160
+ **
+ ** If using Shawe-Taylor, We do the entire A.1.2.1.2 setps in the block
+ ** FIPS186_3_ST_TYPE.
+ */
+ if (type == FIPS186_1_TYPE) {
+ CHECK_SEC_OK(makeQfromSeed(seedlen, seed, &Q));
+ } else if (type == FIPS186_3_TYPE) {
+ CHECK_SEC_OK(makeQ2fromSeed(hashtype, N, seed, &Q));
+ } else {
+ /* FIPS186_3_ST_TYPE */
+ unsigned int qgen_counter, pgen_counter;
+
+ /* Step 1 (L,N) already checked for acceptability */
+
+ firstseed = *seed;
+ qgen_counter = 0;
+ /* Step 2. Use N and firstseed to generate random prime q
+ * using Apendix C.6 */
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, &Q,
+ &qseed, &qgen_counter));
+ /* Step 3. Use floor(L/2+1) and qseed to generate random prime p0
+ * using Appendix C.6 */
+ pgen_counter = 0;
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+ &qseed, &p0, &pseed, &pgen_counter));
+ /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+ CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L,
+ &p0, &Q, &P, &pseed, &pgen_counter));
+
+ /* combine all the seeds */
+ seed->len = firstseed.len + qseed.len + pseed.len;
+ seed->data = PORT_ArenaZAlloc(verify->arena, seed->len);
+ if (seed->data == NULL) {
+ goto cleanup;
+ }
+ PORT_Memcpy(seed->data, firstseed.data, firstseed.len);
+ PORT_Memcpy(seed->data + firstseed.len, pseed.data, pseed.len);
+ PORT_Memcpy(seed->data + firstseed.len + pseed.len, qseed.data, qseed.len);
+ counter = 0; /* (qgen_counter << 16) | pgen_counter; */
+
+ /* we've generated both P and Q now, skip to generating G */
+ goto generate_G;
+ }
+ /* ******************************************************************
+ ** Step 8. (Step 4 in 186-1)
+ ** "Use a robust primality testing algorithm to test whether q is prime."
+ **
+ ** Appendix 2.1 states that a Rabin test with at least 50 iterations
+ ** "will give an acceptable probability of error."
+ */
+ /*CHECK_SEC_OK( prm_RabinTest(&Q, &passed) );*/
+ err = mpp_pprime(&Q, prime_testcount_q(L, N));
+ passed = (err == MP_YES) ? SECSuccess : SECFailure;
+ /* ******************************************************************
+ ** Step 9. (Step 5 in 186-1) "If q is not prime, goto step 5 (1 in 186-1)."
+ */
+ if (passed != SECSuccess)
+ goto step_5;
+ /* ******************************************************************
+ ** Step 10.
+ ** offset = 1;
+ **( Step 6b 186-1)"Let counter = 0 and offset = 2."
+ */
+ offset = (type == FIPS186_1_TYPE) ? 2 : 1;
+ /*
+ ** Step 11. (Step 6a,13a,14 in 186-1)
+ ** For counter - 0 to (4L-1) do
+ **
+ */
+ maxCount = L >= 1024 ? (4 * L - 1) : 4095;
+ for (counter = 0; counter <= maxCount; counter++) {
+ /* ******************************************************************
+ ** Step 11.1 (Step 7 in 186-1)
+ ** "for j = 0 ... n let
+ ** V_j = HASH[(SEED + offset + j) mod 2**seedlen]."
+ **
+ ** Step 11.2 (Step 8 in 186-1)
+ ** "W = V_0 + V_1*2**outlen+...+ V_n-1 * 2**((n-1)*outlen) +
+ ** ((Vn* mod 2**b)*2**(n*outlen))"
+ ** Step 11.3 (Step 8 in 186-1)
+ ** "X = W + 2**(L-1)
+ ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L."
+ **
+ ** Step 11.4 (Step 9 in 186-1).
+ ** "c = X mod 2q"
+ **
+ ** Step 11.5 (Step 9 in 186-1).
+ ** " p = X - (c - 1).
+ ** Note that p is congruent to 1 mod 2q."
+ */
+ CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, seedlen,
+ seed, &Q, &P));
+ /*************************************************************
+ ** Step 11.6. (Step 10 in 186-1)
+ ** "if p < 2**(L-1), then goto step 11.9. (step 13 in 186-1)"
+ */
+ CHECK_MPI_OK(mpl_set_bit(&l, (mp_size)(L - 1), 1)); /* l = 2**(L-1) */
+ if (mp_cmp(&P, &l) < 0)
+ goto step_11_9;
+ /************************************************************
+ ** Step 11.7 (step 11 in 186-1)
+ ** "Perform a robust primality test on p."
+ */
+ /*CHECK_SEC_OK( prm_RabinTest(&P, &passed) );*/
+ err = mpp_pprime(&P, prime_testcount_p(L, N));
+ passed = (err == MP_YES) ? SECSuccess : SECFailure;
+ /* ******************************************************************
+ ** Step 11.8. "If p is determined to be primed return VALID
+ ** values of p, q, seed and counter."
+ */
+ if (passed == SECSuccess)
+ break;
+ step_11_9:
+ /* ******************************************************************
+ ** Step 11.9. "offset = offset + n + 1."
+ */
+ offset += n + 1;
+ }
+ /* ******************************************************************
+ ** Step 12. "goto step 5."
+ **
+ ** NOTE: if counter <= maxCount, then we exited the loop at Step 11.8
+ ** and now need to return p,q, seed, and counter.
+ */
+ if (counter > maxCount)
+ goto step_5;
+
+generate_G:
+ /* ******************************************************************
+ ** returning p, q, seed and counter
+ */
+ if (type == FIPS186_1_TYPE) {
+ /* Generate g, This is called the "Unverifiable Generation of g
+ * in FIPA186-3 Appedix A.2.1. For compatibility we maintain
+ * this version of the code */
+ SECITEM_AllocItem(NULL, &hit, L / 8); /* h is no longer than p */
+ if (!hit.data)
+ goto cleanup;
+ do {
+ /* loop generate h until 1<h<p-1 and (h**[(p-1)/q])mod p > 1 */
+ CHECK_SEC_OK(generate_h_candidate(&hit, &H));
+ CHECK_SEC_OK(makeGfromH(&P, &Q, &H, &G, &passed));
+ } while (passed != PR_TRUE);
+ MPINT_TO_SECITEM(&H, &verify->h, verify->arena);
+ } else {
+ unsigned char index = 1; /* default to 1 */
+ verify->h.data = (unsigned char *)PORT_ArenaZAlloc(verify->arena, 1);
+ if (verify->h.data == NULL) {
+ goto cleanup;
+ }
+ verify->h.len = 1;
+ verify->h.data[0] = index;
+ /* Generate g, using the FIPS 186-3 Appendix A.23 */
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, seed, index, &G));
+ }
+ /* All generation is done. Now, save the PQG params. */
+ MPINT_TO_SECITEM(&P, &params->prime, params->arena);
+ MPINT_TO_SECITEM(&Q, &params->subPrime, params->arena);
+ MPINT_TO_SECITEM(&G, &params->base, params->arena);
+ verify->counter = counter;
+ *pParams = params;
+ *pVfy = verify;
+cleanup:
+ if (pseed.data) {
+ PORT_Free(pseed.data);
+ }
+ if (qseed.data) {
+ PORT_Free(qseed.data);
+ }
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&G);
+ mp_clear(&H);
+ mp_clear(&l);
+ mp_clear(&p0);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv) {
+ if (params) {
+ PORT_FreeArena(params->arena, PR_TRUE);
+ }
+ if (verify) {
+ PORT_FreeArena(verify->arena, PR_TRUE);
+ }
+ }
+ if (hit.data) {
+ SECITEM_FreeItem(&hit, PR_FALSE);
+ }
+ return rv;
+}
+
+SECStatus
+PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int L; /* Length of P in bits. Per FIPS 186. */
+ unsigned int seedBytes;
+
+ if (j > 8 || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ L = 512 + (j * 64); /* bits in P */
+ seedBytes = L / 8;
+ return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+ pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ unsigned int L; /* Length of P in bits. Per FIPS 186. */
+
+ if (j > 8 || !pParams || !pVfy) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ L = 512 + (j * 64); /* bits in P */
+ return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes,
+ pParams, pVfy);
+}
+
+SECStatus
+PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes,
+ PQGParams **pParams, PQGVerify **pVfy)
+{
+ if (N == 0) {
+ N = pqg_get_default_N(L);
+ }
+ if (seedBytes == 0) {
+ /* seedBytes == L/8 for probable primes, N/8 for Shawe-Taylor Primes */
+ seedBytes = N / 8;
+ }
+ if (pqg_validate_dsa2(L, N) != SECSuccess) {
+ /* error code already set */
+ return SECFailure;
+ }
+ return pqg_ParamGen(L, N, FIPS186_3_ST_TYPE, seedBytes, pParams, pVfy);
+}
+
+/*
+ * verify can use vfy structures returned from either FIPS186-1 or
+ * FIPS186-2, and can handle differences in selected Hash functions to
+ * generate the parameters.
+ */
+SECStatus
+PQG_VerifyParams(const PQGParams *params,
+ const PQGVerify *vfy, SECStatus *result)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int g, n, L, N, offset, outlen;
+ mp_int p0, P, Q, G, P_, Q_, G_, r, h;
+ mp_err err = MP_OKAY;
+ int j;
+ unsigned int counter_max = 0; /* handle legacy L < 1024 */
+ unsigned int qseed_len;
+ SECItem pseed_ = { 0, 0, 0 };
+ HASH_HashType hashtype;
+ pqgGenType type;
+
+#define CHECKPARAM(cond) \
+ if (!(cond)) { \
+ *result = SECFailure; \
+ goto cleanup; \
+ }
+ if (!params || !vfy || !result) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* always need at least p, q, and seed for any meaningful check */
+ if ((params->prime.len == 0) || (params->subPrime.len == 0) ||
+ (vfy->seed.len == 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* we want to either check PQ or G or both. If we don't have G, make
+ * sure we have count so we can check P. */
+ if ((params->base.len == 0) && (vfy->counter == -1)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ MP_DIGITS(&p0) = 0;
+ MP_DIGITS(&P) = 0;
+ MP_DIGITS(&Q) = 0;
+ MP_DIGITS(&G) = 0;
+ MP_DIGITS(&P_) = 0;
+ MP_DIGITS(&Q_) = 0;
+ MP_DIGITS(&G_) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&h) = 0;
+ CHECK_MPI_OK(mp_init(&p0));
+ CHECK_MPI_OK(mp_init(&P));
+ CHECK_MPI_OK(mp_init(&Q));
+ CHECK_MPI_OK(mp_init(&G));
+ CHECK_MPI_OK(mp_init(&P_));
+ CHECK_MPI_OK(mp_init(&Q_));
+ CHECK_MPI_OK(mp_init(&G_));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&h));
+ *result = SECSuccess;
+ SECITEM_TO_MPINT(params->prime, &P);
+ SECITEM_TO_MPINT(params->subPrime, &Q);
+ /* if G isn't specified, just check P and Q */
+ if (params->base.len != 0) {
+ SECITEM_TO_MPINT(params->base, &G);
+ }
+ /* 1. Check (L,N) pair */
+ N = mpl_significant_bits(&Q);
+ L = mpl_significant_bits(&P);
+ if (L < 1024) {
+ /* handle DSA1 pqg parameters with less thatn 1024 bits*/
+ CHECKPARAM(N == DSA1_Q_BITS);
+ j = PQG_PBITS_TO_INDEX(L);
+ CHECKPARAM(j >= 0 && j <= 8);
+ counter_max = 4096;
+ } else {
+ /* handle DSA2 parameters (includes DSA1, 1024 bits) */
+ CHECKPARAM(pqg_validate_dsa2(L, N) == SECSuccess);
+ counter_max = 4 * L;
+ }
+ /* 3. G < P */
+ if (params->base.len != 0) {
+ CHECKPARAM(mp_cmp(&G, &P) < 0);
+ }
+ /* 4. P % Q == 1 */
+ CHECK_MPI_OK(mp_mod(&P, &Q, &r));
+ CHECKPARAM(mp_cmp_d(&r, 1) == 0);
+ /* 5. Q is prime */
+ CHECKPARAM(mpp_pprime(&Q, prime_testcount_q(L, N)) == MP_YES);
+ /* 6. P is prime */
+ CHECKPARAM(mpp_pprime(&P, prime_testcount_p(L, N)) == MP_YES);
+ /* Steps 7-12 are done only if the optional PQGVerify is supplied. */
+ /* continue processing P */
+ /* 7. counter < 4*L */
+ CHECKPARAM((vfy->counter == -1) || (vfy->counter < counter_max));
+ /* 8. g >= N and g < 2*L (g is length of seed in bits) */
+ g = vfy->seed.len * 8;
+ CHECKPARAM(g >= N && g < counter_max / 2);
+ /* 9. Q generated from SEED matches Q in PQGParams. */
+ /* This function checks all possible hash and generation types to
+ * find a Q_ which matches Q. */
+ CHECKPARAM(findQfromSeed(L, N, g, &vfy->seed, &Q, &Q_, &qseed_len,
+ &hashtype, &type) == SECSuccess);
+ CHECKPARAM(mp_cmp(&Q, &Q_) == 0);
+ if (type == FIPS186_3_ST_TYPE) {
+ SECItem qseed = { 0, 0, 0 };
+ SECItem pseed = { 0, 0, 0 };
+ unsigned int first_seed_len;
+ unsigned int pgen_counter = 0;
+
+ /* extract pseed and qseed from domain_parameter_seed, which is
+ * first_seed || pseed || qseed. qseed is first_seed + small_integer
+ * pseed is qseed + small_integer. This means most of the time
+ * first_seed.len == qseed.len == pseed.len. Rarely qseed.len and/or
+ * pseed.len will be one greater than first_seed.len, so we can
+ * depend on the fact that
+ * first_seed.len = floor(domain_parameter_seed.len/3).
+ * findQfromSeed returned qseed.len, so we can calculate pseed.len as
+ * pseed.len = domain_parameter_seed.len - first_seed.len - qseed.len
+ * this is probably over kill, since 99.999% of the time they will all
+ * be equal.
+ *
+ * With the lengths, we can now find the offsets;
+ * first_seed.data = domain_parameter_seed.data + 0
+ * pseed.data = domain_parameter_seed.data + first_seed.len
+ * qseed.data = domain_parameter_seed.data
+ * + domain_paramter_seed.len - qseed.len
+ *
+ */
+ first_seed_len = vfy->seed.len / 3;
+ CHECKPARAM(qseed_len < vfy->seed.len);
+ CHECKPARAM(first_seed_len * 8 > N - 1);
+ CHECKPARAM(first_seed_len + qseed_len < vfy->seed.len);
+ qseed.len = qseed_len;
+ qseed.data = vfy->seed.data + vfy->seed.len - qseed.len;
+ pseed.len = vfy->seed.len - (first_seed_len + qseed_len);
+ pseed.data = vfy->seed.data + first_seed_len;
+
+ /*
+ * now complete FIPS 186-3 A.1.2.1.2. Step 1 was completed
+ * above in our initial checks, Step 2 was completed by
+ * findQfromSeed */
+
+ /* Step 3 (status, c0, prime_seed, prime_gen_counter) =
+ ** (ST_Random_Prime((ceil(length/2)+1, input_seed)
+ */
+ CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1,
+ &qseed, &p0, &pseed_, &pgen_counter));
+ /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */
+ CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L,
+ &p0, &Q_, &P_, &pseed_, &pgen_counter));
+ CHECKPARAM(mp_cmp(&P, &P_) == 0);
+ /* make sure pseed wasn't tampered with (since it is part of
+ * calculating G) */
+ CHECKPARAM(SECITEM_CompareItem(&pseed, &pseed_) == SECEqual);
+ } else if (vfy->counter == -1) {
+ /* If counter is set to -1, we are really only verifying G, skip
+ * the remainder of the checks for P */
+ CHECKPARAM(type != FIPS186_1_TYPE); /* we only do this for DSA2 */
+ } else {
+ /* 10. P generated from (L, counter, g, SEED, Q) matches P
+ * in PQGParams. */
+ outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE;
+ n = (L - 1) / outlen;
+ offset = vfy->counter * (n + 1) + ((type == FIPS186_1_TYPE) ? 2 : 1);
+ CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, g, &vfy->seed,
+ &Q, &P_));
+ CHECKPARAM(mp_cmp(&P, &P_) == 0);
+ }
+
+ /* now check G, skip if don't have a g */
+ if (params->base.len == 0)
+ goto cleanup;
+
+ /* first Always check that G is OK FIPS186-3 A.2.2 & A.2.4*/
+ /* 1. 2 < G < P-1 */
+ /* P is prime, p-1 == zero 1st bit */
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+ CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P) < 0);
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+ /* 2. verify g**q mod p == 1 */
+ CHECK_MPI_OK(mp_exptmod(&G, &Q, &P, &h)); /* h = G ** Q mod P */
+ CHECKPARAM(mp_cmp_d(&h, 1) == 0);
+
+ /* no h, the above is the best we can do */
+ if (vfy->h.len == 0) {
+ if (type != FIPS186_1_TYPE) {
+ *result = SECWouldBlock;
+ }
+ goto cleanup;
+ }
+
+ /*
+ * If h is one byte and FIPS186-3 was used to generate Q (we've verified
+ * Q was generated from seed already, then we assume that FIPS 186-3
+ * appendix A.2.3 was used to generate G. Otherwise we assume A.2.1 was
+ * used to generate G.
+ */
+ if ((vfy->h.len == 1) && (type != FIPS186_1_TYPE)) {
+ /* A.2.3 */
+ CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &vfy->seed,
+ vfy->h.data[0], &G_));
+ CHECKPARAM(mp_cmp(&G, &G_) == 0);
+ } else {
+ int passed;
+ /* A.2.1 */
+ SECITEM_TO_MPINT(vfy->h, &h);
+ /* 11. 1 < h < P-1 */
+ /* P is prime, p-1 == zero 1st bit */
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 0));
+ CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P));
+ CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */
+ /* 12. G generated from h matches G in PQGParams. */
+ CHECK_SEC_OK(makeGfromH(&P, &Q, &h, &G_, &passed));
+ CHECKPARAM(passed && mp_cmp(&G, &G_) == 0);
+ }
+cleanup:
+ mp_clear(&p0);
+ mp_clear(&P);
+ mp_clear(&Q);
+ mp_clear(&G);
+ mp_clear(&P_);
+ mp_clear(&Q_);
+ mp_clear(&G_);
+ mp_clear(&r);
+ mp_clear(&h);
+ if (pseed_.data) {
+ SECITEM_FreeItem(&pseed_, PR_FALSE);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/**************************************************************************
+ * Free the PQGParams struct and the things it points to. *
+ **************************************************************************/
+void
+PQG_DestroyParams(PQGParams *params)
+{
+ if (params == NULL)
+ return;
+ if (params->arena != NULL) {
+ PORT_FreeArena(params->arena, PR_FALSE); /* don't zero it */
+ } else {
+ SECITEM_FreeItem(&params->prime, PR_FALSE); /* don't free prime */
+ SECITEM_FreeItem(&params->subPrime, PR_FALSE); /* don't free subPrime */
+ SECITEM_FreeItem(&params->base, PR_FALSE); /* don't free base */
+ PORT_Free(params);
+ }
+}
+
+/**************************************************************************
+ * Free the PQGVerify struct and the things it points to. *
+ **************************************************************************/
+
+void
+PQG_DestroyVerify(PQGVerify *vfy)
+{
+ if (vfy == NULL)
+ return;
+ if (vfy->arena != NULL) {
+ PORT_FreeArena(vfy->arena, PR_FALSE); /* don't zero it */
+ } else {
+ SECITEM_FreeItem(&vfy->seed, PR_FALSE); /* don't free seed */
+ SECITEM_FreeItem(&vfy->h, PR_FALSE); /* don't free h */
+ PORT_Free(vfy);
+ }
+}
diff --git a/security/nss/lib/freebl/pqg.h b/security/nss/lib/freebl/pqg.h
new file mode 100644
index 000000000..c4eecd590
--- /dev/null
+++ b/security/nss/lib/freebl/pqg.h
@@ -0,0 +1,25 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * pqg.h
+ *
+ * header file for pqg functions exported just to freebl
+ */
+
+#ifndef _PQG_H_
+#define _PQG_H_ 1
+
+/* PQG_GetLength returns the significant bytes in the SECItem object (that is
+ * the length of the object minus any leading zeros. Any SECItem may be used,
+ * though this function is usually used for P, Q, or G values */
+unsigned int PQG_GetLength(const SECItem *obj);
+/* Check to see the PQG parameters patch a NIST defined DSA size,
+ * returns SECFaillure and sets SEC_ERROR_INVALID_ARGS if it doesn't.
+ * See blapi.h for legal DSA PQG sizes. */
+SECStatus PQG_Check(const PQGParams *params);
+/* Return the prefered hash algorithm for the given PQGParameters. */
+HASH_HashType PQG_GetHashType(const PQGParams *params);
+
+#endif /* _PQG_H_ */
diff --git a/security/nss/lib/freebl/rawhash.c b/security/nss/lib/freebl/rawhash.c
new file mode 100644
index 000000000..551727b89
--- /dev/null
+++ b/security/nss/lib/freebl/rawhash.c
@@ -0,0 +1,154 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "nspr.h"
+#include "hasht.h"
+#include "blapi.h" /* below the line */
+#include "secerr.h"
+
+static void *
+null_hash_new_context(void)
+{
+ return NULL;
+}
+
+static void *
+null_hash_clone_context(void *v)
+{
+ PORT_Assert(v == NULL);
+ return NULL;
+}
+
+static void
+null_hash_begin(void *v)
+{
+}
+
+static void
+null_hash_update(void *v, const unsigned char *input, unsigned int length)
+{
+}
+
+static void
+null_hash_end(void *v, unsigned char *output, unsigned int *outLen,
+ unsigned int maxOut)
+{
+ *outLen = 0;
+}
+
+static void
+null_hash_destroy_context(void *v, PRBool b)
+{
+ PORT_Assert(v == NULL);
+}
+
+const SECHashObject SECRawHashObjects[] = {
+ { 0,
+ (void *(*)(void))null_hash_new_context,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))null_hash_destroy_context,
+ (void (*)(void *))null_hash_begin,
+ (void (*)(void *, const unsigned char *, unsigned int))null_hash_update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))null_hash_end,
+ 0,
+ HASH_AlgNULL,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))null_hash_end },
+ {
+ MD2_LENGTH,
+ (void *(*)(void))MD2_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))MD2_DestroyContext,
+ (void (*)(void *))MD2_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))MD2_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD2_End,
+ MD2_BLOCK_LENGTH,
+ HASH_AlgMD2,
+ NULL /* end_raw */
+ },
+ { MD5_LENGTH,
+ (void *(*)(void))MD5_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))MD5_DestroyContext,
+ (void (*)(void *))MD5_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))MD5_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_End,
+ MD5_BLOCK_LENGTH,
+ HASH_AlgMD5,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_EndRaw },
+ { SHA1_LENGTH,
+ (void *(*)(void))SHA1_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA1_DestroyContext,
+ (void (*)(void *))SHA1_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA1_Update,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))SHA1_End,
+ SHA1_BLOCK_LENGTH,
+ HASH_AlgSHA1,
+ (void (*)(void *, unsigned char *, unsigned int *, unsigned int))
+ SHA1_EndRaw },
+ { SHA256_LENGTH,
+ (void *(*)(void))SHA256_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA256_DestroyContext,
+ (void (*)(void *))SHA256_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA256_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA256_End,
+ SHA256_BLOCK_LENGTH,
+ HASH_AlgSHA256,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA256_EndRaw },
+ { SHA384_LENGTH,
+ (void *(*)(void))SHA384_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA384_DestroyContext,
+ (void (*)(void *))SHA384_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA384_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA384_End,
+ SHA384_BLOCK_LENGTH,
+ HASH_AlgSHA384,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA384_EndRaw },
+ { SHA512_LENGTH,
+ (void *(*)(void))SHA512_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA512_DestroyContext,
+ (void (*)(void *))SHA512_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA512_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA512_End,
+ SHA512_BLOCK_LENGTH,
+ HASH_AlgSHA512,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA512_EndRaw },
+ { SHA224_LENGTH,
+ (void *(*)(void))SHA224_NewContext,
+ (void *(*)(void *))null_hash_clone_context,
+ (void (*)(void *, PRBool))SHA224_DestroyContext,
+ (void (*)(void *))SHA224_Begin,
+ (void (*)(void *, const unsigned char *, unsigned int))SHA224_Update,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA224_End,
+ SHA224_BLOCK_LENGTH,
+ HASH_AlgSHA224,
+ (void (*)(void *, unsigned char *, unsigned int *,
+ unsigned int))SHA224_EndRaw },
+};
+
+const SECHashObject *
+HASH_GetRawHashObject(HASH_HashType hashType)
+{
+ if (hashType <= HASH_AlgNULL || hashType >= HASH_AlgTOTAL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ return &SECRawHashObjects[hashType];
+}
diff --git a/security/nss/lib/freebl/ret_cr16.s b/security/nss/lib/freebl/ret_cr16.s
new file mode 100644
index 000000000..1f53fc900
--- /dev/null
+++ b/security/nss/lib/freebl/ret_cr16.s
@@ -0,0 +1,27 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef __LP64__
+ .LEVEL 2.0W
+#else
+ .LEVEL 1.1
+#endif
+
+ .CODE ; equivalent to the following two lines
+; .SPACE $TEXT$,SORT=8
+; .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24
+
+ret_cr16
+ .PROC
+ .CALLINFO FRAME=0, NO_CALLS
+ .EXPORT ret_cr16,ENTRY
+ .ENTRY
+; BV %r0(%rp)
+ BV 0(%rp)
+ MFCTL %cr16,%ret0
+ BV %r0(%rp)
+ .EXIT
+ NOP
+ .PROCEND
+ .END
diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c
new file mode 100644
index 000000000..4bb182693
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.c
@@ -0,0 +1,1375 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prinit.h"
+#include "prenv.h"
+#include "prerr.h"
+#include "secerr.h"
+
+#include "prtypes.h"
+#include "blapi.h"
+#include "rijndael.h"
+
+#include "cts.h"
+#include "ctr.h"
+#include "gcm.h"
+
+#ifdef USE_HW_AES
+#include "intel-aes.h"
+#endif
+
+#include "mpi.h"
+
+#ifdef USE_HW_AES
+static int has_intel_aes = 0;
+static PRBool use_hw_aes = PR_FALSE;
+
+#ifdef INTEL_GCM
+#include "intel-gcm.h"
+static int has_intel_avx = 0;
+static int has_intel_clmul = 0;
+static PRBool use_hw_gcm = PR_FALSE;
+#if defined(_MSC_VER) && !defined(_M_IX86)
+#include <intrin.h> /* for _xgetbv() */
+#endif
+#endif
+#endif /* USE_HW_AES */
+
+/*
+ * There are currently five ways to build this code, varying in performance
+ * and code size.
+ *
+ * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab
+ * RIJNDAEL_GENERATE_TABLES Generate tables on first
+ * encryption/decryption, then store them;
+ * use the function gfm
+ * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do
+ * the generation
+ * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table
+ * values "on-the-fly", using gfm
+ * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros
+ *
+ * The default is RIJNDAEL_INCLUDE_TABLES.
+ */
+
+/*
+ * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4],
+ * T**-1[0..4], IMXC[0..4]
+ * When building anything else, includes S, S**-1, Rcon
+ */
+#include "rijndael32.tab"
+
+#if defined(RIJNDAEL_INCLUDE_TABLES)
+/*
+ * RIJNDAEL_INCLUDE_TABLES
+ */
+#define T0(i) _T0[i]
+#define T1(i) _T1[i]
+#define T2(i) _T2[i]
+#define T3(i) _T3[i]
+#define TInv0(i) _TInv0[i]
+#define TInv1(i) _TInv1[i]
+#define TInv2(i) _TInv2[i]
+#define TInv3(i) _TInv3[i]
+#define IMXC0(b) _IMXC0[b]
+#define IMXC1(b) _IMXC1[b]
+#define IMXC2(b) _IMXC2[b]
+#define IMXC3(b) _IMXC3[b]
+/* The S-box can be recovered from the T-tables */
+#ifdef IS_LITTLE_ENDIAN
+#define SBOX(b) ((PRUint8)_T3[b])
+#else
+#define SBOX(b) ((PRUint8)_T1[b])
+#endif
+#define SINV(b) (_SInv[b])
+
+#else /* not RIJNDAEL_INCLUDE_TABLES */
+
+/*
+ * Code for generating T-table values.
+ */
+
+#ifdef IS_LITTLE_ENDIAN
+#define WORD4(b0, b1, b2, b3) \
+ ((((PRUint32)b3) << 24) | \
+ (((PRUint32)b2) << 16) | \
+ (((PRUint32)b1) << 8) | \
+ ((PRUint32)b0))
+#else
+#define WORD4(b0, b1, b2, b3) \
+ ((((PRUint32)b0) << 24) | \
+ (((PRUint32)b1) << 16) | \
+ (((PRUint32)b2) << 8) | \
+ ((PRUint32)b3))
+#endif
+
+/*
+ * Define the S and S**-1 tables (both have been stored)
+ */
+#define SBOX(b) (_S[b])
+#define SINV(b) (_SInv[b])
+
+/*
+ * The function xtime, used for Galois field multiplication
+ */
+#define XTIME(a) \
+ ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1))
+
+/* Choose GFM method (macros or function) */
+#if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \
+ defined(RIJNDAEL_GENERATE_VALUES_MACRO)
+
+/*
+ * Galois field GF(2**8) multipliers, in macro form
+ */
+#define GFM01(a) \
+ (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+ (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM04(a) \
+ (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */
+#define GFM08(a) \
+ (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */
+#define GFM03(a) \
+ (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */
+#define GFM09(a) \
+ (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */
+#define GFM0B(a) \
+ (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */
+#define GFM0D(a) \
+ (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */
+#define GFM0E(a) \
+ (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */
+
+#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gfm(PRUint8 a, PRUint8 b)
+{
+ PRUint8 res = 0;
+ while (b > 0) {
+ res = (b & 0x01) ? res ^ a : res;
+ a = XTIME(a);
+ b >>= 1;
+ }
+ return res;
+}
+
+#define GFM01(a) \
+ (a) /* a * 01 = a, the identity */
+#define GFM02(a) \
+ (XTIME(a) & 0xff) /* a * 02 = xtime(a) */
+#define GFM03(a) \
+ (gfm(a, 0x03)) /* a * 03 */
+#define GFM09(a) \
+ (gfm(a, 0x09)) /* a * 09 */
+#define GFM0B(a) \
+ (gfm(a, 0x0B)) /* a * 0B */
+#define GFM0D(a) \
+ (gfm(a, 0x0D)) /* a * 0D */
+#define GFM0E(a) \
+ (gfm(a, 0x0E)) /* a * 0E */
+
+#endif /* choosing GFM function */
+
+/*
+ * The T-tables
+ */
+#define G_T0(i) \
+ (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i))))
+#define G_T1(i) \
+ (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i))))
+#define G_T2(i) \
+ (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i))))
+#define G_T3(i) \
+ (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i))))
+
+/*
+ * The inverse T-tables
+ */
+#define G_TInv0(i) \
+ (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i))))
+#define G_TInv1(i) \
+ (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i))))
+#define G_TInv2(i) \
+ (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i))))
+#define G_TInv3(i) \
+ (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i))))
+
+/*
+ * The inverse mix column tables
+ */
+#define G_IMXC0(i) \
+ (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i)))
+#define G_IMXC1(i) \
+ (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i)))
+#define G_IMXC2(i) \
+ (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i)))
+#define G_IMXC3(i) \
+ (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i)))
+
+/* Now choose the T-table indexing method */
+#if defined(RIJNDAEL_GENERATE_VALUES)
+/* generate values for the tables with a function*/
+static PRUint32
+gen_TInvXi(PRUint8 tx, PRUint8 i)
+{
+ PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
+ si01 = SINV(i);
+ si02 = XTIME(si01);
+ si04 = XTIME(si02);
+ si08 = XTIME(si04);
+ si03 = si02 ^ si01;
+ si09 = si08 ^ si01;
+ si0B = si08 ^ si03;
+ si0D = si09 ^ si04;
+ si0E = si08 ^ si04 ^ si02;
+ switch (tx) {
+ case 0:
+ return WORD4(si0E, si09, si0D, si0B);
+ case 1:
+ return WORD4(si0B, si0E, si09, si0D);
+ case 2:
+ return WORD4(si0D, si0B, si0E, si09);
+ case 3:
+ return WORD4(si09, si0D, si0B, si0E);
+ }
+ return -1;
+}
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) gen_TInvXi(0, i)
+#define TInv1(i) gen_TInvXi(1, i)
+#define TInv2(i) gen_TInvXi(2, i)
+#define TInv3(i) gen_TInvXi(3, i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#elif defined(RIJNDAEL_GENERATE_VALUES_MACRO)
+/* generate values for the tables with macros */
+#define T0(i) G_T0(i)
+#define T1(i) G_T1(i)
+#define T2(i) G_T2(i)
+#define T3(i) G_T3(i)
+#define TInv0(i) G_TInv0(i)
+#define TInv1(i) G_TInv1(i)
+#define TInv2(i) G_TInv2(i)
+#define TInv3(i) G_TInv3(i)
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */
+/* Generate T and T**-1 table values and store, then index */
+/* The inverse mix column tables are still generated */
+#define T0(i) rijndaelTables->T0[i]
+#define T1(i) rijndaelTables->T1[i]
+#define T2(i) rijndaelTables->T2[i]
+#define T3(i) rijndaelTables->T3[i]
+#define TInv0(i) rijndaelTables->TInv0[i]
+#define TInv1(i) rijndaelTables->TInv1[i]
+#define TInv2(i) rijndaelTables->TInv2[i]
+#define TInv3(i) rijndaelTables->TInv3[i]
+#define IMXC0(b) G_IMXC0(b)
+#define IMXC1(b) G_IMXC1(b)
+#define IMXC2(b) G_IMXC2(b)
+#define IMXC3(b) G_IMXC3(b)
+#endif /* choose T-table indexing method */
+
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+#if defined(RIJNDAEL_GENERATE_TABLES) || \
+ defined(RIJNDAEL_GENERATE_TABLES_MACRO)
+
+/* Code to generate and store the tables */
+
+struct rijndael_tables_str {
+ PRUint32 T0[256];
+ PRUint32 T1[256];
+ PRUint32 T2[256];
+ PRUint32 T3[256];
+ PRUint32 TInv0[256];
+ PRUint32 TInv1[256];
+ PRUint32 TInv2[256];
+ PRUint32 TInv3[256];
+};
+
+static struct rijndael_tables_str *rijndaelTables = NULL;
+static PRCallOnceType coRTInit = { 0, 0, 0 };
+static PRStatus
+init_rijndael_tables(void)
+{
+ PRUint32 i;
+ PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E;
+ struct rijndael_tables_str *rts;
+ rts = (struct rijndael_tables_str *)
+ PORT_Alloc(sizeof(struct rijndael_tables_str));
+ if (!rts)
+ return PR_FAILURE;
+ for (i = 0; i < 256; i++) {
+ /* The forward values */
+ si01 = SBOX(i);
+ si02 = XTIME(si01);
+ si03 = si02 ^ si01;
+ rts->T0[i] = WORD4(si02, si01, si01, si03);
+ rts->T1[i] = WORD4(si03, si02, si01, si01);
+ rts->T2[i] = WORD4(si01, si03, si02, si01);
+ rts->T3[i] = WORD4(si01, si01, si03, si02);
+ /* The inverse values */
+ si01 = SINV(i);
+ si02 = XTIME(si01);
+ si04 = XTIME(si02);
+ si08 = XTIME(si04);
+ si03 = si02 ^ si01;
+ si09 = si08 ^ si01;
+ si0B = si08 ^ si03;
+ si0D = si09 ^ si04;
+ si0E = si08 ^ si04 ^ si02;
+ rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B);
+ rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D);
+ rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09);
+ rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E);
+ }
+ /* wait until all the values are in to set */
+ rijndaelTables = rts;
+ return PR_SUCCESS;
+}
+
+#endif /* code to generate tables */
+
+/**************************************************************************
+ *
+ * Stuff related to the Rijndael key schedule
+ *
+ *************************************************************************/
+
+#define SUBBYTE(w) \
+ ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \
+ (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \
+ (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \
+ (((PRUint32)SBOX((w)&0xff))))
+
+#ifdef IS_LITTLE_ENDIAN
+#define ROTBYTE(b) \
+ ((b >> 8) | (b << 24))
+#else
+#define ROTBYTE(b) \
+ ((b << 8) | (b >> 24))
+#endif
+
+/* rijndael_key_expansion7
+ *
+ * Generate the expanded key from the key input by the user.
+ * XXX
+ * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte
+ * transformation is done periodically. The period is every 4 bytes, and
+ * since 7%4 != 0 this happens at different times for each key word (unlike
+ * Nk == 8 where it happens twice in every key word, in the same positions).
+ * For now, I'm implementing this case "dumbly", w/o any unrolling.
+ */
+static SECStatus
+rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int i;
+ PRUint32 *W;
+ PRUint32 *pW;
+ PRUint32 tmp;
+ W = cx->expandedKey;
+ /* 1. the first Nk words contain the cipher key */
+ memcpy(W, key, Nk * 4);
+ i = Nk;
+ /* 2. loop until full expanded key is obtained */
+ pW = W + i - 1;
+ for (; i < cx->Nb * (cx->Nr + 1); ++i) {
+ tmp = *pW++;
+ if (i % Nk == 0)
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ else if (i % Nk == 4)
+ tmp = SUBBYTE(tmp);
+ *pW = W[i - Nk] ^ tmp;
+ }
+ return SECSuccess;
+}
+
+/* rijndael_key_expansion
+ *
+ * Generate the expanded key from the key input by the user.
+ */
+static SECStatus
+rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int i;
+ PRUint32 *W;
+ PRUint32 *pW;
+ PRUint32 tmp;
+ unsigned int round_key_words = cx->Nb * (cx->Nr + 1);
+ if (Nk == 7)
+ return rijndael_key_expansion7(cx, key, Nk);
+ W = cx->expandedKey;
+ /* The first Nk words contain the input cipher key */
+ memcpy(W, key, Nk * 4);
+ i = Nk;
+ pW = W + i - 1;
+ /* Loop over all sets of Nk words, except the last */
+ while (i < round_key_words - Nk) {
+ tmp = *pW++;
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ if (Nk == 4)
+ continue;
+ switch (Nk) {
+ case 8:
+ tmp = *pW++;
+ tmp = SUBBYTE(tmp);
+ *pW = W[i++ - Nk] ^ tmp;
+ case 7:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ case 6:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ case 5:
+ tmp = *pW++;
+ *pW = W[i++ - Nk] ^ tmp;
+ }
+ }
+ /* Generate the last word */
+ tmp = *pW++;
+ tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1];
+ *pW = W[i++ - Nk] ^ tmp;
+ /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However,
+ * since the above loop generated all but the last Nk key words, there
+ * is no more need for the SubByte transformation.
+ */
+ if (Nk < 8) {
+ for (; i < round_key_words; ++i) {
+ tmp = *pW++;
+ *pW = W[i - Nk] ^ tmp;
+ }
+ } else {
+ /* except in the case when Nk == 8. Then one more SubByte may have
+ * to be performed, at i % Nk == 4.
+ */
+ for (; i < round_key_words; ++i) {
+ tmp = *pW++;
+ if (i % Nk == 4)
+ tmp = SUBBYTE(tmp);
+ *pW = W[i - Nk] ^ tmp;
+ }
+ }
+ return SECSuccess;
+}
+
+/* rijndael_invkey_expansion
+ *
+ * Generate the expanded key for the inverse cipher from the key input by
+ * the user.
+ */
+static SECStatus
+rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk)
+{
+ unsigned int r;
+ PRUint32 *roundkeyw;
+ PRUint8 *b;
+ int Nb = cx->Nb;
+ /* begins like usual key expansion ... */
+ if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
+ return SECFailure;
+ /* ... but has the additional step of InvMixColumn,
+ * excepting the first and last round keys.
+ */
+ roundkeyw = cx->expandedKey + cx->Nb;
+ for (r = 1; r < cx->Nr; ++r) {
+ /* each key word, roundkeyw, represents a column in the key
+ * matrix. Each column is multiplied by the InvMixColumn matrix.
+ * [ 0E 0B 0D 09 ] [ b0 ]
+ * [ 09 0E 0B 0D ] * [ b1 ]
+ * [ 0D 09 0E 0B ] [ b2 ]
+ * [ 0B 0D 09 0E ] [ b3 ]
+ */
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]);
+ if (Nb <= 4)
+ continue;
+ switch (Nb) {
+ case 8:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 7:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 6:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ case 5:
+ b = (PRUint8 *)roundkeyw;
+ *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^
+ IMXC2(b[2]) ^ IMXC3(b[3]);
+ }
+ }
+ return SECSuccess;
+}
+/**************************************************************************
+ *
+ * Stuff related to Rijndael encryption/decryption, optimized for
+ * a 128-bit blocksize.
+ *
+ *************************************************************************/
+
+#ifdef IS_LITTLE_ENDIAN
+#define BYTE0WORD(w) ((w)&0x000000ff)
+#define BYTE1WORD(w) ((w)&0x0000ff00)
+#define BYTE2WORD(w) ((w)&0x00ff0000)
+#define BYTE3WORD(w) ((w)&0xff000000)
+#else
+#define BYTE0WORD(w) ((w)&0xff000000)
+#define BYTE1WORD(w) ((w)&0x00ff0000)
+#define BYTE2WORD(w) ((w)&0x0000ff00)
+#define BYTE3WORD(w) ((w)&0x000000ff)
+#endif
+
+typedef union {
+ PRUint32 w[4];
+ PRUint8 b[16];
+} rijndael_state;
+
+#define COLUMN_0(state) state.w[0]
+#define COLUMN_1(state) state.w[1]
+#define COLUMN_2(state) state.w[2]
+#define COLUMN_3(state) state.w[3]
+
+#define STATE_BYTE(i) state.b[i]
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rijndael_encryptBlock128(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ unsigned int r;
+ PRUint32 *roundkeyw;
+ rijndael_state state;
+ PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+ unsigned char *pIn, *pOut;
+ PRUint32 inBuf[4], outBuf[4];
+
+ if ((ptrdiff_t)input & 0x3) {
+ memcpy(inBuf, input, sizeof inBuf);
+ pIn = (unsigned char *)inBuf;
+ } else {
+ pIn = (unsigned char *)input;
+ }
+ if ((ptrdiff_t)output & 0x3) {
+ pOut = (unsigned char *)outBuf;
+ } else {
+ pOut = (unsigned char *)output;
+ }
+#endif
+ roundkeyw = cx->expandedKey;
+ /* Step 1: Add Round Key 0 to initial state */
+ COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++;
+ COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++;
+ COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++;
+ COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++;
+ /* Step 2: Loop over rounds [1..NR-1] */
+ for (r = 1; r < cx->Nr; ++r) {
+ /* Do ShiftRow, ByteSub, and MixColumn all at once */
+ C0 = T0(STATE_BYTE(0)) ^
+ T1(STATE_BYTE(5)) ^
+ T2(STATE_BYTE(10)) ^
+ T3(STATE_BYTE(15));
+ C1 = T0(STATE_BYTE(4)) ^
+ T1(STATE_BYTE(9)) ^
+ T2(STATE_BYTE(14)) ^
+ T3(STATE_BYTE(3));
+ C2 = T0(STATE_BYTE(8)) ^
+ T1(STATE_BYTE(13)) ^
+ T2(STATE_BYTE(2)) ^
+ T3(STATE_BYTE(7));
+ C3 = T0(STATE_BYTE(12)) ^
+ T1(STATE_BYTE(1)) ^
+ T2(STATE_BYTE(6)) ^
+ T3(STATE_BYTE(11));
+ /* Round key addition */
+ COLUMN_0(state) = C0 ^ *roundkeyw++;
+ COLUMN_1(state) = C1 ^ *roundkeyw++;
+ COLUMN_2(state) = C2 ^ *roundkeyw++;
+ COLUMN_3(state) = C3 ^ *roundkeyw++;
+ }
+ /* Step 3: Do the last round */
+ /* Final round does not employ MixColumn */
+ C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) |
+ (BYTE1WORD(T3(STATE_BYTE(5)))) |
+ (BYTE2WORD(T0(STATE_BYTE(10)))) |
+ (BYTE3WORD(T1(STATE_BYTE(15))))) ^
+ *roundkeyw++;
+ C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) |
+ (BYTE1WORD(T3(STATE_BYTE(9)))) |
+ (BYTE2WORD(T0(STATE_BYTE(14)))) |
+ (BYTE3WORD(T1(STATE_BYTE(3))))) ^
+ *roundkeyw++;
+ C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) |
+ (BYTE1WORD(T3(STATE_BYTE(13)))) |
+ (BYTE2WORD(T0(STATE_BYTE(2)))) |
+ (BYTE3WORD(T1(STATE_BYTE(7))))) ^
+ *roundkeyw++;
+ C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) |
+ (BYTE1WORD(T3(STATE_BYTE(1)))) |
+ (BYTE2WORD(T0(STATE_BYTE(6)))) |
+ (BYTE3WORD(T1(STATE_BYTE(11))))) ^
+ *roundkeyw++;
+ *((PRUint32 *)pOut) = C0;
+ *((PRUint32 *)(pOut + 4)) = C1;
+ *((PRUint32 *)(pOut + 8)) = C2;
+ *((PRUint32 *)(pOut + 12)) = C3;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+ if ((ptrdiff_t)output & 0x3) {
+ memcpy(output, outBuf, sizeof outBuf);
+ }
+#endif
+ return SECSuccess;
+}
+
+static SECStatus NO_SANITIZE_ALIGNMENT
+rijndael_decryptBlock128(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ int r;
+ PRUint32 *roundkeyw;
+ rijndael_state state;
+ PRUint32 C0, C1, C2, C3;
+#if defined(NSS_X86_OR_X64)
+#define pIn input
+#define pOut output
+#else
+ unsigned char *pIn, *pOut;
+ PRUint32 inBuf[4], outBuf[4];
+
+ if ((ptrdiff_t)input & 0x3) {
+ memcpy(inBuf, input, sizeof inBuf);
+ pIn = (unsigned char *)inBuf;
+ } else {
+ pIn = (unsigned char *)input;
+ }
+ if ((ptrdiff_t)output & 0x3) {
+ pOut = (unsigned char *)outBuf;
+ } else {
+ pOut = (unsigned char *)output;
+ }
+#endif
+ roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
+ /* reverse the final key addition */
+ COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--;
+ COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--;
+ COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--;
+ COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--;
+ /* Loop over rounds in reverse [NR..1] */
+ for (r = cx->Nr; r > 1; --r) {
+ /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
+ C0 = TInv0(STATE_BYTE(0)) ^
+ TInv1(STATE_BYTE(13)) ^
+ TInv2(STATE_BYTE(10)) ^
+ TInv3(STATE_BYTE(7));
+ C1 = TInv0(STATE_BYTE(4)) ^
+ TInv1(STATE_BYTE(1)) ^
+ TInv2(STATE_BYTE(14)) ^
+ TInv3(STATE_BYTE(11));
+ C2 = TInv0(STATE_BYTE(8)) ^
+ TInv1(STATE_BYTE(5)) ^
+ TInv2(STATE_BYTE(2)) ^
+ TInv3(STATE_BYTE(15));
+ C3 = TInv0(STATE_BYTE(12)) ^
+ TInv1(STATE_BYTE(9)) ^
+ TInv2(STATE_BYTE(6)) ^
+ TInv3(STATE_BYTE(3));
+ /* Invert the key addition step */
+ COLUMN_3(state) = C3 ^ *roundkeyw--;
+ COLUMN_2(state) = C2 ^ *roundkeyw--;
+ COLUMN_1(state) = C1 ^ *roundkeyw--;
+ COLUMN_0(state) = C0 ^ *roundkeyw--;
+ }
+ /* inverse sub */
+ pOut[0] = SINV(STATE_BYTE(0));
+ pOut[1] = SINV(STATE_BYTE(13));
+ pOut[2] = SINV(STATE_BYTE(10));
+ pOut[3] = SINV(STATE_BYTE(7));
+ pOut[4] = SINV(STATE_BYTE(4));
+ pOut[5] = SINV(STATE_BYTE(1));
+ pOut[6] = SINV(STATE_BYTE(14));
+ pOut[7] = SINV(STATE_BYTE(11));
+ pOut[8] = SINV(STATE_BYTE(8));
+ pOut[9] = SINV(STATE_BYTE(5));
+ pOut[10] = SINV(STATE_BYTE(2));
+ pOut[11] = SINV(STATE_BYTE(15));
+ pOut[12] = SINV(STATE_BYTE(12));
+ pOut[13] = SINV(STATE_BYTE(9));
+ pOut[14] = SINV(STATE_BYTE(6));
+ pOut[15] = SINV(STATE_BYTE(3));
+ /* final key addition */
+ *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--;
+ *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--;
+ *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--;
+ *((PRUint32 *)pOut) ^= *roundkeyw--;
+#if defined(NSS_X86_OR_X64)
+#undef pIn
+#undef pOut
+#else
+ if ((ptrdiff_t)output & 0x3) {
+ memcpy(output, outBuf, sizeof outBuf);
+ }
+#endif
+ return SECSuccess;
+}
+
+/**************************************************************************
+ *
+ * Stuff related to general Rijndael encryption/decryption, for blocksizes
+ * greater than 128 bits.
+ *
+ * XXX This code is currently untested! So far, AES specs have only been
+ * released for 128 bit blocksizes. This will be tested, but for now
+ * only the code above has been tested using known values.
+ *
+ *************************************************************************/
+
+#define COLUMN(array, j) *((PRUint32 *)(array + j))
+
+SECStatus
+rijndael_encryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return SECFailure;
+#ifdef rijndael_large_blocks_fixed
+ unsigned int j, r, Nb;
+ unsigned int c2 = 0, c3 = 0;
+ PRUint32 *roundkeyw;
+ PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
+ Nb = cx->Nb;
+ roundkeyw = cx->expandedKey;
+ /* Step 1: Add Round Key 0 to initial state */
+ for (j = 0; j < 4 * Nb; j += 4) {
+ COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++;
+ }
+ /* Step 2: Loop over rounds [1..NR-1] */
+ for (r = 1; r < cx->Nr; ++r) {
+ for (j = 0; j < Nb; ++j) {
+ COLUMN(output, j) = T0(STATE_BYTE(4 * j)) ^
+ T1(STATE_BYTE(4 * ((j + 1) % Nb) + 1)) ^
+ T2(STATE_BYTE(4 * ((j + c2) % Nb) + 2)) ^
+ T3(STATE_BYTE(4 * ((j + c3) % Nb) + 3));
+ }
+ for (j = 0; j < 4 * Nb; j += 4) {
+ COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++;
+ }
+ }
+ /* Step 3: Do the last round */
+ /* Final round does not employ MixColumn */
+ for (j = 0; j < Nb; ++j) {
+ COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4 * j)))) |
+ (BYTE1WORD(T3(STATE_BYTE(4 * (j + 1) % Nb) + 1))) |
+ (BYTE2WORD(T0(STATE_BYTE(4 * (j + c2) % Nb) + 2))) |
+ (BYTE3WORD(T1(STATE_BYTE(4 * (j + c3) % Nb) + 3)))) ^
+ *roundkeyw++;
+ }
+ return SECSuccess;
+#endif
+}
+
+SECStatus
+rijndael_decryptBlock(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return SECFailure;
+#ifdef rijndael_large_blocks_fixed
+ int j, r, Nb;
+ int c2 = 0, c3 = 0;
+ PRUint32 *roundkeyw;
+ PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE];
+ Nb = cx->Nb;
+ roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3;
+ /* reverse key addition */
+ for (j = 4 * Nb; j >= 0; j -= 4) {
+ COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--;
+ }
+ /* Loop over rounds in reverse [NR..1] */
+ for (r = cx->Nr; r > 1; --r) {
+ /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */
+ for (j = 0; j < Nb; ++j) {
+ COLUMN(output, 4 * j) = TInv0(STATE_BYTE(4 * j)) ^
+ TInv1(STATE_BYTE(4 * (j + Nb - 1) % Nb) + 1) ^
+ TInv2(STATE_BYTE(4 * (j + Nb - c2) % Nb) + 2) ^
+ TInv3(STATE_BYTE(4 * (j + Nb - c3) % Nb) + 3);
+ }
+ /* Invert the key addition step */
+ for (j = 4 * Nb; j >= 0; j -= 4) {
+ COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--;
+ }
+ }
+ /* inverse sub */
+ for (j = 0; j < 4 * Nb; ++j) {
+ output[j] = SINV(clone[j]);
+ }
+ /* final key addition */
+ for (j = 4 * Nb; j >= 0; j -= 4) {
+ COLUMN(output, j) ^= *roundkeyw--;
+ }
+ return SECSuccess;
+#endif
+}
+
+/**************************************************************************
+ *
+ * Rijndael modes of operation (ECB and CBC)
+ *
+ *************************************************************************/
+
+static SECStatus
+rijndael_encryptECB(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ AESBlockFunc *encryptor;
+
+ encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+ ? &rijndael_encryptBlock128
+ : &rijndael_encryptBlock;
+ while (inputLen > 0) {
+ rv = (*encryptor)(cx, output, input);
+ if (rv != SECSuccess)
+ return rv;
+ output += blocksize;
+ input += blocksize;
+ inputLen -= blocksize;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_encryptCBC(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize)
+{
+ unsigned int j;
+ SECStatus rv;
+ AESBlockFunc *encryptor;
+ unsigned char *lastblock;
+ unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8];
+
+ if (!inputLen)
+ return SECSuccess;
+ lastblock = cx->iv;
+ encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+ ? &rijndael_encryptBlock128
+ : &rijndael_encryptBlock;
+ while (inputLen > 0) {
+ /* XOR with the last block (IV if first block) */
+ for (j = 0; j < blocksize; ++j)
+ inblock[j] = input[j] ^ lastblock[j];
+ /* encrypt */
+ rv = (*encryptor)(cx, output, inblock);
+ if (rv != SECSuccess)
+ return rv;
+ /* move to the next block */
+ lastblock = output;
+ output += blocksize;
+ input += blocksize;
+ inputLen -= blocksize;
+ }
+ memcpy(cx->iv, lastblock, blocksize);
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptECB(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ AESBlockFunc *decryptor;
+
+ decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+ ? &rijndael_decryptBlock128
+ : &rijndael_decryptBlock;
+ while (inputLen > 0) {
+ rv = (*decryptor)(cx, output, input);
+ if (rv != SECSuccess)
+ return rv;
+ output += blocksize;
+ input += blocksize;
+ inputLen -= blocksize;
+ }
+ return SECSuccess;
+}
+
+static SECStatus
+rijndael_decryptCBC(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen,
+ unsigned int blocksize)
+{
+ SECStatus rv;
+ AESBlockFunc *decryptor;
+ const unsigned char *in;
+ unsigned char *out;
+ unsigned int j;
+ unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE];
+
+ if (!inputLen)
+ return SECSuccess;
+ PORT_Assert(output - input >= 0 || input - output >= (int)inputLen);
+ decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE)
+ ? &rijndael_decryptBlock128
+ : &rijndael_decryptBlock;
+ in = input + (inputLen - blocksize);
+ memcpy(newIV, in, blocksize);
+ out = output + (inputLen - blocksize);
+ while (inputLen > blocksize) {
+ rv = (*decryptor)(cx, out, in);
+ if (rv != SECSuccess)
+ return rv;
+ for (j = 0; j < blocksize; ++j)
+ out[j] ^= in[(int)(j - blocksize)];
+ out -= blocksize;
+ in -= blocksize;
+ inputLen -= blocksize;
+ }
+ if (in == input) {
+ rv = (*decryptor)(cx, out, in);
+ if (rv != SECSuccess)
+ return rv;
+ for (j = 0; j < blocksize; ++j)
+ out[j] ^= cx->iv[j];
+ }
+ memcpy(cx->iv, newIV, blocksize);
+ return SECSuccess;
+}
+
+/************************************************************************
+ *
+ * BLAPI Interface functions
+ *
+ * The following functions implement the encryption routines defined in
+ * BLAPI for the AES cipher, Rijndael.
+ *
+ ***********************************************************************/
+
+AESContext *
+AES_AllocateContext(void)
+{
+ return PORT_ZNew(AESContext);
+}
+
+#ifdef INTEL_GCM
+/*
+ * Adapted from the example code in "How to detect New Instruction support in
+ * the 4th generation Intel Core processor family" by Max Locktyukhin.
+ *
+ * XGETBV:
+ * Reads an extended control register (XCR) specified by ECX into EDX:EAX.
+ */
+static PRBool
+check_xcr0_ymm()
+{
+ PRUint32 xcr0;
+#if defined(_MSC_VER)
+#if defined(_M_IX86)
+ __asm {
+ mov ecx, 0
+ xgetbv
+ mov xcr0, eax
+ }
+#else
+ xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */
+#endif
+#else
+ __asm__("xgetbv"
+ : "=a"(xcr0)
+ : "c"(0)
+ : "%edx");
+#endif
+ /* Check if xmm and ymm state are enabled in XCR0. */
+ return (xcr0 & 6) == 6;
+}
+#endif
+
+/*
+** Initialize a new AES context suitable for AES encryption/decryption in
+** the ECB or CBC mode.
+** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC
+*/
+static SECStatus
+aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int blocksize)
+{
+ unsigned int Nk;
+ /* According to Rijndael AES Proposal, section 12.1, block and key
+ * lengths between 128 and 256 bits are supported, as long as the
+ * length in bytes is divisible by 4.
+ */
+ if (key == NULL ||
+ keysize < RIJNDAEL_MIN_BLOCKSIZE ||
+ keysize > RIJNDAEL_MAX_BLOCKSIZE ||
+ keysize % 4 != 0 ||
+ blocksize < RIJNDAEL_MIN_BLOCKSIZE ||
+ blocksize > RIJNDAEL_MAX_BLOCKSIZE ||
+ blocksize % 4 != 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode != NSS_AES && mode != NSS_AES_CBC) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (mode == NSS_AES_CBC && iv == NULL) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+#ifdef USE_HW_AES
+ if (has_intel_aes == 0) {
+ unsigned long eax, ebx, ecx, edx;
+ char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES");
+
+ if (disable_hw_aes == NULL) {
+ freebl_cpuid(1, &eax, &ebx, &ecx, &edx);
+ has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1;
+#ifdef INTEL_GCM
+ has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1;
+ if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 &&
+ check_xcr0_ymm()) {
+ has_intel_avx = 1;
+ } else {
+ has_intel_avx = -1;
+ }
+#endif
+ } else {
+ has_intel_aes = -1;
+#ifdef INTEL_GCM
+ has_intel_avx = -1;
+ has_intel_clmul = -1;
+#endif
+ }
+ }
+ use_hw_aes = (PRBool)(has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16);
+#ifdef INTEL_GCM
+ use_hw_gcm = (PRBool)(use_hw_aes && has_intel_avx > 0 && has_intel_clmul > 0);
+#endif
+#endif /* USE_HW_AES */
+ /* Nb = (block size in bits) / 32 */
+ cx->Nb = blocksize / 4;
+ /* Nk = (key size in bits) / 32 */
+ Nk = keysize / 4;
+ /* Obtain number of rounds from "table" */
+ cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb);
+ /* copy in the iv, if neccessary */
+ if (mode == NSS_AES_CBC) {
+ memcpy(cx->iv, iv, blocksize);
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ cx->worker = (freeblCipherFunc)
+ intel_aes_cbc_worker(encrypt, keysize);
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)(encrypt
+ ? &rijndael_encryptCBC
+ : &rijndael_decryptCBC);
+ }
+ } else {
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ cx->worker = (freeblCipherFunc)
+ intel_aes_ecb_worker(encrypt, keysize);
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)(encrypt
+ ? &rijndael_encryptECB
+ : &rijndael_decryptECB);
+ }
+ }
+ PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE);
+ if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ goto cleanup;
+ }
+#ifdef USE_HW_AES
+ if (use_hw_aes) {
+ intel_aes_init(encrypt, keysize);
+ } else
+#endif
+ {
+
+#if defined(RIJNDAEL_GENERATE_TABLES) || \
+ defined(RIJNDAEL_GENERATE_TABLES_MACRO)
+ if (rijndaelTables == NULL) {
+ if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) {
+ return SecFailure;
+ }
+ }
+#endif
+ /* Generate expanded key */
+ if (encrypt) {
+ if (rijndael_key_expansion(cx, key, Nk) != SECSuccess)
+ goto cleanup;
+ } else {
+ if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess)
+ goto cleanup;
+ }
+ }
+ cx->worker_cx = cx;
+ cx->destroy = NULL;
+ cx->isBlock = PR_TRUE;
+ return SECSuccess;
+cleanup:
+ return SECFailure;
+}
+
+SECStatus
+AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize,
+ const unsigned char *iv, int mode, unsigned int encrypt,
+ unsigned int blocksize)
+{
+ int basemode = mode;
+ PRBool baseencrypt = encrypt;
+ SECStatus rv;
+
+ switch (mode) {
+ case NSS_AES_CTS:
+ basemode = NSS_AES_CBC;
+ break;
+ case NSS_AES_GCM:
+ case NSS_AES_CTR:
+ basemode = NSS_AES;
+ baseencrypt = PR_TRUE;
+ break;
+ }
+ /* make sure enough is initializes so we can safely call Destroy */
+ cx->worker_cx = NULL;
+ cx->destroy = NULL;
+ rv = aes_InitContext(cx, key, keysize, iv, basemode,
+ baseencrypt, blocksize);
+ if (rv != SECSuccess) {
+ AES_DestroyContext(cx, PR_FALSE);
+ return rv;
+ }
+ cx->mode = mode;
+
+ /* finally, set up any mode specific contexts */
+ switch (mode) {
+ case NSS_AES_CTS:
+ cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize);
+ cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate);
+ cx->destroy = (freeblDestroyFunc)CTS_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ break;
+ case NSS_AES_GCM:
+#ifdef INTEL_GCM
+ if (use_hw_gcm) {
+ cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize);
+ cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate);
+ cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ } else
+#endif
+ {
+ cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize);
+ cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate);
+ cx->destroy = (freeblDestroyFunc)GCM_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ }
+ break;
+ case NSS_AES_CTR:
+ cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize);
+#if defined(USE_HW_AES) && defined(_MSC_VER)
+ if (use_hw_aes) {
+ cx->worker = (freeblCipherFunc)CTR_Update_HW_AES;
+ } else
+#endif
+ {
+ cx->worker = (freeblCipherFunc)CTR_Update;
+ }
+ cx->destroy = (freeblDestroyFunc)CTR_DestroyContext;
+ cx->isBlock = PR_FALSE;
+ break;
+ default:
+ /* everything has already been set up by aes_InitContext, just
+ * return */
+ return SECSuccess;
+ }
+ /* check to see if we succeeded in getting the worker context */
+ if (cx->worker_cx == NULL) {
+ /* no, just destroy the existing context */
+ cx->destroy = NULL; /* paranoia, though you can see a dozen lines */
+ /* below that this isn't necessary */
+ AES_DestroyContext(cx, PR_FALSE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+/* AES_CreateContext
+ *
+ * create a new context for Rijndael operations
+ */
+AESContext *
+AES_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, int encrypt,
+ unsigned int keysize, unsigned int blocksize)
+{
+ AESContext *cx = AES_AllocateContext();
+ if (cx) {
+ SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt,
+ blocksize);
+ if (rv != SECSuccess) {
+ AES_DestroyContext(cx, PR_TRUE);
+ cx = NULL;
+ }
+ }
+ return cx;
+}
+
+/*
+ * AES_DestroyContext
+ *
+ * Zero an AES cipher context. If freeit is true, also free the pointer
+ * to the context.
+ */
+void
+AES_DestroyContext(AESContext *cx, PRBool freeit)
+{
+ if (cx->worker_cx && cx->destroy) {
+ (*cx->destroy)(cx->worker_cx, PR_TRUE);
+ cx->worker_cx = NULL;
+ cx->destroy = NULL;
+ }
+ if (freeit)
+ PORT_Free(cx);
+}
+
+/*
+ * AES_Encrypt
+ *
+ * Encrypt an arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Encrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ int blocksize;
+ /* Check args */
+ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ blocksize = 4 * cx->Nb;
+ if (cx->isBlock && (inputLen % blocksize != 0)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+#if UINT_MAX > MP_32BIT_MAX
+ /*
+ * we can guarentee that GSM won't overlfow if we limit the input to
+ * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now.
+ *
+ * We do it here to cover both hardware and software GCM operations.
+ */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) > 4);
+ }
+ if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+#else
+ /* if we can't pass in a 32_bit number, then no such check needed */
+ {
+ PR_STATIC_ASSERT(sizeof(unsigned int) <= 4);
+ }
+#endif
+
+ return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+ input, inputLen, blocksize);
+}
+
+/*
+ * AES_Decrypt
+ *
+ * Decrypt and arbitrary-length buffer. The output buffer must already be
+ * allocated to at least inputLen.
+ */
+SECStatus
+AES_Decrypt(AESContext *cx, unsigned char *output,
+ unsigned int *outputLen, unsigned int maxOutputLen,
+ const unsigned char *input, unsigned int inputLen)
+{
+ int blocksize;
+ /* Check args */
+ if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ blocksize = 4 * cx->Nb;
+ if (cx->isBlock && (inputLen % blocksize != 0)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+ if (maxOutputLen < inputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+ *outputLen = inputLen;
+ return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen,
+ input, inputLen, blocksize);
+}
diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h
new file mode 100644
index 000000000..0e14ec2fc
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael.h
@@ -0,0 +1,67 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _RIJNDAEL_H_
+#define _RIJNDAEL_H_ 1
+
+#include "blapii.h"
+
+#define RIJNDAEL_MIN_BLOCKSIZE 16 /* bytes */
+#define RIJNDAEL_MAX_BLOCKSIZE 32 /* bytes */
+
+typedef SECStatus AESBlockFunc(AESContext *cx,
+ unsigned char *output,
+ const unsigned char *input);
+
+/* RIJNDAEL_NUM_ROUNDS
+ *
+ * Number of rounds per execution
+ * Nk - number of key bytes
+ * Nb - blocksize (in bytes)
+ */
+#define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \
+ (PR_MAX(Nk, Nb) + 6)
+
+/* RIJNDAEL_MAX_STATE_SIZE
+ *
+ * Maximum number of bytes in the state (spec includes up to 256-bit block
+ * size)
+ */
+#define RIJNDAEL_MAX_STATE_SIZE 32
+
+/*
+ * This magic number is (Nb_max * (Nr_max + 1))
+ * where Nb_max is the maximum block size in 32-bit words,
+ * Nr_max is the maximum number of rounds, which is Nb_max + 6
+ */
+#define RIJNDAEL_MAX_EXP_KEY_SIZE (8 * 15)
+
+/* AESContextStr
+ *
+ * Values which maintain the state for Rijndael encryption/decryption.
+ *
+ * iv - initialization vector for CBC mode
+ * Nb - the number of bytes in a block, specified by user
+ * Nr - the number of rounds, specified by a table
+ * expandedKey - the round keys in 4-byte words, the length is Nr * Nb
+ * worker - the encryption/decryption function to use with worker_cx
+ * destroy - if not NULL, the destroy function to use with worker_cx
+ * worker_cx - the context for worker and destroy
+ * isBlock - is the mode of operation a block cipher or a stream cipher?
+ */
+struct AESContextStr {
+ unsigned int Nb;
+ unsigned int Nr;
+ freeblCipherFunc worker;
+ /* NOTE: The offsets of iv and expandedKey are hardcoded in intel-aes.s.
+ * Don't add new members before them without updating intel-aes.s. */
+ unsigned char iv[RIJNDAEL_MAX_BLOCKSIZE];
+ PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE];
+ freeblDestroyFunc destroy;
+ void *worker_cx;
+ PRBool isBlock;
+ int mode;
+};
+
+#endif /* _RIJNDAEL_H_ */
diff --git a/security/nss/lib/freebl/rijndael32.tab b/security/nss/lib/freebl/rijndael32.tab
new file mode 100644
index 000000000..59be7c2c0
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael32.tab
@@ -0,0 +1,1219 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef RIJNDAEL_INCLUDE_TABLES
+static const PRUint8 _S[256] =
+{
+ 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118,
+202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21,
+ 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117,
+ 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132,
+ 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207,
+208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168,
+ 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210,
+205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115,
+ 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219,
+224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121,
+231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8,
+186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138,
+112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158,
+225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223,
+140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22
+};
+#endif /* not RIJNDAEL_INCLUDE_TABLES */
+
+static const PRUint8 _SInv[256] =
+{
+ 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251,
+124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203,
+ 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78,
+ 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37,
+114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
+108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132,
+144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6,
+208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107,
+ 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110,
+ 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27,
+252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244,
+ 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95,
+ 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239,
+160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97,
+ 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125
+};
+
+#ifdef RIJNDAEL_INCLUDE_TABLES
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T0[256] =
+{
+0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6,
+0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56,
+0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f,
+0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb,
+0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453,
+0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c,
+0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551,
+0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a,
+0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637,
+0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df,
+0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d,
+0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b,
+0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd,
+0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1,
+0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d,
+0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85,
+0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a,
+0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe,
+0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d,
+0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1,
+0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5,
+0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3,
+0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755,
+0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6,
+0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54,
+0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428,
+0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264,
+0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8,
+0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531,
+0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda,
+0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac,
+0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810,
+0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657,
+0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e,
+0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c,
+0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c,
+0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199,
+0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122,
+0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c,
+0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5,
+0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7,
+0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e,
+0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c
+};
+#else
+static const PRUint32 _T0[256] =
+{
+0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd,
+0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d,
+0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7,
+0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4,
+0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1,
+0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e,
+0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e,
+0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46,
+0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7,
+0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe,
+0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a,
+0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2,
+0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e,
+0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256,
+0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4,
+0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa,
+0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1,
+0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42,
+0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158,
+0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22,
+0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631,
+0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T1[256] =
+{
+0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd,
+0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d,
+0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d,
+0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b,
+0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7,
+0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a,
+0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4,
+0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f,
+0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1,
+0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d,
+0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e,
+0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb,
+0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e,
+0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c,
+0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46,
+0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a,
+0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7,
+0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81,
+0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe,
+0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104,
+0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a,
+0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f,
+0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2,
+0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695,
+0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e,
+0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c,
+0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456,
+0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4,
+0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4,
+0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7,
+0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa,
+0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018,
+0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1,
+0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21,
+0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42,
+0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12,
+0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958,
+0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233,
+0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22,
+0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a,
+0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731,
+0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11,
+0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a
+};
+#else
+static const PRUint32 _T1[256] =
+{
+0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b,
+0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b,
+0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282,
+0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0,
+0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4,
+0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626,
+0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5,
+0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515,
+0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696,
+0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2,
+0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383,
+0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0,
+0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3,
+0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded,
+0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb,
+0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf,
+0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d,
+0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f,
+0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3,
+0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5,
+0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff,
+0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec,
+0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7,
+0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373,
+0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a,
+0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414,
+0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232,
+0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c,
+0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595,
+0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d,
+0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656,
+0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808,
+0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6,
+0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f,
+0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e,
+0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e,
+0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1,
+0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111,
+0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e,
+0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf,
+0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6,
+0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f,
+0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T2[256] =
+{
+0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b,
+0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b,
+0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82,
+0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0,
+0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4,
+0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26,
+0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5,
+0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15,
+0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196,
+0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2,
+0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83,
+0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0,
+0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3,
+0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced,
+0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb,
+0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf,
+0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d,
+0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f,
+0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3,
+0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5,
+0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff,
+0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec,
+0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7,
+0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573,
+0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a,
+0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14,
+0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632,
+0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c,
+0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495,
+0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d,
+0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56,
+0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808,
+0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6,
+0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f,
+0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e,
+0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e,
+0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1,
+0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311,
+0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e,
+0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf,
+0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6,
+0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f,
+0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16
+};
+#else
+static const PRUint32 _T2[256] =
+{
+0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b,
+0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b,
+0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82,
+0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0,
+0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4,
+0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26,
+0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5,
+0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15,
+0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796,
+0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2,
+0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83,
+0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0,
+0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3,
+0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed,
+0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb,
+0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf,
+0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d,
+0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f,
+0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3,
+0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5,
+0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff,
+0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec,
+0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7,
+0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673,
+0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a,
+0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814,
+0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432,
+0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c,
+0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195,
+0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d,
+0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56,
+0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008,
+0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6,
+0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f,
+0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e,
+0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e,
+0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1,
+0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211,
+0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e,
+0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df,
+0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6,
+0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f,
+0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _T3[256] =
+{
+0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b,
+0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b,
+0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282,
+0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0,
+0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4,
+0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626,
+0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5,
+0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515,
+0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696,
+0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2,
+0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383,
+0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0,
+0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3,
+0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded,
+0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb,
+0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf,
+0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d,
+0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f,
+0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3,
+0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5,
+0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff,
+0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec,
+0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7,
+0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373,
+0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a,
+0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414,
+0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232,
+0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c,
+0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595,
+0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d,
+0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656,
+0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808,
+0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6,
+0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f,
+0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e,
+0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e,
+0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1,
+0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111,
+0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e,
+0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf,
+0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6,
+0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f,
+0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616
+};
+#else
+static const PRUint32 _T3[256] =
+{
+0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6,
+0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56,
+0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f,
+0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb,
+0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753,
+0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c,
+0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451,
+0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a,
+0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137,
+0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf,
+0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d,
+0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b,
+0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd,
+0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1,
+0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d,
+0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85,
+0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a,
+0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe,
+0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d,
+0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1,
+0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5,
+0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3,
+0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255,
+0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6,
+0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54,
+0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28,
+0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664,
+0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8,
+0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431,
+0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da,
+0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac,
+0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810,
+0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157,
+0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e,
+0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c,
+0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c,
+0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899,
+0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322,
+0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c,
+0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5,
+0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7,
+0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e,
+0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv0[256] =
+{
+0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f,
+0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5,
+0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25,
+0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b,
+0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458,
+0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927,
+0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5,
+0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9,
+0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72,
+0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3,
+0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7,
+0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4,
+0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040,
+0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d,
+0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6,
+0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879,
+0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32,
+0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36,
+0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793,
+0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c,
+0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2,
+0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3,
+0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb,
+0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684,
+0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc,
+0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947,
+0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9,
+0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f,
+0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890,
+0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf,
+0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e,
+0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef,
+0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a,
+0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733,
+0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43,
+0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546,
+0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92,
+0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb,
+0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255,
+0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478,
+0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc,
+0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664,
+0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0
+};
+#else
+static const PRUint32 _TInv0[256] =
+{
+0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1,
+0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67,
+0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3,
+0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182,
+0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2,
+0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492,
+0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa,
+0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997,
+0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48,
+0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f,
+0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad,
+0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc,
+0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3,
+0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1,
+0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8,
+0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4,
+0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331,
+0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d,
+0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252,
+0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f,
+0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c,
+0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv1[256] =
+{
+0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1,
+0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525,
+0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567,
+0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6,
+0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3,
+0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd,
+0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582,
+0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994,
+0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2,
+0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5,
+0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792,
+0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a,
+0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa,
+0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46,
+0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697,
+0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db,
+0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248,
+0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627,
+0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f,
+0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16,
+0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad,
+0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd,
+0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc,
+0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420,
+0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3,
+0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722,
+0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1,
+0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4,
+0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8,
+0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3,
+0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4,
+0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6,
+0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31,
+0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315,
+0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d,
+0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f,
+0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252,
+0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89,
+0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f,
+0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886,
+0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c,
+0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490,
+0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042
+};
+#else
+static const PRUint32 _TInv1[256] =
+{
+0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45,
+0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c,
+0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b,
+0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9,
+0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421,
+0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971,
+0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31,
+0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b,
+0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02,
+0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708,
+0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4,
+0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe,
+0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef,
+0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd,
+0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9,
+0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee,
+0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed,
+0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39,
+0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7,
+0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a,
+0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7,
+0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60,
+0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6,
+0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611,
+0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230,
+0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964,
+0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1,
+0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf,
+0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8,
+0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512,
+0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918,
+0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8,
+0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23,
+0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8,
+0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0,
+0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551,
+0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2,
+0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c,
+0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273,
+0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db,
+0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225,
+0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1,
+0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv2[256] =
+{
+0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145,
+0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c,
+0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b,
+0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9,
+0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321,
+0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71,
+0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231,
+0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b,
+0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202,
+0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508,
+0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4,
+0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe,
+0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef,
+0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd,
+0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9,
+0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee,
+0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed,
+0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739,
+0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7,
+0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a,
+0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7,
+0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60,
+0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6,
+0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011,
+0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330,
+0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264,
+0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1,
+0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf,
+0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8,
+0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312,
+0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418,
+0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8,
+0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123,
+0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8,
+0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0,
+0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51,
+0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2,
+0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c,
+0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73,
+0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db,
+0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25,
+0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1,
+0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257
+};
+#else
+static const PRUint32 _TInv2[256] =
+{
+0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d,
+0x58abacfa, 0x03934be3, 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502,
+0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, 0x5a49deb1, 0x1b6725ba,
+0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3,
+0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874,
+0x692949e0, 0xc8448ec9, 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9,
+0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, 0x4a1863df, 0x3182e51a,
+0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908,
+0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b,
+0x8f57e31f, 0xab2a6655, 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337,
+0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, 0x1c2b8acf, 0xb492a779,
+0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6,
+0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060,
+0x9f065e71, 0x1051bd6e, 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6,
+0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, 0xfb241998, 0xe997d6bd,
+0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8,
+0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b,
+0x70ac1e11, 0x724e6c5a, 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d,
+0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, 0x67b10c0a, 0xe70f9357,
+0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12,
+0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b,
+0xa8b92db6, 0xa9c8141e, 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f,
+0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, 0x29768b43, 0xc6dccb23,
+0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6,
+0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2,
+0x52ec0d86, 0xe3d077c1, 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9,
+0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, 0x4ec78749, 0xd1c1d938,
+0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad,
+0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8,
+0xf75e2e39, 0xaff582c3, 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25,
+0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, 0x7809cd26, 0x18f46e59,
+0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15,
+0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f,
+0x9430c6a5, 0x66c035a2, 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7,
+0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, 0xd68d764d, 0xb04d43ef,
+0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665,
+0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db,
+0x5633e910, 0x47136dd6, 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13,
+0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, 0xdf599cd2, 0x733f55f2,
+0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844,
+0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2,
+0x498b283c, 0x9541ff0d, 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456,
+0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _TInv3[256] =
+{
+0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d,
+0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02,
+0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba,
+0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3,
+0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174,
+0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9,
+0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a,
+0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08,
+0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b,
+0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837,
+0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479,
+0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6,
+0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60,
+0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6,
+0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd,
+0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8,
+0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b,
+0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d,
+0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757,
+0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12,
+0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b,
+0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f,
+0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623,
+0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6,
+0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2,
+0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9,
+0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138,
+0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad,
+0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8,
+0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225,
+0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859,
+0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815,
+0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f,
+0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7,
+0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef,
+0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165,
+0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db,
+0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13,
+0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2,
+0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44,
+0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2,
+0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156,
+0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8
+};
+#else
+static const PRUint32 _TInv3[256] =
+{
+0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f,
+0xfa58abac, 0xe303934b, 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5,
+0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, 0xb15a49de, 0xba1b6725,
+0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b,
+0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358,
+0xe0692949, 0xc9c8448e, 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27,
+0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, 0xdf4a1863, 0x1a3182e5,
+0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9,
+0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272,
+0x1f8f57e3, 0x55ab2a66, 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3,
+0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, 0xcf1c2b8a, 0x79b492a7,
+0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4,
+0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40,
+0x719f065e, 0x6e1051bd, 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d,
+0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, 0x98fb2419, 0xbde997d6,
+0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79,
+0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832,
+0x1170ac1e, 0x5a724e6c, 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736,
+0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, 0x0a67b10c, 0x57e70f93,
+0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c,
+0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2,
+0xb6a8b92d, 0x1ea9c814, 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3,
+0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, 0x4329768b, 0x23c6dccb,
+0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084,
+0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc,
+0x8652ec0d, 0xc1e3d077, 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247,
+0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, 0x494ec787, 0x38d1c1d9,
+0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f,
+0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890,
+0x39f75e2e, 0xc3aff582, 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf,
+0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, 0x267809cd, 0x5918f46e,
+0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef,
+0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a,
+0xa59430c6, 0xa266c035, 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533,
+0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, 0x4dd68d76, 0xefb04d43,
+0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46,
+0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292,
+0x105633e9, 0xd647136d, 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb,
+0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, 0xd2df599c, 0xf2733f55,
+0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678,
+0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc,
+0x3c498b28, 0x0d9541ff, 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064,
+0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC0[256] =
+{
+0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36,
+0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362,
+0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 0xb0d090e0, 0xbbdd99ee,
+0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca,
+0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6,
+0xd296eeb4, 0xd99be7ba, 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9,
+0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5,
+0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81,
+0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d,
+0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859,
+0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 0xf66d76ad, 0xfd607fa3,
+0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987,
+0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb,
+0x942b08f9, 0x9f2601f7, 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f,
+0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733,
+0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117,
+0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040,
+0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14,
+0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 0x3d06dd96, 0x360bd498,
+0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc,
+0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0,
+0x5f40a3c2, 0x544daacc, 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753,
+0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f,
+0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b,
+0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197,
+0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3,
+0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 0x8c61d79a, 0x876cde94,
+0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0,
+0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc,
+0xee27a9ce, 0xe52aa0c0, 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68,
+0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604,
+0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020,
+0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da,
+0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e,
+0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 0xb1670a0c, 0xba6a0302,
+0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526,
+0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a,
+0xd3217458, 0xd82c7d56, 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25,
+0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049,
+0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d,
+0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1,
+0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5,
+0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d
+};
+#else
+static const PRUint32 _IMXC0[256] =
+{
+0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927,
+0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45,
+0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb,
+0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381,
+0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf,
+0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66,
+0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28,
+0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012,
+0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec,
+0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e,
+0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd,
+0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7,
+0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89,
+0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b,
+0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815,
+0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f,
+0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa,
+0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8,
+0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36,
+0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c,
+0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742,
+0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea,
+0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4,
+0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e,
+0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360,
+0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502,
+0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87,
+0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd,
+0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3,
+0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621,
+0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f,
+0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55,
+0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26,
+0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844,
+0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba,
+0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480,
+0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce,
+0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67,
+0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929,
+0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713,
+0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed,
+0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f,
+0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC1[256] =
+{
+0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d, 0x3424382c, 0x392d3627,
+0x2e36243a, 0x233f2a31, 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245,
+0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69, 0xd090e0b0, 0xdd99eebb,
+0xca82fca6, 0xc78bf2ad, 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81,
+0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5, 0x8cfca8c4, 0x81f5a6cf,
+0x96eeb4d2, 0x9be7bad9, 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966,
+0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a, 0xd373ab23, 0xde7aa528,
+0xc961b735, 0xc468b93e, 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112,
+0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6, 0x5f8f03e7, 0x52860dec,
+0x459d1ff1, 0x489411fa, 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e,
+0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2, 0x6d76adf6, 0x607fa3fd,
+0x7764b1e0, 0x7a6dbfeb, 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7,
+0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3, 0x311ae582, 0x3c13eb89,
+0x2b08f994, 0x2601f79f, 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b,
+0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777, 0xd5ae3d1e, 0xd8a73315,
+0xcfbc2108, 0xc2b52f03, 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f,
+0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490, 0xe2694ea1, 0xef6040aa,
+0xf87b52b7, 0xf5725cbc, 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8,
+0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4, 0x06dd963d, 0x0bd49836,
+0x1ccf8a2b, 0x11c68420, 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c,
+0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478, 0x5ab1de49, 0x57b8d042,
+0x40a3c25f, 0x4daacc54, 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea,
+0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6, 0xb2a431af, 0xbfad3fa4,
+0xa8b62db9, 0xa5bf23b2, 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e,
+0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a, 0x3e58996b, 0x33519760,
+0x244a857d, 0x29438b76, 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302,
+0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e, 0x61d79a8c, 0x6cde9487,
+0x7bc5869a, 0x76cc8891, 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd,
+0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9, 0x3dbbd2f8, 0x30b2dcf3,
+0x27a9ceee, 0x2aa0c0e5, 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821,
+0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d, 0xd90f0a64, 0xd406046f,
+0xc31d1672, 0xce141879, 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055,
+0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c, 0x83bed42d, 0x8eb7da26,
+0x99acc83b, 0x94a5c630, 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44,
+0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668, 0x670a0cb1, 0x6a0302ba,
+0x7d1810a7, 0x70111eac, 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680,
+0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4, 0x3b6644c5, 0x366f4ace,
+0x217458d3, 0x2c7d56d8, 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567,
+0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b, 0x64e94722, 0x69e04929,
+0x7efb5b34, 0x73f2553f, 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13,
+0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7, 0xe815efe6, 0xe51ce1ed,
+0xf207f3f0, 0xff0efdfb, 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f,
+0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3
+};
+#else
+static const PRUint32 _IMXC1[256] =
+{
+0x00000000, 0x0b0e090d, 0x161c121a, 0x1d121b17, 0x2c382434, 0x27362d39,
+0x3a24362e, 0x312a3f23, 0x58704868, 0x537e4165, 0x4e6c5a72, 0x4562537f,
+0x74486c5c, 0x7f466551, 0x62547e46, 0x695a774b, 0xb0e090d0, 0xbbee99dd,
+0xa6fc82ca, 0xadf28bc7, 0x9cd8b4e4, 0x97d6bde9, 0x8ac4a6fe, 0x81caaff3,
+0xe890d8b8, 0xe39ed1b5, 0xfe8ccaa2, 0xf582c3af, 0xc4a8fc8c, 0xcfa6f581,
+0xd2b4ee96, 0xd9bae79b, 0x7bdb3bbb, 0x70d532b6, 0x6dc729a1, 0x66c920ac,
+0x57e31f8f, 0x5ced1682, 0x41ff0d95, 0x4af10498, 0x23ab73d3, 0x28a57ade,
+0x35b761c9, 0x3eb968c4, 0x0f9357e7, 0x049d5eea, 0x198f45fd, 0x12814cf0,
+0xcb3bab6b, 0xc035a266, 0xdd27b971, 0xd629b07c, 0xe7038f5f, 0xec0d8652,
+0xf11f9d45, 0xfa119448, 0x934be303, 0x9845ea0e, 0x8557f119, 0x8e59f814,
+0xbf73c737, 0xb47dce3a, 0xa96fd52d, 0xa261dc20, 0xf6ad766d, 0xfda37f60,
+0xe0b16477, 0xebbf6d7a, 0xda955259, 0xd19b5b54, 0xcc894043, 0xc787494e,
+0xaedd3e05, 0xa5d33708, 0xb8c12c1f, 0xb3cf2512, 0x82e51a31, 0x89eb133c,
+0x94f9082b, 0x9ff70126, 0x464de6bd, 0x4d43efb0, 0x5051f4a7, 0x5b5ffdaa,
+0x6a75c289, 0x617bcb84, 0x7c69d093, 0x7767d99e, 0x1e3daed5, 0x1533a7d8,
+0x0821bccf, 0x032fb5c2, 0x32058ae1, 0x390b83ec, 0x241998fb, 0x2f1791f6,
+0x8d764dd6, 0x867844db, 0x9b6a5fcc, 0x906456c1, 0xa14e69e2, 0xaa4060ef,
+0xb7527bf8, 0xbc5c72f5, 0xd50605be, 0xde080cb3, 0xc31a17a4, 0xc8141ea9,
+0xf93e218a, 0xf2302887, 0xef223390, 0xe42c3a9d, 0x3d96dd06, 0x3698d40b,
+0x2b8acf1c, 0x2084c611, 0x11aef932, 0x1aa0f03f, 0x07b2eb28, 0x0cbce225,
+0x65e6956e, 0x6ee89c63, 0x73fa8774, 0x78f48e79, 0x49deb15a, 0x42d0b857,
+0x5fc2a340, 0x54ccaa4d, 0xf741ecda, 0xfc4fe5d7, 0xe15dfec0, 0xea53f7cd,
+0xdb79c8ee, 0xd077c1e3, 0xcd65daf4, 0xc66bd3f9, 0xaf31a4b2, 0xa43fadbf,
+0xb92db6a8, 0xb223bfa5, 0x83098086, 0x8807898b, 0x9515929c, 0x9e1b9b91,
+0x47a17c0a, 0x4caf7507, 0x51bd6e10, 0x5ab3671d, 0x6b99583e, 0x60975133,
+0x7d854a24, 0x768b4329, 0x1fd13462, 0x14df3d6f, 0x09cd2678, 0x02c32f75,
+0x33e91056, 0x38e7195b, 0x25f5024c, 0x2efb0b41, 0x8c9ad761, 0x8794de6c,
+0x9a86c57b, 0x9188cc76, 0xa0a2f355, 0xabacfa58, 0xb6bee14f, 0xbdb0e842,
+0xd4ea9f09, 0xdfe49604, 0xc2f68d13, 0xc9f8841e, 0xf8d2bb3d, 0xf3dcb230,
+0xeecea927, 0xe5c0a02a, 0x3c7a47b1, 0x37744ebc, 0x2a6655ab, 0x21685ca6,
+0x10426385, 0x1b4c6a88, 0x065e719f, 0x0d507892, 0x640a0fd9, 0x6f0406d4,
+0x72161dc3, 0x791814ce, 0x48322bed, 0x433c22e0, 0x5e2e39f7, 0x552030fa,
+0x01ec9ab7, 0x0ae293ba, 0x17f088ad, 0x1cfe81a0, 0x2dd4be83, 0x26dab78e,
+0x3bc8ac99, 0x30c6a594, 0x599cd2df, 0x5292dbd2, 0x4f80c0c5, 0x448ec9c8,
+0x75a4f6eb, 0x7eaaffe6, 0x63b8e4f1, 0x68b6edfc, 0xb10c0a67, 0xba02036a,
+0xa710187d, 0xac1e1170, 0x9d342e53, 0x963a275e, 0x8b283c49, 0x80263544,
+0xe97c420f, 0xe2724b02, 0xff605015, 0xf46e5918, 0xc544663b, 0xce4a6f36,
+0xd3587421, 0xd8567d2c, 0x7a37a10c, 0x7139a801, 0x6c2bb316, 0x6725ba1b,
+0x560f8538, 0x5d018c35, 0x40139722, 0x4b1d9e2f, 0x2247e964, 0x2949e069,
+0x345bfb7e, 0x3f55f273, 0x0e7fcd50, 0x0571c45d, 0x1863df4a, 0x136dd647,
+0xcad731dc, 0xc1d938d1, 0xdccb23c6, 0xd7c52acb, 0xe6ef15e8, 0xede11ce5,
+0xf0f307f2, 0xfbfd0eff, 0x92a779b4, 0x99a970b9, 0x84bb6bae, 0x8fb562a3,
+0xbe9f5d80, 0xb591548d, 0xa8834f9a, 0xa38d4697
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC2[256] =
+{
+0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17, 0x24382c34, 0x2d362739,
+0x36243a2e, 0x3f2a3123, 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f,
+0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b, 0x90e0b0d0, 0x99eebbdd,
+0x82fca6ca, 0x8bf2adc7, 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3,
+0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af, 0xfca8c48c, 0xf5a6cf81,
+0xeeb4d296, 0xe7bad99b, 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac,
+0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98, 0x73ab23d3, 0x7aa528de,
+0x61b735c9, 0x68b93ec4, 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0,
+0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c, 0x8f03e75f, 0x860dec52,
+0x9d1ff145, 0x9411fa48, 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14,
+0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220, 0x76adf66d, 0x7fa3fd60,
+0x64b1e077, 0x6dbfeb7a, 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e,
+0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312, 0x1ae58231, 0x13eb893c,
+0x08f9942b, 0x01f79f26, 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa,
+0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e, 0xae3d1ed5, 0xa73315d8,
+0xbc2108cf, 0xb52f03c2, 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6,
+0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1, 0x694ea1e2, 0x6040aaef,
+0x7b52b7f8, 0x725cbcf5, 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9,
+0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d, 0xdd963d06, 0xd498360b,
+0xcf8a2b1c, 0xc6842011, 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25,
+0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879, 0xb1de495a, 0xb8d04257,
+0xa3c25f40, 0xaacc544d, 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd,
+0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9, 0xa431afb2, 0xad3fa4bf,
+0xb62db9a8, 0xbf23b2a5, 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91,
+0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d, 0x58996b3e, 0x51976033,
+0x4a857d24, 0x438b7629, 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275,
+0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41, 0xd79a8c61, 0xde94876c,
+0xc5869a7b, 0xcc889176, 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42,
+0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e, 0xbbd2f83d, 0xb2dcf330,
+0xa9ceee27, 0xa0c0e52a, 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6,
+0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92, 0x0f0a64d9, 0x06046fd4,
+0x1d1672c3, 0x141879ce, 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa,
+0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0, 0xbed42d83, 0xb7da268e,
+0xacc83b99, 0xa5c63094, 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8,
+0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc, 0x0a0cb167, 0x0302ba6a,
+0x1810a77d, 0x111eac70, 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044,
+0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418, 0x6644c53b, 0x6f4ace36,
+0x7458d321, 0x7d56d82c, 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b,
+0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f, 0xe9472264, 0xe0492969,
+0xfb5b347e, 0xf2553f73, 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347,
+0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb, 0x15efe6e8, 0x1ce1ede5,
+0x07f3f0f2, 0x0efdfbff, 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3,
+0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397
+};
+#else
+static const PRUint32 _IMXC2[256] =
+{
+0x00000000, 0x0d0b0e09, 0x1a161c12, 0x171d121b, 0x342c3824, 0x3927362d,
+0x2e3a2436, 0x23312a3f, 0x68587048, 0x65537e41, 0x724e6c5a, 0x7f456253,
+0x5c74486c, 0x517f4665, 0x4662547e, 0x4b695a77, 0xd0b0e090, 0xddbbee99,
+0xcaa6fc82, 0xc7adf28b, 0xe49cd8b4, 0xe997d6bd, 0xfe8ac4a6, 0xf381caaf,
+0xb8e890d8, 0xb5e39ed1, 0xa2fe8cca, 0xaff582c3, 0x8cc4a8fc, 0x81cfa6f5,
+0x96d2b4ee, 0x9bd9bae7, 0xbb7bdb3b, 0xb670d532, 0xa16dc729, 0xac66c920,
+0x8f57e31f, 0x825ced16, 0x9541ff0d, 0x984af104, 0xd323ab73, 0xde28a57a,
+0xc935b761, 0xc43eb968, 0xe70f9357, 0xea049d5e, 0xfd198f45, 0xf012814c,
+0x6bcb3bab, 0x66c035a2, 0x71dd27b9, 0x7cd629b0, 0x5fe7038f, 0x52ec0d86,
+0x45f11f9d, 0x48fa1194, 0x03934be3, 0x0e9845ea, 0x198557f1, 0x148e59f8,
+0x37bf73c7, 0x3ab47dce, 0x2da96fd5, 0x20a261dc, 0x6df6ad76, 0x60fda37f,
+0x77e0b164, 0x7aebbf6d, 0x59da9552, 0x54d19b5b, 0x43cc8940, 0x4ec78749,
+0x05aedd3e, 0x08a5d337, 0x1fb8c12c, 0x12b3cf25, 0x3182e51a, 0x3c89eb13,
+0x2b94f908, 0x269ff701, 0xbd464de6, 0xb04d43ef, 0xa75051f4, 0xaa5b5ffd,
+0x896a75c2, 0x84617bcb, 0x937c69d0, 0x9e7767d9, 0xd51e3dae, 0xd81533a7,
+0xcf0821bc, 0xc2032fb5, 0xe132058a, 0xec390b83, 0xfb241998, 0xf62f1791,
+0xd68d764d, 0xdb867844, 0xcc9b6a5f, 0xc1906456, 0xe2a14e69, 0xefaa4060,
+0xf8b7527b, 0xf5bc5c72, 0xbed50605, 0xb3de080c, 0xa4c31a17, 0xa9c8141e,
+0x8af93e21, 0x87f23028, 0x90ef2233, 0x9de42c3a, 0x063d96dd, 0x0b3698d4,
+0x1c2b8acf, 0x112084c6, 0x3211aef9, 0x3f1aa0f0, 0x2807b2eb, 0x250cbce2,
+0x6e65e695, 0x636ee89c, 0x7473fa87, 0x7978f48e, 0x5a49deb1, 0x5742d0b8,
+0x405fc2a3, 0x4d54ccaa, 0xdaf741ec, 0xd7fc4fe5, 0xc0e15dfe, 0xcdea53f7,
+0xeedb79c8, 0xe3d077c1, 0xf4cd65da, 0xf9c66bd3, 0xb2af31a4, 0xbfa43fad,
+0xa8b92db6, 0xa5b223bf, 0x86830980, 0x8b880789, 0x9c951592, 0x919e1b9b,
+0x0a47a17c, 0x074caf75, 0x1051bd6e, 0x1d5ab367, 0x3e6b9958, 0x33609751,
+0x247d854a, 0x29768b43, 0x621fd134, 0x6f14df3d, 0x7809cd26, 0x7502c32f,
+0x5633e910, 0x5b38e719, 0x4c25f502, 0x412efb0b, 0x618c9ad7, 0x6c8794de,
+0x7b9a86c5, 0x769188cc, 0x55a0a2f3, 0x58abacfa, 0x4fb6bee1, 0x42bdb0e8,
+0x09d4ea9f, 0x04dfe496, 0x13c2f68d, 0x1ec9f884, 0x3df8d2bb, 0x30f3dcb2,
+0x27eecea9, 0x2ae5c0a0, 0xb13c7a47, 0xbc37744e, 0xab2a6655, 0xa621685c,
+0x85104263, 0x881b4c6a, 0x9f065e71, 0x920d5078, 0xd9640a0f, 0xd46f0406,
+0xc372161d, 0xce791814, 0xed48322b, 0xe0433c22, 0xf75e2e39, 0xfa552030,
+0xb701ec9a, 0xba0ae293, 0xad17f088, 0xa01cfe81, 0x832dd4be, 0x8e26dab7,
+0x993bc8ac, 0x9430c6a5, 0xdf599cd2, 0xd25292db, 0xc54f80c0, 0xc8448ec9,
+0xeb75a4f6, 0xe67eaaff, 0xf163b8e4, 0xfc68b6ed, 0x67b10c0a, 0x6aba0203,
+0x7da71018, 0x70ac1e11, 0x539d342e, 0x5e963a27, 0x498b283c, 0x44802635,
+0x0fe97c42, 0x02e2724b, 0x15ff6050, 0x18f46e59, 0x3bc54466, 0x36ce4a6f,
+0x21d35874, 0x2cd8567d, 0x0c7a37a1, 0x017139a8, 0x166c2bb3, 0x1b6725ba,
+0x38560f85, 0x355d018c, 0x22401397, 0x2f4b1d9e, 0x642247e9, 0x692949e0,
+0x7e345bfb, 0x733f55f2, 0x500e7fcd, 0x5d0571c4, 0x4a1863df, 0x47136dd6,
+0xdccad731, 0xd1c1d938, 0xc6dccb23, 0xcbd7c52a, 0xe8e6ef15, 0xe5ede11c,
+0xf2f0f307, 0xfffbfd0e, 0xb492a779, 0xb999a970, 0xae84bb6b, 0xa38fb562,
+0x80be9f5d, 0x8db59154, 0x9aa8834f, 0x97a38d46
+};
+#endif
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 _IMXC3[256] =
+{
+0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b, 0x382c3424, 0x3627392d,
+0x243a2e36, 0x2a31233f, 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53,
+0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77, 0xe0b0d090, 0xeebbdd99,
+0xfca6ca82, 0xf2adc78b, 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af,
+0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3, 0xa8c48cfc, 0xa6cf81f5,
+0xb4d296ee, 0xbad99be7, 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20,
+0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804, 0xab23d373, 0xa528de7a,
+0xb735c961, 0xb93ec468, 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c,
+0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0, 0x03e75f8f, 0x0dec5286,
+0x1ff1459d, 0x11fa4894, 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8,
+0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc, 0xadf66d76, 0xa3fd607f,
+0xb1e07764, 0xbfeb7a6d, 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49,
+0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225, 0xe582311a, 0xeb893c13,
+0xf9942b08, 0xf79f2601, 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd,
+0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9, 0x3d1ed5ae, 0x3315d8a7,
+0x2108cfbc, 0x2f03c2b5, 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691,
+0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156, 0x4ea1e269, 0x40aaef60,
+0x52b7f87b, 0x5cbcf572, 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e,
+0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a, 0x963d06dd, 0x98360bd4,
+0x8a2b1ccf, 0x842011c6, 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2,
+0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e, 0xde495ab1, 0xd04257b8,
+0xc25f40a3, 0xcc544daa, 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7,
+0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3, 0x31afb2a4, 0x3fa4bfad,
+0x2db9a8b6, 0x23b2a5bf, 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b,
+0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67, 0x996b3e58, 0x97603351,
+0x857d244a, 0x8b762943, 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f,
+0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b, 0x9a8c61d7, 0x94876cde,
+0x869a7bc5, 0x889176cc, 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8,
+0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84, 0xd2f83dbb, 0xdcf330b2,
+0xceee27a9, 0xc0e52aa0, 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c,
+0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278, 0x0a64d90f, 0x046fd406,
+0x1672c31d, 0x1879ce14, 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30,
+0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081, 0xd42d83be, 0xda268eb7,
+0xc83b99ac, 0xc63094a5, 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9,
+0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced, 0x0cb1670a, 0x02ba6a03,
+0x10a77d18, 0x1eac7011, 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435,
+0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859, 0x44c53b66, 0x4ace366f,
+0x58d32174, 0x56d82c7d, 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba,
+0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e, 0x472264e9, 0x492969e0,
+0x5b347efb, 0x553f73f2, 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6,
+0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a, 0xefe6e815, 0xe1ede51c,
+0xf3f0f207, 0xfdfbff0e, 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362,
+0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746
+};
+#else
+static const PRUint32 _IMXC3[256] =
+{
+0x00000000, 0x090d0b0e, 0x121a161c, 0x1b171d12, 0x24342c38, 0x2d392736,
+0x362e3a24, 0x3f23312a, 0x48685870, 0x4165537e, 0x5a724e6c, 0x537f4562,
+0x6c5c7448, 0x65517f46, 0x7e466254, 0x774b695a, 0x90d0b0e0, 0x99ddbbee,
+0x82caa6fc, 0x8bc7adf2, 0xb4e49cd8, 0xbde997d6, 0xa6fe8ac4, 0xaff381ca,
+0xd8b8e890, 0xd1b5e39e, 0xcaa2fe8c, 0xc3aff582, 0xfc8cc4a8, 0xf581cfa6,
+0xee96d2b4, 0xe79bd9ba, 0x3bbb7bdb, 0x32b670d5, 0x29a16dc7, 0x20ac66c9,
+0x1f8f57e3, 0x16825ced, 0x0d9541ff, 0x04984af1, 0x73d323ab, 0x7ade28a5,
+0x61c935b7, 0x68c43eb9, 0x57e70f93, 0x5eea049d, 0x45fd198f, 0x4cf01281,
+0xab6bcb3b, 0xa266c035, 0xb971dd27, 0xb07cd629, 0x8f5fe703, 0x8652ec0d,
+0x9d45f11f, 0x9448fa11, 0xe303934b, 0xea0e9845, 0xf1198557, 0xf8148e59,
+0xc737bf73, 0xce3ab47d, 0xd52da96f, 0xdc20a261, 0x766df6ad, 0x7f60fda3,
+0x6477e0b1, 0x6d7aebbf, 0x5259da95, 0x5b54d19b, 0x4043cc89, 0x494ec787,
+0x3e05aedd, 0x3708a5d3, 0x2c1fb8c1, 0x2512b3cf, 0x1a3182e5, 0x133c89eb,
+0x082b94f9, 0x01269ff7, 0xe6bd464d, 0xefb04d43, 0xf4a75051, 0xfdaa5b5f,
+0xc2896a75, 0xcb84617b, 0xd0937c69, 0xd99e7767, 0xaed51e3d, 0xa7d81533,
+0xbccf0821, 0xb5c2032f, 0x8ae13205, 0x83ec390b, 0x98fb2419, 0x91f62f17,
+0x4dd68d76, 0x44db8678, 0x5fcc9b6a, 0x56c19064, 0x69e2a14e, 0x60efaa40,
+0x7bf8b752, 0x72f5bc5c, 0x05bed506, 0x0cb3de08, 0x17a4c31a, 0x1ea9c814,
+0x218af93e, 0x2887f230, 0x3390ef22, 0x3a9de42c, 0xdd063d96, 0xd40b3698,
+0xcf1c2b8a, 0xc6112084, 0xf93211ae, 0xf03f1aa0, 0xeb2807b2, 0xe2250cbc,
+0x956e65e6, 0x9c636ee8, 0x877473fa, 0x8e7978f4, 0xb15a49de, 0xb85742d0,
+0xa3405fc2, 0xaa4d54cc, 0xecdaf741, 0xe5d7fc4f, 0xfec0e15d, 0xf7cdea53,
+0xc8eedb79, 0xc1e3d077, 0xdaf4cd65, 0xd3f9c66b, 0xa4b2af31, 0xadbfa43f,
+0xb6a8b92d, 0xbfa5b223, 0x80868309, 0x898b8807, 0x929c9515, 0x9b919e1b,
+0x7c0a47a1, 0x75074caf, 0x6e1051bd, 0x671d5ab3, 0x583e6b99, 0x51336097,
+0x4a247d85, 0x4329768b, 0x34621fd1, 0x3d6f14df, 0x267809cd, 0x2f7502c3,
+0x105633e9, 0x195b38e7, 0x024c25f5, 0x0b412efb, 0xd7618c9a, 0xde6c8794,
+0xc57b9a86, 0xcc769188, 0xf355a0a2, 0xfa58abac, 0xe14fb6be, 0xe842bdb0,
+0x9f09d4ea, 0x9604dfe4, 0x8d13c2f6, 0x841ec9f8, 0xbb3df8d2, 0xb230f3dc,
+0xa927eece, 0xa02ae5c0, 0x47b13c7a, 0x4ebc3774, 0x55ab2a66, 0x5ca62168,
+0x63851042, 0x6a881b4c, 0x719f065e, 0x78920d50, 0x0fd9640a, 0x06d46f04,
+0x1dc37216, 0x14ce7918, 0x2bed4832, 0x22e0433c, 0x39f75e2e, 0x30fa5520,
+0x9ab701ec, 0x93ba0ae2, 0x88ad17f0, 0x81a01cfe, 0xbe832dd4, 0xb78e26da,
+0xac993bc8, 0xa59430c6, 0xd2df599c, 0xdbd25292, 0xc0c54f80, 0xc9c8448e,
+0xf6eb75a4, 0xffe67eaa, 0xe4f163b8, 0xedfc68b6, 0x0a67b10c, 0x036aba02,
+0x187da710, 0x1170ac1e, 0x2e539d34, 0x275e963a, 0x3c498b28, 0x35448026,
+0x420fe97c, 0x4b02e272, 0x5015ff60, 0x5918f46e, 0x663bc544, 0x6f36ce4a,
+0x7421d358, 0x7d2cd856, 0xa10c7a37, 0xa8017139, 0xb3166c2b, 0xba1b6725,
+0x8538560f, 0x8c355d01, 0x97224013, 0x9e2f4b1d, 0xe9642247, 0xe0692949,
+0xfb7e345b, 0xf2733f55, 0xcd500e7f, 0xc45d0571, 0xdf4a1863, 0xd647136d,
+0x31dccad7, 0x38d1c1d9, 0x23c6dccb, 0x2acbd7c5, 0x15e8e6ef, 0x1ce5ede1,
+0x07f2f0f3, 0x0efffbfd, 0x79b492a7, 0x70b999a9, 0x6bae84bb, 0x62a38fb5,
+0x5d80be9f, 0x548db591, 0x4f9aa883, 0x4697a38d
+};
+#endif
+
+#endif /* RIJNDAEL_INCLUDE_TABLES */
+
+#ifdef IS_LITTLE_ENDIAN
+static const PRUint32 Rcon[30] = {
+0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
+0x00000040, 0x00000080, 0x0000001b, 0x00000036, 0x0000006c, 0x000000d8,
+0x000000ab, 0x0000004d, 0x0000009a, 0x0000002f, 0x0000005e, 0x000000bc,
+0x00000063, 0x000000c6, 0x00000097, 0x00000035, 0x0000006a, 0x000000d4,
+0x000000b3, 0x0000007d, 0x000000fa, 0x000000ef, 0x000000c5, 0x00000091
+};
+#else
+static const PRUint32 Rcon[30] = {
+0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
+0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000,
+0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000,
+0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000,
+0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000
+};
+#endif
+
diff --git a/security/nss/lib/freebl/rijndael_tables.c b/security/nss/lib/freebl/rijndael_tables.c
new file mode 100644
index 000000000..78dd85a96
--- /dev/null
+++ b/security/nss/lib/freebl/rijndael_tables.c
@@ -0,0 +1,215 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "stdio.h"
+#include "prtypes.h"
+#include "blapi.h"
+
+/*
+ * what follows is code thrown together to generate the myriad of tables
+ * used by Rijndael, the AES cipher.
+ */
+
+#define WORD_LE(b0, b1, b2, b3) \
+ (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | b0)
+
+#define WORD_BE(b0, b1, b2, b3) \
+ (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | b3)
+
+static const PRUint8 __S[256] =
+ {
+ 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118,
+ 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192,
+ 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21,
+ 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117,
+ 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132,
+ 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207,
+ 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168,
+ 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210,
+ 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115,
+ 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219,
+ 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121,
+ 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8,
+ 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138,
+ 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158,
+ 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223,
+ 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22,
+ };
+
+static const PRUint8 __SInv[256] =
+ {
+ 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251,
+ 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203,
+ 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78,
+ 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37,
+ 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146,
+ 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132,
+ 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6,
+ 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107,
+ 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115,
+ 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110,
+ 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27,
+ 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244,
+ 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95,
+ 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239,
+ 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97,
+ 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125
+ };
+
+/* GF_MULTIPLY
+ *
+ * multiply two bytes represented in GF(2**8), mod (x**4 + 1)
+ */
+PRUint8
+gf_multiply(PRUint8 a, PRUint8 b)
+{
+ PRUint8 res = 0;
+ while (b > 0) {
+ res = (b & 0x01) ? res ^ a : res;
+ a = (a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1);
+ b >>= 1;
+ }
+ return res;
+}
+
+void
+make_T_Table(char *table, const PRUint8 Sx[256], FILE *file,
+ unsigned char m0, unsigned char m1,
+ unsigned char m2, unsigned char m3)
+{
+ PRUint32 Ti;
+ int i;
+ fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+ for (i = 0; i < 256; i++) {
+ Ti = WORD_LE(gf_multiply(Sx[i], m0),
+ gf_multiply(Sx[i], m1),
+ gf_multiply(Sx[i], m2),
+ gf_multiply(Sx[i], m3));
+ if (Ti == 0)
+ fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ else
+ fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#else\n");
+ fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table);
+ for (i = 0; i < 256; i++) {
+ Ti = WORD_BE(gf_multiply(Sx[i], m0),
+ gf_multiply(Sx[i], m1),
+ gf_multiply(Sx[i], m2),
+ gf_multiply(Sx[i], m3));
+ if (Ti == 0)
+ fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ else
+ fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',',
+ (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#endif\n\n");
+}
+
+void
+make_InvMixCol_Table(int num, FILE *file, PRUint8 m0, PRUint8 m1, PRUint8 m2, PRUint8 m3)
+{
+ PRUint16 i;
+ PRUint8 b0, b1, b2, b3;
+ fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+ for (i = 0; i < 256; i++) {
+ b0 = gf_multiply(i, m0);
+ b1 = gf_multiply(i, m1);
+ b2 = gf_multiply(i, m2);
+ b3 = gf_multiply(i, m3);
+ fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b3, b2, b1, b0, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#else\n");
+ fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num);
+ for (i = 0; i < 256; i++) {
+ b0 = gf_multiply(i, m0);
+ b1 = gf_multiply(i, m1);
+ b2 = gf_multiply(i, m2);
+ b3 = gf_multiply(i, m3);
+ fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b0, b1, b2, b3, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ }
+ fprintf(file, "\n};\n");
+ fprintf(file, "#endif\n\n");
+}
+
+int
+main()
+{
+ int i, j;
+ PRUint8 cur, last;
+ PRUint32 tmp;
+ FILE *optfile;
+ optfile = fopen("rijndael32.tab", "w");
+ /* output S, if there are no T tables */
+ fprintf(optfile, "#ifndef RIJNDAEL_INCLUDE_TABLES\n");
+ fprintf(optfile, "static const PRUint8 _S[256] = \n{\n");
+ for (i = 0; i < 256; i++) {
+ fprintf(optfile, "%3d%c%c", __S[i], (i == 255) ? ' ' : ',',
+ (i % 16 == 15) ? '\n' : ' ');
+ }
+ fprintf(optfile, "};\n#endif /* not RIJNDAEL_INCLUDE_TABLES */\n\n");
+ /* output S**-1 */
+ fprintf(optfile, "static const PRUint8 _SInv[256] = \n{\n");
+ for (i = 0; i < 256; i++) {
+ fprintf(optfile, "%3d%c%c", __SInv[i], (i == 255) ? ' ' : ',',
+ (i % 16 == 15) ? '\n' : ' ');
+ }
+ fprintf(optfile, "};\n\n");
+ fprintf(optfile, "#ifdef RIJNDAEL_INCLUDE_TABLES\n");
+ /* The 32-bit word tables for optimized implementation */
+ /* T0 = [ S[a] * 02, S[a], S[a], S[a] * 03 ] */
+ make_T_Table("0", __S, optfile, 0x02, 0x01, 0x01, 0x03);
+ /* T1 = [ S[a] * 03, S[a] * 02, S[a], S[a] ] */
+ make_T_Table("1", __S, optfile, 0x03, 0x02, 0x01, 0x01);
+ /* T2 = [ S[a], S[a] * 03, S[a] * 02, S[a] ] */
+ make_T_Table("2", __S, optfile, 0x01, 0x03, 0x02, 0x01);
+ /* T3 = [ S[a], S[a], S[a] * 03, S[a] * 02 ] */
+ make_T_Table("3", __S, optfile, 0x01, 0x01, 0x03, 0x02);
+ /* TInv0 = [ Si[a] * 0E, Si[a] * 09, Si[a] * 0D, Si[a] * 0B ] */
+ make_T_Table("Inv0", __SInv, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+ /* TInv1 = [ Si[a] * 0B, Si[a] * 0E, Si[a] * 09, Si[a] * 0D ] */
+ make_T_Table("Inv1", __SInv, optfile, 0x0b, 0x0e, 0x09, 0x0d);
+ /* TInv2 = [ Si[a] * 0D, Si[a] * 0B, Si[a] * 0E, Si[a] * 09 ] */
+ make_T_Table("Inv2", __SInv, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+ /* TInv3 = [ Si[a] * 09, Si[a] * 0D, Si[a] * 0B, Si[a] * 0E ] */
+ make_T_Table("Inv3", __SInv, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+ /* byte multiply tables for inverse key expansion (mimics InvMixColumn) */
+ make_InvMixCol_Table(0, optfile, 0x0e, 0x09, 0x0d, 0x0b);
+ make_InvMixCol_Table(1, optfile, 0x0b, 0x0E, 0x09, 0x0d);
+ make_InvMixCol_Table(2, optfile, 0x0d, 0x0b, 0x0e, 0x09);
+ make_InvMixCol_Table(3, optfile, 0x09, 0x0d, 0x0b, 0x0e);
+ fprintf(optfile, "#endif /* RIJNDAEL_INCLUDE_TABLES */\n\n");
+ /* round constants for key expansion */
+ fprintf(optfile, "#ifdef IS_LITTLE_ENDIAN\n");
+ fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+ cur = 0x01;
+ for (i = 0; i < 30; i++) {
+ fprintf(optfile, "%#.8x%c%c", WORD_LE(cur, 0, 0, 0),
+ (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ last = cur;
+ cur = gf_multiply(last, 0x02);
+ }
+ fprintf(optfile, "};\n");
+ fprintf(optfile, "#else\n");
+ fprintf(optfile, "static const PRUint32 Rcon[30] = {\n");
+ cur = 0x01;
+ for (i = 0; i < 30; i++) {
+ fprintf(optfile, "%#.8x%c%c", WORD_BE(cur, 0, 0, 0),
+ (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' ');
+ last = cur;
+ cur = gf_multiply(last, 0x02);
+ }
+ fprintf(optfile, "};\n");
+ fprintf(optfile, "#endif\n\n");
+ fclose(optfile);
+ return 0;
+}
diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c
new file mode 100644
index 000000000..ff8c40ed9
--- /dev/null
+++ b/security/nss/lib/freebl/rsa.c
@@ -0,0 +1,1625 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA key generation, public key op, private key op.
+ */
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "prclist.h"
+#include "nssilock.h"
+#include "prinit.h"
+#include "blapi.h"
+#include "mpi.h"
+#include "mpprime.h"
+#include "mplogic.h"
+#include "secmpi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+/*
+** Number of times to attempt to generate a prime (p or q) from a random
+** seed (the seed changes for each iteration).
+*/
+#define MAX_PRIME_GEN_ATTEMPTS 10
+/*
+** Number of times to attempt to generate a key. The primes p and q change
+** for each attempt.
+*/
+#define MAX_KEY_GEN_ATTEMPTS 10
+
+/* Blinding Parameters max cache size */
+#define RSA_BLINDING_PARAMS_MAX_CACHE_SIZE 20
+
+/* exponent should not be greater than modulus */
+#define BAD_RSA_KEY_SIZE(modLen, expLen) \
+ ((expLen) > (modLen) || (modLen) > RSA_MAX_MODULUS_BITS / 8 || \
+ (expLen) > RSA_MAX_EXPONENT_BITS / 8)
+
+struct blindingParamsStr;
+typedef struct blindingParamsStr blindingParams;
+
+struct blindingParamsStr {
+ blindingParams *next;
+ mp_int f, g; /* blinding parameter */
+ int counter; /* number of remaining uses of (f, g) */
+};
+
+/*
+** RSABlindingParamsStr
+**
+** For discussion of Paul Kocher's timing attack against an RSA private key
+** operation, see http://www.cryptography.com/timingattack/paper.html. The
+** countermeasure to this attack, known as blinding, is also discussed in
+** the Handbook of Applied Cryptography, 11.118-11.119.
+*/
+struct RSABlindingParamsStr {
+ /* Blinding-specific parameters */
+ PRCList link; /* link to list of structs */
+ SECItem modulus; /* list element "key" */
+ blindingParams *free, *bp; /* Blinding parameters queue */
+ blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE];
+};
+typedef struct RSABlindingParamsStr RSABlindingParams;
+
+/*
+** RSABlindingParamsListStr
+**
+** List of key-specific blinding params. The arena holds the volatile pool
+** of memory for each entry and the list itself. The lock is for list
+** operations, in this case insertions and iterations, as well as control
+** of the counter for each set of blinding parameters.
+*/
+struct RSABlindingParamsListStr {
+ PZLock *lock; /* Lock for the list */
+ PRCondVar *cVar; /* Condidtion Variable */
+ int waitCount; /* Number of threads waiting on cVar */
+ PRCList head; /* Pointer to the list */
+};
+
+/*
+** The master blinding params list.
+*/
+static struct RSABlindingParamsListStr blindingParamsList = { 0 };
+
+/* Number of times to reuse (f, g). Suggested by Paul Kocher */
+#define RSA_BLINDING_PARAMS_MAX_REUSE 50
+
+/* Global, allows optional use of blinding. On by default. */
+/* Cannot be changed at the moment, due to thread-safety issues. */
+static PRBool nssRSAUseBlinding = PR_TRUE;
+
+static SECStatus
+rsa_build_from_primes(const mp_int *p, const mp_int *q,
+ mp_int *e, PRBool needPublicExponent,
+ mp_int *d, PRBool needPrivateExponent,
+ RSAPrivateKey *key, unsigned int keySizeInBits)
+{
+ mp_int n, phi;
+ mp_int psub1, qsub1, tmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&phi) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&qsub1) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&phi));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&qsub1));
+ CHECK_MPI_OK(mp_init(&tmp));
+ /* p and q must be distinct. */
+ if (mp_cmp(p, q) == 0) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 1. Compute n = p*q */
+ CHECK_MPI_OK(mp_mul(p, q, &n));
+ /* verify that the modulus has the desired number of bits */
+ if ((unsigned)mpl_significant_bits(&n) != keySizeInBits) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ rv = SECFailure;
+ goto cleanup;
+ }
+
+ /* at least one exponent must be given */
+ PORT_Assert(!(needPublicExponent && needPrivateExponent));
+
+ /* 2. Compute phi = (p-1)*(q-1) */
+ CHECK_MPI_OK(mp_sub_d(p, 1, &psub1));
+ CHECK_MPI_OK(mp_sub_d(q, 1, &qsub1));
+ if (needPublicExponent || needPrivateExponent) {
+ CHECK_MPI_OK(mp_lcm(&psub1, &qsub1, &phi));
+ /* 3. Compute d = e**-1 mod(phi) */
+ /* or e = d**-1 mod(phi) as necessary */
+ if (needPublicExponent) {
+ err = mp_invmod(d, &phi, e);
+ } else {
+ err = mp_invmod(e, &phi, d);
+ }
+ } else {
+ err = MP_OKAY;
+ }
+ /* Verify that phi(n) and e have no common divisors */
+ if (err != MP_OKAY) {
+ if (err == MP_UNDEF) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM);
+ err = MP_OKAY; /* to keep PORT_SetError from being called again */
+ rv = SECFailure;
+ }
+ goto cleanup;
+ }
+
+ /* 4. Compute exponent1 = d mod (p-1) */
+ CHECK_MPI_OK(mp_mod(d, &psub1, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->exponent1, key->arena);
+ /* 5. Compute exponent2 = d mod (q-1) */
+ CHECK_MPI_OK(mp_mod(d, &qsub1, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->exponent2, key->arena);
+ /* 6. Compute coefficient = q**-1 mod p */
+ CHECK_MPI_OK(mp_invmod(q, p, &tmp));
+ MPINT_TO_SECITEM(&tmp, &key->coefficient, key->arena);
+
+ /* copy our calculated results, overwrite what is there */
+ key->modulus.data = NULL;
+ MPINT_TO_SECITEM(&n, &key->modulus, key->arena);
+ key->privateExponent.data = NULL;
+ MPINT_TO_SECITEM(d, &key->privateExponent, key->arena);
+ key->publicExponent.data = NULL;
+ MPINT_TO_SECITEM(e, &key->publicExponent, key->arena);
+ key->prime1.data = NULL;
+ MPINT_TO_SECITEM(p, &key->prime1, key->arena);
+ key->prime2.data = NULL;
+ MPINT_TO_SECITEM(q, &key->prime2, key->arena);
+cleanup:
+ mp_clear(&n);
+ mp_clear(&phi);
+ mp_clear(&psub1);
+ mp_clear(&qsub1);
+ mp_clear(&tmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+static SECStatus
+generate_prime(mp_int *prime, int primeLen)
+{
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ unsigned long counter = 0;
+ int piter;
+ unsigned char *pb = NULL;
+ pb = PORT_Alloc(primeLen);
+ if (!pb) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ for (piter = 0; piter < MAX_PRIME_GEN_ATTEMPTS; piter++) {
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen));
+ pb[0] |= 0xC0; /* set two high-order bits */
+ pb[primeLen - 1] |= 0x01; /* set low-order bit */
+ CHECK_MPI_OK(mp_read_unsigned_octets(prime, pb, primeLen));
+ err = mpp_make_prime(prime, primeLen * 8, PR_FALSE, &counter);
+ if (err != MP_NO)
+ goto cleanup;
+ /* keep going while err == MP_NO */
+ }
+cleanup:
+ if (pb)
+ PORT_ZFree(pb, primeLen);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+ * make sure the key components meet fips186 requirements.
+ */
+static PRBool
+rsa_fips186_verify(mp_int *p, mp_int *q, mp_int *d, int keySizeInBits)
+{
+ mp_int pq_diff;
+ mp_err err = MP_OKAY;
+ PRBool ret = PR_FALSE;
+
+ if (keySizeInBits < 250) {
+ /* not a valid FIPS length, no point in our other tests */
+ /* if you are here, and in FIPS mode, you are outside the security
+ * policy */
+ return PR_TRUE;
+ }
+
+ /* p & q are already known to be greater then sqrt(2)*2^(keySize/2-1) */
+ /* we also know that gcd(p-1,e) = 1 and gcd(q-1,e) = 1 because the
+ * mp_invmod() function will fail. */
+ /* now check p-q > 2^(keysize/2-100) */
+ MP_DIGITS(&pq_diff) = 0;
+ CHECK_MPI_OK(mp_init(&pq_diff));
+ /* NSS always has p > q, so we know pq_diff is positive */
+ CHECK_MPI_OK(mp_sub(p, q, &pq_diff));
+ if ((unsigned)mpl_significant_bits(&pq_diff) < (keySizeInBits / 2 - 100)) {
+ goto cleanup;
+ }
+ /* now verify d is large enough*/
+ if ((unsigned)mpl_significant_bits(d) < (keySizeInBits / 2)) {
+ goto cleanup;
+ }
+ ret = PR_TRUE;
+
+cleanup:
+ mp_clear(&pq_diff);
+ return ret;
+}
+
+/*
+** Generate and return a new RSA public and private key.
+** Both keys are encoded in a single RSAPrivateKey structure.
+** "cx" is the random number generator context
+** "keySizeInBits" is the size of the key to be generated, in bits.
+** 512, 1024, etc.
+** "publicExponent" when not NULL is a pointer to some data that
+** represents the public exponent to use. The data is a byte
+** encoded integer, in "big endian" order.
+*/
+RSAPrivateKey *
+RSA_NewKey(int keySizeInBits, SECItem *publicExponent)
+{
+ unsigned int primeLen;
+ mp_int p, q, e, d;
+ int kiter;
+ int max_attempts;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ int prerr = 0;
+ RSAPrivateKey *key = NULL;
+ PLArenaPool *arena = NULL;
+ /* Require key size to be a multiple of 16 bits. */
+ if (!publicExponent || keySizeInBits % 16 != 0 ||
+ BAD_RSA_KEY_SIZE((unsigned int)keySizeInBits / 8, publicExponent->len)) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return NULL;
+ }
+ /* 1. Allocate arena & key */
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return NULL;
+ }
+ key = PORT_ArenaZNew(arena, RSAPrivateKey);
+ if (!key) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ PORT_FreeArena(arena, PR_TRUE);
+ return NULL;
+ }
+ key->arena = arena;
+ /* length of primes p and q (in bytes) */
+ primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE);
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&d) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&d));
+ /* 2. Set the version number (PKCS1 v1.5 says it should be zero) */
+ SECITEM_AllocItem(arena, &key->version, 1);
+ key->version.data[0] = 0;
+ /* 3. Set the public exponent */
+ SECITEM_TO_MPINT(*publicExponent, &e);
+ kiter = 0;
+ max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */
+ do {
+ prerr = 0;
+ PORT_SetError(0);
+ CHECK_SEC_OK(generate_prime(&p, primeLen));
+ CHECK_SEC_OK(generate_prime(&q, primeLen));
+ /* Assure p > q */
+ /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+ * implementation optimization that requires p > q. We can remove
+ * this code in the future.
+ */
+ if (mp_cmp(&p, &q) < 0)
+ mp_exch(&p, &q);
+ /* Attempt to use these primes to generate a key */
+ rv = rsa_build_from_primes(&p, &q,
+ &e, PR_FALSE, /* needPublicExponent=false */
+ &d, PR_TRUE, /* needPrivateExponent=true */
+ key, keySizeInBits);
+ if (rv == SECSuccess) {
+ if (rsa_fips186_verify(&p, &q, &d, keySizeInBits)) {
+ break;
+ }
+ prerr = SEC_ERROR_NEED_RANDOM; /* retry with different values */
+ } else {
+ prerr = PORT_GetError();
+ }
+ kiter++;
+ /* loop until have primes */
+ } while (prerr == SEC_ERROR_NEED_RANDOM && kiter < max_attempts);
+ if (prerr)
+ goto cleanup;
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&e);
+ mp_clear(&d);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv && arena) {
+ PORT_FreeArena(arena, PR_TRUE);
+ key = NULL;
+ }
+ return key;
+}
+
+mp_err
+rsa_is_prime(mp_int *p)
+{
+ int res;
+
+ /* run a Fermat test */
+ res = mpp_fermat(p, 2);
+ if (res != MP_OKAY) {
+ return res;
+ }
+
+ /* If that passed, run some Miller-Rabin tests */
+ res = mpp_pprime(p, 2);
+ return res;
+}
+
+/*
+ * Factorize a RSA modulus n into p and q by using the exponents e and d.
+ *
+ * In: e, d, n
+ * Out: p, q
+ *
+ * See Handbook of Applied Cryptography, 8.2.2(i).
+ *
+ * The algorithm is probabilistic, it is run 64 times and each run has a 50%
+ * chance of succeeding with a runtime of O(log(e*d)).
+ *
+ * The returned p might be smaller than q.
+ */
+static mp_err
+rsa_factorize_n_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+ mp_int *n)
+{
+ /* lambda is the private modulus: e*d = 1 mod lambda */
+ /* so: e*d - 1 = k*lambda = t*2^s where t is odd */
+ mp_int klambda;
+ mp_int t, onetwentyeight;
+ unsigned long s = 0;
+ unsigned long i;
+
+ /* cand = a^(t * 2^i) mod n, next_cand = a^(t * 2^(i+1)) mod n */
+ mp_int a;
+ mp_int cand;
+ mp_int next_cand;
+
+ mp_int n_minus_one;
+ mp_err err = MP_OKAY;
+
+ MP_DIGITS(&klambda) = 0;
+ MP_DIGITS(&t) = 0;
+ MP_DIGITS(&a) = 0;
+ MP_DIGITS(&cand) = 0;
+ MP_DIGITS(&n_minus_one) = 0;
+ MP_DIGITS(&next_cand) = 0;
+ MP_DIGITS(&onetwentyeight) = 0;
+ CHECK_MPI_OK(mp_init(&klambda));
+ CHECK_MPI_OK(mp_init(&t));
+ CHECK_MPI_OK(mp_init(&a));
+ CHECK_MPI_OK(mp_init(&cand));
+ CHECK_MPI_OK(mp_init(&n_minus_one));
+ CHECK_MPI_OK(mp_init(&next_cand));
+ CHECK_MPI_OK(mp_init(&onetwentyeight));
+
+ mp_set_int(&onetwentyeight, 128);
+
+ /* calculate k*lambda = e*d - 1 */
+ CHECK_MPI_OK(mp_mul(e, d, &klambda));
+ CHECK_MPI_OK(mp_sub_d(&klambda, 1, &klambda));
+
+ /* factorize klambda into t*2^s */
+ CHECK_MPI_OK(mp_copy(&klambda, &t));
+ while (mpp_divis_d(&t, 2) == MP_YES) {
+ CHECK_MPI_OK(mp_div_2(&t, &t));
+ s += 1;
+ }
+
+ /* precompute n_minus_one = n - 1 */
+ CHECK_MPI_OK(mp_copy(n, &n_minus_one));
+ CHECK_MPI_OK(mp_sub_d(&n_minus_one, 1, &n_minus_one));
+
+ /* pick random bases a, each one has a 50% leading to a factorization */
+ CHECK_MPI_OK(mp_set_int(&a, 2));
+ /* The following is equivalent to for (a=2, a <= 128, a+=2) */
+ while (mp_cmp(&a, &onetwentyeight) <= 0) {
+ /* compute the base cand = a^(t * 2^0) [i = 0] */
+ CHECK_MPI_OK(mp_exptmod(&a, &t, n, &cand));
+
+ for (i = 0; i < s; i++) {
+ /* condition 1: skip the base if we hit a trivial factor of n */
+ if (mp_cmp(&cand, &n_minus_one) == 0 || mp_cmp_d(&cand, 1) == 0) {
+ break;
+ }
+
+ /* increase i in a^(t * 2^i) by squaring the number */
+ CHECK_MPI_OK(mp_exptmod_d(&cand, 2, n, &next_cand));
+
+ /* condition 2: a^(t * 2^(i+1)) = 1 mod n */
+ if (mp_cmp_d(&next_cand, 1) == 0) {
+ /* conditions verified, gcd(a^(t * 2^i) - 1, n) is a factor */
+ CHECK_MPI_OK(mp_sub_d(&cand, 1, &cand));
+ CHECK_MPI_OK(mp_gcd(&cand, n, p));
+ if (mp_cmp_d(p, 1) == 0) {
+ CHECK_MPI_OK(mp_add_d(&cand, 1, &cand));
+ break;
+ }
+ CHECK_MPI_OK(mp_div(n, p, q, NULL));
+ goto cleanup;
+ }
+ CHECK_MPI_OK(mp_copy(&next_cand, &cand));
+ }
+
+ CHECK_MPI_OK(mp_add_d(&a, 2, &a));
+ }
+
+ /* if we reach here it's likely (2^64 - 1 / 2^64) that d is wrong */
+ err = MP_RANGE;
+
+cleanup:
+ mp_clear(&klambda);
+ mp_clear(&t);
+ mp_clear(&a);
+ mp_clear(&cand);
+ mp_clear(&n_minus_one);
+ mp_clear(&next_cand);
+ mp_clear(&onetwentyeight);
+ return err;
+}
+
+/*
+ * Try to find the two primes based on 2 exponents plus a prime.
+ *
+ * In: e, d and p.
+ * Out: p,q.
+ *
+ * Step 1, Since d = e**-1 mod phi, we know that d*e == 1 mod phi, or
+ * d*e = 1+k*phi, or d*e-1 = k*phi. since d is less than phi and e is
+ * usually less than d, then k must be an integer between e-1 and 1
+ * (probably on the order of e).
+ * Step 1a, We can divide k*phi by prime-1 and get k*(q-1). This will reduce
+ * the size of our division through the rest of the loop.
+ * Step 2, Loop through the values k=e-1 to 1 looking for k. k should be on
+ * the order or e, and e is typically small. This may take a while for
+ * a large random e. We are looking for a k that divides kphi
+ * evenly. Once we find a k that divides kphi evenly, we assume it
+ * is the true k. It's possible this k is not the 'true' k but has
+ * swapped factors of p-1 and/or q-1. Because of this, we
+ * tentatively continue Steps 3-6 inside this loop, and may return looking
+ * for another k on failure.
+ * Step 3, Calculate our tentative phi=kphi/k. Note: real phi is (p-1)*(q-1).
+ * Step 4a, kphi is k*(q-1), so phi is our tenative q-1. q = phi+1.
+ * If k is correct, q should be the right length and prime.
+ * Step 4b, It's possible q-1 and k could have swapped factors. We now have a
+ * possible solution that meets our criteria. It may not be the only
+ * solution, however, so we keep looking. If we find more than one,
+ * we will fail since we cannot determine which is the correct
+ * solution, and returning the wrong modulus will compromise both
+ * moduli. If no other solution is found, we return the unique solution.
+ *
+ * This will return p & q. q may be larger than p in the case that p was given
+ * and it was the smaller prime.
+ */
+static mp_err
+rsa_get_prime_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q,
+ mp_int *n, unsigned int keySizeInBits)
+{
+ mp_int kphi; /* k*phi */
+ mp_int k; /* current guess at 'k' */
+ mp_int phi; /* (p-1)(q-1) */
+ mp_int r; /* remainder */
+ mp_int tmp; /* p-1 if p is given */
+ mp_err err = MP_OKAY;
+ unsigned int order_k;
+
+ MP_DIGITS(&kphi) = 0;
+ MP_DIGITS(&phi) = 0;
+ MP_DIGITS(&k) = 0;
+ MP_DIGITS(&r) = 0;
+ MP_DIGITS(&tmp) = 0;
+ CHECK_MPI_OK(mp_init(&kphi));
+ CHECK_MPI_OK(mp_init(&phi));
+ CHECK_MPI_OK(mp_init(&k));
+ CHECK_MPI_OK(mp_init(&r));
+ CHECK_MPI_OK(mp_init(&tmp));
+
+ /* our algorithm looks for a factor k whose maximum size is dependent
+ * on the size of our smallest exponent, which had better be the public
+ * exponent (if it's the private, the key is vulnerable to a brute force
+ * attack).
+ *
+ * since our factor search is linear, we need to limit the maximum
+ * size of the public key. this should not be a problem normally, since
+ * public keys are usually small.
+ *
+ * if we want to handle larger public key sizes, we should have
+ * a version which tries to 'completely' factor k*phi (where completely
+ * means 'factor into primes, or composites with which are products of
+ * large primes). Once we have all the factors, we can sort them out and
+ * try different combinations to form our phi. The risk is if (p-1)/2,
+ * (q-1)/2, and k are all large primes. In any case if the public key
+ * is small (order of 20 some bits), then a linear search for k is
+ * manageable.
+ */
+ if (mpl_significant_bits(e) > 23) {
+ err = MP_RANGE;
+ goto cleanup;
+ }
+
+ /* calculate k*phi = e*d - 1 */
+ CHECK_MPI_OK(mp_mul(e, d, &kphi));
+ CHECK_MPI_OK(mp_sub_d(&kphi, 1, &kphi));
+
+ /* kphi is (e*d)-1, which is the same as k*(p-1)(q-1)
+ * d < (p-1)(q-1), therefor k must be less than e-1
+ * We can narrow down k even more, though. Since p and q are odd and both
+ * have their high bit set, then we know that phi must be on order of
+ * keySizeBits.
+ */
+ order_k = (unsigned)mpl_significant_bits(&kphi) - keySizeInBits;
+
+ /* for (k=kinit; order(k) >= order_k; k--) { */
+ /* k=kinit: k can't be bigger than kphi/2^(keySizeInBits -1) */
+ CHECK_MPI_OK(mp_2expt(&k, keySizeInBits - 1));
+ CHECK_MPI_OK(mp_div(&kphi, &k, &k, NULL));
+ if (mp_cmp(&k, e) >= 0) {
+ /* also can't be bigger then e-1 */
+ CHECK_MPI_OK(mp_sub_d(e, 1, &k));
+ }
+
+ /* calculate our temp value */
+ /* This saves recalculating this value when the k guess is wrong, which
+ * is reasonably frequent. */
+ /* tmp = p-1 (used to calculate q-1= phi/tmp) */
+ CHECK_MPI_OK(mp_sub_d(p, 1, &tmp));
+ CHECK_MPI_OK(mp_div(&kphi, &tmp, &kphi, &r));
+ if (mp_cmp_z(&r) != 0) {
+ /* p-1 doesn't divide kphi, some parameter wasn't correct */
+ err = MP_RANGE;
+ goto cleanup;
+ }
+ mp_zero(q);
+ /* kphi is now k*(q-1) */
+
+ /* rest of the for loop */
+ for (; (err == MP_OKAY) && (mpl_significant_bits(&k) >= order_k);
+ err = mp_sub_d(&k, 1, &k)) {
+ CHECK_MPI_OK(err);
+ /* looking for k as a factor of kphi */
+ CHECK_MPI_OK(mp_div(&kphi, &k, &phi, &r));
+ if (mp_cmp_z(&r) != 0) {
+ /* not a factor, try the next one */
+ continue;
+ }
+ /* we have a possible phi, see if it works */
+ if ((unsigned)mpl_significant_bits(&phi) != keySizeInBits / 2) {
+ /* phi is not the right size */
+ continue;
+ }
+ /* phi should be divisible by 2, since
+ * q is odd and phi=(q-1). */
+ if (mpp_divis_d(&phi, 2) == MP_NO) {
+ /* phi is not divisible by 4 */
+ continue;
+ }
+ /* we now have a candidate for the second prime */
+ CHECK_MPI_OK(mp_add_d(&phi, 1, &tmp));
+
+ /* check to make sure it is prime */
+ err = rsa_is_prime(&tmp);
+ if (err != MP_OKAY) {
+ if (err == MP_NO) {
+ /* No, then we still have the wrong phi */
+ continue;
+ }
+ goto cleanup;
+ }
+ /*
+ * It is possible that we have the wrong phi if
+ * k_guess*(q_guess-1) = k*(q-1) (k and q-1 have swapped factors).
+ * since our q_quess is prime, however. We have found a valid
+ * rsa key because:
+ * q is the correct order of magnitude.
+ * phi = (p-1)(q-1) where p and q are both primes.
+ * e*d mod phi = 1.
+ * There is no way to know from the info given if this is the
+ * original key. We never want to return the wrong key because if
+ * two moduli with the same factor is known, then euclid's gcd
+ * algorithm can be used to find that factor. Even though the
+ * caller didn't pass the original modulus, it doesn't mean the
+ * modulus wasn't known or isn't available somewhere. So to be safe
+ * if we can't be sure we have the right q, we don't return any.
+ *
+ * So to make sure we continue looking for other valid q's. If none
+ * are found, then we can safely return this one, otherwise we just
+ * fail */
+ if (mp_cmp_z(q) != 0) {
+ /* this is the second valid q, don't return either,
+ * just fail */
+ err = MP_RANGE;
+ break;
+ }
+ /* we only have one q so far, save it and if no others are found,
+ * it's safe to return it */
+ CHECK_MPI_OK(mp_copy(&tmp, q));
+ continue;
+ }
+ if ((unsigned)mpl_significant_bits(&k) < order_k) {
+ if (mp_cmp_z(q) == 0) {
+ /* If we get here, something was wrong with the parameters we
+ * were given */
+ err = MP_RANGE;
+ }
+ }
+cleanup:
+ mp_clear(&kphi);
+ mp_clear(&phi);
+ mp_clear(&k);
+ mp_clear(&r);
+ mp_clear(&tmp);
+ return err;
+}
+
+/*
+ * take a private key with only a few elements and fill out the missing pieces.
+ *
+ * All the entries will be overwritten with data allocated out of the arena
+ * If no arena is supplied, one will be created.
+ *
+ * The following fields must be supplied in order for this function
+ * to succeed:
+ * one of either publicExponent or privateExponent
+ * two more of the following 5 parameters.
+ * modulus (n)
+ * prime1 (p)
+ * prime2 (q)
+ * publicExponent (e)
+ * privateExponent (d)
+ *
+ * NOTE: if only the publicExponent, privateExponent, and one prime is given,
+ * then there may be more than one RSA key that matches that combination.
+ *
+ * All parameters will be replaced in the key structure with new parameters
+ * Allocated out of the arena. There is no attempt to free the old structures.
+ * Prime1 will always be greater than prime2 (even if the caller supplies the
+ * smaller prime as prime1 or the larger prime as prime2). The parameters are
+ * not overwritten on failure.
+ *
+ * How it works:
+ * We can generate all the parameters from one of the exponents, plus the
+ * two primes. (rsa_build_key_from_primes)
+ * If we are given one of the exponents and both primes, we are done.
+ * If we are given one of the exponents, the modulus and one prime, we
+ * caclulate the second prime by dividing the modulus by the given
+ * prime, giving us an exponent and 2 primes.
+ * If we are given 2 exponents and one of the primes we calculate
+ * k*phi = d*e-1, where k is an integer less than d which
+ * divides d*e-1. We find factor k so we can isolate phi.
+ * phi = (p-1)(q-1)
+ * We can use phi to find the other prime as follows:
+ * q = (phi/(p-1)) + 1. We now have 2 primes and an exponent.
+ * (NOTE: if more then one prime meets this condition, the operation
+ * will fail. See comments elsewhere in this file about this).
+ * (rsa_get_prime_from_exponents)
+ * If we are given 2 exponents and the modulus we factor the modulus to
+ * get the 2 missing primes (rsa_factorize_n_from_exponents)
+ *
+ */
+SECStatus
+RSA_PopulatePrivateKey(RSAPrivateKey *key)
+{
+ PLArenaPool *arena = NULL;
+ PRBool needPublicExponent = PR_TRUE;
+ PRBool needPrivateExponent = PR_TRUE;
+ PRBool hasModulus = PR_FALSE;
+ unsigned int keySizeInBits = 0;
+ int prime_count = 0;
+ /* standard RSA nominclature */
+ mp_int p, q, e, d, n;
+ /* remainder */
+ mp_int r;
+ mp_err err = 0;
+ SECStatus rv = SECFailure;
+
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&r) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&r));
+
+ /* if the key didn't already have an arena, create one. */
+ if (key->arena == NULL) {
+ arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE);
+ if (!arena) {
+ goto cleanup;
+ }
+ key->arena = arena;
+ }
+
+ /* load up the known exponents */
+ if (key->publicExponent.data) {
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ needPublicExponent = PR_FALSE;
+ }
+ if (key->privateExponent.data) {
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ needPrivateExponent = PR_FALSE;
+ }
+ if (needPrivateExponent && needPublicExponent) {
+ /* Not enough information, we need at least one exponent */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ /* load up the known primes. If only one prime is given, it will be
+ * assigned 'p'. Once we have both primes, well make sure p is the larger.
+ * The value prime_count tells us howe many we have acquired.
+ */
+ if (key->prime1.data) {
+ int primeLen = key->prime1.len;
+ if (key->prime1.data[0] == 0) {
+ primeLen--;
+ }
+ keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->prime1, &p);
+ prime_count++;
+ }
+ if (key->prime2.data) {
+ int primeLen = key->prime2.len;
+ if (key->prime2.data[0] == 0) {
+ primeLen--;
+ }
+ keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->prime2, prime_count ? &q : &p);
+ prime_count++;
+ }
+ /* load up the modulus */
+ if (key->modulus.data) {
+ int modLen = key->modulus.len;
+ if (key->modulus.data[0] == 0) {
+ modLen--;
+ }
+ keySizeInBits = modLen * PR_BITS_PER_BYTE;
+ SECITEM_TO_MPINT(key->modulus, &n);
+ hasModulus = PR_TRUE;
+ }
+ /* if we have the modulus and one prime, calculate the second. */
+ if ((prime_count == 1) && (hasModulus)) {
+ if (mp_div(&n, &p, &q, &r) != MP_OKAY || mp_cmp_z(&r) != 0) {
+ /* p is not a factor or n, fail */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+ prime_count++;
+ }
+
+ /* If we didn't have enough primes try to calculate the primes from
+ * the exponents */
+ if (prime_count < 2) {
+ /* if we don't have at least 2 primes at this point, then we need both
+ * exponents and one prime or a modulus*/
+ if (!needPublicExponent && !needPrivateExponent &&
+ (prime_count > 0)) {
+ CHECK_MPI_OK(rsa_get_prime_from_exponents(&e, &d, &p, &q, &n,
+ keySizeInBits));
+ } else if (!needPublicExponent && !needPrivateExponent && hasModulus) {
+ CHECK_MPI_OK(rsa_factorize_n_from_exponents(&e, &d, &p, &q, &n));
+ } else {
+ /* not enough given parameters to get both primes */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+ }
+
+ /* Assure p > q */
+ /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any
+ * implementation optimization that requires p > q. We can remove
+ * this code in the future.
+ */
+ if (mp_cmp(&p, &q) < 0)
+ mp_exch(&p, &q);
+
+ /* we now have our 2 primes and at least one exponent, we can fill
+ * in the key */
+ rv = rsa_build_from_primes(&p, &q,
+ &e, needPublicExponent,
+ &d, needPrivateExponent,
+ key, keySizeInBits);
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&e);
+ mp_clear(&d);
+ mp_clear(&n);
+ mp_clear(&r);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ if (rv && arena) {
+ PORT_FreeArena(arena, PR_TRUE);
+ key->arena = NULL;
+ }
+ return rv;
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+ unsigned char byteZero = modulus->data[0];
+ unsigned int modLen = modulus->len - !byteZero;
+ return modLen;
+}
+
+/*
+** Perform a raw public-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+SECStatus
+RSA_PublicKeyOp(RSAPublicKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ unsigned int modLen, expLen, offset;
+ mp_int n, e, m, c;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ if (!key || !output || !input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&c) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&m));
+ CHECK_MPI_OK(mp_init(&c));
+ modLen = rsa_modulusLen(&key->modulus);
+ expLen = rsa_modulusLen(&key->publicExponent);
+ /* 1. Obtain public key (n, e) */
+ if (BAD_RSA_KEY_SIZE(modLen, expLen)) {
+ PORT_SetError(SEC_ERROR_INVALID_KEY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ if (e.used > n.used) {
+ /* exponent should not be greater than modulus */
+ PORT_SetError(SEC_ERROR_INVALID_KEY);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 2. check input out of range (needs to be in range [0..n-1]) */
+ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ rv = SECFailure;
+ goto cleanup;
+ }
+ /* 2 bis. Represent message as integer in range [0..n-1] */
+ CHECK_MPI_OK(mp_read_unsigned_octets(&m, input, modLen));
+/* 3. Compute c = m**e mod n */
+#ifdef USE_MPI_EXPT_D
+ /* XXX see which is faster */
+ if (MP_USED(&e) == 1) {
+ CHECK_MPI_OK(mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c));
+ } else
+#endif
+ CHECK_MPI_OK(mp_exptmod(&m, &e, &n, &c));
+ /* 4. result c is ciphertext */
+ err = mp_to_fixlen_octets(&c, output, modLen);
+ if (err >= 0)
+ err = MP_OKAY;
+cleanup:
+ mp_clear(&n);
+ mp_clear(&e);
+ mp_clear(&m);
+ mp_clear(&c);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** RSA Private key operation (no CRT).
+*/
+static SECStatus
+rsa_PrivateKeyOpNoCRT(RSAPrivateKey *key, mp_int *m, mp_int *c, mp_int *n,
+ unsigned int modLen)
+{
+ mp_int d;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&d) = 0;
+ CHECK_MPI_OK(mp_init(&d));
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ /* 1. m = c**d mod n */
+ CHECK_MPI_OK(mp_exptmod(c, &d, n, m));
+cleanup:
+ mp_clear(&d);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** RSA Private key operation using CRT.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTNoCheck(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+ mp_int p, q, d_p, d_q, qInv;
+ mp_int m1, m2, h, ctmp;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&d_p) = 0;
+ MP_DIGITS(&d_q) = 0;
+ MP_DIGITS(&qInv) = 0;
+ MP_DIGITS(&m1) = 0;
+ MP_DIGITS(&m2) = 0;
+ MP_DIGITS(&h) = 0;
+ MP_DIGITS(&ctmp) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&d_p));
+ CHECK_MPI_OK(mp_init(&d_q));
+ CHECK_MPI_OK(mp_init(&qInv));
+ CHECK_MPI_OK(mp_init(&m1));
+ CHECK_MPI_OK(mp_init(&m2));
+ CHECK_MPI_OK(mp_init(&h));
+ CHECK_MPI_OK(mp_init(&ctmp));
+ /* copy private key parameters into mp integers */
+ SECITEM_TO_MPINT(key->prime1, &p); /* p */
+ SECITEM_TO_MPINT(key->prime2, &q); /* q */
+ SECITEM_TO_MPINT(key->exponent1, &d_p); /* d_p = d mod (p-1) */
+ SECITEM_TO_MPINT(key->exponent2, &d_q); /* d_q = d mod (q-1) */
+ SECITEM_TO_MPINT(key->coefficient, &qInv); /* qInv = q**-1 mod p */
+ /* 1. m1 = c**d_p mod p */
+ CHECK_MPI_OK(mp_mod(c, &p, &ctmp));
+ CHECK_MPI_OK(mp_exptmod(&ctmp, &d_p, &p, &m1));
+ /* 2. m2 = c**d_q mod q */
+ CHECK_MPI_OK(mp_mod(c, &q, &ctmp));
+ CHECK_MPI_OK(mp_exptmod(&ctmp, &d_q, &q, &m2));
+ /* 3. h = (m1 - m2) * qInv mod p */
+ CHECK_MPI_OK(mp_submod(&m1, &m2, &p, &h));
+ CHECK_MPI_OK(mp_mulmod(&h, &qInv, &p, &h));
+ /* 4. m = m2 + h * q */
+ CHECK_MPI_OK(mp_mul(&h, &q, m));
+ CHECK_MPI_OK(mp_add(m, &m2, m));
+cleanup:
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&d_p);
+ mp_clear(&d_q);
+ mp_clear(&qInv);
+ mp_clear(&m1);
+ mp_clear(&m2);
+ mp_clear(&h);
+ mp_clear(&ctmp);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+/*
+** An attack against RSA CRT was described by Boneh, DeMillo, and Lipton in:
+** "On the Importance of Eliminating Errors in Cryptographic Computations",
+** http://theory.stanford.edu/~dabo/papers/faults.ps.gz
+**
+** As a defense against the attack, carry out the private key operation,
+** followed up with a public key operation to invert the result.
+** Verify that result against the input.
+*/
+static SECStatus
+rsa_PrivateKeyOpCRTCheckedPubKey(RSAPrivateKey *key, mp_int *m, mp_int *c)
+{
+ mp_int n, e, v;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&v) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&v));
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, m, c));
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ /* Perform a public key operation v = m ** e mod n */
+ CHECK_MPI_OK(mp_exptmod(m, &e, &n, &v));
+ if (mp_cmp(&v, c) != 0) {
+ rv = SECFailure;
+ }
+cleanup:
+ mp_clear(&n);
+ mp_clear(&e);
+ mp_clear(&v);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+static PRCallOnceType coBPInit = { 0, 0, 0 };
+static PRStatus
+init_blinding_params_list(void)
+{
+ blindingParamsList.lock = PZ_NewLock(nssILockOther);
+ if (!blindingParamsList.lock) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return PR_FAILURE;
+ }
+ blindingParamsList.cVar = PR_NewCondVar(blindingParamsList.lock);
+ if (!blindingParamsList.cVar) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return PR_FAILURE;
+ }
+ blindingParamsList.waitCount = 0;
+ PR_INIT_CLIST(&blindingParamsList.head);
+ return PR_SUCCESS;
+}
+
+static SECStatus
+generate_blinding_params(RSAPrivateKey *key, mp_int *f, mp_int *g, mp_int *n,
+ unsigned int modLen)
+{
+ SECStatus rv = SECSuccess;
+ mp_int e, k;
+ mp_err err = MP_OKAY;
+ unsigned char *kb = NULL;
+
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&k) = 0;
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&k));
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ /* generate random k < n */
+ kb = PORT_Alloc(modLen);
+ if (!kb) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+ CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen));
+ CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen));
+ /* k < n */
+ CHECK_MPI_OK(mp_mod(&k, n, &k));
+ /* f = k**e mod n */
+ CHECK_MPI_OK(mp_exptmod(&k, &e, n, f));
+ /* g = k**-1 mod n */
+ CHECK_MPI_OK(mp_invmod(&k, n, g));
+cleanup:
+ if (kb)
+ PORT_ZFree(kb, modLen);
+ mp_clear(&k);
+ mp_clear(&e);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+static SECStatus
+init_blinding_params(RSABlindingParams *rsabp, RSAPrivateKey *key,
+ mp_int *n, unsigned int modLen)
+{
+ blindingParams *bp = rsabp->array;
+ int i = 0;
+
+ /* Initialize the list pointer for the element */
+ PR_INIT_CLIST(&rsabp->link);
+ for (i = 0; i < RSA_BLINDING_PARAMS_MAX_CACHE_SIZE; ++i, ++bp) {
+ bp->next = bp + 1;
+ MP_DIGITS(&bp->f) = 0;
+ MP_DIGITS(&bp->g) = 0;
+ bp->counter = 0;
+ }
+ /* The last bp->next value was initialized with out
+ * of rsabp->array pointer and must be set to NULL
+ */
+ rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL;
+
+ bp = rsabp->array;
+ rsabp->bp = NULL;
+ rsabp->free = bp;
+
+ /* List elements are keyed using the modulus */
+ return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus);
+}
+
+static SECStatus
+get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen,
+ mp_int *f, mp_int *g)
+{
+ RSABlindingParams *rsabp = NULL;
+ blindingParams *bpUnlinked = NULL;
+ blindingParams *bp;
+ PRCList *el;
+ SECStatus rv = SECSuccess;
+ mp_err err = MP_OKAY;
+ int cmp = -1;
+ PRBool holdingLock = PR_FALSE;
+
+ do {
+ if (blindingParamsList.lock == NULL) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ /* Acquire the list lock */
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+
+ /* Walk the list looking for the private key */
+ for (el = PR_NEXT_LINK(&blindingParamsList.head);
+ el != &blindingParamsList.head;
+ el = PR_NEXT_LINK(el)) {
+ rsabp = (RSABlindingParams *)el;
+ cmp = SECITEM_CompareItem(&rsabp->modulus, &key->modulus);
+ if (cmp >= 0) {
+ /* The key is found or not in the list. */
+ break;
+ }
+ }
+
+ if (cmp) {
+ /* At this point, the key is not in the list. el should point to
+ ** the list element before which this key should be inserted.
+ */
+ rsabp = PORT_ZNew(RSABlindingParams);
+ if (!rsabp) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto cleanup;
+ }
+
+ rv = init_blinding_params(rsabp, key, n, modLen);
+ if (rv != SECSuccess) {
+ PORT_ZFree(rsabp, sizeof(RSABlindingParams));
+ goto cleanup;
+ }
+
+ /* Insert the new element into the list
+ ** If inserting in the middle of the list, el points to the link
+ ** to insert before. Otherwise, the link needs to be appended to
+ ** the end of the list, which is the same as inserting before the
+ ** head (since el would have looped back to the head).
+ */
+ PR_INSERT_BEFORE(&rsabp->link, el);
+ }
+
+ /* We've found (or created) the RSAblindingParams struct for this key.
+ * Now, search its list of ready blinding params for a usable one.
+ */
+ while (0 != (bp = rsabp->bp)) {
+ if (--(bp->counter) > 0) {
+ /* Found a match and there are still remaining uses left */
+ /* Return the parameters */
+ CHECK_MPI_OK(mp_copy(&bp->f, f));
+ CHECK_MPI_OK(mp_copy(&bp->g, g));
+
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+ }
+ /* exhausted this one, give its values to caller, and
+ * then retire it.
+ */
+ mp_exch(&bp->f, f);
+ mp_exch(&bp->g, g);
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ bp->counter = 0;
+ /* Move to free list */
+ rsabp->bp = bp->next;
+ bp->next = rsabp->free;
+ rsabp->free = bp;
+ /* In case there're threads waiting for new blinding
+ * value - notify 1 thread the value is ready
+ */
+ if (blindingParamsList.waitCount > 0) {
+ PR_NotifyCondVar(blindingParamsList.cVar);
+ blindingParamsList.waitCount--;
+ }
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+ }
+ /* We did not find a usable set of blinding params. Can we make one? */
+ /* Find a free bp struct. */
+ if ((bp = rsabp->free) != NULL) {
+ /* unlink this bp */
+ rsabp->free = bp->next;
+ bp->next = NULL;
+ bpUnlinked = bp; /* In case we fail */
+
+ PZ_Unlock(blindingParamsList.lock);
+ holdingLock = PR_FALSE;
+ /* generate blinding parameter values for the current thread */
+ CHECK_SEC_OK(generate_blinding_params(key, f, g, n, modLen));
+
+ /* put the blinding parameter values into cache */
+ CHECK_MPI_OK(mp_init(&bp->f));
+ CHECK_MPI_OK(mp_init(&bp->g));
+ CHECK_MPI_OK(mp_copy(f, &bp->f));
+ CHECK_MPI_OK(mp_copy(g, &bp->g));
+
+ /* Put this at head of queue of usable params. */
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+ (void)holdingLock;
+ /* initialize RSABlindingParamsStr */
+ bp->counter = RSA_BLINDING_PARAMS_MAX_REUSE;
+ bp->next = rsabp->bp;
+ rsabp->bp = bp;
+ bpUnlinked = NULL;
+ /* In case there're threads waiting for new blinding value
+ * just notify them the value is ready
+ */
+ if (blindingParamsList.waitCount > 0) {
+ PR_NotifyAllCondVar(blindingParamsList.cVar);
+ blindingParamsList.waitCount = 0;
+ }
+ PZ_Unlock(blindingParamsList.lock);
+ return SECSuccess;
+ }
+ /* Here, there are no usable blinding parameters available,
+ * and no free bp blocks, presumably because they're all
+ * actively having parameters generated for them.
+ * So, we need to wait here and not eat up CPU until some
+ * change happens.
+ */
+ blindingParamsList.waitCount++;
+ PR_WaitCondVar(blindingParamsList.cVar, PR_INTERVAL_NO_TIMEOUT);
+ PZ_Unlock(blindingParamsList.lock);
+ holdingLock = PR_FALSE;
+ (void)holdingLock;
+ } while (1);
+
+cleanup:
+ /* It is possible to reach this after the lock is already released. */
+ if (bpUnlinked) {
+ if (!holdingLock) {
+ PZ_Lock(blindingParamsList.lock);
+ holdingLock = PR_TRUE;
+ }
+ bp = bpUnlinked;
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ bp->counter = 0;
+ /* Must put the unlinked bp back on the free list */
+ bp->next = rsabp->free;
+ rsabp->free = bp;
+ }
+ if (holdingLock) {
+ PZ_Unlock(blindingParamsList.lock);
+ }
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ }
+ return SECFailure;
+}
+
+/*
+** Perform a raw private-key operation
+** Length of input and output buffers are equal to key's modulus len.
+*/
+static SECStatus
+rsa_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input,
+ PRBool check)
+{
+ unsigned int modLen;
+ unsigned int offset;
+ SECStatus rv = SECSuccess;
+ mp_err err;
+ mp_int n, c, m;
+ mp_int f, g;
+ if (!key || !output || !input) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ /* check input out of range (needs to be in range [0..n-1]) */
+ modLen = rsa_modulusLen(&key->modulus);
+ offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */
+ if (memcmp(input, key->modulus.data + offset, modLen) >= 0) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&c) = 0;
+ MP_DIGITS(&m) = 0;
+ MP_DIGITS(&f) = 0;
+ MP_DIGITS(&g) = 0;
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&c));
+ CHECK_MPI_OK(mp_init(&m));
+ CHECK_MPI_OK(mp_init(&f));
+ CHECK_MPI_OK(mp_init(&g));
+ SECITEM_TO_MPINT(key->modulus, &n);
+ OCTETS_TO_MPINT(input, &c, modLen);
+ /* If blinding, compute pre-image of ciphertext by multiplying by
+ ** blinding factor
+ */
+ if (nssRSAUseBlinding) {
+ CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g));
+ /* c' = c*f mod n */
+ CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c));
+ }
+ /* Do the private key operation m = c**d mod n */
+ if (key->prime1.len == 0 ||
+ key->prime2.len == 0 ||
+ key->exponent1.len == 0 ||
+ key->exponent2.len == 0 ||
+ key->coefficient.len == 0) {
+ CHECK_SEC_OK(rsa_PrivateKeyOpNoCRT(key, &m, &c, &n, modLen));
+ } else if (check) {
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTCheckedPubKey(key, &m, &c));
+ } else {
+ CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c));
+ }
+ /* If blinding, compute post-image of plaintext by multiplying by
+ ** blinding factor
+ */
+ if (nssRSAUseBlinding) {
+ /* m = m'*g mod n */
+ CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m));
+ }
+ err = mp_to_fixlen_octets(&m, output, modLen);
+ if (err >= 0)
+ err = MP_OKAY;
+cleanup:
+ mp_clear(&n);
+ mp_clear(&c);
+ mp_clear(&m);
+ mp_clear(&f);
+ mp_clear(&g);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+SECStatus
+RSA_PrivateKeyOp(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return rsa_PrivateKeyOp(key, output, input, PR_FALSE);
+}
+
+SECStatus
+RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key,
+ unsigned char *output,
+ const unsigned char *input)
+{
+ return rsa_PrivateKeyOp(key, output, input, PR_TRUE);
+}
+
+SECStatus
+RSA_PrivateKeyCheck(const RSAPrivateKey *key)
+{
+ mp_int p, q, n, psub1, qsub1, e, d, d_p, d_q, qInv, res;
+ mp_err err = MP_OKAY;
+ SECStatus rv = SECSuccess;
+ MP_DIGITS(&p) = 0;
+ MP_DIGITS(&q) = 0;
+ MP_DIGITS(&n) = 0;
+ MP_DIGITS(&psub1) = 0;
+ MP_DIGITS(&qsub1) = 0;
+ MP_DIGITS(&e) = 0;
+ MP_DIGITS(&d) = 0;
+ MP_DIGITS(&d_p) = 0;
+ MP_DIGITS(&d_q) = 0;
+ MP_DIGITS(&qInv) = 0;
+ MP_DIGITS(&res) = 0;
+ CHECK_MPI_OK(mp_init(&p));
+ CHECK_MPI_OK(mp_init(&q));
+ CHECK_MPI_OK(mp_init(&n));
+ CHECK_MPI_OK(mp_init(&psub1));
+ CHECK_MPI_OK(mp_init(&qsub1));
+ CHECK_MPI_OK(mp_init(&e));
+ CHECK_MPI_OK(mp_init(&d));
+ CHECK_MPI_OK(mp_init(&d_p));
+ CHECK_MPI_OK(mp_init(&d_q));
+ CHECK_MPI_OK(mp_init(&qInv));
+ CHECK_MPI_OK(mp_init(&res));
+
+ if (!key->modulus.data || !key->prime1.data || !key->prime2.data ||
+ !key->publicExponent.data || !key->privateExponent.data ||
+ !key->exponent1.data || !key->exponent2.data ||
+ !key->coefficient.data) {
+ /* call RSA_PopulatePrivateKey first, if the application wishes to
+ * recover these parameters */
+ err = MP_BADARG;
+ goto cleanup;
+ }
+
+ SECITEM_TO_MPINT(key->modulus, &n);
+ SECITEM_TO_MPINT(key->prime1, &p);
+ SECITEM_TO_MPINT(key->prime2, &q);
+ SECITEM_TO_MPINT(key->publicExponent, &e);
+ SECITEM_TO_MPINT(key->privateExponent, &d);
+ SECITEM_TO_MPINT(key->exponent1, &d_p);
+ SECITEM_TO_MPINT(key->exponent2, &d_q);
+ SECITEM_TO_MPINT(key->coefficient, &qInv);
+ /* p and q must be distinct. */
+ if (mp_cmp(&p, &q) == 0) {
+ rv = SECFailure;
+ goto cleanup;
+ }
+#define VERIFY_MPI_EQUAL(m1, m2) \
+ if (mp_cmp(m1, m2) != 0) { \
+ rv = SECFailure; \
+ goto cleanup; \
+ }
+#define VERIFY_MPI_EQUAL_1(m) \
+ if (mp_cmp_d(m, 1) != 0) { \
+ rv = SECFailure; \
+ goto cleanup; \
+ }
+ /* n == p * q */
+ CHECK_MPI_OK(mp_mul(&p, &q, &res));
+ VERIFY_MPI_EQUAL(&res, &n);
+ /* gcd(e, p-1) == 1 */
+ CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1));
+ CHECK_MPI_OK(mp_gcd(&e, &psub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* gcd(e, q-1) == 1 */
+ CHECK_MPI_OK(mp_sub_d(&q, 1, &qsub1));
+ CHECK_MPI_OK(mp_gcd(&e, &qsub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d*e == 1 mod p-1 */
+ CHECK_MPI_OK(mp_mulmod(&d, &e, &psub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d*e == 1 mod q-1 */
+ CHECK_MPI_OK(mp_mulmod(&d, &e, &qsub1, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+ /* d_p == d mod p-1 */
+ CHECK_MPI_OK(mp_mod(&d, &psub1, &res));
+ VERIFY_MPI_EQUAL(&res, &d_p);
+ /* d_q == d mod q-1 */
+ CHECK_MPI_OK(mp_mod(&d, &qsub1, &res));
+ VERIFY_MPI_EQUAL(&res, &d_q);
+ /* q * q**-1 == 1 mod p */
+ CHECK_MPI_OK(mp_mulmod(&q, &qInv, &p, &res));
+ VERIFY_MPI_EQUAL_1(&res);
+
+cleanup:
+ mp_clear(&n);
+ mp_clear(&p);
+ mp_clear(&q);
+ mp_clear(&psub1);
+ mp_clear(&qsub1);
+ mp_clear(&e);
+ mp_clear(&d);
+ mp_clear(&d_p);
+ mp_clear(&d_q);
+ mp_clear(&qInv);
+ mp_clear(&res);
+ if (err) {
+ MP_TO_SEC_ERROR(err);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+static SECStatus
+RSA_Init(void)
+{
+ if (PR_CallOnce(&coBPInit, init_blinding_params_list) != PR_SUCCESS) {
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+SECStatus
+BL_Init(void)
+{
+ return RSA_Init();
+}
+
+/* cleanup at shutdown */
+void
+RSA_Cleanup(void)
+{
+ blindingParams *bp = NULL;
+ if (!coBPInit.initialized)
+ return;
+
+ while (!PR_CLIST_IS_EMPTY(&blindingParamsList.head)) {
+ RSABlindingParams *rsabp =
+ (RSABlindingParams *)PR_LIST_HEAD(&blindingParamsList.head);
+ PR_REMOVE_LINK(&rsabp->link);
+ /* clear parameters cache */
+ while (rsabp->bp != NULL) {
+ bp = rsabp->bp;
+ rsabp->bp = rsabp->bp->next;
+ mp_clear(&bp->f);
+ mp_clear(&bp->g);
+ }
+ SECITEM_FreeItem(&rsabp->modulus, PR_FALSE);
+ PORT_Free(rsabp);
+ }
+
+ if (blindingParamsList.cVar) {
+ PR_DestroyCondVar(blindingParamsList.cVar);
+ blindingParamsList.cVar = NULL;
+ }
+
+ if (blindingParamsList.lock) {
+ SKIP_AFTER_FORK(PZ_DestroyLock(blindingParamsList.lock));
+ blindingParamsList.lock = NULL;
+ }
+
+ coBPInit.initialized = 0;
+ coBPInit.inProgress = 0;
+ coBPInit.status = 0;
+}
+
+/*
+ * need a central place for this function to free up all the memory that
+ * free_bl may have allocated along the way. Currently only RSA does this,
+ * so I've put it here for now.
+ */
+void
+BL_Cleanup(void)
+{
+ RSA_Cleanup();
+}
+
+PRBool bl_parentForkedAfterC_Initialize;
+
+/*
+ * Set fork flag so it can be tested in SKIP_AFTER_FORK on relevant platforms.
+ */
+void
+BL_SetForkState(PRBool forked)
+{
+ bl_parentForkedAfterC_Initialize = forked;
+}
diff --git a/security/nss/lib/freebl/rsapkcs.c b/security/nss/lib/freebl/rsapkcs.c
new file mode 100644
index 000000000..577fe1f61
--- /dev/null
+++ b/security/nss/lib/freebl/rsapkcs.c
@@ -0,0 +1,1385 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * RSA PKCS#1 v2.1 (RFC 3447) operations
+ */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "secerr.h"
+
+#include "blapi.h"
+#include "secitem.h"
+#include "blapii.h"
+
+#define RSA_BLOCK_MIN_PAD_LEN 8
+#define RSA_BLOCK_FIRST_OCTET 0x00
+#define RSA_BLOCK_PRIVATE_PAD_OCTET 0xff
+#define RSA_BLOCK_AFTER_PAD_OCTET 0x00
+
+/*
+ * RSA block types
+ *
+ * The values of RSA_BlockPrivate and RSA_BlockPublic are fixed.
+ * The value of RSA_BlockRaw isn't fixed by definition, but we are keeping
+ * the value that NSS has been using in the past.
+ */
+typedef enum {
+ RSA_BlockPrivate = 1, /* pad for a private-key operation */
+ RSA_BlockPublic = 2, /* pad for a public-key operation */
+ RSA_BlockRaw = 4 /* simply justify the block appropriately */
+} RSA_BlockType;
+
+/* Needed for RSA-PSS functions */
+static const unsigned char eightZeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/* Constant time comparison of a single byte.
+ * Returns 1 iff a == b, otherwise returns 0.
+ * Note: For ranges of bytes, use constantTimeCompare.
+ */
+static unsigned char
+constantTimeEQ8(unsigned char a, unsigned char b)
+{
+ unsigned char c = ~((a - b) | (b - a));
+ c >>= 7;
+ return c;
+}
+
+/* Constant time comparison of a range of bytes.
+ * Returns 1 iff len bytes of a are identical to len bytes of b, otherwise
+ * returns 0.
+ */
+static unsigned char
+constantTimeCompare(const unsigned char *a,
+ const unsigned char *b,
+ unsigned int len)
+{
+ unsigned char tmp = 0;
+ unsigned int i;
+ for (i = 0; i < len; ++i, ++a, ++b)
+ tmp |= *a ^ *b;
+ return constantTimeEQ8(0x00, tmp);
+}
+
+/* Constant time conditional.
+ * Returns a if c is 1, or b if c is 0. The result is undefined if c is
+ * not 0 or 1.
+ */
+static unsigned int
+constantTimeCondition(unsigned int c,
+ unsigned int a,
+ unsigned int b)
+{
+ return (~(c - 1) & a) | ((c - 1) & b);
+}
+
+static unsigned int
+rsa_modulusLen(SECItem *modulus)
+{
+ unsigned char byteZero = modulus->data[0];
+ unsigned int modLen = modulus->len - !byteZero;
+ return modLen;
+}
+
+/*
+ * Format one block of data for public/private key encryption using
+ * the rules defined in PKCS #1.
+ */
+static unsigned char *
+rsa_FormatOneBlock(unsigned modulusLen,
+ RSA_BlockType blockType,
+ SECItem *data)
+{
+ unsigned char *block;
+ unsigned char *bp;
+ int padLen;
+ int i, j;
+ SECStatus rv;
+
+ block = (unsigned char *)PORT_Alloc(modulusLen);
+ if (block == NULL)
+ return NULL;
+
+ bp = block;
+
+ /*
+ * All RSA blocks start with two octets:
+ * 0x00 || BlockType
+ */
+ *bp++ = RSA_BLOCK_FIRST_OCTET;
+ *bp++ = (unsigned char)blockType;
+
+ switch (blockType) {
+
+ /*
+ * Blocks intended for private-key operation.
+ */
+ case RSA_BlockPrivate: /* preferred method */
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ * 1 1 padLen 1 data->len
+ * Pad is either all 0x00 or all 0xff bytes, depending on blockType.
+ */
+ padLen = modulusLen - data->len - 3;
+ PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+ if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+ PORT_Free(block);
+ return NULL;
+ }
+ PORT_Memset(bp, RSA_BLOCK_PRIVATE_PAD_OCTET, padLen);
+ bp += padLen;
+ *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+ PORT_Memcpy(bp, data->data, data->len);
+ break;
+
+ /*
+ * Blocks intended for public-key operation.
+ */
+ case RSA_BlockPublic:
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ * 1 1 padLen 1 data->len
+ * Pad is all non-zero random bytes.
+ *
+ * Build the block left to right.
+ * Fill the entire block from Pad to the end with random bytes.
+ * Use the bytes after Pad as a supply of extra random bytes from
+ * which to find replacements for the zero bytes in Pad.
+ * If we need more than that, refill the bytes after Pad with
+ * new random bytes as necessary.
+ */
+ padLen = modulusLen - (data->len + 3);
+ PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN);
+ if (padLen < RSA_BLOCK_MIN_PAD_LEN) {
+ PORT_Free(block);
+ return NULL;
+ }
+ j = modulusLen - 2;
+ rv = RNG_GenerateGlobalRandomBytes(bp, j);
+ if (rv == SECSuccess) {
+ for (i = 0; i < padLen;) {
+ unsigned char repl;
+ /* Pad with non-zero random data. */
+ if (bp[i] != RSA_BLOCK_AFTER_PAD_OCTET) {
+ ++i;
+ continue;
+ }
+ if (j <= padLen) {
+ rv = RNG_GenerateGlobalRandomBytes(bp + padLen,
+ modulusLen - (2 + padLen));
+ if (rv != SECSuccess)
+ break;
+ j = modulusLen - 2;
+ }
+ do {
+ repl = bp[--j];
+ } while (repl == RSA_BLOCK_AFTER_PAD_OCTET && j > padLen);
+ if (repl != RSA_BLOCK_AFTER_PAD_OCTET) {
+ bp[i++] = repl;
+ }
+ }
+ }
+ if (rv != SECSuccess) {
+ PORT_Free(block);
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE);
+ return NULL;
+ }
+ bp += padLen;
+ *bp++ = RSA_BLOCK_AFTER_PAD_OCTET;
+ PORT_Memcpy(bp, data->data, data->len);
+ break;
+
+ default:
+ PORT_Assert(0);
+ PORT_Free(block);
+ return NULL;
+ }
+
+ return block;
+}
+
+static SECStatus
+rsa_FormatBlock(SECItem *result,
+ unsigned modulusLen,
+ RSA_BlockType blockType,
+ SECItem *data)
+{
+ switch (blockType) {
+ case RSA_BlockPrivate:
+ case RSA_BlockPublic:
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ *
+ * The "3" below is the first octet + the second octet + the 0x00
+ * octet that always comes just before the ActualData.
+ */
+ PORT_Assert(data->len <= (modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN)));
+
+ result->data = rsa_FormatOneBlock(modulusLen, blockType, data);
+ if (result->data == NULL) {
+ result->len = 0;
+ return SECFailure;
+ }
+ result->len = modulusLen;
+
+ break;
+
+ case RSA_BlockRaw:
+ /*
+ * Pad || ActualData
+ * Pad is zeros. The application is responsible for recovering
+ * the actual data.
+ */
+ if (data->len > modulusLen) {
+ return SECFailure;
+ }
+ result->data = (unsigned char *)PORT_ZAlloc(modulusLen);
+ result->len = modulusLen;
+ PORT_Memcpy(result->data + (modulusLen - data->len),
+ data->data, data->len);
+ break;
+
+ default:
+ PORT_Assert(0);
+ result->data = NULL;
+ result->len = 0;
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+/*
+ * Mask generation function MGF1 as defined in PKCS #1 v2.1 / RFC 3447.
+ */
+static SECStatus
+MGF1(HASH_HashType hashAlg,
+ unsigned char *mask,
+ unsigned int maskLen,
+ const unsigned char *mgfSeed,
+ unsigned int mgfSeedLen)
+{
+ unsigned int digestLen;
+ PRUint32 counter;
+ PRUint32 rounds;
+ unsigned char *tempHash;
+ unsigned char *temp;
+ const SECHashObject *hash;
+ void *hashContext;
+ unsigned char C[4];
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ if (hash == NULL)
+ return SECFailure;
+
+ hashContext = (*hash->create)();
+ rounds = (maskLen + hash->length - 1) / hash->length;
+ for (counter = 0; counter < rounds; counter++) {
+ C[0] = (unsigned char)((counter >> 24) & 0xff);
+ C[1] = (unsigned char)((counter >> 16) & 0xff);
+ C[2] = (unsigned char)((counter >> 8) & 0xff);
+ C[3] = (unsigned char)(counter & 0xff);
+
+ /* This could be optimized when the clone functions in
+ * rawhash.c are implemented. */
+ (*hash->begin)(hashContext);
+ (*hash->update)(hashContext, mgfSeed, mgfSeedLen);
+ (*hash->update)(hashContext, C, sizeof C);
+
+ tempHash = mask + counter * hash->length;
+ if (counter != (rounds - 1)) {
+ (*hash->end)(hashContext, tempHash, &digestLen, hash->length);
+ } else { /* we're in the last round and need to cut the hash */
+ temp = (unsigned char *)PORT_Alloc(hash->length);
+ (*hash->end)(hashContext, temp, &digestLen, hash->length);
+ PORT_Memcpy(tempHash, temp, maskLen - counter * hash->length);
+ PORT_Free(temp);
+ }
+ }
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ return SECSuccess;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_SignRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ if (maxOutputLen < modulusLen)
+ return SECFailure;
+
+ unformatted.len = dataLen;
+ unformatted.data = (unsigned char *)data;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+ *outputLen = modulusLen;
+
+done:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRaw(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *buffer;
+
+ if (sigLen != modulusLen)
+ goto failure;
+ if (hashLen > modulusLen)
+ goto failure;
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess)
+ goto loser;
+
+ /*
+ * make sure we get the same results
+ */
+ /* XXX(rsleevi): Constant time */
+ /* NOTE: should we verify the leading zeros? */
+ if (PORT_Memcmp(buffer + (modulusLen - hashLen), hash, hashLen) != 0)
+ goto loser;
+
+ PORT_Free(buffer);
+ return SECSuccess;
+
+loser:
+ PORT_Free(buffer);
+failure:
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRecoverRaw(RSAPublicKey *key,
+ unsigned char *data,
+ unsigned int *dataLen,
+ unsigned int maxDataLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+ if (sigLen != modulusLen)
+ goto failure;
+ if (maxDataLen < modulusLen)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, data, sig);
+ if (rv != SECSuccess)
+ goto failure;
+
+ *dataLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptRaw(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ formatted.data = NULL;
+ if (maxOutputLen < modulusLen)
+ goto failure;
+
+ unformatted.len = inputLen;
+ unformatted.data = (unsigned char *)input;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted);
+ if (rv != SECSuccess)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, output, formatted.data);
+ if (rv != SECSuccess)
+ goto failure;
+
+ PORT_ZFree(formatted.data, modulusLen);
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_DecryptRaw(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+
+ if (modulusLen > maxOutputLen)
+ goto failure;
+ if (inputLen != modulusLen)
+ goto failure;
+
+ rv = RSA_PrivateKeyOp(key, output, input);
+ if (rv != SECSuccess)
+ goto failure;
+
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ return SECFailure;
+}
+
+/*
+ * Decodes an EME-OAEP encoded block, validating the encoding in constant
+ * time.
+ * Described in RFC 3447, section 7.1.2.
+ * input contains the encoded block, after decryption.
+ * label is the optional value L that was associated with the message.
+ * On success, the original message and message length will be stored in
+ * output and outputLen.
+ */
+static SECStatus
+eme_oaep_decode(unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen)
+{
+ const SECHashObject *hash;
+ void *hashContext;
+ SECStatus rv = SECFailure;
+ unsigned char labelHash[HASH_LENGTH_MAX];
+ unsigned int i;
+ unsigned int maskLen;
+ unsigned int paddingOffset;
+ unsigned char *mask = NULL;
+ unsigned char *tmpOutput = NULL;
+ unsigned char isGood;
+ unsigned char foundPaddingEnd;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+
+ /* 1.c */
+ if (inputLen < (hash->length * 2) + 2) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ /* Step 3.a - Generate lHash */
+ hashContext = (*hash->create)();
+ if (hashContext == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hashContext);
+ if (labelLen > 0)
+ (*hash->update)(hashContext, label, labelLen);
+ (*hash->end)(hashContext, labelHash, &i, sizeof(labelHash));
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ tmpOutput = (unsigned char *)PORT_Alloc(inputLen);
+ if (tmpOutput == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ maskLen = inputLen - hash->length - 1;
+ mask = (unsigned char *)PORT_Alloc(maskLen);
+ if (mask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ goto done;
+ }
+
+ PORT_Memcpy(tmpOutput, input, inputLen);
+
+ /* 3.c - Generate seedMask */
+ MGF1(maskHashAlg, mask, hash->length, &tmpOutput[1 + hash->length],
+ inputLen - hash->length - 1);
+ /* 3.d - Unmask seed */
+ for (i = 0; i < hash->length; ++i)
+ tmpOutput[1 + i] ^= mask[i];
+
+ /* 3.e - Generate dbMask */
+ MGF1(maskHashAlg, mask, maskLen, &tmpOutput[1], hash->length);
+ /* 3.f - Unmask DB */
+ for (i = 0; i < maskLen; ++i)
+ tmpOutput[1 + hash->length + i] ^= mask[i];
+
+ /* 3.g - Compare Y, lHash, and PS in constant time
+ * Warning: This code is timing dependent and must not disclose which of
+ * these were invalid.
+ */
+ paddingOffset = 0;
+ isGood = 1;
+ foundPaddingEnd = 0;
+
+ /* Compare Y */
+ isGood &= constantTimeEQ8(0x00, tmpOutput[0]);
+
+ /* Compare lHash and lHash' */
+ isGood &= constantTimeCompare(&labelHash[0],
+ &tmpOutput[1 + hash->length],
+ hash->length);
+
+ /* Compare that the padding is zero or more zero octets, followed by a
+ * 0x01 octet */
+ for (i = 1 + (hash->length * 2); i < inputLen; ++i) {
+ unsigned char isZero = constantTimeEQ8(0x00, tmpOutput[i]);
+ unsigned char isOne = constantTimeEQ8(0x01, tmpOutput[i]);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] == 0x01 && !foundPaddingEnd)
+ * paddingOffset = i;
+ */
+ paddingOffset = constantTimeCondition(isOne & ~foundPaddingEnd, i,
+ paddingOffset);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] == 0x01)
+ * foundPaddingEnd = true;
+ *
+ * Note: This may yield false positives, as it will be set whenever
+ * a 0x01 byte is encountered. If there was bad padding (eg:
+ * 0x03 0x02 0x01), foundPaddingEnd will still be set to true, and
+ * paddingOffset will still be set to 2.
+ */
+ foundPaddingEnd = constantTimeCondition(isOne, 1, foundPaddingEnd);
+ /* non-constant time equivalent:
+ * if (tmpOutput[i] != 0x00 && tmpOutput[i] != 0x01 &&
+ * !foundPaddingEnd) {
+ * isGood = false;
+ * }
+ *
+ * Note: This may yield false positives, as a message (and padding)
+ * that is entirely zeros will result in isGood still being true. Thus
+ * it's necessary to check foundPaddingEnd is positive below.
+ */
+ isGood = constantTimeCondition(~foundPaddingEnd & ~isZero, 0, isGood);
+ }
+
+ /* While both isGood and foundPaddingEnd may have false positives, they
+ * cannot BOTH have false positives. If both are not true, then an invalid
+ * message was received. Note, this comparison must still be done in constant
+ * time so as not to leak either condition.
+ */
+ if (!(isGood & foundPaddingEnd)) {
+ PORT_SetError(SEC_ERROR_BAD_DATA);
+ goto done;
+ }
+
+ /* End timing dependent code */
+
+ ++paddingOffset; /* Skip the 0x01 following the end of PS */
+
+ *outputLen = inputLen - paddingOffset;
+ if (*outputLen > maxOutputLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ goto done;
+ }
+
+ if (*outputLen)
+ PORT_Memcpy(output, &tmpOutput[paddingOffset], *outputLen);
+ rv = SECSuccess;
+
+done:
+ if (mask)
+ PORT_ZFree(mask, maskLen);
+ if (tmpOutput)
+ PORT_ZFree(tmpOutput, inputLen);
+ return rv;
+}
+
+/*
+ * Generate an EME-OAEP encoded block for encryption
+ * Described in RFC 3447, section 7.1.1
+ * We use input instead of M for the message to be encrypted
+ * label is the optional value L to be associated with the message.
+ */
+static SECStatus
+eme_oaep_encode(unsigned char *em,
+ unsigned int emLen,
+ const unsigned char *input,
+ unsigned int inputLen,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen)
+{
+ const SECHashObject *hash;
+ void *hashContext;
+ SECStatus rv;
+ unsigned char *mask;
+ unsigned int reservedLen;
+ unsigned int dbMaskLen;
+ unsigned int i;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ PORT_Assert(seed == NULL || seedLen == hash->length);
+
+ /* Step 1.b */
+ reservedLen = (2 * hash->length) + 2;
+ if (emLen < reservedLen || inputLen > (emLen - reservedLen)) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ /*
+ * From RFC 3447, Section 7.1
+ * +----------+---------+-------+
+ * DB = | lHash | PS | M |
+ * +----------+---------+-------+
+ * |
+ * +----------+ V
+ * | seed |--> MGF ---> xor
+ * +----------+ |
+ * | |
+ * +--+ V |
+ * |00| xor <----- MGF <-----|
+ * +--+ | |
+ * | | |
+ * V V V
+ * +--+----------+----------------------------+
+ * EM = |00|maskedSeed| maskedDB |
+ * +--+----------+----------------------------+
+ *
+ * We use mask to hold the result of the MGF functions, and all other
+ * values are generated in their final resting place.
+ */
+ *em = 0x00;
+
+ /* Step 2.a - Generate lHash */
+ hashContext = (*hash->create)();
+ if (hashContext == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hashContext);
+ if (labelLen > 0)
+ (*hash->update)(hashContext, label, labelLen);
+ (*hash->end)(hashContext, &em[1 + hash->length], &i, hash->length);
+ (*hash->destroy)(hashContext, PR_TRUE);
+
+ /* Step 2.b - Generate PS */
+ if (emLen - reservedLen - inputLen > 0) {
+ PORT_Memset(em + 1 + (hash->length * 2), 0x00,
+ emLen - reservedLen - inputLen);
+ }
+
+ /* Step 2.c. - Generate DB
+ * DB = lHash || PS || 0x01 || M
+ * Note that PS and lHash have already been placed into em at their
+ * appropriate offsets. This just copies M into place
+ */
+ em[emLen - inputLen - 1] = 0x01;
+ if (inputLen)
+ PORT_Memcpy(em + emLen - inputLen, input, inputLen);
+
+ if (seed == NULL) {
+ /* Step 2.d - Generate seed */
+ rv = RNG_GenerateGlobalRandomBytes(em + 1, hash->length);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ } else {
+ /* For Known Answer Tests, copy the supplied seed. */
+ PORT_Memcpy(em + 1, seed, seedLen);
+ }
+
+ /* Step 2.e - Generate dbMask*/
+ dbMaskLen = emLen - hash->length - 1;
+ mask = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (mask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ MGF1(maskHashAlg, mask, dbMaskLen, em + 1, hash->length);
+ /* Step 2.f - Compute maskedDB*/
+ for (i = 0; i < dbMaskLen; ++i)
+ em[1 + hash->length + i] ^= mask[i];
+
+ /* Step 2.g - Generate seedMask */
+ MGF1(maskHashAlg, mask, hash->length, &em[1 + hash->length], dbMaskLen);
+ /* Step 2.h - Compute maskedSeed */
+ for (i = 0; i < hash->length; ++i)
+ em[1 + i] ^= mask[i];
+
+ PORT_ZFree(mask, dbMaskLen);
+ return SECSuccess;
+}
+
+SECStatus
+RSA_EncryptOAEP(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ const unsigned char *seed,
+ unsigned int seedLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *oaepEncoded = NULL;
+
+ if (maxOutputLen < modulusLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if ((labelLen == 0 && label != NULL) ||
+ (labelLen > 0 && label == NULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+ if (oaepEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ rv = eme_oaep_encode(oaepEncoded, modulusLen, input, inputLen,
+ hashAlg, maskHashAlg, label, labelLen, seed, seedLen);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PublicKeyOp(key, output, oaepEncoded);
+ if (rv != SECSuccess)
+ goto done;
+ *outputLen = modulusLen;
+
+done:
+ PORT_Free(oaepEncoded);
+ return rv;
+}
+
+SECStatus
+RSA_DecryptOAEP(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *label,
+ unsigned int labelLen,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECFailure;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *oaepEncoded = NULL;
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ if (inputLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_INPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((labelLen == 0 && label != NULL) ||
+ (labelLen > 0 && label == NULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+ if (oaepEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, oaepEncoded, input);
+ if (rv != SECSuccess) {
+ goto done;
+ }
+ rv = eme_oaep_decode(output, outputLen, maxOutputLen, oaepEncoded,
+ modulusLen, hashAlg, maskHashAlg, label,
+ labelLen);
+
+done:
+ if (oaepEncoded)
+ PORT_ZFree(oaepEncoded, modulusLen);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_EncryptBlock(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ formatted.data = NULL;
+ if (maxOutputLen < modulusLen)
+ goto failure;
+
+ unformatted.len = inputLen;
+ unformatted.data = (unsigned char *)input;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPublic,
+ &unformatted);
+ if (rv != SECSuccess)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, output, formatted.data);
+ if (rv != SECSuccess)
+ goto failure;
+
+ PORT_ZFree(formatted.data, modulusLen);
+ *outputLen = modulusLen;
+ return SECSuccess;
+
+failure:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_DecryptBlock(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer;
+
+ if (inputLen != modulusLen)
+ goto failure;
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer)
+ goto failure;
+
+ rv = RSA_PrivateKeyOp(key, buffer, input);
+ if (rv != SECSuccess)
+ goto loser;
+
+ /* XXX(rsleevi): Constant time */
+ if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+ buffer[1] != (unsigned char)RSA_BlockPublic) {
+ goto loser;
+ }
+ *outputLen = 0;
+ for (i = 2; i < modulusLen; i++) {
+ if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) {
+ *outputLen = modulusLen - i - 1;
+ break;
+ }
+ }
+ if (*outputLen == 0)
+ goto loser;
+ if (*outputLen > maxOutputLen)
+ goto loser;
+
+ PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen);
+
+ PORT_Free(buffer);
+ return SECSuccess;
+
+loser:
+ PORT_Free(buffer);
+failure:
+ return SECFailure;
+}
+
+/*
+ * Encode a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.1.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.1.
+ * We only support MGF1 as the MGF.
+ *
+ * NOTE: this code assumes modBits is a multiple of 8.
+ */
+static SECStatus
+emsa_pss_encode(unsigned char *em,
+ unsigned int emLen,
+ const unsigned char *mHash,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLen)
+{
+ const SECHashObject *hash;
+ void *hash_context;
+ unsigned char *dbMask;
+ unsigned int dbMaskLen;
+ unsigned int i;
+ SECStatus rv;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ dbMaskLen = emLen - hash->length - 1;
+
+ /* Step 3 */
+ if (emLen < hash->length + saltLen + 2) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ /* Step 4 */
+ if (salt == NULL) {
+ rv = RNG_GenerateGlobalRandomBytes(&em[dbMaskLen - saltLen], saltLen);
+ if (rv != SECSuccess) {
+ return rv;
+ }
+ } else {
+ PORT_Memcpy(&em[dbMaskLen - saltLen], salt, saltLen);
+ }
+
+ /* Step 5 + 6 */
+ /* Compute H and store it at its final location &em[dbMaskLen]. */
+ hash_context = (*hash->create)();
+ if (hash_context == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hash_context);
+ (*hash->update)(hash_context, eightZeros, 8);
+ (*hash->update)(hash_context, mHash, hash->length);
+ (*hash->update)(hash_context, &em[dbMaskLen - saltLen], saltLen);
+ (*hash->end)(hash_context, &em[dbMaskLen], &i, hash->length);
+ (*hash->destroy)(hash_context, PR_TRUE);
+
+ /* Step 7 + 8 */
+ PORT_Memset(em, 0, dbMaskLen - saltLen - 1);
+ em[dbMaskLen - saltLen - 1] = 0x01;
+
+ /* Step 9 */
+ dbMask = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (dbMask == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ MGF1(maskHashAlg, dbMask, dbMaskLen, &em[dbMaskLen], hash->length);
+
+ /* Step 10 */
+ for (i = 0; i < dbMaskLen; i++)
+ em[i] ^= dbMask[i];
+ PORT_Free(dbMask);
+
+ /* Step 11 */
+ em[0] &= 0x7f;
+
+ /* Step 12 */
+ em[emLen - 1] = 0xbc;
+
+ return SECSuccess;
+}
+
+/*
+ * Verify a RSA-PSS signature.
+ * Described in RFC 3447, section 9.1.2.
+ * We use mHash instead of M as input.
+ * emBits from the RFC is just modBits - 1, see section 8.1.2.
+ * We only support MGF1 as the MGF.
+ *
+ * NOTE: this code assumes modBits is a multiple of 8.
+ */
+static SECStatus
+emsa_pss_verify(const unsigned char *mHash,
+ const unsigned char *em,
+ unsigned int emLen,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLen)
+{
+ const SECHashObject *hash;
+ void *hash_context;
+ unsigned char *db;
+ unsigned char *H_; /* H' from the RFC */
+ unsigned int i;
+ unsigned int dbMaskLen;
+ SECStatus rv;
+
+ hash = HASH_GetRawHashObject(hashAlg);
+ dbMaskLen = emLen - hash->length - 1;
+
+ /* Step 3 + 4 + 6 */
+ if ((emLen < (hash->length + saltLen + 2)) ||
+ (em[emLen - 1] != 0xbc) ||
+ ((em[0] & 0x80) != 0)) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* Step 7 */
+ db = (unsigned char *)PORT_Alloc(dbMaskLen);
+ if (db == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ /* &em[dbMaskLen] points to H, used as mgfSeed */
+ MGF1(maskHashAlg, db, dbMaskLen, &em[dbMaskLen], hash->length);
+
+ /* Step 8 */
+ for (i = 0; i < dbMaskLen; i++) {
+ db[i] ^= em[i];
+ }
+
+ /* Step 9 */
+ db[0] &= 0x7f;
+
+ /* Step 10 */
+ for (i = 0; i < (dbMaskLen - saltLen - 1); i++) {
+ if (db[i] != 0) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+ }
+ if (db[dbMaskLen - saltLen - 1] != 0x01) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ /* Step 12 + 13 */
+ H_ = (unsigned char *)PORT_Alloc(hash->length);
+ if (H_ == NULL) {
+ PORT_Free(db);
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ hash_context = (*hash->create)();
+ if (hash_context == NULL) {
+ PORT_Free(db);
+ PORT_Free(H_);
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ (*hash->begin)(hash_context);
+ (*hash->update)(hash_context, eightZeros, 8);
+ (*hash->update)(hash_context, mHash, hash->length);
+ (*hash->update)(hash_context, &db[dbMaskLen - saltLen], saltLen);
+ (*hash->end)(hash_context, H_, &i, hash->length);
+ (*hash->destroy)(hash_context, PR_TRUE);
+
+ PORT_Free(db);
+
+ /* Step 14 */
+ if (PORT_Memcmp(H_, &em[dbMaskLen], hash->length) != 0) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ rv = SECFailure;
+ } else {
+ rv = SECSuccess;
+ }
+
+ PORT_Free(H_);
+ return rv;
+}
+
+SECStatus
+RSA_SignPSS(RSAPrivateKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ const unsigned char *salt,
+ unsigned int saltLength,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *pssEncoded = NULL;
+
+ if (maxOutputLen < modulusLen) {
+ PORT_SetError(SEC_ERROR_OUTPUT_LEN);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ pssEncoded = (unsigned char *)PORT_Alloc(modulusLen);
+ if (pssEncoded == NULL) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+ rv = emsa_pss_encode(pssEncoded, modulusLen, input, hashAlg,
+ maskHashAlg, salt, saltLength);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, pssEncoded);
+ *outputLen = modulusLen;
+
+done:
+ PORT_Free(pssEncoded);
+ return rv;
+}
+
+SECStatus
+RSA_CheckSignPSS(RSAPublicKey *key,
+ HASH_HashType hashAlg,
+ HASH_HashType maskHashAlg,
+ unsigned int saltLength,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *hash,
+ unsigned int hashLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned char *buffer;
+
+ if (sigLen != modulusLen) {
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) {
+ PORT_SetError(SEC_ERROR_INVALID_ALGORITHM);
+ return SECFailure;
+ }
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen);
+ if (!buffer) {
+ PORT_SetError(SEC_ERROR_NO_MEMORY);
+ return SECFailure;
+ }
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess) {
+ PORT_Free(buffer);
+ PORT_SetError(SEC_ERROR_BAD_SIGNATURE);
+ return SECFailure;
+ }
+
+ rv = emsa_pss_verify(hash, buffer, modulusLen, hashAlg,
+ maskHashAlg, saltLength);
+ PORT_Free(buffer);
+
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_Sign(RSAPrivateKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *input,
+ unsigned int inputLen)
+{
+ SECStatus rv = SECSuccess;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ SECItem formatted;
+ SECItem unformatted;
+
+ if (maxOutputLen < modulusLen)
+ return SECFailure;
+
+ unformatted.len = inputLen;
+ unformatted.data = (unsigned char *)input;
+ formatted.data = NULL;
+ rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPrivate,
+ &unformatted);
+ if (rv != SECSuccess)
+ goto done;
+
+ rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data);
+ *outputLen = modulusLen;
+
+ goto done;
+
+done:
+ if (formatted.data != NULL)
+ PORT_ZFree(formatted.data, modulusLen);
+ return rv;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSign(RSAPublicKey *key,
+ const unsigned char *sig,
+ unsigned int sigLen,
+ const unsigned char *data,
+ unsigned int dataLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer;
+
+ if (sigLen != modulusLen)
+ goto failure;
+ /*
+ * 0x00 || BT || Pad || 0x00 || ActualData
+ *
+ * The "3" below is the first octet + the second octet + the 0x00
+ * octet that always comes just before the ActualData.
+ */
+ if (dataLen > modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN))
+ goto failure;
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess)
+ goto loser;
+
+ /*
+ * check the padding that was used
+ */
+ if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+ buffer[1] != (unsigned char)RSA_BlockPrivate) {
+ goto loser;
+ }
+ for (i = 2; i < modulusLen - dataLen - 1; i++) {
+ if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET)
+ goto loser;
+ }
+ if (buffer[i] != RSA_BLOCK_AFTER_PAD_OCTET)
+ goto loser;
+
+ /*
+ * make sure we get the same results
+ */
+ if (PORT_Memcmp(buffer + modulusLen - dataLen, data, dataLen) != 0)
+ goto loser;
+
+ PORT_Free(buffer);
+ return SECSuccess;
+
+loser:
+ PORT_Free(buffer);
+failure:
+ return SECFailure;
+}
+
+/* XXX Doesn't set error code */
+SECStatus
+RSA_CheckSignRecover(RSAPublicKey *key,
+ unsigned char *output,
+ unsigned int *outputLen,
+ unsigned int maxOutputLen,
+ const unsigned char *sig,
+ unsigned int sigLen)
+{
+ SECStatus rv;
+ unsigned int modulusLen = rsa_modulusLen(&key->modulus);
+ unsigned int i;
+ unsigned char *buffer;
+
+ if (sigLen != modulusLen)
+ goto failure;
+
+ buffer = (unsigned char *)PORT_Alloc(modulusLen + 1);
+ if (!buffer)
+ goto failure;
+
+ rv = RSA_PublicKeyOp(key, buffer, sig);
+ if (rv != SECSuccess)
+ goto loser;
+ *outputLen = 0;
+
+ /*
+ * check the padding that was used
+ */
+ if (buffer[0] != RSA_BLOCK_FIRST_OCTET ||
+ buffer[1] != (unsigned char)RSA_BlockPrivate) {
+ goto loser;
+ }
+ for (i = 2; i < modulusLen; i++) {
+ if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) {
+ *outputLen = modulusLen - i - 1;
+ break;
+ }
+ if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET)
+ goto loser;
+ }
+ if (*outputLen == 0)
+ goto loser;
+ if (*outputLen > maxOutputLen)
+ goto loser;
+
+ PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen);
+
+ PORT_Free(buffer);
+ return SECSuccess;
+
+loser:
+ PORT_Free(buffer);
+failure:
+ return SECFailure;
+}
diff --git a/security/nss/lib/freebl/secmpi.h b/security/nss/lib/freebl/secmpi.h
new file mode 100644
index 000000000..5e8fd1105
--- /dev/null
+++ b/security/nss/lib/freebl/secmpi.h
@@ -0,0 +1,54 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "mpi.h"
+
+#define CHECK_SEC_OK(func) \
+ if (SECSuccess != (rv = func)) \
+ goto cleanup
+
+#define CHECK_MPI_OK(func) \
+ if (MP_OKAY > (err = func)) \
+ goto cleanup
+
+#define OCTETS_TO_MPINT(oc, mp, len) \
+ CHECK_MPI_OK(mp_read_unsigned_octets((mp), oc, len))
+
+#define SECITEM_TO_MPINT(it, mp) \
+ CHECK_MPI_OK(mp_read_unsigned_octets((mp), (it).data, (it).len))
+
+#define MPINT_TO_SECITEM(mp, it, arena) \
+ do { \
+ int mpintLen = mp_unsigned_octet_size(mp); \
+ if (mpintLen <= 0) { \
+ err = MP_RANGE; \
+ goto cleanup; \
+ } \
+ SECITEM_AllocItem(arena, (it), mpintLen); \
+ if ((it)->data == NULL) { \
+ err = MP_MEM; \
+ goto cleanup; \
+ } \
+ err = mp_to_unsigned_octets(mp, (it)->data, (it)->len); \
+ if (err < 0) \
+ goto cleanup; \
+ else \
+ err = MP_OKAY; \
+ } while (0)
+
+#define MP_TO_SEC_ERROR(err) \
+ switch (err) { \
+ case MP_MEM: \
+ PORT_SetError(SEC_ERROR_NO_MEMORY); \
+ break; \
+ case MP_RANGE: \
+ PORT_SetError(SEC_ERROR_BAD_DATA); \
+ break; \
+ case MP_BADARG: \
+ PORT_SetError(SEC_ERROR_INVALID_ARGS); \
+ break; \
+ default: \
+ PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); \
+ break; \
+ }
diff --git a/security/nss/lib/freebl/secrng.h b/security/nss/lib/freebl/secrng.h
new file mode 100644
index 000000000..19eae4833
--- /dev/null
+++ b/security/nss/lib/freebl/secrng.h
@@ -0,0 +1,65 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SECRNG_H_
+#define _SECRNG_H_
+/*
+ * secrng.h - public data structures and prototypes for the secure random
+ * number generator
+ */
+
+/******************************************/
+/*
+** Random number generation. A cryptographically strong random number
+** generator.
+*/
+
+#include "blapi.h"
+
+/* the number of bytes to read from the system random number generator */
+#define SYSTEM_RNG_SEED_COUNT 1024
+
+SEC_BEGIN_PROTOS
+
+/*
+** The following functions are provided by the security library
+** but are differently implemented for the UNIX, Win, and OS/2
+** versions
+*/
+
+/*
+** Get the "noisiest" information available on the system.
+** The amount of data returned depends on the system implementation.
+** It will not exceed maxbytes, but may be (much) less.
+** Returns number of noise bytes copied into buf, or zero if error.
+*/
+extern size_t RNG_GetNoise(void *buf, size_t maxbytes);
+
+/*
+** RNG_SystemInfoForRNG should be called before any use of SSL. It
+** gathers up the system specific information to help seed the
+** state of the global random number generator.
+*/
+extern void RNG_SystemInfoForRNG(void);
+
+/*
+** Use the contents (and stat) of a file to help seed the
+** global random number generator.
+*/
+extern void RNG_FileForRNG(const char *filename);
+
+/*
+** Get maxbytes bytes of random data from the system random number
+** generator.
+** Returns the number of bytes copied into buf -- maxbytes if success
+** or zero if error.
+** Errors:
+** PR_NOT_IMPLEMENTED_ERROR There is no system RNG on the platform.
+** SEC_ERROR_NEED_RANDOM The system RNG failed.
+*/
+extern size_t RNG_SystemRNG(void *buf, size_t maxbytes);
+
+SEC_END_PROTOS
+
+#endif /* _SECRNG_H_ */
diff --git a/security/nss/lib/freebl/seed.c b/security/nss/lib/freebl/seed.c
new file mode 100644
index 000000000..f198cce45
--- /dev/null
+++ b/security/nss/lib/freebl/seed.c
@@ -0,0 +1,641 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#ifdef WIN32
+#include <memory.h>
+#endif
+
+#include "seed.h"
+#include "secerr.h"
+
+static const seed_word SS[4][256] = {
+ { 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0,
+ 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124,
+ 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c,
+ 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360,
+ 0x28082028, 0x04444044, 0x20002020, 0x1d8d919c,
+ 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314,
+ 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378,
+ 0x3b8bb3b8, 0x13031310, 0x12c2d2d0, 0x2ecee2ec,
+ 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8,
+ 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074,
+ 0x2ccce0ec, 0x15859194, 0x0b0b0308, 0x17475354,
+ 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100,
+ 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098,
+ 0x10001010, 0x0cccc0cc, 0x32c2f2f0, 0x19c9d1d8,
+ 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380,
+ 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8,
+ 0x20406060, 0x10405050, 0x2383a3a0, 0x2bcbe3e8,
+ 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c,
+ 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078,
+ 0x2686a2a4, 0x12021210, 0x2f8fa3ac, 0x15c5d1d4,
+ 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140,
+ 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008,
+ 0x1f0f131c, 0x19899198, 0x00000000, 0x19091118,
+ 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0,
+ 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324,
+ 0x3080b0b0, 0x0b8b8388, 0x0e0e020c, 0x2b8ba3a8,
+ 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c,
+ 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208,
+ 0x3f8fb3bc, 0x2fcfe3ec, 0x33c3f3f0, 0x05c5c1c4,
+ 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064,
+ 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218,
+ 0x06060204, 0x21012120, 0x2b4b6368, 0x26466264,
+ 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288,
+ 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0,
+ 0x3a4a7278, 0x07474344, 0x16869294, 0x25c5e1e4,
+ 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc,
+ 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac,
+ 0x36063234, 0x15051114, 0x22022220, 0x38083038,
+ 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c,
+ 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394,
+ 0x35053134, 0x0bcbc3c8, 0x0ecec2cc, 0x3c0c303c,
+ 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188,
+ 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8,
+ 0x14849094, 0x19495158, 0x02828280, 0x04c4c0c4,
+ 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364,
+ 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8,
+ 0x0f0f030c, 0x0e8e828c, 0x02424240, 0x23032320,
+ 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4,
+ 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0,
+ 0x2f4f636c, 0x3d0d313c, 0x2d0d212c, 0x00404040,
+ 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0,
+ 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154,
+ 0x3b0b3338, 0x1cccd0dc, 0x28486068, 0x3f4f737c,
+ 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254,
+ 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244,
+ 0x3585b1b4, 0x2b0b2328, 0x25456164, 0x3acaf2f8,
+ 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c,
+ 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0,
+ 0x31013130, 0x2acae2e8, 0x2d4d616c, 0x1f4f535c,
+ 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088,
+ 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4,
+ 0x22426260, 0x29092128, 0x07070304, 0x33033330,
+ 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178,
+ 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298 },
+ { 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2,
+ 0xcc0fcfc3, 0xdc1eced2, 0xb03383b3, 0xb83888b0,
+ 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3,
+ 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53,
+ 0xc003c3c3, 0x60224262, 0x30330333, 0xb43585b1,
+ 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3,
+ 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602,
+ 0x1c1c0c10, 0xbc3c8cb0, 0x34360632, 0x480b4b43,
+ 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0,
+ 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0,
+ 0xc002c2c2, 0x44054541, 0xe021c1e1, 0xd416c6d2,
+ 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890,
+ 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32,
+ 0xa42585a1, 0xf839c9f1, 0x0c0d0d01, 0xdc1fcfd3,
+ 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72,
+ 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272,
+ 0x40024242, 0xd414c4d0, 0x40014141, 0xc000c0c0,
+ 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83,
+ 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13,
+ 0xc80acac2, 0x2c2c0c20, 0xa82a8aa2, 0x34340430,
+ 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1,
+ 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0,
+ 0x54174753, 0xac2e8ea2, 0x08080800, 0xc405c5c1,
+ 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1,
+ 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131,
+ 0xf435c5f1, 0x880a8a82, 0x682a4a62, 0xb03181b1,
+ 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202,
+ 0x20220222, 0x04040400, 0x68284860, 0x70314171,
+ 0x04070703, 0xd81bcbd3, 0x9c1d8d91, 0x98198991,
+ 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951,
+ 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0,
+ 0x981a8a92, 0xa02383a3, 0xa82b8ba3, 0xd010c0d0,
+ 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12,
+ 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3,
+ 0x94168692, 0x783b4b73, 0x5c1c4c50, 0xa02282a2,
+ 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41,
+ 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32,
+ 0x0c0c0c00, 0x2c2e0e22, 0xb83a8ab2, 0x6c2e4e62,
+ 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292,
+ 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0,
+ 0x14150511, 0xf83bcbf3, 0x70304070, 0x74354571,
+ 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303,
+ 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470,
+ 0xd415c5d1, 0xb43484b0, 0xe82acae2, 0x08090901,
+ 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040,
+ 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501,
+ 0xf83acaf2, 0x00010101, 0xf030c0f0, 0x282a0a22,
+ 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343,
+ 0x84058581, 0x14140410, 0x88098981, 0x981b8b93,
+ 0xb03080b0, 0xe425c5e1, 0x48084840, 0x78394971,
+ 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282,
+ 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53,
+ 0x74374773, 0x54144450, 0xb03282b2, 0x1c1d0d11,
+ 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642,
+ 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3,
+ 0x7c3e4e72, 0xd81acad2, 0xc809c9c1, 0xfc3dcdf1,
+ 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30,
+ 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70,
+ 0x0c0e0e02, 0x50104050, 0x38390931, 0x24260622,
+ 0x30320232, 0x84048480, 0x68294961, 0x90138393,
+ 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0,
+ 0xc80bcbc3, 0x50134353, 0x080a0a02, 0x84078783,
+ 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83,
+ 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3 },
+ { 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3,
+ 0x50541444, 0x111c1d0d, 0xa0ac2c8c, 0x21242505,
+ 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e,
+ 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343,
+ 0x20282808, 0x40440444, 0x20202000, 0x919c1d8d,
+ 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707,
+ 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b,
+ 0xb3b83b8b, 0x13101303, 0xd2d012c2, 0xe2ec2ece,
+ 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888,
+ 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444,
+ 0xe0ec2ccc, 0x91941585, 0x03080b0b, 0x53541747,
+ 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101,
+ 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888,
+ 0x10101000, 0xc0cc0ccc, 0xf2f032c2, 0xd1d819c9,
+ 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383,
+ 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9,
+ 0x60602040, 0x50501040, 0xa3a02383, 0xe3e82bcb,
+ 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f,
+ 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848,
+ 0xa2a42686, 0x12101202, 0xa3ac2f8f, 0xd1d415c5,
+ 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141,
+ 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808,
+ 0x131c1f0f, 0x91981989, 0x00000000, 0x11181909,
+ 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1,
+ 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707,
+ 0xb0b03080, 0x83880b8b, 0x020c0e0e, 0xa3a82b8b,
+ 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d,
+ 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a,
+ 0xb3bc3f8f, 0xe3ec2fcf, 0xf3f033c3, 0xc1c405c5,
+ 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444,
+ 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a,
+ 0x02040606, 0x21202101, 0x63682b4b, 0x62642646,
+ 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a,
+ 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0,
+ 0x72783a4a, 0x43440747, 0x92941686, 0xe1e425c5,
+ 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf,
+ 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e,
+ 0x32343606, 0x11141505, 0x22202202, 0x30383808,
+ 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c,
+ 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787,
+ 0x31343505, 0xc3c80bcb, 0xc2cc0ece, 0x303c3c0c,
+ 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989,
+ 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8,
+ 0x90941484, 0x51581949, 0x82800282, 0xc0c404c4,
+ 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747,
+ 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888,
+ 0x030c0f0f, 0x828c0e8e, 0x42400242, 0x23202303,
+ 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484,
+ 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2,
+ 0x636c2f4f, 0x313c3d0d, 0x212c2d0d, 0x40400040,
+ 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1,
+ 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545,
+ 0x33383b0b, 0xd0dc1ccc, 0x60682848, 0x737c3f4f,
+ 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646,
+ 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646,
+ 0xb1b43585, 0x23282b0b, 0x61642545, 0xf2f83aca,
+ 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f,
+ 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282,
+ 0x31303101, 0xe2e82aca, 0x616c2d4d, 0x535c1f4f,
+ 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888,
+ 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4,
+ 0x62602242, 0x21282909, 0x03040707, 0x33303303,
+ 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949,
+ 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a },
+ { 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426,
+ 0xcfc3cc0f, 0xced2dc1e, 0x83b3b033, 0x88b0b838,
+ 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407,
+ 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b,
+ 0xc3c3c003, 0x42626022, 0x03333033, 0x85b1b435,
+ 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427,
+ 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406,
+ 0x0c101c1c, 0x8cb0bc3c, 0x06323436, 0x4b43480b,
+ 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828,
+ 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434,
+ 0xc2c2c002, 0x45414405, 0xc1e1e021, 0xc6d2d416,
+ 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818,
+ 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e,
+ 0x85a1a425, 0xc9f1f839, 0x0d010c0d, 0xcfd3dc1f,
+ 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a,
+ 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032,
+ 0x42424002, 0xc4d0d414, 0x41414001, 0xc0c0c000,
+ 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b,
+ 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f,
+ 0xcac2c80a, 0x0c202c2c, 0x8aa2a82a, 0x04303434,
+ 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829,
+ 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838,
+ 0x47535417, 0x8ea2ac2e, 0x08000808, 0xc5c1c405,
+ 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839,
+ 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031,
+ 0xc5f1f435, 0x8a82880a, 0x4a62682a, 0x81b1b031,
+ 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002,
+ 0x02222022, 0x04000404, 0x48606828, 0x41717031,
+ 0x07030407, 0xcbd3d81b, 0x8d919c1d, 0x89919819,
+ 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819,
+ 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c,
+ 0x8a92981a, 0x83a3a023, 0x8ba3a82b, 0xc0d0d010,
+ 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a,
+ 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f,
+ 0x86929416, 0x4b73783b, 0x4c505c1c, 0x82a2a022,
+ 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d,
+ 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a,
+ 0x0c000c0c, 0x0e222c2e, 0x8ab2b83a, 0x4e626c2e,
+ 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012,
+ 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c,
+ 0x05111415, 0xcbf3f83b, 0x40707030, 0x45717435,
+ 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003,
+ 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434,
+ 0xc5d1d415, 0x84b0b434, 0xcae2e82a, 0x09010809,
+ 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000,
+ 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405,
+ 0xcaf2f83a, 0x01010001, 0xc0f0f030, 0x0a22282a,
+ 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003,
+ 0x85818405, 0x04101414, 0x89818809, 0x8b93981b,
+ 0x80b0b030, 0xc5e1e425, 0x48404808, 0x49717839,
+ 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002,
+ 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f,
+ 0x47737437, 0x44505414, 0x82b2b032, 0x0d111c1d,
+ 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406,
+ 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b,
+ 0x4e727c3e, 0xcad2d81a, 0xc9c1c809, 0xcdf1fc3d,
+ 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c,
+ 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c,
+ 0x0e020c0e, 0x40505010, 0x09313839, 0x06222426,
+ 0x02323032, 0x84808404, 0x49616829, 0x83939013,
+ 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424,
+ 0xcbc3c80b, 0x43535013, 0x0a02080a, 0x87838407,
+ 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f,
+ 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437 }
+};
+
+/* key schedule constants - golden ratio */
+#define KC0 0x9e3779b9
+#define KC1 0x3c6ef373
+#define KC2 0x78dde6e6
+#define KC3 0xf1bbcdcc
+#define KC4 0xe3779b99
+#define KC5 0xc6ef3733
+#define KC6 0x8dde6e67
+#define KC7 0x1bbcdccf
+#define KC8 0x3779b99e
+#define KC9 0x6ef3733c
+#define KC10 0xdde6e678
+#define KC11 0xbbcdccf1
+#define KC12 0x779b99e3
+#define KC13 0xef3733c6
+#define KC14 0xde6e678d
+#define KC15 0xbcdccf1b
+
+void
+SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+ SEED_KEY_SCHEDULE *ks)
+{
+ seed_word K0, K1, K2, K3;
+ seed_word t0, t1;
+
+ char2word(rawkey, K0);
+ char2word(rawkey + 4, K1);
+ char2word(rawkey + 8, K2);
+ char2word(rawkey + 12, K3);
+
+ t0 = (K0 + K2 - KC0);
+ t1 = (K1 - K3 + KC0);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[0]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC1);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[2]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC2);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[4]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC3);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[6]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC4);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[8]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC5);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[10]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC6);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[12]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC7);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[14]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC8);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[16]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC9);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[18]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC10);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[20]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC11);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[22]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC12);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[24]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC13);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[26]);
+ KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC14);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[28]);
+ KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC15);
+ KEYUPDATE_TEMP(t0, t1, &ks->data[30]);
+}
+
+void
+SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks)
+{
+ seed_word L0, L1, R0, R1;
+ seed_word t0, t1;
+
+ char2word(s, L0);
+ char2word(s + 4, L1);
+ char2word(s + 8, R0);
+ char2word(s + 12, R1);
+
+ E_SEED(t0, t1, L0, L1, R0, R1, 0);
+ E_SEED(t0, t1, R0, R1, L0, L1, 2);
+ E_SEED(t0, t1, L0, L1, R0, R1, 4);
+ E_SEED(t0, t1, R0, R1, L0, L1, 6);
+ E_SEED(t0, t1, L0, L1, R0, R1, 8);
+ E_SEED(t0, t1, R0, R1, L0, L1, 10);
+ E_SEED(t0, t1, L0, L1, R0, R1, 12);
+ E_SEED(t0, t1, R0, R1, L0, L1, 14);
+ E_SEED(t0, t1, L0, L1, R0, R1, 16);
+ E_SEED(t0, t1, R0, R1, L0, L1, 18);
+ E_SEED(t0, t1, L0, L1, R0, R1, 20);
+ E_SEED(t0, t1, R0, R1, L0, L1, 22);
+ E_SEED(t0, t1, L0, L1, R0, R1, 24);
+ E_SEED(t0, t1, R0, R1, L0, L1, 26);
+ E_SEED(t0, t1, L0, L1, R0, R1, 28);
+ E_SEED(t0, t1, R0, R1, L0, L1, 30);
+
+ word2char(R0, d);
+ word2char(R1, d + 4);
+ word2char(L0, d + 8);
+ word2char(L1, d + 12);
+}
+
+void
+SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks)
+{
+ seed_word L0, L1, R0, R1;
+ seed_word t0, t1;
+
+ char2word(s, L0);
+ char2word(s + 4, L1);
+ char2word(s + 8, R0);
+ char2word(s + 12, R1);
+
+ E_SEED(t0, t1, L0, L1, R0, R1, 30);
+ E_SEED(t0, t1, R0, R1, L0, L1, 28);
+ E_SEED(t0, t1, L0, L1, R0, R1, 26);
+ E_SEED(t0, t1, R0, R1, L0, L1, 24);
+ E_SEED(t0, t1, L0, L1, R0, R1, 22);
+ E_SEED(t0, t1, R0, R1, L0, L1, 20);
+ E_SEED(t0, t1, L0, L1, R0, R1, 18);
+ E_SEED(t0, t1, R0, R1, L0, L1, 16);
+ E_SEED(t0, t1, L0, L1, R0, R1, 14);
+ E_SEED(t0, t1, R0, R1, L0, L1, 12);
+ E_SEED(t0, t1, L0, L1, R0, R1, 10);
+ E_SEED(t0, t1, R0, R1, L0, L1, 8);
+ E_SEED(t0, t1, L0, L1, R0, R1, 6);
+ E_SEED(t0, t1, R0, R1, L0, L1, 4);
+ E_SEED(t0, t1, L0, L1, R0, R1, 2);
+ E_SEED(t0, t1, R0, R1, L0, L1, 0);
+
+ word2char(R0, d);
+ word2char(R1, d + 4);
+ word2char(L0, d + 8);
+ word2char(L1, d + 12);
+}
+
+void
+SEED_ecb_encrypt(const unsigned char *in,
+ unsigned char *out,
+ const SEED_KEY_SCHEDULE *ks, int enc)
+{
+ if (enc) {
+ SEED_encrypt(in, out, ks);
+ } else {
+ SEED_decrypt(in, out, ks);
+ }
+}
+
+void
+SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SEED_KEY_SCHEDULE *ks,
+ unsigned char ivec[SEED_BLOCK_SIZE], int enc)
+{
+ size_t n;
+ unsigned char tmp[SEED_BLOCK_SIZE];
+ const unsigned char *iv = ivec;
+
+ if (enc) {
+ while (len >= SEED_BLOCK_SIZE) {
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+ out[n] = in[n] ^ iv[n];
+
+ SEED_encrypt(out, out, ks);
+ iv = out;
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ for (n = 0; n < len; ++n)
+ out[n] = in[n] ^ iv[n];
+
+ for (n = len; n < SEED_BLOCK_SIZE; ++n)
+ out[n] = iv[n];
+
+ SEED_encrypt(out, out, ks);
+ iv = out;
+ }
+
+ memcpy(ivec, iv, SEED_BLOCK_SIZE);
+ } else if (in != out) {
+ while (len >= SEED_BLOCK_SIZE) {
+ SEED_decrypt(in, out, ks);
+
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+ out[n] ^= iv[n];
+
+ iv = in;
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ SEED_decrypt(in, tmp, ks);
+
+ for (n = 0; n < len; ++n)
+ out[n] = tmp[n] ^ iv[n];
+
+ iv = in;
+ }
+
+ memcpy(ivec, iv, SEED_BLOCK_SIZE);
+ } else {
+ while (len >= SEED_BLOCK_SIZE) {
+ memcpy(tmp, in, SEED_BLOCK_SIZE);
+ SEED_decrypt(in, out, ks);
+
+ for (n = 0; n < SEED_BLOCK_SIZE; ++n)
+ out[n] ^= ivec[n];
+
+ memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+ len -= SEED_BLOCK_SIZE;
+ in += SEED_BLOCK_SIZE;
+ out += SEED_BLOCK_SIZE;
+ }
+
+ if (len) {
+ memcpy(tmp, in, SEED_BLOCK_SIZE);
+ SEED_decrypt(tmp, tmp, ks);
+
+ for (n = 0; n < len; ++n)
+ out[n] = tmp[n] ^ ivec[n];
+
+ memcpy(ivec, tmp, SEED_BLOCK_SIZE);
+ }
+ }
+}
+
+SEEDContext *
+SEED_AllocateContext(void)
+{
+ return PORT_ZNew(SEEDContext);
+}
+
+SECStatus
+SEED_InitContext(SEEDContext *cx, const unsigned char *key,
+ unsigned int keylen, const unsigned char *iv,
+ int mode, unsigned int encrypt, unsigned int unused)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (mode) {
+ case NSS_SEED:
+ SEED_set_key(key, &cx->ks);
+ cx->mode = NSS_SEED;
+ cx->encrypt = encrypt;
+ break;
+
+ case NSS_SEED_CBC:
+ memcpy(cx->iv, iv, 16);
+ SEED_set_key(key, &cx->ks);
+ cx->mode = NSS_SEED_CBC;
+ cx->encrypt = encrypt;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+SEEDContext *
+SEED_CreateContext(const unsigned char *key, const unsigned char *iv,
+ int mode, PRBool encrypt)
+{
+ SEEDContext *cx = PORT_ZNew(SEEDContext);
+ SECStatus rv = SEED_InitContext(cx, key, SEED_KEY_LENGTH, iv, mode,
+ encrypt, 0);
+
+ if (rv != SECSuccess) {
+ PORT_ZFree(cx, sizeof *cx);
+ cx = NULL;
+ }
+
+ return cx;
+}
+
+void
+SEED_DestroyContext(SEEDContext *cx, PRBool freeit)
+{
+ if (cx) {
+ memset(cx, 0, sizeof *cx);
+
+ if (freeit)
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+SEED_Encrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+ unsigned int maxOutLen, const unsigned char *in,
+ unsigned int inLen)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (!cx->encrypt) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (cx->mode) {
+ case NSS_SEED:
+ SEED_ecb_encrypt(in, out, &cx->ks, 1);
+ *outLen = inLen;
+ break;
+
+ case NSS_SEED_CBC:
+ SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 1);
+ *outLen = inLen;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
+
+SECStatus
+SEED_Decrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen,
+ unsigned int maxOutLen, const unsigned char *in,
+ unsigned int inLen)
+{
+ if (!cx) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ if (cx->encrypt) {
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ switch (cx->mode) {
+ case NSS_SEED:
+ SEED_ecb_encrypt(in, out, &cx->ks, 0);
+ *outLen = inLen;
+ break;
+
+ case NSS_SEED_CBC:
+ SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 0);
+ *outLen = inLen;
+ break;
+
+ default:
+ PORT_SetError(SEC_ERROR_INVALID_ARGS);
+ return SECFailure;
+ }
+
+ return SECSuccess;
+}
diff --git a/security/nss/lib/freebl/seed.h b/security/nss/lib/freebl/seed.h
new file mode 100644
index 000000000..f527165b7
--- /dev/null
+++ b/security/nss/lib/freebl/seed.h
@@ -0,0 +1,125 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef HEADER_SEED_H
+#define HEADER_SEED_H
+
+#include <string.h>
+#include "blapi.h"
+
+#if !defined(NO_SYS_TYPES_H)
+#include <sys/types.h>
+#endif
+
+typedef PRUint32 seed_word;
+
+#define G_FUNC(v) \
+ SS[0][((v)&0xff)] ^ \
+ SS[1][((v) >> 8 & 0xff)] ^ \
+ SS[2][((v) >> 16 & 0xff)] ^ \
+ SS[3][((v) >> 24 & 0xff)]
+
+#define char2word(c, i) \
+ (i) = ((((seed_word)((c)[0])) << 24) | \
+ (((seed_word)((c)[1])) << 16) | \
+ (((seed_word)((c)[2])) << 8) | \
+ ((seed_word)((c)[3])))
+
+#define word2char(l, c) \
+ *((c) + 0) = (unsigned char)((l) >> 24); \
+ *((c) + 1) = (unsigned char)((l) >> 16); \
+ *((c) + 2) = (unsigned char)((l) >> 8); \
+ *((c) + 3) = (unsigned char)((l))
+
+#define KEYSCHEDULE_UPDATE0(T0, T1, K0, K1, K2, K3, KC) \
+ (T0) = (K2); \
+ (K2) = (((K2) << 8) ^ ((K3) >> 24)); \
+ (K3) = (((K3) << 8) ^ ((T0) >> 24)); \
+ (T0) = ((K0) + (K2) - (KC)); \
+ (T1) = ((K1) + (KC) - (K3))
+
+#define KEYSCHEDULE_UPDATE1(T0, T1, K0, K1, K2, K3, KC) \
+ (T0) = (K0); \
+ (K0) = (((K0) >> 8) ^ ((K1) << 24)); \
+ (K1) = (((K1) >> 8) ^ ((T0) << 24)); \
+ (T0) = ((K0) + (K2) - (KC)); \
+ (T1) = ((K1) + (KC) - (K3))
+
+#define KEYUPDATE_TEMP(T0, T1, K) \
+ (K)[0] = G_FUNC((T0)); \
+ (K)[1] = G_FUNC((T1))
+
+#define XOR_SEEDBLOCK(DST, SRC) \
+ (DST)[0] ^= (SRC)[0]; \
+ (DST)[1] ^= (SRC)[1]; \
+ (DST)[2] ^= (SRC)[2]; \
+ (DST)[3] ^= (SRC)[3]
+
+#define MOV_SEEDBLOCK(DST, SRC) \
+ (DST)[0] = (SRC)[0]; \
+ (DST)[1] = (SRC)[1]; \
+ (DST)[2] = (SRC)[2]; \
+ (DST)[3] = (SRC)[3]
+
+#define CHAR2WORD(C, I) \
+ char2word((C), (I)[0]); \
+ char2word((C) + 4, (I)[1]); \
+ char2word((C) + 8, (I)[2]); \
+ char2word((C) + 12, (I)[3])
+
+#define WORD2CHAR(I, C) \
+ word2char((I)[0], (C)); \
+ word2char((I)[1], (C + 4)); \
+ word2char((I)[2], (C + 8)); \
+ word2char((I)[3], (C + 12))
+
+#define E_SEED(T0, T1, X1, X2, X3, X4, rbase) \
+ (T0) = (X3) ^ (ks->data)[(rbase)]; \
+ (T1) = (X4) ^ (ks->data)[(rbase) + 1]; \
+ (T1) ^= (T0); \
+ (T1) = G_FUNC(T1); \
+ (T0) += (T1); \
+ (T0) = G_FUNC(T0); \
+ (T1) += (T0); \
+ (T1) = G_FUNC(T1); \
+ (T0) += (T1); \
+ (X1) ^= (T0); \
+ (X2) ^= (T1)
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct seed_key_st {
+ PRUint32 data[32];
+} SEED_KEY_SCHEDULE;
+
+struct SEEDContextStr {
+ unsigned char iv[SEED_BLOCK_SIZE];
+ SEED_KEY_SCHEDULE ks;
+ int mode;
+ unsigned int encrypt;
+};
+
+void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH],
+ SEED_KEY_SCHEDULE *ks);
+
+void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks);
+void SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE],
+ unsigned char d[SEED_BLOCK_SIZE],
+ const SEED_KEY_SCHEDULE *ks);
+
+void SEED_ecb_encrypt(const unsigned char *in, unsigned char *out,
+ const SEED_KEY_SCHEDULE *ks, int enc);
+void SEED_cbc_encrypt(const unsigned char *in, unsigned char *out,
+ size_t len, const SEED_KEY_SCHEDULE *ks,
+ unsigned char ivec[SEED_BLOCK_SIZE], int enc);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_SEED_H */
diff --git a/security/nss/lib/freebl/sha-fast-amd64-sun.s b/security/nss/lib/freebl/sha-fast-amd64-sun.s
new file mode 100644
index 000000000..6430469a4
--- /dev/null
+++ b/security/nss/lib/freebl/sha-fast-amd64-sun.s
@@ -0,0 +1,2151 @@
+/ This Source Code Form is subject to the terms of the Mozilla Public
+/ License, v. 2.0. If a copy of the MPL was not distributed with this
+/ file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+ .file "sha_fast.c"
+ .text
+ .align 16
+.globl SHA1_Begin
+ .type SHA1_Begin, @function
+SHA1_Begin:
+.LFB4:
+ movl $4023233417, %ecx
+ movl $2562383102, %edx
+ movl $3285377520, %eax
+ movq $0, 64(%rdi)
+ movq $1732584193, 72(%rdi)
+ movq %rcx, 80(%rdi)
+ movq %rdx, 88(%rdi)
+ movq $271733878, 96(%rdi)
+ movq %rax, 104(%rdi)
+ ret
+.LFE4:
+ .size SHA1_Begin, .-SHA1_Begin
+ .align 16
+ .type shaCompress, @function
+shaCompress:
+.LFB7:
+ pushq %r15
+.LCFI0:
+ pushq %r14
+.LCFI1:
+ pushq %r13
+.LCFI2:
+ pushq %r12
+.LCFI3:
+ movq -88(%rdi), %r12
+ movq -80(%rdi), %r10
+ movq -72(%rdi), %r13
+ movq -64(%rdi), %r8
+ pushq %rbx
+.LCFI4:
+ movq -56(%rdi), %rcx
+ movl (%rsi), %eax
+ movl %r12d, %edx
+ movq %r13, %r9
+ roll $5, %edx
+ movl 4(%rsi), %ebx
+ xorq %r8, %r9
+/APP
+ bswap %eax
+/NO_APP
+ andq %r10, %r9
+ mov %eax, %r15d
+ roll $30, %r10d
+ movq %r15, -48(%rdi)
+ xorq %r8, %r9
+ movq -48(%rdi), %r14
+ addq %r9, %rdx
+ movq %r10, %rax
+ movl %r12d, %r15d
+ addq %rcx, %rdx
+ xorq %r13, %rax
+ roll $30, %r15d
+ leaq 1518500249(%rdx,%r14), %rdx
+ andq %r12, %rax
+ movq %r15, %r12
+/APP
+ bswap %ebx
+/NO_APP
+ movl %edx, %ecx
+ mov %ebx, %r11d
+ xorq %r13, %rax
+ movq %r11, -40(%rdi)
+ roll $5, %ecx
+ movq -40(%rdi), %r9
+ addq %rax, %rcx
+ xorq %r10, %r12
+ movl 8(%rsi), %r14d
+ addq %r8, %rcx
+ andq %rdx, %r12
+ movl %edx, %r11d
+ leaq 1518500249(%rcx,%r9), %rcx
+ xorq %r10, %r12
+ roll $30, %r11d
+/APP
+ bswap %r14d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r14d, %ebx
+ movl 12(%rsi), %r9d
+ movq %rbx, -32(%rdi)
+ roll $5, %r8d
+ movq -32(%rdi), %rax
+ addq %r12, %r8
+ movq %r11, %r12
+ movl %ecx, %ebx
+ addq %r13, %r8
+ xorq %r15, %r12
+ roll $30, %ebx
+ leaq 1518500249(%r8,%rax), %r8
+ andq %rcx, %r12
+ movl 16(%rsi), %eax
+/APP
+ bswap %r9d
+/NO_APP
+ movl %r8d, %edx
+ mov %r9d, %r14d
+ xorq %r15, %r12
+ movq %r14, -24(%rdi)
+ roll $5, %edx
+ movq -24(%rdi), %r13
+ addq %r12, %rdx
+ movq %rbx, %r12
+ movl %r8d, %r14d
+ addq %r10, %rdx
+ leaq 1518500249(%rdx,%r13), %rdx
+ movl 20(%rsi), %r13d
+/APP
+ bswap %eax
+/NO_APP
+ movl %edx, %ecx
+ mov %eax, %r9d
+ roll $5, %ecx
+ xorq %r11, %r12
+ movq %r9, -16(%rdi)
+ andq %r8, %r12
+ movq -16(%rdi), %r10
+ roll $30, %r14d
+ xorq %r11, %r12
+ movq %r14, %rax
+ movl %edx, %r9d
+ addq %r12, %rcx
+ xorq %rbx, %rax
+ roll $30, %r9d
+ addq %r15, %rcx
+ andq %rdx, %rax
+ leaq 1518500249(%rcx,%r10), %rcx
+ xorq %rbx, %rax
+ movl 24(%rsi), %r10d
+/APP
+ bswap %r13d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r13d, %r15d
+ movq %r15, -8(%rdi)
+ roll $5, %r8d
+ movq -8(%rdi), %r12
+ addq %rax, %r8
+ movl %ecx, %r15d
+ addq %r11, %r8
+ movq %r9, %r11
+ roll $30, %r15d
+ leaq 1518500249(%r8,%r12), %r8
+ xorq %r14, %r11
+ movl 28(%rsi), %r12d
+/APP
+ bswap %r10d
+/NO_APP
+ andq %rcx, %r11
+ mov %r10d, %r13d
+ movl %r8d, %edx
+ movq %r13, (%rdi)
+ xorq %r14, %r11
+ movq (%rdi), %rax
+ roll $5, %edx
+ movq %r15, %r10
+ movl %r8d, %r13d
+ addq %r11, %rdx
+ xorq %r9, %r10
+ roll $30, %r13d
+ addq %rbx, %rdx
+ andq %r8, %r10
+ leaq 1518500249(%rdx,%rax), %rdx
+ xorq %r9, %r10
+ movl 32(%rsi), %eax
+/APP
+ bswap %r12d
+/NO_APP
+ movl %edx, %ecx
+ mov %r12d, %ebx
+ movq %rbx, 8(%rdi)
+ roll $5, %ecx
+ movq 8(%rdi), %r11
+ addq %r10, %rcx
+ movq %r13, %r10
+ movl %edx, %ebx
+ addq %r14, %rcx
+ leaq 1518500249(%rcx,%r11), %rcx
+/APP
+ bswap %eax
+/NO_APP
+ movl %ecx, %r8d
+ mov %eax, %r12d
+ roll $5, %r8d
+ xorq %r15, %r10
+ movq %r12, 16(%rdi)
+ andq %rdx, %r10
+ movq 16(%rdi), %r14
+ roll $30, %ebx
+ xorq %r15, %r10
+ movq %rbx, %rax
+ movl 36(%rsi), %r11d
+ addq %r10, %r8
+ xorq %r13, %rax
+ movl %ecx, %r12d
+ addq %r9, %r8
+ andq %rcx, %rax
+ roll $30, %r12d
+ leaq 1518500249(%r8,%r14), %r8
+ xorq %r13, %rax
+ movl 40(%rsi), %r14d
+/APP
+ bswap %r11d
+/NO_APP
+ movl %r8d, %edx
+ mov %r11d, %r9d
+ movq %r12, %r11
+ movq %r9, 24(%rdi)
+ roll $5, %edx
+ movq 24(%rdi), %r10
+ addq %rax, %rdx
+ xorq %rbx, %r11
+ movl %r8d, %r9d
+ addq %r15, %rdx
+ andq %r8, %r11
+ roll $30, %r9d
+ leaq 1518500249(%rdx,%r10), %rdx
+ xorq %rbx, %r11
+ movl 44(%rsi), %r10d
+/APP
+ bswap %r14d
+/NO_APP
+ movl %edx, %ecx
+ mov %r14d, %r15d
+ movq %r15, 32(%rdi)
+ roll $5, %ecx
+ movq 32(%rdi), %rax
+ addq %r11, %rcx
+ movq %r9, %r11
+ movl %edx, %r15d
+ addq %r13, %rcx
+ xorq %r12, %r11
+ roll $30, %r15d
+ leaq 1518500249(%rcx,%rax), %rcx
+ andq %rdx, %r11
+ movl 48(%rsi), %eax
+/APP
+ bswap %r10d
+/NO_APP
+ movl %ecx, %r8d
+ mov %r10d, %r14d
+ xorq %r12, %r11
+ movq %r14, 40(%rdi)
+ roll $5, %r8d
+ movq 40(%rdi), %r13
+ addq %r11, %r8
+ movq %r15, %r10
+ movl %ecx, %r14d
+ addq %rbx, %r8
+ xorq %r9, %r10
+ leaq 1518500249(%r8,%r13), %r8
+ movl 52(%rsi), %r13d
+/APP
+ bswap %eax
+/NO_APP
+ movl %r8d, %edx
+ mov %eax, %ebx
+ roll $5, %edx
+ andq %rcx, %r10
+ movq %rbx, 48(%rdi)
+ xorq %r9, %r10
+ movq 48(%rdi), %r11
+ roll $30, %r14d
+ addq %r10, %rdx
+ movq %r14, %rax
+ movl %r8d, %ebx
+ addq %r12, %rdx
+ xorq %r15, %rax
+ roll $30, %ebx
+ leaq 1518500249(%rdx,%r11), %rdx
+ andq %r8, %rax
+ movl 56(%rsi), %r11d
+/APP
+ bswap %r13d
+/NO_APP
+ movl %edx, %ecx
+ mov %r13d, %r12d
+ xorq %r15, %rax
+ movq %r12, 56(%rdi)
+ roll $5, %ecx
+ movq 56(%rdi), %r10
+ addq %rax, %rcx
+ movl %edx, %r12d
+ addq %r9, %rcx
+ movq %rbx, %r9
+ roll $30, %r12d
+ leaq 1518500249(%rcx,%r10), %rcx
+ xorq %r14, %r9
+ movl 60(%rsi), %r10d
+/APP
+ bswap %r11d
+/NO_APP
+ andq %rdx, %r9
+ mov %r11d, %r13d
+ movl %ecx, %r8d
+ movq %r13, 64(%rdi)
+ xorq %r14, %r9
+ movq 64(%rdi), %rax
+ roll $5, %r8d
+ movq %r12, %r11
+ movl %ecx, %r13d
+ addq %r9, %r8
+ xorq %rbx, %r11
+ roll $30, %r13d
+ addq %r15, %r8
+ andq %rcx, %r11
+ leaq 1518500249(%r8,%rax), %r8
+ xorq %rbx, %r11
+/APP
+ bswap %r10d
+/NO_APP
+ movl %r8d, %esi
+ mov %r10d, %r15d
+ movq %r15, 72(%rdi)
+ roll $5, %esi
+ movq 72(%rdi), %r9
+ movq 56(%rdi), %r10
+ movq 16(%rdi), %rcx
+ addq %r11, %rsi
+ movq -32(%rdi), %rdx
+ addq %r14, %rsi
+ movq -48(%rdi), %rax
+ leaq 1518500249(%rsi,%r9), %r14
+ movq %r13, %r11
+ movl %r8d, %r15d
+ xorq %rcx, %r10
+ xorq %rdx, %r10
+ movl %r14d, %ecx
+ xorl %eax, %r10d
+ roll %r10d
+ roll $5, %ecx
+ xorq %r12, %r11
+ andq %r8, %r11
+ movq %r10, -48(%rdi)
+ movq -48(%rdi), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ movl %r14d, %r10d
+ addq %r11, %rcx
+ movq 64(%rdi), %r11
+ movq 24(%rdi), %rdx
+ addq %rbx, %rcx
+ movq -24(%rdi), %rbx
+ movq -40(%rdi), %rax
+ leaq 1518500249(%rcx,%r9), %rcx
+ movq %r15, %r8
+ roll $30, %r10d
+ xorq %rdx, %r11
+ xorq %r13, %r8
+ xorq %rbx, %r11
+ andq %r14, %r8
+ movl %ecx, %r9d
+ xorl %eax, %r11d
+ xorq %r13, %r8
+ roll $5, %r9d
+ roll %r11d
+ addq %r8, %r9
+ movq %r10, %rax
+ movq %r11, -40(%rdi)
+ movq -40(%rdi), %rsi
+ addq %r12, %r9
+ movq 72(%rdi), %rbx
+ movq 32(%rdi), %rdx
+ xorq %r15, %rax
+ movq -16(%rdi), %r14
+ movq -32(%rdi), %r12
+ andq %rcx, %rax
+ leaq 1518500249(%r9,%rsi), %r9
+ xorq %r15, %rax
+ movl %ecx, %r11d
+ xorq %rdx, %rbx
+ roll $30, %r11d
+ xorq %r14, %rbx
+ movl %r9d, %esi
+ xorl %r12d, %ebx
+ roll $5, %esi
+ roll %ebx
+ addq %rax, %rsi
+ movq %rbx, -32(%rdi)
+ movq -32(%rdi), %r8
+ addq %r13, %rsi
+ movq -48(%rdi), %r12
+ movq 40(%rdi), %rdx
+ movq %r11, %r13
+ movq -8(%rdi), %r14
+ movq -24(%rdi), %rcx
+ movl %r9d, %ebx
+ leaq 1518500249(%rsi,%r8), %rsi
+ xorq %rdx, %r12
+ xorq %r14, %r12
+ movl %esi, %r8d
+ xorl %ecx, %r12d
+ roll %r12d
+ roll $5, %r8d
+ xorq %r10, %r13
+ andq %r9, %r13
+ movq %r12, -24(%rdi)
+ movq -24(%rdi), %rax
+ xorq %r10, %r13
+ roll $30, %ebx
+ movl %esi, %r12d
+ addq %r13, %r8
+ xorq %rbx, %rsi
+ roll $30, %r12d
+ addq %r15, %r8
+ movq -40(%rdi), %r15
+ movq 48(%rdi), %rdx
+ movq (%rdi), %r14
+ movq -16(%rdi), %r9
+ leaq 1518500249(%r8,%rax), %r13
+ xorq %r11, %rsi
+ xorq %rdx, %r15
+ movl %r13d, %ecx
+ xorq %r14, %r15
+ roll $5, %ecx
+ xorl %r9d, %r15d
+ addq %rsi, %rcx
+ roll %r15d
+ addq %r10, %rcx
+ movq %r15, -16(%rdi)
+ movq -16(%rdi), %rsi
+ movl %r13d, %r15d
+ movq -32(%rdi), %r14
+ movq 56(%rdi), %rax
+ xorq %r12, %r13
+ movq 8(%rdi), %rdx
+ movq -8(%rdi), %r10
+ xorq %rbx, %r13
+ leaq 1859775393(%rcx,%rsi), %r9
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rdx, %r14
+ movl %r9d, %esi
+ xorl %r10d, %r14d
+ roll $5, %esi
+ roll %r14d
+ addq %r13, %rsi
+ movq %r14, -8(%rdi)
+ movq -8(%rdi), %r8
+ addq %r11, %rsi
+ movq -24(%rdi), %r13
+ movq 64(%rdi), %rax
+ movl %r9d, %r14d
+ movq 16(%rdi), %rdx
+ movq (%rdi), %r11
+ xorq %r15, %r9
+ leaq 1859775393(%rsi,%r8), %r10
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %r8d
+ xorl %r11d, %r13d
+ roll $5, %r8d
+ roll %r13d
+ xorq %r12, %r9
+ roll $30, %r14d
+ addq %r9, %r8
+ movq %r13, (%rdi)
+ movq (%rdi), %rcx
+ addq %rbx, %r8
+ movq -16(%rdi), %rbx
+ movq 72(%rdi), %rax
+ movq 24(%rdi), %rdx
+ movq 8(%rdi), %r9
+ movl %r10d, %r13d
+ leaq 1859775393(%r8,%rcx), %r11
+ xorq %r14, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ xorq %r15, %r10
+ xorq %rdx, %rbx
+ movl %r11d, %ecx
+ xorl %r9d, %ebx
+ roll $5, %ecx
+ roll %ebx
+ addq %r10, %rcx
+ movq %rbx, 8(%rdi)
+ movq 8(%rdi), %rsi
+ addq %r12, %rcx
+ movq -8(%rdi), %r12
+ movq -48(%rdi), %rax
+ movl %r11d, %ebx
+ movq 32(%rdi), %rdx
+ movq 16(%rdi), %r9
+ xorq %r13, %r11
+ leaq 1859775393(%rcx,%rsi), %r10
+ xorq %r14, %r11
+ roll $30, %ebx
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %esi
+ xorl %r9d, %r12d
+ roll $5, %esi
+ roll %r12d
+ addq %r11, %rsi
+ movq %r12, 16(%rdi)
+ addq %r15, %rsi
+ movq 16(%rdi), %r8
+ movq (%rdi), %r15
+ movq -40(%rdi), %rax
+ movl %r10d, %r12d
+ movq 40(%rdi), %rdx
+ movq 24(%rdi), %r9
+ xorq %rbx, %r10
+ leaq 1859775393(%rsi,%r8), %r11
+ xorq %r13, %r10
+ xorq %rax, %r15
+ xorq %rdx, %r15
+ movl %r11d, %r8d
+ xorl %r9d, %r15d
+ roll $5, %r8d
+ roll %r15d
+ addq %r10, %r8
+ movq %r15, 24(%rdi)
+ movq 24(%rdi), %rcx
+ addq %r14, %r8
+ movq 8(%rdi), %r14
+ movq -32(%rdi), %rax
+ roll $30, %r12d
+ movq 48(%rdi), %rdx
+ movq 32(%rdi), %r10
+ movl %r11d, %r15d
+ leaq 1859775393(%r8,%rcx), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rbx, %r11
+ xorq %rdx, %r14
+ movl %r9d, %ecx
+ xorl %r10d, %r14d
+ roll $5, %ecx
+ roll %r14d
+ addq %r11, %rcx
+ movq %r14, 32(%rdi)
+ addq %r13, %rcx
+ movq 32(%rdi), %rsi
+ movq 16(%rdi), %r13
+ movq -24(%rdi), %rax
+ movl %r9d, %r14d
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r11
+ xorq %r15, %r9
+ leaq 1859775393(%rcx,%rsi), %r10
+ xorq %r12, %r9
+ roll $30, %r14d
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %esi
+ xorl %r11d, %r13d
+ roll $5, %esi
+ roll %r13d
+ addq %r9, %rsi
+ movq %r13, 40(%rdi)
+ movq 40(%rdi), %r8
+ addq %rbx, %rsi
+ movq 24(%rdi), %rbx
+ movq -16(%rdi), %rax
+ movl %r10d, %r13d
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %r9
+ xorq %r14, %r10
+ leaq 1859775393(%rsi,%r8), %r11
+ xorq %r15, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ xorq %rdx, %rbx
+ movl %r11d, %r8d
+ xorl %r9d, %ebx
+ roll $5, %r8d
+ roll %ebx
+ addq %r10, %r8
+ movq %rbx, 48(%rdi)
+ addq %r12, %r8
+ movq 48(%rdi), %rcx
+ movq 32(%rdi), %r12
+ movq -8(%rdi), %rax
+ movl %r11d, %ebx
+ movq 72(%rdi), %rdx
+ movq 56(%rdi), %r9
+ leaq 1859775393(%r8,%rcx), %r10
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %ecx
+ xorl %r9d, %r12d
+ xorq %r13, %r11
+ roll $5, %ecx
+ xorq %r14, %r11
+ roll %r12d
+ roll $30, %ebx
+ addq %r11, %rcx
+ movq %r12, 56(%rdi)
+ movq 56(%rdi), %rsi
+ addq %r15, %rcx
+ movq 40(%rdi), %r15
+ movq (%rdi), %rax
+ movq -48(%rdi), %rdx
+ movq 64(%rdi), %r9
+ movl %r10d, %r12d
+ leaq 1859775393(%rcx,%rsi), %r11
+ xorq %rbx, %r10
+ roll $30, %r12d
+ xorq %rax, %r15
+ xorq %r13, %r10
+ xorq %rdx, %r15
+ movl %r11d, %esi
+ xorl %r9d, %r15d
+ roll $5, %esi
+ roll %r15d
+ addq %r10, %rsi
+ movq %r15, 64(%rdi)
+ movq 64(%rdi), %r8
+ addq %r14, %rsi
+ movq 48(%rdi), %r14
+ movq 8(%rdi), %rax
+ movl %r11d, %r15d
+ movq -40(%rdi), %rdx
+ movq 72(%rdi), %r10
+ xorq %r12, %r11
+ leaq 1859775393(%rsi,%r8), %r9
+ xorq %rbx, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rdx, %r14
+ movl %r9d, %r8d
+ xorl %r10d, %r14d
+ roll $5, %r8d
+ roll %r14d
+ addq %r11, %r8
+ movq %r14, 72(%rdi)
+ addq %r13, %r8
+ movq 72(%rdi), %rcx
+ movq 56(%rdi), %r13
+ movq 16(%rdi), %rax
+ movl %r9d, %r14d
+ movq -32(%rdi), %rdx
+ movq -48(%rdi), %r11
+ leaq 1859775393(%r8,%rcx), %r10
+ xorq %rax, %r13
+ xorq %rdx, %r13
+ movl %r10d, %ecx
+ xorl %r11d, %r13d
+ roll $5, %ecx
+ roll %r13d
+ xorq %r15, %r9
+ roll $30, %r14d
+ xorq %r12, %r9
+ movq %r13, -48(%rdi)
+ movq -48(%rdi), %rsi
+ addq %r9, %rcx
+ movl %r10d, %r13d
+ xorq %r14, %r10
+ addq %rbx, %rcx
+ movq 64(%rdi), %rbx
+ movq 24(%rdi), %rax
+ movq -24(%rdi), %rdx
+ leaq 1859775393(%rcx,%rsi), %r11
+ movq -40(%rdi), %r9
+ xorq %r15, %r10
+ roll $30, %r13d
+ xorq %rax, %rbx
+ movl %r11d, %esi
+ xorq %rdx, %rbx
+ roll $5, %esi
+ xorl %r9d, %ebx
+ addq %r10, %rsi
+ roll %ebx
+ addq %r12, %rsi
+ movq %rbx, -40(%rdi)
+ movq -40(%rdi), %r8
+ movl %r11d, %ebx
+ movq 72(%rdi), %r12
+ movq 32(%rdi), %rax
+ xorq %r13, %r11
+ movq -16(%rdi), %rdx
+ movq -32(%rdi), %r9
+ xorq %r14, %r11
+ leaq 1859775393(%rsi,%r8), %r10
+ roll $30, %ebx
+ xorq %rax, %r12
+ xorq %rdx, %r12
+ movl %r10d, %r8d
+ xorl %r9d, %r12d
+ roll $5, %r8d
+ roll %r12d
+ addq %r11, %r8
+ movq %r12, -32(%rdi)
+ movq -32(%rdi), %rcx
+ addq %r15, %r8
+ movq -48(%rdi), %r15
+ movq 40(%rdi), %rax
+ movl %r10d, %r12d
+ movq -8(%rdi), %rdx
+ movq -24(%rdi), %r9
+ xorq %rbx, %r10
+ leaq 1859775393(%r8,%rcx), %r11
+ xorq %r13, %r10
+ xorq %rax, %r15
+ xorq %rdx, %r15
+ movl %r11d, %ecx
+ xorl %r9d, %r15d
+ roll $5, %ecx
+ roll %r15d
+ addq %r10, %rcx
+ addq %r14, %rcx
+ movq %r15, -24(%rdi)
+ movq -24(%rdi), %rsi
+ movq -40(%rdi), %r14
+ movq 48(%rdi), %rax
+ roll $30, %r12d
+ movq (%rdi), %rdx
+ movq -16(%rdi), %r10
+ movl %r11d, %r15d
+ leaq 1859775393(%rcx,%rsi), %r9
+ xorq %r12, %r11
+ roll $30, %r15d
+ xorq %rax, %r14
+ xorq %rbx, %r11
+ xorq %rdx, %r14
+ movl %r9d, %esi
+ xorl %r10d, %r14d
+ roll $5, %esi
+ roll %r14d
+ addq %r11, %rsi
+ movq %r14, -16(%rdi)
+ movq -16(%rdi), %r8
+ addq %r13, %rsi
+ movq -32(%rdi), %r11
+ movq 56(%rdi), %rax
+ movl %r9d, %r14d
+ movq 8(%rdi), %rdx
+ movq -8(%rdi), %r10
+ xorq %r15, %r9
+ leaq 1859775393(%rsi,%r8), %r13
+ xorq %r12, %r9
+ roll $30, %r14d
+ xorq %rax, %r11
+ xorq %rdx, %r11
+ movl %r13d, %r8d
+ xorl %r10d, %r11d
+ roll $5, %r8d
+ movl %r13d, %r10d
+ roll %r11d
+ addq %r9, %r8
+ xorq %r14, %r13
+ movq %r11, -8(%rdi)
+ addq %rbx, %r8
+ movq -8(%rdi), %rbx
+ movq -24(%rdi), %r9
+ movq 64(%rdi), %rax
+ xorq %r15, %r13
+ movq 16(%rdi), %rdx
+ movq (%rdi), %rcx
+ leaq 1859775393(%r8,%rbx), %r11
+ xorq %rax, %r9
+ xorq %rdx, %r9
+ movl %r11d, %ebx
+ xorl %ecx, %r9d
+ roll $5, %ebx
+ roll %r9d
+ addq %r13, %rbx
+ movq %r9, (%rdi)
+ movq (%rdi), %rsi
+ addq %r12, %rbx
+ movq -16(%rdi), %r12
+ movq 72(%rdi), %r13
+ movl %r11d, %r9d
+ leaq 1859775393(%rbx,%rsi), %rcx
+ movl %r10d, %ebx
+ movq 24(%rdi), %r10
+ movq 8(%rdi), %rax
+ xorq %r13, %r12
+ roll $30, %ebx
+ movl %ecx, %esi
+ xorq %r10, %r12
+ xorq %rbx, %r11
+ roll $5, %esi
+ xorl %eax, %r12d
+ xorq %r14, %r11
+ roll $30, %r9d
+ roll %r12d
+ addq %r11, %rsi
+ movq %rcx, %rax
+ movq %r12, 8(%rdi)
+ movq 8(%rdi), %rdx
+ addq %r15, %rsi
+ movq -8(%rdi), %r11
+ movq -48(%rdi), %r13
+ movl %ecx, %r12d
+ movq 32(%rdi), %r10
+ movq 16(%rdi), %r8
+ orq %r9, %rcx
+ leaq 1859775393(%rsi,%rdx), %rsi
+ andq %rbx, %rcx
+ andq %r9, %rax
+ xorq %r13, %r11
+ orq %rcx, %rax
+ roll $30, %r12d
+ xorq %r10, %r11
+ movq %rsi, %r10
+ xorl %r8d, %r11d
+ movl %esi, %r8d
+ andq %r12, %r10
+ roll %r11d
+ roll $5, %r8d
+ movq %r11, 16(%rdi)
+ addq %rax, %r8
+ movq 16(%rdi), %r15
+ movq (%rdi), %r13
+ movq -40(%rdi), %rdx
+ addq %r14, %r8
+ movq 40(%rdi), %r14
+ movq 24(%rdi), %rcx
+ movl %esi, %r11d
+ addq %r15, %r8
+ movl $2400959708, %r15d
+ orq %r12, %rsi
+ xorq %rdx, %r13
+ addq %r15, %r8
+ andq %r9, %rsi
+ xorq %r14, %r13
+ orq %rsi, %r10
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ roll %r13d
+ roll $5, %ecx
+ movq %r13, 24(%rdi)
+ addq %r10, %rcx
+ movq 24(%rdi), %rax
+ movq 8(%rdi), %r14
+ movq -32(%rdi), %rdx
+ addq %rbx, %rcx
+ movq 48(%rdi), %rbx
+ movq 32(%rdi), %rsi
+ roll $30, %r11d
+ addq %rax, %rcx
+ movl %r8d, %r13d
+ movq %r8, %r10
+ xorq %rdx, %r14
+ addq %r15, %rcx
+ orq %r11, %r8
+ xorq %rbx, %r14
+ andq %r12, %r8
+ andq %r11, %r10
+ xorl %esi, %r14d
+ movl %ecx, %esi
+ orq %r8, %r10
+ roll $5, %esi
+ roll %r14d
+ roll $30, %r13d
+ addq %r10, %rsi
+ movq %r14, 32(%rdi)
+ movq 32(%rdi), %rax
+ addq %r9, %rsi
+ movq 16(%rdi), %r9
+ movq -24(%rdi), %rdx
+ movq 56(%rdi), %rbx
+ movq 40(%rdi), %r8
+ movl %ecx, %r14d
+ addq %rax, %rsi
+ movq %rcx, %r10
+ orq %r13, %rcx
+ xorq %rdx, %r9
+ addq %r15, %rsi
+ andq %r11, %rcx
+ xorq %rbx, %r9
+ andq %r13, %r10
+ roll $30, %r14d
+ xorl %r8d, %r9d
+ movl %esi, %r8d
+ orq %rcx, %r10
+ roll %r9d
+ roll $5, %r8d
+ movq %r9, 40(%rdi)
+ addq %r10, %r8
+ movq 40(%rdi), %rax
+ movq 24(%rdi), %r10
+ movq -16(%rdi), %rdx
+ addq %r12, %r8
+ movq 64(%rdi), %rbx
+ movq 48(%rdi), %rcx
+ movl %esi, %r9d
+ addq %rax, %r8
+ movq %rsi, %r12
+ xorq %rdx, %r10
+ addq %r15, %r8
+ xorq %rbx, %r10
+ orq %r14, %rsi
+ andq %r14, %r12
+ andq %r13, %rsi
+ xorl %ecx, %r10d
+ movl %r8d, %ecx
+ orq %rsi, %r12
+ roll %r10d
+ roll $5, %ecx
+ movq %r10, 48(%rdi)
+ addq %r12, %rcx
+ movq 48(%rdi), %rax
+ movq 32(%rdi), %r12
+ movq -8(%rdi), %rdx
+ addq %r11, %rcx
+ movq 72(%rdi), %rbx
+ movq 56(%rdi), %rsi
+ roll $30, %r9d
+ addq %rax, %rcx
+ movl %r8d, %r10d
+ movq %r8, %r11
+ xorq %rdx, %r12
+ addq %r15, %rcx
+ orq %r9, %r8
+ xorq %rbx, %r12
+ andq %r14, %r8
+ andq %r9, %r11
+ xorl %esi, %r12d
+ movl %ecx, %esi
+ orq %r8, %r11
+ roll %r12d
+ roll $5, %esi
+ roll $30, %r10d
+ movq %r12, 56(%rdi)
+ addq %r11, %rsi
+ movq 56(%rdi), %rax
+ movq 40(%rdi), %r11
+ movq (%rdi), %rdx
+ addq %r13, %rsi
+ movq -48(%rdi), %rbx
+ movq 64(%rdi), %r8
+ movq %rcx, %r13
+ addq %rax, %rsi
+ andq %r10, %r13
+ movl %ecx, %r12d
+ xorq %rdx, %r11
+ addq %r15, %rsi
+ xorq %rbx, %r11
+ xorl %r8d, %r11d
+ movl %esi, %r8d
+ roll %r11d
+ roll $5, %r8d
+ orq %r10, %rcx
+ andq %r9, %rcx
+ movq %r11, 64(%rdi)
+ movq 64(%rdi), %rax
+ orq %rcx, %r13
+ roll $30, %r12d
+ movl %esi, %r11d
+ addq %r13, %r8
+ movq 48(%rdi), %r13
+ movq 8(%rdi), %rdx
+ movq -40(%rdi), %rbx
+ addq %r14, %r8
+ movq 72(%rdi), %rcx
+ addq %rax, %r8
+ movq %rsi, %r14
+ orq %r12, %rsi
+ xorq %rdx, %r13
+ addq %r15, %r8
+ andq %r10, %rsi
+ xorq %rbx, %r13
+ andq %r12, %r14
+ roll $30, %r11d
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ orq %rsi, %r14
+ roll %r13d
+ roll $5, %ecx
+ movq %r13, 72(%rdi)
+ addq %r14, %rcx
+ movq 72(%rdi), %rax
+ movq 56(%rdi), %r14
+ movq 16(%rdi), %rdx
+ addq %r9, %rcx
+ movq -32(%rdi), %rbx
+ movq -48(%rdi), %rsi
+ movl %r8d, %r13d
+ addq %rax, %rcx
+ movq %r8, %r9
+ orq %r11, %r8
+ xorq %rdx, %r14
+ addq %r15, %rcx
+ andq %r12, %r8
+ xorq %rbx, %r14
+ andq %r11, %r9
+ xorl %esi, %r14d
+ movl %ecx, %esi
+ orq %r8, %r9
+ roll $5, %esi
+ roll %r14d
+ addq %r9, %rsi
+ movq %r14, -48(%rdi)
+ movq -48(%rdi), %rax
+ addq %r10, %rsi
+ movq 64(%rdi), %r10
+ movq 24(%rdi), %rdx
+ movq -24(%rdi), %rbx
+ movq -40(%rdi), %r8
+ movl %ecx, %r14d
+ addq %rax, %rsi
+ roll $30, %r13d
+ movq %rcx, %r9
+ xorq %rdx, %r10
+ addq %r15, %rsi
+ orq %r13, %rcx
+ xorq %rbx, %r10
+ andq %r11, %rcx
+ andq %r13, %r9
+ xorl %r8d, %r10d
+ movl %esi, %r8d
+ orq %rcx, %r9
+ roll $5, %r8d
+ roll %r10d
+ roll $30, %r14d
+ addq %r9, %r8
+ movq %r10, -40(%rdi)
+ movq -40(%rdi), %rax
+ addq %r12, %r8
+ movq 72(%rdi), %r12
+ movq 32(%rdi), %rdx
+ movq -16(%rdi), %rbx
+ movq -32(%rdi), %rcx
+ movl %esi, %r10d
+ addq %rax, %r8
+ movq %rsi, %r9
+ orq %r14, %rsi
+ xorq %rdx, %r12
+ addq %r15, %r8
+ andq %r13, %rsi
+ xorq %rbx, %r12
+ andq %r14, %r9
+ roll $30, %r10d
+ xorl %ecx, %r12d
+ movl %r8d, %ecx
+ orq %rsi, %r9
+ roll $5, %ecx
+ roll %r12d
+ addq %r9, %rcx
+ movq %r12, -32(%rdi)
+ movq -32(%rdi), %rax
+ addq %r11, %rcx
+ movq -48(%rdi), %r11
+ movq 40(%rdi), %rdx
+ movq -8(%rdi), %rbx
+ movq -24(%rdi), %rsi
+ movl %r8d, %r12d
+ addq %rax, %rcx
+ movq %r8, %r9
+ xorq %rdx, %r11
+ addq %r15, %rcx
+ xorq %rbx, %r11
+ xorl %esi, %r11d
+ orq %r10, %r8
+ andq %r10, %r9
+ andq %r14, %r8
+ movl %ecx, %esi
+ roll %r11d
+ orq %r8, %r9
+ roll $5, %esi
+ movq %r11, -24(%rdi)
+ addq %r9, %rsi
+ movq -24(%rdi), %rax
+ roll $30, %r12d
+ addq %r13, %rsi
+ movq -40(%rdi), %r13
+ movq 48(%rdi), %rdx
+ movq (%rdi), %rbx
+ movq -16(%rdi), %r8
+ movl %ecx, %r11d
+ addq %rax, %rsi
+ movq %rcx, %r9
+ orq %r12, %rcx
+ xorq %rdx, %r13
+ addq %r15, %rsi
+ andq %r10, %rcx
+ xorq %rbx, %r13
+ andq %r12, %r9
+ roll $30, %r11d
+ xorl %r8d, %r13d
+ movl %esi, %r8d
+ orq %rcx, %r9
+ roll %r13d
+ roll $5, %r8d
+ movq %r13, -16(%rdi)
+ addq %r9, %r8
+ movq -16(%rdi), %rax
+ movq -32(%rdi), %r9
+ movq 56(%rdi), %rdx
+ addq %r14, %r8
+ movq 8(%rdi), %rcx
+ movq -8(%rdi), %rbx
+ movl %esi, %r13d
+ addq %rax, %r8
+ movq %rsi, %r14
+ orq %r11, %rsi
+ xorq %rdx, %r9
+ addq %r15, %r8
+ andq %r11, %r14
+ xorq %rcx, %r9
+ xorl %ebx, %r9d
+ movl %r8d, %ebx
+ roll %r9d
+ roll $5, %ebx
+ andq %r12, %rsi
+ orq %rsi, %r14
+ movq %r9, -8(%rdi)
+ movq -8(%rdi), %rax
+ addq %r14, %rbx
+ movq -24(%rdi), %r14
+ movq 64(%rdi), %rdx
+ movq 16(%rdi), %rcx
+ addq %r10, %rbx
+ movq (%rdi), %rsi
+ roll $30, %r13d
+ addq %rax, %rbx
+ movl %r8d, %r9d
+ xorq %rdx, %r14
+ addq %r15, %rbx
+ movq %r8, %r10
+ xorq %rcx, %r14
+ orq %r13, %r8
+ andq %r13, %r10
+ andq %r11, %r8
+ xorl %esi, %r14d
+ movl %ebx, %esi
+ orq %r8, %r10
+ roll $5, %esi
+ roll %r14d
+ addq %r10, %rsi
+ movq %r14, (%rdi)
+ movq (%rdi), %rax
+ addq %r12, %rsi
+ movq -16(%rdi), %r12
+ movq 72(%rdi), %rdx
+ movq 24(%rdi), %rcx
+ movq 8(%rdi), %r8
+ roll $30, %r9d
+ addq %rax, %rsi
+ movl %ebx, %r14d
+ movq %rbx, %r10
+ xorq %rdx, %r12
+ addq %r15, %rsi
+ orq %r9, %rbx
+ xorq %rcx, %r12
+ andq %r13, %rbx
+ andq %r9, %r10
+ xorl %r8d, %r12d
+ movl %esi, %r8d
+ orq %rbx, %r10
+ roll %r12d
+ roll $5, %r8d
+ movq %r12, 8(%rdi)
+ movq 8(%rdi), %rax
+ addq %r10, %r8
+ movq -8(%rdi), %rbx
+ movq -48(%rdi), %rdx
+ addq %r11, %r8
+ movq 32(%rdi), %r11
+ movq 16(%rdi), %rcx
+ movl %esi, %r12d
+ addq %rax, %r8
+ movq %rsi, %r10
+ addq %r15, %r8
+ xorq %rdx, %rbx
+ roll $30, %r14d
+ xorq %r11, %rbx
+ orq %r14, %rsi
+ andq %r14, %r10
+ xorl %ecx, %ebx
+ andq %r9, %rsi
+ movl %r8d, %ecx
+ roll %ebx
+ orq %rsi, %r10
+ roll $5, %ecx
+ movq %rbx, 16(%rdi)
+ movq 16(%rdi), %rsi
+ addq %r10, %rcx
+ movq (%rdi), %r11
+ movq -40(%rdi), %rax
+ addq %r13, %rcx
+ movq 40(%rdi), %rdx
+ movq 24(%rdi), %r13
+ roll $30, %r12d
+ addq %rsi, %rcx
+ movl %r8d, %ebx
+ movq %r8, %r10
+ xorq %rax, %r11
+ addq %r15, %rcx
+ orq %r12, %r8
+ xorq %rdx, %r11
+ andq %r14, %r8
+ andq %r12, %r10
+ xorl %r13d, %r11d
+ movl %ecx, %r13d
+ orq %r8, %r10
+ roll %r11d
+ roll $5, %r13d
+ roll $30, %ebx
+ movq %r11, 24(%rdi)
+ addq %r10, %r13
+ movq 24(%rdi), %rsi
+ movq 8(%rdi), %r10
+ movq -32(%rdi), %rax
+ addq %r9, %r13
+ movq 48(%rdi), %rdx
+ movq 32(%rdi), %r8
+ movl %ecx, %r11d
+ addq %rsi, %r13
+ movq %rcx, %r9
+ xorq %rax, %r10
+ addq %r15, %r13
+ xorq %rdx, %r10
+ xorl %r8d, %r10d
+ movl %r13d, %r8d
+ roll %r10d
+ orq %rbx, %rcx
+ andq %rbx, %r9
+ movq %r10, 32(%rdi)
+ andq %r12, %rcx
+ movl %r13d, %r10d
+ orq %rcx, %r9
+ roll $5, %r10d
+ movq 32(%rdi), %rsi
+ addq %r9, %r10
+ roll $30, %r11d
+ movq %r13, %rcx
+ addq %r14, %r10
+ movq 16(%rdi), %r14
+ movq -24(%rdi), %rax
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r9
+ addq %rsi, %r10
+ addq %r15, %r10
+ orq %r11, %r13
+ andq %r11, %rcx
+ xorq %rax, %r14
+ andq %rbx, %r13
+ xorq %rdx, %r14
+ orq %r13, %rcx
+ xorl %r9d, %r14d
+ movl %r10d, %r9d
+ roll %r14d
+ roll $5, %r9d
+ movq %r14, 40(%rdi)
+ movq 40(%rdi), %rsi
+ addq %rcx, %r9
+ movq 24(%rdi), %r13
+ addq %r12, %r9
+ movq -16(%rdi), %r12
+ movq 64(%rdi), %rax
+ movl %r10d, %r14d
+ addq %rsi, %r9
+ movl %r8d, %esi
+ addq %r15, %r9
+ movq 48(%rdi), %r15
+ xorq %r12, %r13
+ roll $30, %esi
+ xorq %rax, %r13
+ xorq %rsi, %r10
+ xorl %r15d, %r13d
+ movl %r9d, %r15d
+ xorq %r11, %r10
+ roll $5, %r15d
+ roll %r13d
+ addq %r10, %r15
+ movq %r13, 48(%rdi)
+ movq 48(%rdi), %r10
+ addq %rbx, %r15
+ movq 32(%rdi), %rbx
+ movq -8(%rdi), %r8
+ movq 72(%rdi), %rdx
+ movq 56(%rdi), %rcx
+ roll $30, %r14d
+ addq %r10, %r15
+ movl $3395469782, %r10d
+ movl %r9d, %r13d
+ xorq %r8, %rbx
+ addq %r10, %r15
+ xorq %r14, %r9
+ xorq %rdx, %rbx
+ xorq %rsi, %r9
+ roll $30, %r13d
+ xorl %ecx, %ebx
+ movl %r15d, %ecx
+ roll %ebx
+ roll $5, %ecx
+ movq %rbx, 56(%rdi)
+ addq %r9, %rcx
+ movq 56(%rdi), %r12
+ movq 40(%rdi), %r9
+ movq (%rdi), %rax
+ addq %r11, %rcx
+ movq -48(%rdi), %r8
+ movq 64(%rdi), %r11
+ movl %r15d, %ebx
+ addq %r12, %rcx
+ xorq %r13, %r15
+ roll $30, %ebx
+ xorq %rax, %r9
+ addq %r10, %rcx
+ xorq %r14, %r15
+ xorq %r8, %r9
+ xorl %r11d, %r9d
+ movl %ecx, %r11d
+ roll %r9d
+ roll $5, %r11d
+ movq %r9, 64(%rdi)
+ addq %r15, %r11
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %r15
+ movq 8(%rdi), %r12
+ addq %rsi, %r11
+ movq -40(%rdi), %rax
+ movq 72(%rdi), %r8
+ movl %ecx, %r9d
+ addq %rdx, %r11
+ xorq %r12, %r15
+ addq %r10, %r11
+ xorq %rax, %r15
+ xorl %r8d, %r15d
+ movl %r11d, %r8d
+ roll %r15d
+ roll $5, %r8d
+ xorq %rbx, %rcx
+ xorq %r13, %rcx
+ movq %r15, 72(%rdi)
+ movq 72(%rdi), %rsi
+ addq %rcx, %r8
+ movq 56(%rdi), %r12
+ movq 16(%rdi), %rcx
+ movq -32(%rdi), %rdx
+ addq %r14, %r8
+ movq -48(%rdi), %r14
+ addq %rsi, %r8
+ roll $30, %r9d
+ movl %r11d, %r15d
+ xorq %rcx, %r12
+ addq %r10, %r8
+ xorq %r9, %r11
+ xorq %rdx, %r12
+ xorq %rbx, %r11
+ roll $30, %r15d
+ xorl %r14d, %r12d
+ movl %r8d, %r14d
+ roll $5, %r14d
+ roll %r12d
+ addq %r11, %r14
+ movq %r12, -48(%rdi)
+ movq -48(%rdi), %rax
+ addq %r13, %r14
+ movq 64(%rdi), %r13
+ movq 24(%rdi), %rsi
+ movq -24(%rdi), %rcx
+ movq -40(%rdi), %r11
+ movl %r8d, %r12d
+ addq %rax, %r14
+ xorq %r15, %r8
+ roll $30, %r12d
+ xorq %rsi, %r13
+ addq %r10, %r14
+ xorq %r9, %r8
+ xorq %rcx, %r13
+ xorl %r11d, %r13d
+ movl %r14d, %r11d
+ roll $5, %r11d
+ roll %r13d
+ addq %r8, %r11
+ movq %r13, -40(%rdi)
+ movq -40(%rdi), %rdx
+ addq %rbx, %r11
+ movq 72(%rdi), %rbx
+ movq 32(%rdi), %rax
+ movq -16(%rdi), %rsi
+ movq -32(%rdi), %r8
+ movl %r14d, %r13d
+ addq %rdx, %r11
+ xorq %rax, %rbx
+ addq %r10, %r11
+ xorq %rsi, %rbx
+ xorl %r8d, %ebx
+ xorq %r12, %r14
+ movl %r11d, %r8d
+ xorq %r15, %r14
+ roll %ebx
+ roll $5, %r8d
+ movq %rbx, -32(%rdi)
+ addq %r14, %r8
+ movq -32(%rdi), %rcx
+ movq -48(%rdi), %r14
+ movq 40(%rdi), %rdx
+ addq %r9, %r8
+ movq -8(%rdi), %rax
+ movq -24(%rdi), %r9
+ roll $30, %r13d
+ addq %rcx, %r8
+ movl %r11d, %ebx
+ xorq %r13, %r11
+ xorq %rdx, %r14
+ addq %r10, %r8
+ xorq %r12, %r11
+ xorq %rax, %r14
+ roll $30, %ebx
+ xorl %r9d, %r14d
+ movl %r8d, %r9d
+ roll $5, %r9d
+ roll %r14d
+ addq %r11, %r9
+ movq %r14, -24(%rdi)
+ movq -24(%rdi), %rsi
+ addq %r15, %r9
+ movq -40(%rdi), %r15
+ movq 48(%rdi), %rcx
+ movq (%rdi), %rdx
+ movq -16(%rdi), %r11
+ movl %r8d, %r14d
+ addq %rsi, %r9
+ xorq %rbx, %r8
+ xorq %rcx, %r15
+ addq %r10, %r9
+ xorq %r13, %r8
+ xorq %rdx, %r15
+ xorl %r11d, %r15d
+ movl %r9d, %r11d
+ roll %r15d
+ roll $5, %r11d
+ movq %r15, -16(%rdi)
+ addq %r8, %r11
+ movq -16(%rdi), %rax
+ addq %r12, %r11
+ movq -32(%rdi), %r12
+ movq 56(%rdi), %rsi
+ movq 8(%rdi), %rcx
+ movq -8(%rdi), %r8
+ movl %r9d, %r15d
+ addq %rax, %r11
+ addq %r10, %r11
+ roll $30, %r14d
+ xorq %rsi, %r12
+ xorq %rcx, %r12
+ xorq %r14, %r9
+ roll $30, %r15d
+ xorl %r8d, %r12d
+ movl %r11d, %r8d
+ xorq %rbx, %r9
+ roll $5, %r8d
+ roll %r12d
+ addq %r9, %r8
+ movq %r12, -8(%rdi)
+ movq -8(%rdi), %rdx
+ addq %r13, %r8
+ movq -24(%rdi), %r13
+ movq 64(%rdi), %rax
+ movq 16(%rdi), %rsi
+ movq (%rdi), %rcx
+ movl %r11d, %r12d
+ addq %rdx, %r8
+ xorq %r15, %r11
+ roll $30, %r12d
+ xorq %rax, %r13
+ addq %r10, %r8
+ xorq %r14, %r11
+ xorq %rsi, %r13
+ xorl %ecx, %r13d
+ movl %r8d, %ecx
+ roll $5, %ecx
+ roll %r13d
+ addq %r11, %rcx
+ movq %r13, (%rdi)
+ movq (%rdi), %r9
+ addq %rbx, %rcx
+ movq -16(%rdi), %rbx
+ movq 72(%rdi), %rdx
+ movq 24(%rdi), %rax
+ movq 8(%rdi), %rsi
+ movl %r8d, %r13d
+ addq %r9, %rcx
+ xorq %r12, %r8
+ xorq %rdx, %rbx
+ addq %r10, %rcx
+ xorq %r15, %r8
+ xorq %rax, %rbx
+ xorl %esi, %ebx
+ movl %ecx, %esi
+ roll $5, %esi
+ roll %ebx
+ addq %r8, %rsi
+ movq %rbx, 8(%rdi)
+ movq 8(%rdi), %r11
+ addq %r14, %rsi
+ movq -8(%rdi), %r14
+ movq -48(%rdi), %r9
+ movq 32(%rdi), %rdx
+ movq 16(%rdi), %r8
+ roll $30, %r13d
+ addq %r11, %rsi
+ movl %ecx, %ebx
+ xorq %r13, %rcx
+ xorq %r9, %r14
+ addq %r10, %rsi
+ xorq %r12, %rcx
+ xorq %rdx, %r14
+ roll $30, %ebx
+ xorl %r8d, %r14d
+ movl %esi, %r8d
+ roll $5, %r8d
+ roll %r14d
+ addq %rcx, %r8
+ movq %r14, 16(%rdi)
+ movq 16(%rdi), %rax
+ addq %r15, %r8
+ movq (%rdi), %r15
+ movq -40(%rdi), %r11
+ movq 40(%rdi), %r9
+ movq 24(%rdi), %rcx
+ movl %esi, %r14d
+ addq %rax, %r8
+ xorq %rbx, %rsi
+ roll $30, %r14d
+ xorq %r11, %r15
+ addq %r10, %r8
+ xorq %r13, %rsi
+ xorq %r9, %r15
+ xorl %ecx, %r15d
+ movl %r8d, %ecx
+ roll %r15d
+ roll $5, %ecx
+ movq %r15, 24(%rdi)
+ addq %rsi, %rcx
+ movq 24(%rdi), %rdx
+ movq 8(%rdi), %r11
+ movq -32(%rdi), %rax
+ addq %r12, %rcx
+ movq 48(%rdi), %r12
+ movq 32(%rdi), %rsi
+ movl %r8d, %r15d
+ addq %rdx, %rcx
+ xorq %rax, %r11
+ addq %r10, %rcx
+ xorq %r12, %r11
+ xorl %esi, %r11d
+ movl %ecx, %esi
+ roll %r11d
+ movq %r11, 32(%rdi)
+ movl %ecx, %r11d
+ movq 32(%rdi), %r9
+ roll $5, %r11d
+ xorq %r14, %r8
+ movq 16(%rdi), %r12
+ xorq %rbx, %r8
+ movq -24(%rdi), %rdx
+ movq 56(%rdi), %rax
+ addq %r8, %r11
+ movq 40(%rdi), %r8
+ roll $30, %r15d
+ addq %r13, %r11
+ xorq %r15, %rcx
+ addq %r9, %r11
+ xorq %rdx, %r12
+ xorq %r14, %rcx
+ addq %r10, %r11
+ xorq %rax, %r12
+ xorl %r8d, %r12d
+ movl %r11d, %r8d
+ roll $5, %r8d
+ roll %r12d
+ addq %rcx, %r8
+ movq %r12, 40(%rdi)
+ movq 40(%rdi), %r13
+ addq %rbx, %r8
+ movq 24(%rdi), %rbx
+ movq -16(%rdi), %r9
+ movq 64(%rdi), %rdx
+ movq 48(%rdi), %rcx
+ movl %r11d, %r12d
+ addq %r13, %r8
+ movl %esi, %r13d
+ roll $30, %r12d
+ xorq %r9, %rbx
+ addq %r10, %r8
+ roll $30, %r13d
+ xorq %rdx, %rbx
+ xorq %r13, %r11
+ xorl %ecx, %ebx
+ movl %r8d, %ecx
+ xorq %r15, %r11
+ roll %ebx
+ roll $5, %ecx
+ movq %rbx, 48(%rdi)
+ addq %r11, %rcx
+ movq 48(%rdi), %rax
+ movq 32(%rdi), %r11
+ movq -8(%rdi), %rsi
+ addq %r14, %rcx
+ movq 72(%rdi), %r9
+ movq 56(%rdi), %r14
+ movl %r8d, %ebx
+ addq %rax, %rcx
+ xorq %rsi, %r11
+ addq %r10, %rcx
+ xorq %r9, %r11
+ xorl %r14d, %r11d
+ xorq %r12, %r8
+ movl %ecx, %r14d
+ xorq %r13, %r8
+ roll %r11d
+ roll $5, %r14d
+ movq %r11, 56(%rdi)
+ addq %r8, %r14
+ movq 56(%rdi), %rdx
+ movq 40(%rdi), %r8
+ movq (%rdi), %rax
+ addq %r15, %r14
+ movq -48(%rdi), %r15
+ movq 64(%rdi), %rsi
+ roll $30, %ebx
+ addq %rdx, %r14
+ movl %ecx, %r11d
+ xorq %rbx, %rcx
+ xorq %rax, %r8
+ addq %r10, %r14
+ xorq %r12, %rcx
+ xorq %r15, %r8
+ roll $30, %r11d
+ xorl %esi, %r8d
+ movl %r14d, %esi
+ roll %r8d
+ roll $5, %esi
+ movq %r8, 64(%rdi)
+ movq 64(%rdi), %r9
+ addq %rcx, %rsi
+ movq 48(%rdi), %r15
+ movq 8(%rdi), %rcx
+ addq %r13, %rsi
+ movq -40(%rdi), %rdx
+ movq 72(%rdi), %rax
+ movl %r14d, %r8d
+ addq %r9, %rsi
+ xorq %r11, %r14
+ addq %r10, %rsi
+ xorq %rcx, %r15
+ xorq %rbx, %r14
+ xorq %rdx, %r15
+ movl %esi, %r13d
+ xorl %eax, %r15d
+ roll $5, %r13d
+ roll %r15d
+ addq %r14, %r13
+ movq %r15, 72(%rdi)
+ addq %r12, %r13
+ movq 72(%rdi), %r12
+ addq %r12, %r13
+ addq %r10, %r13
+ movq -88(%rdi), %r10
+ roll $30, %r8d
+ addq %r13, %r10
+ movq %r10, -88(%rdi)
+ movq -80(%rdi), %r9
+ addq %rsi, %r9
+ movq %r9, -80(%rdi)
+ movq -72(%rdi), %rcx
+ addq %r8, %rcx
+ movq %rcx, -72(%rdi)
+ movq -64(%rdi), %rdx
+ addq %r11, %rdx
+ movq %rdx, -64(%rdi)
+ movq -56(%rdi), %rax
+ addq %rbx, %rax
+ popq %rbx
+ popq %r12
+ popq %r13
+ popq %r14
+ popq %r15
+ movq %rax, -56(%rdi)
+ ret
+.LFE7:
+ .size shaCompress, .-shaCompress
+ .align 16
+.globl SHA1_Update
+ .type SHA1_Update, @function
+SHA1_Update:
+.LFB5:
+ pushq %rbp
+.LCFI5:
+ movq %rsp, %rbp
+.LCFI6:
+ movq %r13, -24(%rbp)
+.LCFI7:
+ movq %r14, -16(%rbp)
+.LCFI8:
+ movl %edx, %r13d
+ movq %r15, -8(%rbp)
+.LCFI9:
+ movq %rbx, -40(%rbp)
+.LCFI10:
+ movq %rdi, %r15
+ movq %r12, -32(%rbp)
+.LCFI11:
+ subq $48, %rsp
+.LCFI12:
+ testl %edx, %edx
+ movq %rsi, %r14
+ je .L243
+ movq 64(%rdi), %rdx
+ mov %r13d, %ecx
+ leaq (%rdx,%rcx), %rax
+ movq %rax, 64(%rdi)
+ movl %edx, %eax
+ andl $63, %eax
+ movl %eax, -44(%rbp)
+ jne .L256
+.L245:
+ cmpl $63, %r13d
+ jbe .L253
+ leaq 160(%r15), %rbx
+ .align 16
+.L250:
+ movq %r14, %rsi
+ subl $64, %r13d
+ movq %rbx, %rdi
+ call shaCompress
+ addq $64, %r14
+ cmpl $63, %r13d
+ ja .L250
+.L253:
+ testl %r13d, %r13d
+ je .L243
+ mov %r13d, %edx
+ movq %r14, %rsi
+ movq %r15, %rdi
+ movq -40(%rbp), %rbx
+ movq -32(%rbp), %r12
+ movq -24(%rbp), %r13
+ movq -16(%rbp), %r14
+ movq -8(%rbp), %r15
+ leave
+ jmp memcpy@PLT
+ .align 16
+.L243:
+ movq -40(%rbp), %rbx
+ movq -32(%rbp), %r12
+ movq -24(%rbp), %r13
+ movq -16(%rbp), %r14
+ movq -8(%rbp), %r15
+ leave
+ ret
+.L256:
+ movl $64, %ebx
+ mov %eax, %edi
+ subl %eax, %ebx
+ cmpl %ebx, %r13d
+ cmovb %r13d, %ebx
+ addq %r15, %rdi
+ mov %ebx, %r12d
+ subl %ebx, %r13d
+ movq %r12, %rdx
+ addq %r12, %r14
+ call memcpy@PLT
+ addl -44(%rbp), %ebx
+ andl $63, %ebx
+ jne .L245
+ leaq 160(%r15), %rdi
+ movq %r15, %rsi
+ call shaCompress
+ jmp .L245
+.LFE5:
+ .size SHA1_Update, .-SHA1_Update
+ .section .rodata
+ .align 32
+ .type bulk_pad.0, @object
+ .size bulk_pad.0, 64
+bulk_pad.0:
+ .byte -128
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .byte 0
+ .text
+ .align 16
+.globl SHA1_End
+ .type SHA1_End, @function
+SHA1_End:
+.LFB6:
+ pushq %rbp
+.LCFI13:
+ movq %rsp, %rbp
+.LCFI14:
+ movq %r12, -24(%rbp)
+.LCFI15:
+ movq %r13, -16(%rbp)
+.LCFI16:
+ movq %rsi, %r13
+ movq %r14, -8(%rbp)
+.LCFI17:
+ movq %rbx, -32(%rbp)
+.LCFI18:
+ subq $32, %rsp
+.LCFI19:
+ movq 64(%rdi), %rbx
+ movq %rdx, %r14
+ movl $119, %edx
+ leaq bulk_pad.0(%rip), %rsi
+ movq %rdi, %r12
+ movl %ebx, %r8d
+ salq $3, %rbx
+ andl $63, %r8d
+ subl %r8d, %edx
+ andl $63, %edx
+ incl %edx
+ call SHA1_Update@PLT
+ movq %rbx, %rdi
+ movq %r12, %rsi
+ shrq $32, %rdi
+/APP
+ bswap %edi
+/NO_APP
+ movl %edi, 56(%r12)
+ leaq 160(%r12), %rdi
+/APP
+ bswap %ebx
+/NO_APP
+ movl %ebx, 60(%r12)
+ call shaCompress
+ movl 72(%r12), %esi
+ movl 80(%r12), %ebx
+ movl 88(%r12), %ecx
+ movl 96(%r12), %edx
+ movl 104(%r12), %eax
+ movq 8(%rsp), %r12
+/APP
+ bswap %ebx
+ bswap %esi
+/NO_APP
+ movl %ebx, 4(%r13)
+ movl %esi, (%r13)
+/APP
+ bswap %ecx
+ bswap %edx
+/NO_APP
+ movl %ecx, 8(%r13)
+ movl %edx, 12(%r13)
+/APP
+ bswap %eax
+/NO_APP
+ movq (%rsp), %rbx
+ movl %eax, 16(%r13)
+ cmpq $0, %r14
+ je .L133
+ movl $20, (%r14)
+.L133:
+ movq 16(%rsp), %r13
+ movq 24(%rsp), %r14
+ leave
+ ret
+.LFE6:
+ .size SHA1_End, .-SHA1_End
+ .align 16
+.globl SHA1_NewContext
+ .type SHA1_NewContext, @function
+SHA1_NewContext:
+.LFB8:
+ movl $248, %edi
+ jmp PORT_Alloc_Util@PLT
+.LFE8:
+ .size SHA1_NewContext, .-SHA1_NewContext
+ .align 16
+.globl SHA1_DestroyContext
+ .type SHA1_DestroyContext, @function
+SHA1_DestroyContext:
+.LFB9:
+ pushq %rbp
+.LCFI20:
+ movl $248, %edx
+ movq %rsp, %rbp
+.LCFI21:
+ movq %rbx, -16(%rbp)
+.LCFI22:
+ movq %r12, -8(%rbp)
+.LCFI23:
+ movl %esi, %ebx
+ subq $16, %rsp
+.LCFI24:
+ xorl %esi, %esi
+ movq %rdi, %r12
+ call memset@PLT
+ testl %ebx, %ebx
+ jne .L268
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ ret
+ .align 16
+.L268:
+ movq %r12, %rdi
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ jmp PORT_Free_Util@PLT
+.LFE9:
+ .size SHA1_DestroyContext, .-SHA1_DestroyContext
+ .align 16
+.globl SHA1_HashBuf
+ .type SHA1_HashBuf, @function
+SHA1_HashBuf:
+.LFB10:
+ pushq %rbp
+.LCFI25:
+ movq %rsp, %rbp
+.LCFI26:
+ movq %rbx, -32(%rbp)
+.LCFI27:
+ leaq -288(%rbp), %rbx
+ movq %r12, -24(%rbp)
+.LCFI28:
+ movq %r13, -16(%rbp)
+.LCFI29:
+ movq %r14, -8(%rbp)
+.LCFI30:
+ movq %rsi, %r13
+ subq $304, %rsp
+.LCFI31:
+ movq %rdi, %r14
+ movl %edx, %r12d
+ movq %rbx, %rdi
+ call SHA1_Begin@PLT
+ movl %r12d, %edx
+ movq %r13, %rsi
+ movq %rbx, %rdi
+ call SHA1_Update@PLT
+ leaq -292(%rbp), %rdx
+ movq %r14, %rsi
+ movq %rbx, %rdi
+ movl $20, %ecx
+ call SHA1_End@PLT
+ movq -32(%rbp), %rbx
+ movq -24(%rbp), %r12
+ xorl %eax, %eax
+ movq -16(%rbp), %r13
+ movq -8(%rbp), %r14
+ leave
+ ret
+.LFE10:
+ .size SHA1_HashBuf, .-SHA1_HashBuf
+ .align 16
+.globl SHA1_Hash
+ .type SHA1_Hash, @function
+SHA1_Hash:
+.LFB11:
+ pushq %rbp
+.LCFI32:
+ movq %rsp, %rbp
+.LCFI33:
+ movq %rbx, -16(%rbp)
+.LCFI34:
+ movq %r12, -8(%rbp)
+.LCFI35:
+ movq %rsi, %rbx
+ subq $16, %rsp
+.LCFI36:
+ movq %rdi, %r12
+ movq %rsi, %rdi
+ call strlen@PLT
+ movq %rbx, %rsi
+ movq %r12, %rdi
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ movl %eax, %edx
+ jmp SHA1_HashBuf@PLT
+.LFE11:
+ .size SHA1_Hash, .-SHA1_Hash
+ .align 16
+.globl SHA1_FlattenSize
+ .type SHA1_FlattenSize, @function
+SHA1_FlattenSize:
+.LFB12:
+ movl $248, %eax
+ ret
+.LFE12:
+ .size SHA1_FlattenSize, .-SHA1_FlattenSize
+ .align 16
+.globl SHA1_Flatten
+ .type SHA1_Flatten, @function
+SHA1_Flatten:
+.LFB13:
+ pushq %rbp
+.LCFI37:
+ movq %rsi, %rax
+ movl $248, %edx
+ movq %rdi, %rsi
+ movq %rax, %rdi
+ movq %rsp, %rbp
+.LCFI38:
+ call memcpy@PLT
+ leave
+ xorl %eax, %eax
+ ret
+.LFE13:
+ .size SHA1_Flatten, .-SHA1_Flatten
+ .align 16
+.globl SHA1_Resurrect
+ .type SHA1_Resurrect, @function
+SHA1_Resurrect:
+.LFB14:
+ pushq %rbp
+.LCFI39:
+ movq %rsp, %rbp
+.LCFI40:
+ movq %rbx, -16(%rbp)
+.LCFI41:
+ movq %r12, -8(%rbp)
+.LCFI42:
+ subq $16, %rsp
+.LCFI43:
+ movq %rdi, %r12
+ call SHA1_NewContext@PLT
+ movq %rax, %rbx
+ xorl %eax, %eax
+ testq %rbx, %rbx
+ je .L273
+ movl $248, %edx
+ movq %r12, %rsi
+ movq %rbx, %rdi
+ call memcpy@PLT
+ movq %rbx, %rax
+.L273:
+ movq (%rsp), %rbx
+ movq 8(%rsp), %r12
+ leave
+ ret
+.LFE14:
+ .size SHA1_Resurrect, .-SHA1_Resurrect
+ .align 16
+.globl SHA1_Clone
+ .type SHA1_Clone, @function
+SHA1_Clone:
+.LFB15:
+ movl $248, %edx
+ jmp memcpy@PLT
+.LFE15:
+ .size SHA1_Clone, .-SHA1_Clone
+ .align 16
+.globl SHA1_TraceState
+ .type SHA1_TraceState, @function
+SHA1_TraceState:
+.LFB16:
+ movl $-5992, %edi
+ jmp PORT_SetError_Util@PLT
+.LFE16:
+ .size SHA1_TraceState, .-SHA1_TraceState
+ .align 16
+.globl SHA1_EndRaw
+ .type SHA1_EndRaw, @function
+SHA1_EndRaw:
+.LFB50:
+ movq 72(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, (%rsi)
+ movq 80(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 4(%rsi)
+ movq 88(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 8(%rsi)
+ movq 96(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ movl %eax, 12(%rsi)
+ movq 104(%rdi), %rax
+/APP
+ bswap %eax
+/NO_APP
+ testq %rdx, %rdx
+ movl %eax, 16(%rsi)
+ je .L14
+ movl $20, (%rdx)
+.L14:
+ rep
+ ret
+.LFE50:
+ .size SHA1_EndRaw, .-SHA1_EndRaw
diff --git a/security/nss/lib/freebl/sha256.h b/security/nss/lib/freebl/sha256.h
new file mode 100644
index 000000000..c65ca152d
--- /dev/null
+++ b/security/nss/lib/freebl/sha256.h
@@ -0,0 +1,19 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_256_H_
+#define _SHA_256_H_
+
+#include "prtypes.h"
+
+struct SHA256ContextStr {
+ union {
+ PRUint32 w[64]; /* message schedule, input buffer, plus 48 words */
+ PRUint8 b[256];
+ } u;
+ PRUint32 h[8]; /* 8 state variables */
+ PRUint32 sizeHi, sizeLo; /* 64-bit count of hashed bytes. */
+};
+
+#endif /* _SHA_256_H_ */
diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c
new file mode 100644
index 000000000..528f884b2
--- /dev/null
+++ b/security/nss/lib/freebl/sha512.c
@@ -0,0 +1,1655 @@
+/*
+ * sha512.c - implementation of SHA224, SHA256, SHA384 and SHA512
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "prcpucfg.h"
+#if defined(NSS_X86) || defined(SHA_NO_LONG_LONG)
+#define NOUNROLL512 1
+#undef HAVE_LONG_LONG
+#endif
+#include "prtypes.h" /* for PRUintXX */
+#include "prlong.h"
+#include "secport.h" /* for PORT_XXX */
+#include "blapi.h"
+#include "sha256.h" /* for struct SHA256ContextStr */
+
+/* ============= Common constants and defines ======================= */
+
+#define W ctx->u.w
+#define B ctx->u.b
+#define H ctx->h
+
+#define SHR(x, n) (x >> n)
+#define SHL(x, n) (x << n)
+#define Ch(x, y, z) ((x & y) ^ (~x & z))
+#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
+#define SHA_MIN(a, b) (a < b ? a : b)
+
+/* Padding used with all flavors of SHA */
+static const PRUint8 pad[240] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+ /* compiler will fill the rest in with zeros */
+};
+
+/* ============= SHA256 implementation ================================== */
+
+/* SHA-256 constants, K256. */
+static const PRUint32 K256[64] = {
+ 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+ 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+ 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+ 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+ 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+ 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+ 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+ 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+ 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+ 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+ 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+ 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+ 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+ 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+ 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+ 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+/* SHA-256 initial hash values */
+static const PRUint32 H256[8] = {
+ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a,
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+};
+
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#include <stdlib.h>
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(_MSC_VER) && defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif
+#endif
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+ __asm {
+ mov eax,dwd
+ bswap eax
+ }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ __asm__("bswap %0"
+ : "+r"(value));
+ return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__GNUC__) && (defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__))))
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 ret;
+ __asm__("rev %0, %1"
+ : "=r"(ret)
+ : "r"(value));
+ return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#else
+#define SWAP4MASK 0x00FF00FF
+static PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 t1 = (value << 16) | (value >> 16);
+ return ((t1 & SWAP4MASK) << 8) | ((t1 >> 8) & SWAP4MASK);
+}
+#define SHA_HTONL(x) swap4b(x)
+#endif
+#define BYTESWAP4(x) x = SHA_HTONL(x)
+#endif /* defined(IS_LITTLE_ENDIAN) */
+
+#if defined(_MSC_VER)
+#pragma intrinsic(_lrotr, _lrotl)
+#define ROTR32(x, n) _lrotr(x, n)
+#define ROTL32(x, n) _lrotl(x, n)
+#else
+#define ROTR32(x, n) ((x >> n) | (x << ((8 * sizeof x) - n)))
+#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n)))
+#endif
+
+/* Capitol Sigma and lower case sigma functions */
+#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22))
+#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25))
+#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ SHR(x, 3))
+#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ SHR(x, 10))
+
+SHA256Context *
+SHA256_NewContext(void)
+{
+ SHA256Context *ctx = PORT_New(SHA256Context);
+ return ctx;
+}
+
+void
+SHA256_DestroyContext(SHA256Context *ctx, PRBool freeit)
+{
+ memset(ctx, 0, sizeof *ctx);
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA256_Begin(SHA256Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H256, sizeof H256);
+}
+
+static void
+SHA256_Compress(SHA256Context *ctx)
+{
+ {
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(W[0]);
+ BYTESWAP4(W[1]);
+ BYTESWAP4(W[2]);
+ BYTESWAP4(W[3]);
+ BYTESWAP4(W[4]);
+ BYTESWAP4(W[5]);
+ BYTESWAP4(W[6]);
+ BYTESWAP4(W[7]);
+ BYTESWAP4(W[8]);
+ BYTESWAP4(W[9]);
+ BYTESWAP4(W[10]);
+ BYTESWAP4(W[11]);
+ BYTESWAP4(W[12]);
+ BYTESWAP4(W[13]);
+ BYTESWAP4(W[14]);
+ BYTESWAP4(W[15]);
+#endif
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+/* prepare the "message schedule" */
+#ifdef NOUNROLL256
+ {
+ int t;
+ for (t = 16; t < 64; ++t) {
+ INITW(t);
+ }
+ }
+#else
+ INITW(16);
+ INITW(17);
+ INITW(18);
+ INITW(19);
+
+ INITW(20);
+ INITW(21);
+ INITW(22);
+ INITW(23);
+ INITW(24);
+ INITW(25);
+ INITW(26);
+ INITW(27);
+ INITW(28);
+ INITW(29);
+
+ INITW(30);
+ INITW(31);
+ INITW(32);
+ INITW(33);
+ INITW(34);
+ INITW(35);
+ INITW(36);
+ INITW(37);
+ INITW(38);
+ INITW(39);
+
+ INITW(40);
+ INITW(41);
+ INITW(42);
+ INITW(43);
+ INITW(44);
+ INITW(45);
+ INITW(46);
+ INITW(47);
+ INITW(48);
+ INITW(49);
+
+ INITW(50);
+ INITW(51);
+ INITW(52);
+ INITW(53);
+ INITW(54);
+ INITW(55);
+ INITW(56);
+ INITW(57);
+ INITW(58);
+ INITW(59);
+
+ INITW(60);
+ INITW(61);
+ INITW(62);
+ INITW(63);
+
+#endif
+#undef INITW
+ }
+ {
+ PRUint32 a, b, c, d, e, f, g, h;
+
+ a = H[0];
+ b = H[1];
+ c = H[2];
+ d = H[3];
+ e = H[4];
+ f = H[5];
+ g = H[6];
+ h = H[7];
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ h += S1(e) + Ch(e, f, g) + K256[n] + W[n]; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c);
+
+#ifdef NOUNROLL256
+ {
+ int t;
+ for (t = 0; t < 64; t += 8) {
+ ROUND(t + 0, a, b, c, d, e, f, g, h)
+ ROUND(t + 1, h, a, b, c, d, e, f, g)
+ ROUND(t + 2, g, h, a, b, c, d, e, f)
+ ROUND(t + 3, f, g, h, a, b, c, d, e)
+ ROUND(t + 4, e, f, g, h, a, b, c, d)
+ ROUND(t + 5, d, e, f, g, h, a, b, c)
+ ROUND(t + 6, c, d, e, f, g, h, a, b)
+ ROUND(t + 7, b, c, d, e, f, g, h, a)
+ }
+ }
+#else
+ ROUND(0, a, b, c, d, e, f, g, h)
+ ROUND(1, h, a, b, c, d, e, f, g)
+ ROUND(2, g, h, a, b, c, d, e, f)
+ ROUND(3, f, g, h, a, b, c, d, e)
+ ROUND(4, e, f, g, h, a, b, c, d)
+ ROUND(5, d, e, f, g, h, a, b, c)
+ ROUND(6, c, d, e, f, g, h, a, b)
+ ROUND(7, b, c, d, e, f, g, h, a)
+
+ ROUND(8, a, b, c, d, e, f, g, h)
+ ROUND(9, h, a, b, c, d, e, f, g)
+ ROUND(10, g, h, a, b, c, d, e, f)
+ ROUND(11, f, g, h, a, b, c, d, e)
+ ROUND(12, e, f, g, h, a, b, c, d)
+ ROUND(13, d, e, f, g, h, a, b, c)
+ ROUND(14, c, d, e, f, g, h, a, b)
+ ROUND(15, b, c, d, e, f, g, h, a)
+
+ ROUND(16, a, b, c, d, e, f, g, h)
+ ROUND(17, h, a, b, c, d, e, f, g)
+ ROUND(18, g, h, a, b, c, d, e, f)
+ ROUND(19, f, g, h, a, b, c, d, e)
+ ROUND(20, e, f, g, h, a, b, c, d)
+ ROUND(21, d, e, f, g, h, a, b, c)
+ ROUND(22, c, d, e, f, g, h, a, b)
+ ROUND(23, b, c, d, e, f, g, h, a)
+
+ ROUND(24, a, b, c, d, e, f, g, h)
+ ROUND(25, h, a, b, c, d, e, f, g)
+ ROUND(26, g, h, a, b, c, d, e, f)
+ ROUND(27, f, g, h, a, b, c, d, e)
+ ROUND(28, e, f, g, h, a, b, c, d)
+ ROUND(29, d, e, f, g, h, a, b, c)
+ ROUND(30, c, d, e, f, g, h, a, b)
+ ROUND(31, b, c, d, e, f, g, h, a)
+
+ ROUND(32, a, b, c, d, e, f, g, h)
+ ROUND(33, h, a, b, c, d, e, f, g)
+ ROUND(34, g, h, a, b, c, d, e, f)
+ ROUND(35, f, g, h, a, b, c, d, e)
+ ROUND(36, e, f, g, h, a, b, c, d)
+ ROUND(37, d, e, f, g, h, a, b, c)
+ ROUND(38, c, d, e, f, g, h, a, b)
+ ROUND(39, b, c, d, e, f, g, h, a)
+
+ ROUND(40, a, b, c, d, e, f, g, h)
+ ROUND(41, h, a, b, c, d, e, f, g)
+ ROUND(42, g, h, a, b, c, d, e, f)
+ ROUND(43, f, g, h, a, b, c, d, e)
+ ROUND(44, e, f, g, h, a, b, c, d)
+ ROUND(45, d, e, f, g, h, a, b, c)
+ ROUND(46, c, d, e, f, g, h, a, b)
+ ROUND(47, b, c, d, e, f, g, h, a)
+
+ ROUND(48, a, b, c, d, e, f, g, h)
+ ROUND(49, h, a, b, c, d, e, f, g)
+ ROUND(50, g, h, a, b, c, d, e, f)
+ ROUND(51, f, g, h, a, b, c, d, e)
+ ROUND(52, e, f, g, h, a, b, c, d)
+ ROUND(53, d, e, f, g, h, a, b, c)
+ ROUND(54, c, d, e, f, g, h, a, b)
+ ROUND(55, b, c, d, e, f, g, h, a)
+
+ ROUND(56, a, b, c, d, e, f, g, h)
+ ROUND(57, h, a, b, c, d, e, f, g)
+ ROUND(58, g, h, a, b, c, d, e, f)
+ ROUND(59, f, g, h, a, b, c, d, e)
+ ROUND(60, e, f, g, h, a, b, c, d)
+ ROUND(61, d, e, f, g, h, a, b, c)
+ ROUND(62, c, d, e, f, g, h, a, b)
+ ROUND(63, b, c, d, e, f, g, h, a)
+#endif
+
+ H[0] += a;
+ H[1] += b;
+ H[2] += c;
+ H[3] += d;
+ H[4] += e;
+ H[5] += f;
+ H[6] += g;
+ H[7] += h;
+ }
+#undef ROUND
+}
+
+#undef s0
+#undef s1
+#undef S0
+#undef S1
+
+void
+SHA256_Update(SHA256Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ if (!inputLen)
+ return;
+
+ /* Add inputLen into the count of bytes processed, before processing */
+ if ((ctx->sizeLo += inputLen) < inputLen)
+ ctx->sizeHi++;
+
+ /* if data already in buffer, attemp to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA256_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo)
+ todo = inputLen;
+ memcpy(B + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA256_BLOCK_LENGTH)
+ SHA256_Compress(ctx);
+ }
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA256_BLOCK_LENGTH) {
+ memcpy(B, input, SHA256_BLOCK_LENGTH);
+ input += SHA256_BLOCK_LENGTH;
+ inputLen -= SHA256_BLOCK_LENGTH;
+ SHA256_Compress(ctx);
+ }
+ /* if data left over, fill it into buffer */
+ if (inputLen)
+ memcpy(B, input, inputLen);
+}
+
+void
+SHA256_End(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int inBuf = ctx->sizeLo & 0x3f;
+ unsigned int padLen = (inBuf < 56) ? (56 - inBuf) : (56 + 64 - inBuf);
+ PRUint32 hi, lo;
+
+ hi = (ctx->sizeHi << 3) | (ctx->sizeLo >> 29);
+ lo = (ctx->sizeLo << 3);
+
+ SHA256_Update(ctx, pad, padLen);
+
+#if defined(IS_LITTLE_ENDIAN)
+ W[14] = SHA_HTONL(hi);
+ W[15] = SHA_HTONL(lo);
+#else
+ W[14] = hi;
+ W[15] = lo;
+#endif
+ SHA256_Compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(H[0]);
+ BYTESWAP4(H[1]);
+ BYTESWAP4(H[2]);
+ BYTESWAP4(H[3]);
+ BYTESWAP4(H[4]);
+ BYTESWAP4(H[5]);
+ BYTESWAP4(H[6]);
+ BYTESWAP4(H[7]);
+#endif
+ padLen = PR_MIN(SHA256_LENGTH, maxDigestLen);
+ memcpy(digest, H, padLen);
+ if (digestLen)
+ *digestLen = padLen;
+}
+
+void
+SHA256_EndRaw(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint32 h[8];
+ unsigned int len;
+
+ memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP4(h[0]);
+ BYTESWAP4(h[1]);
+ BYTESWAP4(h[2]);
+ BYTESWAP4(h[3]);
+ BYTESWAP4(h[4]);
+ BYTESWAP4(h[5]);
+ BYTESWAP4(h[6]);
+ BYTESWAP4(h[7]);
+#endif
+
+ len = PR_MIN(SHA256_LENGTH, maxDigestLen);
+ memcpy(digest, h, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+SECStatus
+SHA256_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA256Context ctx;
+ unsigned int outLen;
+
+ SHA256_Begin(&ctx);
+ SHA256_Update(&ctx, src, src_length);
+ SHA256_End(&ctx, dest, &outLen, SHA256_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA256_Hash(unsigned char *dest, const char *src)
+{
+ return SHA256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA256_TraceState(SHA256Context *ctx)
+{
+}
+
+unsigned int
+SHA256_FlattenSize(SHA256Context *ctx)
+{
+ return sizeof *ctx;
+}
+
+SECStatus
+SHA256_Flatten(SHA256Context *ctx, unsigned char *space)
+{
+ PORT_Memcpy(space, ctx, sizeof *ctx);
+ return SECSuccess;
+}
+
+SHA256Context *
+SHA256_Resurrect(unsigned char *space, void *arg)
+{
+ SHA256Context *ctx = SHA256_NewContext();
+ if (ctx)
+ PORT_Memcpy(ctx, space, sizeof *ctx);
+ return ctx;
+}
+
+void
+SHA256_Clone(SHA256Context *dest, SHA256Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ============= SHA224 implementation ================================== */
+
+/* SHA-224 initial hash values */
+static const PRUint32 H224[8] = {
+ 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939,
+ 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4
+};
+
+SHA224Context *
+SHA224_NewContext(void)
+{
+ return SHA256_NewContext();
+}
+
+void
+SHA224_DestroyContext(SHA224Context *ctx, PRBool freeit)
+{
+ SHA256_DestroyContext(ctx, freeit);
+}
+
+void
+SHA224_Begin(SHA224Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H224, sizeof H224);
+}
+
+void
+SHA224_Update(SHA224Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ SHA256_Update(ctx, input, inputLen);
+}
+
+void
+SHA224_End(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+ SHA256_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA224_EndRaw(SHA256Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH);
+ SHA256_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA224_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA256Context ctx;
+ unsigned int outLen;
+
+ SHA224_Begin(&ctx);
+ SHA256_Update(&ctx, src, src_length);
+ SHA256_End(&ctx, dest, &outLen, SHA224_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA224_Hash(unsigned char *dest, const char *src)
+{
+ return SHA224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA224_TraceState(SHA224Context *ctx)
+{
+}
+
+unsigned int
+SHA224_FlattenSize(SHA224Context *ctx)
+{
+ return SHA256_FlattenSize(ctx);
+}
+
+SECStatus
+SHA224_Flatten(SHA224Context *ctx, unsigned char *space)
+{
+ return SHA256_Flatten(ctx, space);
+}
+
+SHA224Context *
+SHA224_Resurrect(unsigned char *space, void *arg)
+{
+ return SHA256_Resurrect(space, arg);
+}
+
+void
+SHA224_Clone(SHA224Context *dest, SHA224Context *src)
+{
+ SHA256_Clone(dest, src);
+}
+
+/* ======= SHA512 and SHA384 common constants and defines ================= */
+
+/* common #defines for SHA512 and SHA384 */
+#if defined(HAVE_LONG_LONG)
+#if defined(_MSC_VER)
+#pragma intrinsic(_rotr64, _rotl64)
+#define ROTR64(x, n) _rotr64(x, n)
+#define ROTL64(x, n) _rotl64(x, n)
+#else
+#define ROTR64(x, n) ((x >> n) | (x << (64 - n)))
+#define ROTL64(x, n) ((x << n) | (x >> (64 - n)))
+#endif
+
+#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39))
+#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41))
+#define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7))
+#define s1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SHR(x, 6))
+
+#if PR_BYTES_PER_LONG == 8
+#define ULLC(hi, lo) 0x##hi##lo##UL
+#elif defined(_MSC_VER)
+#define ULLC(hi, lo) 0x##hi##lo##ui64
+#else
+#define ULLC(hi, lo) 0x##hi##lo##ULL
+#endif
+
+#if defined(IS_LITTLE_ENDIAN)
+#if defined(_MSC_VER)
+#pragma intrinsic(_byteswap_uint64)
+#define SHA_HTONLL(x) _byteswap_uint64(x)
+
+#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64))
+static __inline__ PRUint64
+swap8b(PRUint64 value)
+{
+ __asm__("bswapq %0"
+ : "+r"(value));
+ return (value);
+}
+#define SHA_HTONLL(x) swap8b(x)
+
+#else
+#define SHA_MASK16 ULLC(0000FFFF, 0000FFFF)
+#define SHA_MASK8 ULLC(00FF00FF, 00FF00FF)
+static PRUint64
+swap8b(PRUint64 x)
+{
+ PRUint64 t1 = x;
+ t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8);
+ t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16);
+ return (t1 >> 32) | (t1 << 32);
+}
+#define SHA_HTONLL(x) swap8b(x)
+#endif
+#define BYTESWAP8(x) x = SHA_HTONLL(x)
+#endif /* defined(IS_LITTLE_ENDIAN) */
+
+#else /* no long long */
+
+#if defined(IS_LITTLE_ENDIAN)
+#define ULLC(hi, lo) \
+ { \
+ 0x##lo##U, 0x##hi##U \
+ }
+#define SHA_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \
+ x.hi ^= x.lo ^= x.hi ^= x.lo, x)
+#define BYTESWAP8(x) \
+ do { \
+ PRUint32 tmp; \
+ BYTESWAP4(x.lo); \
+ BYTESWAP4(x.hi); \
+ tmp = x.lo; \
+ x.lo = x.hi; \
+ x.hi = tmp; \
+ } while (0)
+#else
+#define ULLC(hi, lo) \
+ { \
+ 0x##hi##U, 0x##lo##U \
+ }
+#endif
+
+#endif
+
+/* SHA-384 and SHA-512 constants, K512. */
+static const PRUint64 K512[80] = {
+#if PR_BYTES_PER_LONG == 8
+ 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL,
+ 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL,
+ 0x3956c25bf348b538UL, 0x59f111f1b605d019UL,
+ 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL,
+ 0xd807aa98a3030242UL, 0x12835b0145706fbeUL,
+ 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL,
+ 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL,
+ 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL,
+ 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL,
+ 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL,
+ 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL,
+ 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL,
+ 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL,
+ 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL,
+ 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL,
+ 0x06ca6351e003826fUL, 0x142929670a0e6e70UL,
+ 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL,
+ 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL,
+ 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL,
+ 0x81c2c92e47edaee6UL, 0x92722c851482353bUL,
+ 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL,
+ 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL,
+ 0xd192e819d6ef5218UL, 0xd69906245565a910UL,
+ 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL,
+ 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL,
+ 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL,
+ 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL,
+ 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL,
+ 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL,
+ 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL,
+ 0x90befffa23631e28UL, 0xa4506cebde82bde9UL,
+ 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL,
+ 0xca273eceea26619cUL, 0xd186b8c721c0c207UL,
+ 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL,
+ 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL,
+ 0x113f9804bef90daeUL, 0x1b710b35131c471bUL,
+ 0x28db77f523047d84UL, 0x32caab7b40c72493UL,
+ 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL,
+ 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL,
+ 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL
+#else
+ ULLC(428a2f98, d728ae22), ULLC(71374491, 23ef65cd),
+ ULLC(b5c0fbcf, ec4d3b2f), ULLC(e9b5dba5, 8189dbbc),
+ ULLC(3956c25b, f348b538), ULLC(59f111f1, b605d019),
+ ULLC(923f82a4, af194f9b), ULLC(ab1c5ed5, da6d8118),
+ ULLC(d807aa98, a3030242), ULLC(12835b01, 45706fbe),
+ ULLC(243185be, 4ee4b28c), ULLC(550c7dc3, d5ffb4e2),
+ ULLC(72be5d74, f27b896f), ULLC(80deb1fe, 3b1696b1),
+ ULLC(9bdc06a7, 25c71235), ULLC(c19bf174, cf692694),
+ ULLC(e49b69c1, 9ef14ad2), ULLC(efbe4786, 384f25e3),
+ ULLC(0fc19dc6, 8b8cd5b5), ULLC(240ca1cc, 77ac9c65),
+ ULLC(2de92c6f, 592b0275), ULLC(4a7484aa, 6ea6e483),
+ ULLC(5cb0a9dc, bd41fbd4), ULLC(76f988da, 831153b5),
+ ULLC(983e5152, ee66dfab), ULLC(a831c66d, 2db43210),
+ ULLC(b00327c8, 98fb213f), ULLC(bf597fc7, beef0ee4),
+ ULLC(c6e00bf3, 3da88fc2), ULLC(d5a79147, 930aa725),
+ ULLC(06ca6351, e003826f), ULLC(14292967, 0a0e6e70),
+ ULLC(27b70a85, 46d22ffc), ULLC(2e1b2138, 5c26c926),
+ ULLC(4d2c6dfc, 5ac42aed), ULLC(53380d13, 9d95b3df),
+ ULLC(650a7354, 8baf63de), ULLC(766a0abb, 3c77b2a8),
+ ULLC(81c2c92e, 47edaee6), ULLC(92722c85, 1482353b),
+ ULLC(a2bfe8a1, 4cf10364), ULLC(a81a664b, bc423001),
+ ULLC(c24b8b70, d0f89791), ULLC(c76c51a3, 0654be30),
+ ULLC(d192e819, d6ef5218), ULLC(d6990624, 5565a910),
+ ULLC(f40e3585, 5771202a), ULLC(106aa070, 32bbd1b8),
+ ULLC(19a4c116, b8d2d0c8), ULLC(1e376c08, 5141ab53),
+ ULLC(2748774c, df8eeb99), ULLC(34b0bcb5, e19b48a8),
+ ULLC(391c0cb3, c5c95a63), ULLC(4ed8aa4a, e3418acb),
+ ULLC(5b9cca4f, 7763e373), ULLC(682e6ff3, d6b2b8a3),
+ ULLC(748f82ee, 5defb2fc), ULLC(78a5636f, 43172f60),
+ ULLC(84c87814, a1f0ab72), ULLC(8cc70208, 1a6439ec),
+ ULLC(90befffa, 23631e28), ULLC(a4506ceb, de82bde9),
+ ULLC(bef9a3f7, b2c67915), ULLC(c67178f2, e372532b),
+ ULLC(ca273ece, ea26619c), ULLC(d186b8c7, 21c0c207),
+ ULLC(eada7dd6, cde0eb1e), ULLC(f57d4f7f, ee6ed178),
+ ULLC(06f067aa, 72176fba), ULLC(0a637dc5, a2c898a6),
+ ULLC(113f9804, bef90dae), ULLC(1b710b35, 131c471b),
+ ULLC(28db77f5, 23047d84), ULLC(32caab7b, 40c72493),
+ ULLC(3c9ebe0a, 15c9bebc), ULLC(431d67c4, 9c100d4c),
+ ULLC(4cc5d4be, cb3e42b6), ULLC(597f299c, fc657e2a),
+ ULLC(5fcb6fab, 3ad6faec), ULLC(6c44198c, 4a475817)
+#endif
+};
+
+struct SHA512ContextStr {
+ union {
+ PRUint64 w[80]; /* message schedule, input buffer, plus 64 words */
+ PRUint32 l[160];
+ PRUint8 b[640];
+ } u;
+ PRUint64 h[8]; /* 8 state variables */
+ PRUint64 sizeLo; /* 64-bit count of hashed bytes. */
+};
+
+/* =========== SHA512 implementation ===================================== */
+
+/* SHA-512 initial hash values */
+static const PRUint64 H512[8] = {
+#if PR_BYTES_PER_LONG == 8
+ 0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL,
+ 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL,
+ 0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL,
+ 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL
+#else
+ ULLC(6a09e667, f3bcc908), ULLC(bb67ae85, 84caa73b),
+ ULLC(3c6ef372, fe94f82b), ULLC(a54ff53a, 5f1d36f1),
+ ULLC(510e527f, ade682d1), ULLC(9b05688c, 2b3e6c1f),
+ ULLC(1f83d9ab, fb41bd6b), ULLC(5be0cd19, 137e2179)
+#endif
+};
+
+SHA512Context *
+SHA512_NewContext(void)
+{
+ SHA512Context *ctx = PORT_New(SHA512Context);
+ return ctx;
+}
+
+void
+SHA512_DestroyContext(SHA512Context *ctx, PRBool freeit)
+{
+ memset(ctx, 0, sizeof *ctx);
+ if (freeit) {
+ PORT_Free(ctx);
+ }
+}
+
+void
+SHA512_Begin(SHA512Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H512, sizeof H512);
+}
+
+#if defined(SHA512_TRACE)
+#if defined(HAVE_LONG_LONG)
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %016lx, %s = %016lx\n", \
+ n, #e, d, #a, h);
+#else
+#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %08x%08x, %s = %08x%08x\n", \
+ n, #e, d.hi, d.lo, #a, h.hi, h.lo);
+#endif
+#else
+#define DUMP(n, a, d, e, h)
+#endif
+
+#if defined(HAVE_LONG_LONG)
+
+#define ADDTO(x, y) y += x
+
+#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16])
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ h += S1(e) + Ch(e, f, g) + K512[n] + W[n]; \
+ d += h; \
+ h += S0(a) + Maj(a, b, c); \
+ DUMP(n, a, d, e, h)
+
+#else /* use only 32-bit variables, and don't unroll loops */
+
+#undef NOUNROLL512
+#define NOUNROLL512 1
+
+#define ADDTO(x, y) \
+ y.lo += x.lo; \
+ y.hi += x.hi + (x.lo > y.lo)
+
+#define ROTR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+#define ROTR64A(x, n, lo, hi) (x.lo << (64 - n) | x.hi >> (n - 32))
+#define SHR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n))
+
+/* Capitol Sigma and lower case sigma functions */
+#define s0lo(x) (ROTR64a(x, 1, lo, hi) ^ ROTR64a(x, 8, lo, hi) ^ SHR64a(x, 7, lo, hi))
+#define s0hi(x) (ROTR64a(x, 1, hi, lo) ^ ROTR64a(x, 8, hi, lo) ^ (x.hi >> 7))
+
+#define s1lo(x) (ROTR64a(x, 19, lo, hi) ^ ROTR64A(x, 61, lo, hi) ^ SHR64a(x, 6, lo, hi))
+#define s1hi(x) (ROTR64a(x, 19, hi, lo) ^ ROTR64A(x, 61, hi, lo) ^ (x.hi >> 6))
+
+#define S0lo(x) (ROTR64a(x, 28, lo, hi) ^ ROTR64A(x, 34, lo, hi) ^ ROTR64A(x, 39, lo, hi))
+#define S0hi(x) (ROTR64a(x, 28, hi, lo) ^ ROTR64A(x, 34, hi, lo) ^ ROTR64A(x, 39, hi, lo))
+
+#define S1lo(x) (ROTR64a(x, 14, lo, hi) ^ ROTR64a(x, 18, lo, hi) ^ ROTR64A(x, 41, lo, hi))
+#define S1hi(x) (ROTR64a(x, 14, hi, lo) ^ ROTR64a(x, 18, hi, lo) ^ ROTR64A(x, 41, hi, lo))
+
+/* 32-bit versions of Ch and Maj */
+#define Chxx(x, y, z, lo) ((x.lo & y.lo) ^ (~x.lo & z.lo))
+#define Majx(x, y, z, lo) ((x.lo & y.lo) ^ (x.lo & z.lo) ^ (y.lo & z.lo))
+
+#define INITW(t) \
+ do { \
+ PRUint32 lo, tm; \
+ PRUint32 cy = 0; \
+ lo = s1lo(W[t - 2]); \
+ lo += (tm = W[t - 7].lo); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = s0lo(W[t - 15])); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = W[t - 16].lo); \
+ if (lo < tm) \
+ cy++; \
+ W[t].lo = lo; \
+ W[t].hi = cy + s1hi(W[t - 2]) + W[t - 7].hi + s0hi(W[t - 15]) + W[t - 16].hi; \
+ } while (0)
+
+#define ROUND(n, a, b, c, d, e, f, g, h) \
+ { \
+ PRUint32 lo, tm, cy; \
+ lo = S1lo(e); \
+ lo += (tm = Chxx(e, f, g, lo)); \
+ cy = (lo < tm); \
+ lo += (tm = K512[n].lo); \
+ if (lo < tm) \
+ cy++; \
+ lo += (tm = W[n].lo); \
+ if (lo < tm) \
+ cy++; \
+ h.lo += lo; \
+ if (h.lo < lo) \
+ cy++; \
+ h.hi += cy + S1hi(e) + Chxx(e, f, g, hi) + K512[n].hi + W[n].hi; \
+ d.lo += h.lo; \
+ d.hi += h.hi + (d.lo < h.lo); \
+ lo = S0lo(a); \
+ lo += (tm = Majx(a, b, c, lo)); \
+ cy = (lo < tm); \
+ h.lo += lo; \
+ if (h.lo < lo) \
+ cy++; \
+ h.hi += cy + S0hi(a) + Majx(a, b, c, hi); \
+ DUMP(n, a, d, e, h) \
+ }
+#endif
+
+static void
+SHA512_Compress(SHA512Context *ctx)
+{
+#if defined(IS_LITTLE_ENDIAN)
+ {
+ BYTESWAP8(W[0]);
+ BYTESWAP8(W[1]);
+ BYTESWAP8(W[2]);
+ BYTESWAP8(W[3]);
+ BYTESWAP8(W[4]);
+ BYTESWAP8(W[5]);
+ BYTESWAP8(W[6]);
+ BYTESWAP8(W[7]);
+ BYTESWAP8(W[8]);
+ BYTESWAP8(W[9]);
+ BYTESWAP8(W[10]);
+ BYTESWAP8(W[11]);
+ BYTESWAP8(W[12]);
+ BYTESWAP8(W[13]);
+ BYTESWAP8(W[14]);
+ BYTESWAP8(W[15]);
+ }
+#endif
+
+ {
+#ifdef NOUNROLL512
+ {
+ /* prepare the "message schedule" */
+ int t;
+ for (t = 16; t < 80; ++t) {
+ INITW(t);
+ }
+ }
+#else
+ INITW(16);
+ INITW(17);
+ INITW(18);
+ INITW(19);
+
+ INITW(20);
+ INITW(21);
+ INITW(22);
+ INITW(23);
+ INITW(24);
+ INITW(25);
+ INITW(26);
+ INITW(27);
+ INITW(28);
+ INITW(29);
+
+ INITW(30);
+ INITW(31);
+ INITW(32);
+ INITW(33);
+ INITW(34);
+ INITW(35);
+ INITW(36);
+ INITW(37);
+ INITW(38);
+ INITW(39);
+
+ INITW(40);
+ INITW(41);
+ INITW(42);
+ INITW(43);
+ INITW(44);
+ INITW(45);
+ INITW(46);
+ INITW(47);
+ INITW(48);
+ INITW(49);
+
+ INITW(50);
+ INITW(51);
+ INITW(52);
+ INITW(53);
+ INITW(54);
+ INITW(55);
+ INITW(56);
+ INITW(57);
+ INITW(58);
+ INITW(59);
+
+ INITW(60);
+ INITW(61);
+ INITW(62);
+ INITW(63);
+ INITW(64);
+ INITW(65);
+ INITW(66);
+ INITW(67);
+ INITW(68);
+ INITW(69);
+
+ INITW(70);
+ INITW(71);
+ INITW(72);
+ INITW(73);
+ INITW(74);
+ INITW(75);
+ INITW(76);
+ INITW(77);
+ INITW(78);
+ INITW(79);
+#endif
+ }
+#ifdef SHA512_TRACE
+ {
+ int i;
+ for (i = 0; i < 80; ++i) {
+#ifdef HAVE_LONG_LONG
+ printf("W[%2d] = %016lx\n", i, W[i]);
+#else
+ printf("W[%2d] = %08x%08x\n", i, W[i].hi, W[i].lo);
+#endif
+ }
+ }
+#endif
+ {
+ PRUint64 a, b, c, d, e, f, g, h;
+
+ a = H[0];
+ b = H[1];
+ c = H[2];
+ d = H[3];
+ e = H[4];
+ f = H[5];
+ g = H[6];
+ h = H[7];
+
+#ifdef NOUNROLL512
+ {
+ int t;
+ for (t = 0; t < 80; t += 8) {
+ ROUND(t + 0, a, b, c, d, e, f, g, h)
+ ROUND(t + 1, h, a, b, c, d, e, f, g)
+ ROUND(t + 2, g, h, a, b, c, d, e, f)
+ ROUND(t + 3, f, g, h, a, b, c, d, e)
+ ROUND(t + 4, e, f, g, h, a, b, c, d)
+ ROUND(t + 5, d, e, f, g, h, a, b, c)
+ ROUND(t + 6, c, d, e, f, g, h, a, b)
+ ROUND(t + 7, b, c, d, e, f, g, h, a)
+ }
+ }
+#else
+ ROUND(0, a, b, c, d, e, f, g, h)
+ ROUND(1, h, a, b, c, d, e, f, g)
+ ROUND(2, g, h, a, b, c, d, e, f)
+ ROUND(3, f, g, h, a, b, c, d, e)
+ ROUND(4, e, f, g, h, a, b, c, d)
+ ROUND(5, d, e, f, g, h, a, b, c)
+ ROUND(6, c, d, e, f, g, h, a, b)
+ ROUND(7, b, c, d, e, f, g, h, a)
+
+ ROUND(8, a, b, c, d, e, f, g, h)
+ ROUND(9, h, a, b, c, d, e, f, g)
+ ROUND(10, g, h, a, b, c, d, e, f)
+ ROUND(11, f, g, h, a, b, c, d, e)
+ ROUND(12, e, f, g, h, a, b, c, d)
+ ROUND(13, d, e, f, g, h, a, b, c)
+ ROUND(14, c, d, e, f, g, h, a, b)
+ ROUND(15, b, c, d, e, f, g, h, a)
+
+ ROUND(16, a, b, c, d, e, f, g, h)
+ ROUND(17, h, a, b, c, d, e, f, g)
+ ROUND(18, g, h, a, b, c, d, e, f)
+ ROUND(19, f, g, h, a, b, c, d, e)
+ ROUND(20, e, f, g, h, a, b, c, d)
+ ROUND(21, d, e, f, g, h, a, b, c)
+ ROUND(22, c, d, e, f, g, h, a, b)
+ ROUND(23, b, c, d, e, f, g, h, a)
+
+ ROUND(24, a, b, c, d, e, f, g, h)
+ ROUND(25, h, a, b, c, d, e, f, g)
+ ROUND(26, g, h, a, b, c, d, e, f)
+ ROUND(27, f, g, h, a, b, c, d, e)
+ ROUND(28, e, f, g, h, a, b, c, d)
+ ROUND(29, d, e, f, g, h, a, b, c)
+ ROUND(30, c, d, e, f, g, h, a, b)
+ ROUND(31, b, c, d, e, f, g, h, a)
+
+ ROUND(32, a, b, c, d, e, f, g, h)
+ ROUND(33, h, a, b, c, d, e, f, g)
+ ROUND(34, g, h, a, b, c, d, e, f)
+ ROUND(35, f, g, h, a, b, c, d, e)
+ ROUND(36, e, f, g, h, a, b, c, d)
+ ROUND(37, d, e, f, g, h, a, b, c)
+ ROUND(38, c, d, e, f, g, h, a, b)
+ ROUND(39, b, c, d, e, f, g, h, a)
+
+ ROUND(40, a, b, c, d, e, f, g, h)
+ ROUND(41, h, a, b, c, d, e, f, g)
+ ROUND(42, g, h, a, b, c, d, e, f)
+ ROUND(43, f, g, h, a, b, c, d, e)
+ ROUND(44, e, f, g, h, a, b, c, d)
+ ROUND(45, d, e, f, g, h, a, b, c)
+ ROUND(46, c, d, e, f, g, h, a, b)
+ ROUND(47, b, c, d, e, f, g, h, a)
+
+ ROUND(48, a, b, c, d, e, f, g, h)
+ ROUND(49, h, a, b, c, d, e, f, g)
+ ROUND(50, g, h, a, b, c, d, e, f)
+ ROUND(51, f, g, h, a, b, c, d, e)
+ ROUND(52, e, f, g, h, a, b, c, d)
+ ROUND(53, d, e, f, g, h, a, b, c)
+ ROUND(54, c, d, e, f, g, h, a, b)
+ ROUND(55, b, c, d, e, f, g, h, a)
+
+ ROUND(56, a, b, c, d, e, f, g, h)
+ ROUND(57, h, a, b, c, d, e, f, g)
+ ROUND(58, g, h, a, b, c, d, e, f)
+ ROUND(59, f, g, h, a, b, c, d, e)
+ ROUND(60, e, f, g, h, a, b, c, d)
+ ROUND(61, d, e, f, g, h, a, b, c)
+ ROUND(62, c, d, e, f, g, h, a, b)
+ ROUND(63, b, c, d, e, f, g, h, a)
+
+ ROUND(64, a, b, c, d, e, f, g, h)
+ ROUND(65, h, a, b, c, d, e, f, g)
+ ROUND(66, g, h, a, b, c, d, e, f)
+ ROUND(67, f, g, h, a, b, c, d, e)
+ ROUND(68, e, f, g, h, a, b, c, d)
+ ROUND(69, d, e, f, g, h, a, b, c)
+ ROUND(70, c, d, e, f, g, h, a, b)
+ ROUND(71, b, c, d, e, f, g, h, a)
+
+ ROUND(72, a, b, c, d, e, f, g, h)
+ ROUND(73, h, a, b, c, d, e, f, g)
+ ROUND(74, g, h, a, b, c, d, e, f)
+ ROUND(75, f, g, h, a, b, c, d, e)
+ ROUND(76, e, f, g, h, a, b, c, d)
+ ROUND(77, d, e, f, g, h, a, b, c)
+ ROUND(78, c, d, e, f, g, h, a, b)
+ ROUND(79, b, c, d, e, f, g, h, a)
+#endif
+
+ ADDTO(a, H[0]);
+ ADDTO(b, H[1]);
+ ADDTO(c, H[2]);
+ ADDTO(d, H[3]);
+ ADDTO(e, H[4]);
+ ADDTO(f, H[5]);
+ ADDTO(g, H[6]);
+ ADDTO(h, H[7]);
+ }
+}
+
+void
+SHA512_Update(SHA512Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ unsigned int inBuf;
+ if (!inputLen)
+ return;
+
+#if defined(HAVE_LONG_LONG)
+ inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+ /* Add inputLen into the count of bytes processed, before processing */
+ ctx->sizeLo += inputLen;
+#else
+ inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+ ctx->sizeLo.lo += inputLen;
+ if (ctx->sizeLo.lo < inputLen)
+ ctx->sizeLo.hi++;
+#endif
+
+ /* if data already in buffer, attemp to fill rest of buffer */
+ if (inBuf) {
+ unsigned int todo = SHA512_BLOCK_LENGTH - inBuf;
+ if (inputLen < todo)
+ todo = inputLen;
+ memcpy(B + inBuf, input, todo);
+ input += todo;
+ inputLen -= todo;
+ if (inBuf + todo == SHA512_BLOCK_LENGTH)
+ SHA512_Compress(ctx);
+ }
+
+ /* if enough data to fill one or more whole buffers, process them. */
+ while (inputLen >= SHA512_BLOCK_LENGTH) {
+ memcpy(B, input, SHA512_BLOCK_LENGTH);
+ input += SHA512_BLOCK_LENGTH;
+ inputLen -= SHA512_BLOCK_LENGTH;
+ SHA512_Compress(ctx);
+ }
+ /* if data left over, fill it into buffer */
+ if (inputLen)
+ memcpy(B, input, inputLen);
+}
+
+void
+SHA512_End(SHA512Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+#if defined(HAVE_LONG_LONG)
+ unsigned int inBuf = (unsigned int)ctx->sizeLo & 0x7f;
+#else
+ unsigned int inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f;
+#endif
+ unsigned int padLen = (inBuf < 112) ? (112 - inBuf) : (112 + 128 - inBuf);
+ PRUint64 lo;
+ LL_SHL(lo, ctx->sizeLo, 3);
+
+ SHA512_Update(ctx, pad, padLen);
+
+#if defined(HAVE_LONG_LONG)
+ W[14] = 0;
+#else
+ W[14].lo = 0;
+ W[14].hi = 0;
+#endif
+
+ W[15] = lo;
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(W[15]);
+#endif
+ SHA512_Compress(ctx);
+
+/* now output the answer */
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(H[0]);
+ BYTESWAP8(H[1]);
+ BYTESWAP8(H[2]);
+ BYTESWAP8(H[3]);
+ BYTESWAP8(H[4]);
+ BYTESWAP8(H[5]);
+ BYTESWAP8(H[6]);
+ BYTESWAP8(H[7]);
+#endif
+ padLen = PR_MIN(SHA512_LENGTH, maxDigestLen);
+ memcpy(digest, H, padLen);
+ if (digestLen)
+ *digestLen = padLen;
+}
+
+void
+SHA512_EndRaw(SHA512Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ PRUint64 h[8];
+ unsigned int len;
+
+ memcpy(h, ctx->h, sizeof(h));
+
+#if defined(IS_LITTLE_ENDIAN)
+ BYTESWAP8(h[0]);
+ BYTESWAP8(h[1]);
+ BYTESWAP8(h[2]);
+ BYTESWAP8(h[3]);
+ BYTESWAP8(h[4]);
+ BYTESWAP8(h[5]);
+ BYTESWAP8(h[6]);
+ BYTESWAP8(h[7]);
+#endif
+ len = PR_MIN(SHA512_LENGTH, maxDigestLen);
+ memcpy(digest, h, len);
+ if (digestLen)
+ *digestLen = len;
+}
+
+SECStatus
+SHA512_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA512Context ctx;
+ unsigned int outLen;
+
+ SHA512_Begin(&ctx);
+ SHA512_Update(&ctx, src, src_length);
+ SHA512_End(&ctx, dest, &outLen, SHA512_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA512_Hash(unsigned char *dest, const char *src)
+{
+ return SHA512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA512_TraceState(SHA512Context *ctx)
+{
+}
+
+unsigned int
+SHA512_FlattenSize(SHA512Context *ctx)
+{
+ return sizeof *ctx;
+}
+
+SECStatus
+SHA512_Flatten(SHA512Context *ctx, unsigned char *space)
+{
+ PORT_Memcpy(space, ctx, sizeof *ctx);
+ return SECSuccess;
+}
+
+SHA512Context *
+SHA512_Resurrect(unsigned char *space, void *arg)
+{
+ SHA512Context *ctx = SHA512_NewContext();
+ if (ctx)
+ PORT_Memcpy(ctx, space, sizeof *ctx);
+ return ctx;
+}
+
+void
+SHA512_Clone(SHA512Context *dest, SHA512Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+/* SHA384 uses a SHA512Context as the real context.
+** The only differences between SHA384 an SHA512 are:
+** a) the intialization values for the context, and
+** b) the number of bytes of data produced as output.
+*/
+
+/* SHA-384 initial hash values */
+static const PRUint64 H384[8] = {
+#if PR_BYTES_PER_LONG == 8
+ 0xcbbb9d5dc1059ed8UL, 0x629a292a367cd507UL,
+ 0x9159015a3070dd17UL, 0x152fecd8f70e5939UL,
+ 0x67332667ffc00b31UL, 0x8eb44a8768581511UL,
+ 0xdb0c2e0d64f98fa7UL, 0x47b5481dbefa4fa4UL
+#else
+ ULLC(cbbb9d5d, c1059ed8), ULLC(629a292a, 367cd507),
+ ULLC(9159015a, 3070dd17), ULLC(152fecd8, f70e5939),
+ ULLC(67332667, ffc00b31), ULLC(8eb44a87, 68581511),
+ ULLC(db0c2e0d, 64f98fa7), ULLC(47b5481d, befa4fa4)
+#endif
+};
+
+SHA384Context *
+SHA384_NewContext(void)
+{
+ return SHA512_NewContext();
+}
+
+void
+SHA384_DestroyContext(SHA384Context *ctx, PRBool freeit)
+{
+ SHA512_DestroyContext(ctx, freeit);
+}
+
+void
+SHA384_Begin(SHA384Context *ctx)
+{
+ memset(ctx, 0, sizeof *ctx);
+ memcpy(H, H384, sizeof H384);
+}
+
+void
+SHA384_Update(SHA384Context *ctx, const unsigned char *input,
+ unsigned int inputLen)
+{
+ SHA512_Update(ctx, input, inputLen);
+}
+
+void
+SHA384_End(SHA384Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+ SHA512_End(ctx, digest, digestLen, maxLen);
+}
+
+void
+SHA384_EndRaw(SHA384Context *ctx, unsigned char *digest,
+ unsigned int *digestLen, unsigned int maxDigestLen)
+{
+ unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH);
+ SHA512_EndRaw(ctx, digest, digestLen, maxLen);
+}
+
+SECStatus
+SHA384_HashBuf(unsigned char *dest, const unsigned char *src,
+ PRUint32 src_length)
+{
+ SHA512Context ctx;
+ unsigned int outLen;
+
+ SHA384_Begin(&ctx);
+ SHA512_Update(&ctx, src, src_length);
+ SHA512_End(&ctx, dest, &outLen, SHA384_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+
+ return SECSuccess;
+}
+
+SECStatus
+SHA384_Hash(unsigned char *dest, const char *src)
+{
+ return SHA384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+void
+SHA384_TraceState(SHA384Context *ctx)
+{
+}
+
+unsigned int
+SHA384_FlattenSize(SHA384Context *ctx)
+{
+ return sizeof(SHA384Context);
+}
+
+SECStatus
+SHA384_Flatten(SHA384Context *ctx, unsigned char *space)
+{
+ return SHA512_Flatten(ctx, space);
+}
+
+SHA384Context *
+SHA384_Resurrect(unsigned char *space, void *arg)
+{
+ return SHA512_Resurrect(space, arg);
+}
+
+void
+SHA384_Clone(SHA384Context *dest, SHA384Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+/* ======================================================================= */
+#ifdef SELFTEST
+#include <stdio.h>
+
+static const char abc[] = { "abc" };
+static const char abcdbc[] = {
+ "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"
+};
+static const char abcdef[] = {
+ "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn"
+ "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu"
+};
+
+void
+dumpHash32(const unsigned char *buf, unsigned int bufLen)
+{
+ unsigned int i;
+ for (i = 0; i < bufLen; i += 4) {
+ printf(" %02x%02x%02x%02x", buf[i], buf[i + 1], buf[i + 2], buf[i + 3]);
+ }
+ printf("\n");
+}
+
+void
+test256(void)
+{
+ unsigned char outBuf[SHA256_LENGTH];
+
+ printf("SHA256, input = %s\n", abc);
+ SHA256_Hash(outBuf, abc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ printf("SHA256, input = %s\n", abcdbc);
+ SHA256_Hash(outBuf, abcdbc);
+ dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+test224(void)
+{
+ SHA224Context ctx;
+ unsigned char a1000times[1000];
+ unsigned int outLen;
+ unsigned char outBuf[SHA224_LENGTH];
+ int i;
+
+ /* Test Vector 1 */
+ printf("SHA224, input = %s\n", abc);
+ SHA224_Hash(outBuf, abc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ /* Test Vector 2 */
+ printf("SHA224, input = %s\n", abcdbc);
+ SHA224_Hash(outBuf, abcdbc);
+ dumpHash32(outBuf, sizeof outBuf);
+
+ /* Test Vector 3 */
+
+ /* to hash one million 'a's perform 1000
+ * sha224 updates on a buffer with 1000 'a's
+ */
+ memset(a1000times, 'a', 1000);
+ printf("SHA224, input = %s\n", "a one million times");
+ SHA224_Begin(&ctx);
+ for (i = 0; i < 1000; i++)
+ SHA224_Update(&ctx, a1000times, 1000);
+ SHA224_End(&ctx, outBuf, &outLen, SHA224_LENGTH);
+ dumpHash32(outBuf, sizeof outBuf);
+}
+
+void
+dumpHash64(const unsigned char *buf, unsigned int bufLen)
+{
+ unsigned int i;
+ for (i = 0; i < bufLen; i += 8) {
+ if (i % 32 == 0)
+ printf("\n");
+ printf(" %02x%02x%02x%02x%02x%02x%02x%02x",
+ buf[i], buf[i + 1], buf[i + 2], buf[i + 3],
+ buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]);
+ }
+ printf("\n");
+}
+
+void
+test512(void)
+{
+ unsigned char outBuf[SHA512_LENGTH];
+
+ printf("SHA512, input = %s\n", abc);
+ SHA512_Hash(outBuf, abc);
+ dumpHash64(outBuf, sizeof outBuf);
+
+ printf("SHA512, input = %s\n", abcdef);
+ SHA512_Hash(outBuf, abcdef);
+ dumpHash64(outBuf, sizeof outBuf);
+}
+
+void
+time512(void)
+{
+ unsigned char outBuf[SHA512_LENGTH];
+
+ SHA512_Hash(outBuf, abc);
+ SHA512_Hash(outBuf, abcdef);
+}
+
+void
+test384(void)
+{
+ unsigned char outBuf[SHA384_LENGTH];
+
+ printf("SHA384, input = %s\n", abc);
+ SHA384_Hash(outBuf, abc);
+ dumpHash64(outBuf, sizeof outBuf);
+
+ printf("SHA384, input = %s\n", abcdef);
+ SHA384_Hash(outBuf, abcdef);
+ dumpHash64(outBuf, sizeof outBuf);
+}
+
+int
+main(int argc, char *argv[], char *envp[])
+{
+ int i = 1;
+ if (argc > 1) {
+ i = atoi(argv[1]);
+ }
+ if (i < 2) {
+ test224();
+ test256();
+ test384();
+ test512();
+ } else {
+ while (i-- > 0) {
+ time512();
+ }
+ printf("done\n");
+ }
+ return 0;
+}
+
+void *
+PORT_Alloc(size_t len)
+{
+ return malloc(len);
+}
+void
+PORT_Free(void *ptr)
+{
+ free(ptr);
+}
+void
+PORT_ZFree(void *ptr, size_t len)
+{
+ memset(ptr, 0, len);
+ free(ptr);
+}
+#endif
diff --git a/security/nss/lib/freebl/sha_fast.c b/security/nss/lib/freebl/sha_fast.c
new file mode 100644
index 000000000..52071f0c9
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.c
@@ -0,0 +1,545 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include <memory.h>
+#include "blapi.h"
+#include "sha_fast.h"
+#include "prerror.h"
+
+#ifdef TRACING_SSL
+#include "ssl.h"
+#include "ssltrace.h"
+#endif
+
+static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain);
+
+#define W u.w
+#define B u.b
+
+#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
+#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z))
+#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y))))
+#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z))
+
+#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1)
+
+/*
+ * SHA: initialize context
+ */
+void
+SHA1_Begin(SHA1Context *ctx)
+{
+ ctx->size = 0;
+ /*
+ * Initialize H with constants from FIPS180-1.
+ */
+ ctx->H[0] = 0x67452301L;
+ ctx->H[1] = 0xefcdab89L;
+ ctx->H[2] = 0x98badcfeL;
+ ctx->H[3] = 0x10325476L;
+ ctx->H[4] = 0xc3d2e1f0L;
+}
+
+/* Explanation of H array and index values:
+ * The context's H array is actually the concatenation of two arrays
+ * defined by SHA1, the H array of state variables (5 elements),
+ * and the W array of intermediate values, of which there are 16 elements.
+ * The W array starts at H[5], that is W[0] is H[5].
+ * Although these values are defined as 32-bit values, we use 64-bit
+ * variables to hold them because the AMD64 stores 64 bit values in
+ * memory MUCH faster than it stores any smaller values.
+ *
+ * Rather than passing the context structure to shaCompress, we pass
+ * this combined array of H and W values. We do not pass the address
+ * of the first element of this array, but rather pass the address of an
+ * element in the middle of the array, element X. Presently X[0] is H[11].
+ * So we pass the address of H[11] as the address of array X to shaCompress.
+ * Then shaCompress accesses the members of the array using positive AND
+ * negative indexes.
+ *
+ * Pictorially: (each element is 8 bytes)
+ * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf |
+ * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 |
+ *
+ * The byte offset from X[0] to any member of H and W is always
+ * representable in a signed 8-bit value, which will be encoded
+ * as a single byte offset in the X86-64 instruction set.
+ * If we didn't pass the address of H[11], and instead passed the
+ * address of H[0], the offsets to elements H[16] and above would be
+ * greater than 127, not representable in a signed 8-bit value, and the
+ * x86-64 instruction set would encode every such offset as a 32-bit
+ * signed number in each instruction that accessed element H[16] or
+ * higher. This results in much bigger and slower code.
+ */
+#if !defined(SHA_PUT_W_IN_STACK)
+#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */
+#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */
+#else
+#define H2X 0
+#endif
+
+/*
+ * SHA: Add data to context.
+ */
+void
+SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len)
+{
+ register unsigned int lenB;
+ register unsigned int togo;
+
+ if (!len)
+ return;
+
+ /* accumulate the byte count. */
+ lenB = (unsigned int)(ctx->size) & 63U;
+
+ ctx->size += len;
+
+ /*
+ * Read the data into W and process blocks as they get full
+ */
+ if (lenB > 0) {
+ togo = 64U - lenB;
+ if (len < togo)
+ togo = len;
+ memcpy(ctx->B + lenB, dataIn, togo);
+ len -= togo;
+ dataIn += togo;
+ lenB = (lenB + togo) & 63U;
+ if (!lenB) {
+ shaCompress(&ctx->H[H2X], ctx->W);
+ }
+ }
+#if !defined(HAVE_UNALIGNED_ACCESS)
+ if ((ptrdiff_t)dataIn % sizeof(PRUint32)) {
+ while (len >= 64U) {
+ memcpy(ctx->B, dataIn, 64);
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], ctx->W);
+ dataIn += 64U;
+ }
+ } else
+#endif
+ {
+ while (len >= 64U) {
+ len -= 64U;
+ shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn);
+ dataIn += 64U;
+ }
+ }
+ if (len) {
+ memcpy(ctx->B, dataIn, len);
+ }
+}
+
+/*
+ * SHA: Generate hash value from context
+ */
+void NO_SANITIZE_ALIGNMENT
+SHA1_End(SHA1Context *ctx, unsigned char *hashout,
+ unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+ register PRUint64 size;
+ register PRUint32 lenB;
+
+ static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+#define tmp lenB
+
+ PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+ /*
+ * Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits
+ */
+ size = ctx->size;
+
+ lenB = (PRUint32)size & 63;
+ SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1);
+ PORT_Assert(((PRUint32)ctx->size & 63) == 56);
+ /* Convert size from bytes to bits. */
+ size <<= 3;
+ ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32));
+ ctx->W[15] = SHA_HTONL((PRUint32)size);
+ shaCompress(&ctx->H[H2X], ctx->W);
+
+ /*
+ * Output hash
+ */
+ SHA_STORE_RESULT;
+ if (pDigestLen) {
+ *pDigestLen = SHA1_LENGTH;
+ }
+#undef tmp
+}
+
+void
+SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout,
+ unsigned int *pDigestLen, unsigned int maxDigestLen)
+{
+#if defined(SHA_NEED_TMP_VARIABLE)
+ register PRUint32 tmp;
+#endif
+ PORT_Assert(maxDigestLen >= SHA1_LENGTH);
+
+ SHA_STORE_RESULT;
+ if (pDigestLen)
+ *pDigestLen = SHA1_LENGTH;
+}
+
+#undef B
+/*
+ * SHA: Compression function, unrolled.
+ *
+ * Some operations in shaCompress are done as 5 groups of 16 operations.
+ * Others are done as 4 groups of 20 operations.
+ * The code below shows that structure.
+ *
+ * The functions that compute the new values of the 5 state variables
+ * A-E are done in 4 groups of 20 operations (or you may also think
+ * of them as being done in 16 groups of 5 operations). They are
+ * done by the SHA_RNDx macros below, in the right column.
+ *
+ * The functions that set the 16 values of the W array are done in
+ * 5 groups of 16 operations. The first group is done by the
+ * LOAD macros below, the latter 4 groups are done by SHA_MIX below,
+ * in the left column.
+ *
+ * gcc's optimizer observes that each member of the W array is assigned
+ * a value 5 times in this code. It reduces the number of store
+ * operations done to the W array in the context (that is, in the X array)
+ * by creating a W array on the stack, and storing the W values there for
+ * the first 4 groups of operations on W, and storing the values in the
+ * context's W array only in the fifth group. This is undesirable.
+ * It is MUCH bigger code than simply using the context's W array, because
+ * all the offsets to the W array in the stack are 32-bit signed offsets,
+ * and it is no faster than storing the values in the context's W array.
+ *
+ * The original code for sha_fast.c prevented this creation of a separate
+ * W array in the stack by creating a W array of 80 members, each of
+ * whose elements is assigned only once. It also separated the computations
+ * of the W array values and the computations of the values for the 5
+ * state variables into two separate passes, W's, then A-E's so that the
+ * second pass could be done all in registers (except for accessing the W
+ * array) on machines with fewer registers. The method is suboptimal
+ * for machines with enough registers to do it all in one pass, and it
+ * necessitates using many instructions with 32-bit offsets.
+ *
+ * This code eliminates the separate W array on the stack by a completely
+ * different means: by declaring the X array volatile. This prevents
+ * the optimizer from trying to reduce the use of the X array by the
+ * creation of a MORE expensive W array on the stack. The result is
+ * that all instructions use signed 8-bit offsets and not 32-bit offsets.
+ *
+ * The combination of this code and the -O3 optimizer flag on GCC 3.4.3
+ * results in code that is 3 times faster than the previous NSS sha_fast
+ * code on AMD64.
+ */
+static void NO_SANITIZE_ALIGNMENT
+shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf)
+{
+ register SHA_HW_t A, B, C, D, E;
+
+#if defined(SHA_NEED_TMP_VARIABLE)
+ register PRUint32 tmp;
+#endif
+
+#if !defined(SHA_PUT_W_IN_STACK)
+#define XH(n) X[n - H2X]
+#define XW(n) X[n - W2X]
+#else
+ SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7,
+ w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
+#define XW(n) w_##n
+#define XH(n) X[n]
+#endif
+
+#define K0 0x5a827999L
+#define K1 0x6ed9eba1L
+#define K2 0x8f1bbcdcL
+#define K3 0xca62c1d6L
+
+#define SHA_RND1(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND2(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND3(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \
+ c = SHA_ROTL(c, 30)
+#define SHA_RND4(a, b, c, d, e, n) \
+ a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \
+ c = SHA_ROTL(c, 30)
+
+#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n])
+
+ A = XH(0);
+ B = XH(1);
+ C = XH(2);
+ D = XH(3);
+ E = XH(4);
+
+ LOAD(0);
+ SHA_RND1(E, A, B, C, D, 0);
+ LOAD(1);
+ SHA_RND1(D, E, A, B, C, 1);
+ LOAD(2);
+ SHA_RND1(C, D, E, A, B, 2);
+ LOAD(3);
+ SHA_RND1(B, C, D, E, A, 3);
+ LOAD(4);
+ SHA_RND1(A, B, C, D, E, 4);
+ LOAD(5);
+ SHA_RND1(E, A, B, C, D, 5);
+ LOAD(6);
+ SHA_RND1(D, E, A, B, C, 6);
+ LOAD(7);
+ SHA_RND1(C, D, E, A, B, 7);
+ LOAD(8);
+ SHA_RND1(B, C, D, E, A, 8);
+ LOAD(9);
+ SHA_RND1(A, B, C, D, E, 9);
+ LOAD(10);
+ SHA_RND1(E, A, B, C, D, 10);
+ LOAD(11);
+ SHA_RND1(D, E, A, B, C, 11);
+ LOAD(12);
+ SHA_RND1(C, D, E, A, B, 12);
+ LOAD(13);
+ SHA_RND1(B, C, D, E, A, 13);
+ LOAD(14);
+ SHA_RND1(A, B, C, D, E, 14);
+ LOAD(15);
+ SHA_RND1(E, A, B, C, D, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND1(D, E, A, B, C, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND1(C, D, E, A, B, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND1(B, C, D, E, A, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND1(A, B, C, D, E, 3);
+
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND2(E, A, B, C, D, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND2(D, E, A, B, C, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND2(C, D, E, A, B, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND2(B, C, D, E, A, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND2(A, B, C, D, E, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND2(E, A, B, C, D, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND2(D, E, A, B, C, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND2(C, D, E, A, B, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND2(B, C, D, E, A, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND2(A, B, C, D, E, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND2(E, A, B, C, D, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND2(D, E, A, B, C, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND2(C, D, E, A, B, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND2(B, C, D, E, A, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND2(A, B, C, D, E, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND2(E, A, B, C, D, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND2(D, E, A, B, C, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND2(C, D, E, A, B, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND2(B, C, D, E, A, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND2(A, B, C, D, E, 7);
+
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND3(E, A, B, C, D, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND3(D, E, A, B, C, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND3(C, D, E, A, B, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND3(B, C, D, E, A, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND3(A, B, C, D, E, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND3(E, A, B, C, D, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND3(D, E, A, B, C, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND3(C, D, E, A, B, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND3(B, C, D, E, A, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND3(A, B, C, D, E, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND3(E, A, B, C, D, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND3(D, E, A, B, C, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND3(C, D, E, A, B, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND3(B, C, D, E, A, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND3(A, B, C, D, E, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND3(E, A, B, C, D, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND3(D, E, A, B, C, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND3(C, D, E, A, B, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND3(B, C, D, E, A, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND3(A, B, C, D, E, 11);
+
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND4(E, A, B, C, D, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND4(D, E, A, B, C, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND4(C, D, E, A, B, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND4(B, C, D, E, A, 15);
+
+ SHA_MIX(0, 13, 8, 2);
+ SHA_RND4(A, B, C, D, E, 0);
+ SHA_MIX(1, 14, 9, 3);
+ SHA_RND4(E, A, B, C, D, 1);
+ SHA_MIX(2, 15, 10, 4);
+ SHA_RND4(D, E, A, B, C, 2);
+ SHA_MIX(3, 0, 11, 5);
+ SHA_RND4(C, D, E, A, B, 3);
+ SHA_MIX(4, 1, 12, 6);
+ SHA_RND4(B, C, D, E, A, 4);
+ SHA_MIX(5, 2, 13, 7);
+ SHA_RND4(A, B, C, D, E, 5);
+ SHA_MIX(6, 3, 14, 8);
+ SHA_RND4(E, A, B, C, D, 6);
+ SHA_MIX(7, 4, 15, 9);
+ SHA_RND4(D, E, A, B, C, 7);
+ SHA_MIX(8, 5, 0, 10);
+ SHA_RND4(C, D, E, A, B, 8);
+ SHA_MIX(9, 6, 1, 11);
+ SHA_RND4(B, C, D, E, A, 9);
+ SHA_MIX(10, 7, 2, 12);
+ SHA_RND4(A, B, C, D, E, 10);
+ SHA_MIX(11, 8, 3, 13);
+ SHA_RND4(E, A, B, C, D, 11);
+ SHA_MIX(12, 9, 4, 14);
+ SHA_RND4(D, E, A, B, C, 12);
+ SHA_MIX(13, 10, 5, 15);
+ SHA_RND4(C, D, E, A, B, 13);
+ SHA_MIX(14, 11, 6, 0);
+ SHA_RND4(B, C, D, E, A, 14);
+ SHA_MIX(15, 12, 7, 1);
+ SHA_RND4(A, B, C, D, E, 15);
+
+ XH(0) += A;
+ XH(1) += B;
+ XH(2) += C;
+ XH(3) += D;
+ XH(4) += E;
+}
+
+/*************************************************************************
+** Code below this line added to make SHA code support BLAPI interface
+*/
+
+SHA1Context *
+SHA1_NewContext(void)
+{
+ SHA1Context *cx;
+
+ /* no need to ZNew, SHA1_Begin will init the context */
+ cx = PORT_New(SHA1Context);
+ return cx;
+}
+
+/* Zero and free the context */
+void
+SHA1_DestroyContext(SHA1Context *cx, PRBool freeit)
+{
+ memset(cx, 0, sizeof *cx);
+ if (freeit) {
+ PORT_Free(cx);
+ }
+}
+
+SECStatus
+SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length)
+{
+ SHA1Context ctx;
+ unsigned int outLen;
+
+ SHA1_Begin(&ctx);
+ SHA1_Update(&ctx, src, src_length);
+ SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH);
+ memset(&ctx, 0, sizeof ctx);
+ return SECSuccess;
+}
+
+/* Hash a null-terminated character string. */
+SECStatus
+SHA1_Hash(unsigned char *dest, const char *src)
+{
+ return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src));
+}
+
+/*
+ * need to support save/restore state in pkcs11. Stores all the info necessary
+ * for a structure into just a stream of bytes.
+ */
+unsigned int
+SHA1_FlattenSize(SHA1Context *cx)
+{
+ return sizeof(SHA1Context);
+}
+
+SECStatus
+SHA1_Flatten(SHA1Context *cx, unsigned char *space)
+{
+ PORT_Memcpy(space, cx, sizeof(SHA1Context));
+ return SECSuccess;
+}
+
+SHA1Context *
+SHA1_Resurrect(unsigned char *space, void *arg)
+{
+ SHA1Context *cx = SHA1_NewContext();
+ if (cx == NULL)
+ return NULL;
+
+ PORT_Memcpy(cx, space, sizeof(SHA1Context));
+ return cx;
+}
+
+void
+SHA1_Clone(SHA1Context *dest, SHA1Context *src)
+{
+ memcpy(dest, src, sizeof *dest);
+}
+
+void
+SHA1_TraceState(SHA1Context *ctx)
+{
+ PORT_SetError(PR_NOT_IMPLEMENTED_ERROR);
+}
diff --git a/security/nss/lib/freebl/sha_fast.h b/security/nss/lib/freebl/sha_fast.h
new file mode 100644
index 000000000..4f37d13d0
--- /dev/null
+++ b/security/nss/lib/freebl/sha_fast.h
@@ -0,0 +1,176 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHA_FAST_H_
+#define _SHA_FAST_H_
+
+#include "prlong.h"
+#include "blapii.h"
+
+#define SHA1_INPUT_LEN 64
+
+#if defined(IS_64) && !defined(__sparc)
+typedef PRUint64 SHA_HW_t;
+#define SHA1_USING_64_BIT 1
+#else
+typedef PRUint32 SHA_HW_t;
+#endif
+
+struct SHA1ContextStr {
+ union {
+ PRUint32 w[16]; /* input buffer */
+ PRUint8 b[64];
+ } u;
+ PRUint64 size; /* count of hashed bytes. */
+ SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */
+};
+
+#if defined(_MSC_VER)
+#include <stdlib.h>
+#if defined(IS_LITTLE_ENDIAN)
+#if (_MSC_VER >= 1300)
+#pragma intrinsic(_byteswap_ulong)
+#define SHA_HTONL(x) _byteswap_ulong(x)
+#elif defined(NSS_X86_OR_X64)
+#ifndef FORCEINLINE
+#if (_MSC_VER >= 1200)
+#define FORCEINLINE __forceinline
+#else
+#define FORCEINLINE __inline
+#endif /* _MSC_VER */
+#endif /* !defined FORCEINLINE */
+#define FASTCALL __fastcall
+
+static FORCEINLINE PRUint32 FASTCALL
+swap4b(PRUint32 dwd)
+{
+ __asm {
+ mov eax,dwd
+ bswap eax
+ }
+}
+
+#define SHA_HTONL(x) swap4b(x)
+#endif /* NSS_X86_OR_X64 */
+#endif /* IS_LITTLE_ENDIAN */
+
+#pragma intrinsic(_lrotr, _lrotl)
+#define SHA_ROTL(x, n) _lrotl(x, n)
+#define SHA_ROTL_IS_DEFINED 1
+#endif /* _MSC_VER */
+
+#if defined(__GNUC__)
+/* __x86_64__ and __x86_64 are defined by GCC on x86_64 CPUs */
+#if defined(SHA1_USING_64_BIT)
+static __inline__ PRUint64
+SHA_ROTL(PRUint64 x, PRUint32 n)
+{
+ PRUint32 t = (PRUint32)x;
+ return ((t << n) | (t >> (32 - n)));
+}
+#else
+static __inline__ PRUint32
+SHA_ROTL(PRUint32 t, PRUint32 n)
+{
+ return ((t << n) | (t >> (32 - n)));
+}
+#endif
+#define SHA_ROTL_IS_DEFINED 1
+
+#if defined(NSS_X86_OR_X64)
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ __asm__("bswap %0"
+ : "+r"(value));
+ return (value);
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#elif defined(__thumb2__) || \
+ (!defined(__thumb__) && \
+ (defined(__ARM_ARCH_6__) || \
+ defined(__ARM_ARCH_6J__) || \
+ defined(__ARM_ARCH_6K__) || \
+ defined(__ARM_ARCH_6Z__) || \
+ defined(__ARM_ARCH_6ZK__) || \
+ defined(__ARM_ARCH_6T2__) || \
+ defined(__ARM_ARCH_7__) || \
+ defined(__ARM_ARCH_7A__) || \
+ defined(__ARM_ARCH_7R__)))
+static __inline__ PRUint32
+swap4b(PRUint32 value)
+{
+ PRUint32 ret;
+ __asm__("rev %0, %1"
+ : "=r"(ret)
+ : "r"(value));
+ return ret;
+}
+#define SHA_HTONL(x) swap4b(x)
+
+#endif /* x86 family */
+
+#endif /* __GNUC__ */
+
+#if !defined(SHA_ROTL_IS_DEFINED)
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_ROTL(X, n) (tmp = (X), ((tmp) << (n)) | ((tmp) >> (32 - (n))))
+#endif
+
+#if !defined(SHA_HTONL)
+#define SHA_MASK 0x00FF00FF
+#if defined(IS_LITTLE_ENDIAN)
+#undef SHA_NEED_TMP_VARIABLE
+#define SHA_NEED_TMP_VARIABLE 1
+#define SHA_HTONL(x) (tmp = (x), tmp = (tmp << 16) | (tmp >> 16), \
+ ((tmp & SHA_MASK) << 8) | ((tmp >> 8) & SHA_MASK))
+#else
+#define SHA_HTONL(x) (x)
+#endif
+#endif
+
+#define SHA_BYTESWAP(x) x = SHA_HTONL(x)
+
+#define SHA_STORE(n) ((PRUint32*)hashout)[n] = SHA_HTONL(ctx->H[n])
+#if defined(HAVE_UNALIGNED_ACCESS)
+#define SHA_STORE_RESULT \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4);
+
+#elif defined(IS_LITTLE_ENDIAN) || defined(SHA1_USING_64_BIT)
+#define SHA_STORE_RESULT \
+ if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4); \
+ } else { \
+ PRUint32 tmpbuf[5]; \
+ tmpbuf[0] = SHA_HTONL(ctx->H[0]); \
+ tmpbuf[1] = SHA_HTONL(ctx->H[1]); \
+ tmpbuf[2] = SHA_HTONL(ctx->H[2]); \
+ tmpbuf[3] = SHA_HTONL(ctx->H[3]); \
+ tmpbuf[4] = SHA_HTONL(ctx->H[4]); \
+ memcpy(hashout, tmpbuf, SHA1_LENGTH); \
+ }
+
+#else
+#define SHA_STORE_RESULT \
+ if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \
+ SHA_STORE(0); \
+ SHA_STORE(1); \
+ SHA_STORE(2); \
+ SHA_STORE(3); \
+ SHA_STORE(4); \
+ } else { \
+ memcpy(hashout, ctx->H, SHA1_LENGTH); \
+ }
+#endif
+
+#endif /* _SHA_FAST_H_ */
diff --git a/security/nss/lib/freebl/shsign.h b/security/nss/lib/freebl/shsign.h
new file mode 100644
index 000000000..590c0e6b3
--- /dev/null
+++ b/security/nss/lib/freebl/shsign.h
@@ -0,0 +1,14 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef _SHSIGN_H_
+#define _SHSIGN_H_
+
+#define SGN_SUFFIX ".chk"
+#define NSS_SIGN_CHK_MAGIC1 0xf1
+#define NSS_SIGN_CHK_MAGIC2 0xc5
+#define NSS_SIGN_CHK_MAJOR_VERSION 0x01
+#define NSS_SIGN_CHK_MINOR_VERSION 0x02
+
+#endif /* _SHSIGN_H_ */
diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c
new file mode 100644
index 000000000..af4a34fb0
--- /dev/null
+++ b/security/nss/lib/freebl/shvfy.c
@@ -0,0 +1,534 @@
+
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "shsign.h"
+#include "prlink.h"
+#include "prio.h"
+#include "blapi.h"
+#include "seccomon.h"
+#include "stdio.h"
+#include "prmem.h"
+#include "hasht.h"
+#include "pqg.h"
+#include "blapii.h"
+
+/*
+ * Most modern version of Linux support a speed optimization scheme where an
+ * application called prelink modifies programs and shared libraries to quickly
+ * load if they fit into an already designed address space. In short, prelink
+ * scans the list of programs and libraries on your system, assigns them a
+ * predefined space in the the address space, then provides the fixups to the
+ * library.
+
+ * The modification of the shared library is correctly detected by the freebl
+ * FIPS checksum scheme where we check a signed hash of the library against the
+ * library itself.
+ *
+ * The prelink command itself can reverse the process of modification and
+ * output the prestine shared library as it was before prelink made it's
+ * changes. If FREEBL_USE_PRELINK is set Freebl uses prelink to output the
+ * original copy of the shared library before prelink modified it.
+ */
+#ifdef FREEBL_USE_PRELINK
+#ifndef FREELB_PRELINK_COMMAND
+#define FREEBL_PRELINK_COMMAND "/usr/sbin/prelink -u -o -"
+#endif
+#include "private/pprio.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+
+/*
+ * This function returns an NSPR PRFileDesc * which the caller can read to
+ * obtain the prestine value of the shared library, before any OS related
+ * changes to it (usually address fixups).
+ *
+ * If prelink is installed, this
+ * file descriptor is a pipe connecting the output of
+ * /usr/sbin/prelink -u -o - {Library}
+ * and *pid returns the process id of the prelink child.
+ *
+ * If prelink is not installed, it returns a normal readonly handle to the
+ * library itself and *pid is set to '0'.
+ */
+PRFileDesc *
+bl_OpenUnPrelink(const char *shName, int *pid)
+{
+ char *command = strdup(FREEBL_PRELINK_COMMAND);
+ char *argString = NULL;
+ char **argv = NULL;
+ char *shNameArg = NULL;
+ char *cp;
+ pid_t child;
+ int argc = 0, argNext = 0;
+ struct stat statBuf;
+ int pipefd[2] = { -1, -1 };
+ int ret;
+
+ *pid = 0;
+
+ /* make sure the prelink command exists first. If not, fall back to
+ * just reading the file */
+ for (cp = command; *cp; cp++) {
+ if (*cp == ' ') {
+ *cp++ = 0;
+ argString = cp;
+ break;
+ }
+ }
+ memset(&statBuf, 0, sizeof(statBuf));
+ /* stat the file, follow the link */
+ ret = stat(command, &statBuf);
+ if (ret < 0) {
+ free(command);
+ return PR_Open(shName, PR_RDONLY, 0);
+ }
+ /* file exits, make sure it's an executable */
+ if (!S_ISREG(statBuf.st_mode) ||
+ ((statBuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) {
+ free(command);
+ return PR_Open(shName, PR_RDONLY, 0);
+ }
+
+ /* OK, the prelink command exists and looks correct, use it */
+ /* build the arglist while we can still malloc */
+ /* count the args if any */
+ if (argString && *argString) {
+ /* argString may have leading spaces, strip them off*/
+ for (cp = argString; *cp && *cp == ' '; cp++)
+ ;
+ argString = cp;
+ if (*cp) {
+ /* there is at least one arg.. */
+ argc = 1;
+ }
+
+ /* count the rest: Note there is no provision for escaped
+ * spaces here */
+ for (cp = argString; *cp; cp++) {
+ if (*cp == ' ') {
+ while (*cp && *cp == ' ')
+ cp++;
+ if (*cp)
+ argc++;
+ }
+ }
+ }
+
+ /* add the additional args: argv[0] (command), shName, NULL*/
+ argc += 3;
+ argv = PORT_NewArray(char *, argc);
+ if (argv == NULL) {
+ goto loser;
+ }
+
+ /* fill in the arglist */
+ argv[argNext++] = command;
+ if (argString && *argString) {
+ argv[argNext++] = argString;
+ for (cp = argString; *cp; cp++) {
+ if (*cp == ' ') {
+ *cp++ = 0;
+ while (*cp && *cp == ' ')
+ cp++;
+ if (*cp)
+ argv[argNext++] = cp;
+ }
+ }
+ }
+ /* exec doesn't advertise taking const char **argv, do the paranoid
+ * copy */
+ shNameArg = strdup(shName);
+ if (shNameArg == NULL) {
+ goto loser;
+ }
+ argv[argNext++] = shNameArg;
+ argv[argNext++] = 0;
+
+ ret = pipe(pipefd);
+ if (ret < 0) {
+ goto loser;
+ }
+
+ /* use vfork() so we don't trigger the pthread_at_fork() handlers */
+ child = vfork();
+ if (child < 0)
+ goto loser;
+ if (child == 0) {
+ /* set up the file descriptors */
+ /* if we need to support BSD, this will need to be an open of
+ * /dev/null and dup2(nullFD, 0)*/
+ close(0);
+ /* associate pipefd[1] with stdout */
+ if (pipefd[1] != 1)
+ dup2(pipefd[1], 1);
+ close(2);
+ close(pipefd[0]);
+ /* should probably close the other file descriptors? */
+
+ execv(command, argv);
+ /* avoid at_exit() handlers */
+ _exit(1); /* shouldn't reach here except on an error */
+ }
+ close(pipefd[1]);
+ pipefd[1] = -1;
+
+ /* this is safe because either vfork() as full fork() semantics, and thus
+ * already has it's own address space, or because vfork() has paused
+ * the parent util the exec or exit */
+ free(command);
+ free(shNameArg);
+ PORT_Free(argv);
+
+ *pid = child;
+
+ return PR_ImportPipe(pipefd[0]);
+
+loser:
+ if (pipefd[0] != -1) {
+ close(pipefd[0]);
+ }
+ if (pipefd[1] != -1) {
+ close(pipefd[1]);
+ }
+ free(command);
+ free(shNameArg);
+ PORT_Free(argv);
+
+ return NULL;
+}
+
+/*
+ * bl_CloseUnPrelink -
+ *
+ * This closes the file descripter and reaps and children openned and crated by
+ * b;_OpenUnprelink. It's primary difference between it and just close is
+ * that it calls wait on the pid if one is supplied, preventing zombie children
+ * from hanging around.
+ */
+void
+bl_CloseUnPrelink(PRFileDesc *file, int pid)
+{
+ /* close the file descriptor */
+ PR_Close(file);
+ /* reap the child */
+ if (pid) {
+ waitpid(pid, NULL, 0);
+ }
+}
+#endif
+
+/* #define DEBUG_SHVERIFY 1 */
+
+static char *
+mkCheckFileName(const char *libName)
+{
+ int ln_len = PORT_Strlen(libName);
+ char *output = PORT_Alloc(ln_len + sizeof(SGN_SUFFIX));
+ int index = ln_len + 1 - sizeof("." SHLIB_SUFFIX);
+
+ if ((index > 0) &&
+ (PORT_Strncmp(&libName[index],
+ "." SHLIB_SUFFIX, sizeof("." SHLIB_SUFFIX)) == 0)) {
+ ln_len = index;
+ }
+ PORT_Memcpy(output, libName, ln_len);
+ PORT_Memcpy(&output[ln_len], SGN_SUFFIX, sizeof(SGN_SUFFIX));
+ return output;
+}
+
+static int
+decodeInt(unsigned char *buf)
+{
+ return (buf[3]) | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24);
+}
+
+static SECStatus
+readItem(PRFileDesc *fd, SECItem *item)
+{
+ unsigned char buf[4];
+ int bytesRead;
+
+ bytesRead = PR_Read(fd, buf, 4);
+ if (bytesRead != 4) {
+ return SECFailure;
+ }
+ item->len = decodeInt(buf);
+
+ item->data = PORT_Alloc(item->len);
+ if (item->data == NULL) {
+ item->len = 0;
+ return SECFailure;
+ }
+ bytesRead = PR_Read(fd, item->data, item->len);
+ if (bytesRead != item->len) {
+ PORT_Free(item->data);
+ item->data = NULL;
+ item->len = 0;
+ return SECFailure;
+ }
+ return SECSuccess;
+}
+
+static PRBool blapi_SHVerifyFile(const char *shName, PRBool self);
+
+static PRBool
+blapi_SHVerify(const char *name, PRFuncPtr addr, PRBool self)
+{
+ PRBool result = PR_FALSE; /* if anything goes wrong,
+ * the signature does not verify */
+ /* find our shared library name */
+ char *shName = PR_GetLibraryFilePathname(name, addr);
+ if (!shName) {
+ goto loser;
+ }
+ result = blapi_SHVerifyFile(shName, self);
+
+loser:
+ if (shName != NULL) {
+ PR_Free(shName);
+ }
+
+ return result;
+}
+
+PRBool
+BLAPI_SHVerify(const char *name, PRFuncPtr addr)
+{
+ return blapi_SHVerify(name, addr, PR_FALSE);
+}
+
+PRBool
+BLAPI_SHVerifyFile(const char *shName)
+{
+ return blapi_SHVerifyFile(shName, PR_FALSE);
+}
+
+static PRBool
+blapi_SHVerifyFile(const char *shName, PRBool self)
+{
+ char *checkName = NULL;
+ PRFileDesc *checkFD = NULL;
+ PRFileDesc *shFD = NULL;
+ void *hashcx = NULL;
+ const SECHashObject *hashObj = NULL;
+ SECItem signature = { 0, NULL, 0 };
+ SECItem hash;
+ int bytesRead, offset;
+ SECStatus rv;
+ DSAPublicKey key;
+ int count;
+#ifdef FREEBL_USE_PRELINK
+ int pid = 0;
+#endif
+
+ PRBool result = PR_FALSE; /* if anything goes wrong,
+ * the signature does not verify */
+ unsigned char buf[4096];
+ unsigned char hashBuf[HASH_LENGTH_MAX];
+
+ PORT_Memset(&key, 0, sizeof(key));
+ hash.data = hashBuf;
+ hash.len = sizeof(hashBuf);
+
+ /* If our integrity check was never ran or failed, fail any other
+ * integrity checks to prevent any token going into FIPS mode. */
+ if (!self && (BL_FIPSEntryOK(PR_FALSE) != SECSuccess)) {
+ return PR_FALSE;
+ }
+
+ if (!shName) {
+ goto loser;
+ }
+
+ /* figure out the name of our check file */
+ checkName = mkCheckFileName(shName);
+ if (!checkName) {
+ goto loser;
+ }
+
+ /* open the check File */
+ checkFD = PR_Open(checkName, PR_RDONLY, 0);
+ if (checkFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+ fprintf(stderr, "Failed to open the check file %s: (%d, %d)\n",
+ checkName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+ goto loser;
+ }
+
+ /* read and Verify the headerthe header */
+ bytesRead = PR_Read(checkFD, buf, 12);
+ if (bytesRead != 12) {
+ goto loser;
+ }
+ if ((buf[0] != NSS_SIGN_CHK_MAGIC1) || (buf[1] != NSS_SIGN_CHK_MAGIC2)) {
+ goto loser;
+ }
+ if ((buf[2] != NSS_SIGN_CHK_MAJOR_VERSION) ||
+ (buf[3] < NSS_SIGN_CHK_MINOR_VERSION)) {
+ goto loser;
+ }
+#ifdef notdef
+ if (decodeInt(&buf[8]) != CKK_DSA) {
+ goto loser;
+ }
+#endif
+
+ /* seek past any future header extensions */
+ offset = decodeInt(&buf[4]);
+ if (PR_Seek(checkFD, offset, PR_SEEK_SET) < 0) {
+ goto loser;
+ }
+
+ /* read the key */
+ rv = readItem(checkFD, &key.params.prime);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.params.subPrime);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.params.base);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ rv = readItem(checkFD, &key.publicValue);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+ /* read the siganture */
+ rv = readItem(checkFD, &signature);
+ if (rv != SECSuccess) {
+ goto loser;
+ }
+
+ /* done with the check file */
+ PR_Close(checkFD);
+ checkFD = NULL;
+
+ hashObj = HASH_GetRawHashObject(PQG_GetHashType(&key.params));
+ if (hashObj == NULL) {
+ goto loser;
+ }
+
+/* open our library file */
+#ifdef FREEBL_USE_PRELINK
+ shFD = bl_OpenUnPrelink(shName, &pid);
+#else
+ shFD = PR_Open(shName, PR_RDONLY, 0);
+#endif
+ if (shFD == NULL) {
+#ifdef DEBUG_SHVERIFY
+ fprintf(stderr, "Failed to open the library file %s: (%d, %d)\n",
+ shName, (int)PR_GetError(), (int)PR_GetOSError());
+#endif /* DEBUG_SHVERIFY */
+ goto loser;
+ }
+
+ /* hash our library file with SHA1 */
+ hashcx = hashObj->create();
+ if (hashcx == NULL) {
+ goto loser;
+ }
+ hashObj->begin(hashcx);
+
+ count = 0;
+ while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) {
+ hashObj->update(hashcx, buf, bytesRead);
+ count += bytesRead;
+ }
+#ifdef FREEBL_USE_PRELINK
+ bl_CloseUnPrelink(shFD, pid);
+#else
+ PR_Close(shFD);
+#endif
+ shFD = NULL;
+
+ hashObj->end(hashcx, hash.data, &hash.len, hash.len);
+
+ /* verify the hash against the check file */
+ if (DSA_VerifyDigest(&key, &signature, &hash) == SECSuccess) {
+ result = PR_TRUE;
+ }
+#ifdef DEBUG_SHVERIFY
+ {
+ int i, j;
+ fprintf(stderr, "File %s: %d bytes\n", shName, count);
+ fprintf(stderr, " hash: %d bytes\n", hash.len);
+#define STEP 10
+ for (i = 0; i < hash.len; i += STEP) {
+ fprintf(stderr, " ");
+ for (j = 0; j < STEP && (i + j) < hash.len; j++) {
+ fprintf(stderr, " %02x", hash.data[i + j]);
+ }
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, " signature: %d bytes\n", signature.len);
+ for (i = 0; i < signature.len; i += STEP) {
+ fprintf(stderr, " ");
+ for (j = 0; j < STEP && (i + j) < signature.len; j++) {
+ fprintf(stderr, " %02x", signature.data[i + j]);
+ }
+ fprintf(stderr, "\n");
+ }
+ fprintf(stderr, "Verified : %s\n", result ? "TRUE" : "FALSE");
+ }
+#endif /* DEBUG_SHVERIFY */
+
+loser:
+ if (checkName != NULL) {
+ PORT_Free(checkName);
+ }
+ if (checkFD != NULL) {
+ PR_Close(checkFD);
+ }
+ if (shFD != NULL) {
+ PR_Close(shFD);
+ }
+ if (hashcx != NULL) {
+ if (hashObj) {
+ hashObj->destroy(hashcx, PR_TRUE);
+ }
+ }
+ if (signature.data != NULL) {
+ PORT_Free(signature.data);
+ }
+ if (key.params.prime.data != NULL) {
+ PORT_Free(key.params.prime.data);
+ }
+ if (key.params.subPrime.data != NULL) {
+ PORT_Free(key.params.subPrime.data);
+ }
+ if (key.params.base.data != NULL) {
+ PORT_Free(key.params.base.data);
+ }
+ if (key.publicValue.data != NULL) {
+ PORT_Free(key.publicValue.data);
+ }
+
+ return result;
+}
+
+PRBool
+BLAPI_VerifySelf(const char *name)
+{
+ if (name == NULL) {
+ /*
+ * If name is NULL, freebl is statically linked into softoken.
+ * softoken will call BLAPI_SHVerify next to verify itself.
+ */
+ return PR_TRUE;
+ }
+ return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE);
+}
diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c
new file mode 100644
index 000000000..8e0784935
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.c
@@ -0,0 +1,711 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+#define _GNU_SOURCE 1
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <fcntl.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <dlfcn.h>
+#include <prio.h>
+#include <prlink.h>
+#include <prlog.h>
+#include <prthread.h>
+#include <plstr.h>
+#include <prinit.h>
+#include <prlock.h>
+#include <prmem.h>
+#include <prerror.h>
+#include <prmon.h>
+#include <pratom.h>
+#include <prsystem.h>
+#include <prinrval.h>
+#include <prtime.h>
+#include <prcvar.h>
+#include <secasn1.h>
+#include <secdig.h>
+#include <secport.h>
+#include <secitem.h>
+#include <blapi.h>
+#include <private/pprio.h>
+
+#define FREEBL_NO_WEAK 1
+
+#define WEAK __attribute__((weak))
+
+#ifdef FREEBL_NO_WEAK
+
+/*
+ * This uses function pointers.
+ *
+ * CONS: A separate function is needed to
+ * fill in the function pointers.
+ *
+ * PROS: it works on all platforms.
+ * it allows for dynamically finding nspr and libutil, even once
+ * softoken is loaded and running. (NOTE: this may be a problem if
+ * we switch between the stubs and real NSPR on the fly. NSPR will
+ * do bad things if passed an _FakeArena to free or allocate from).
+ */
+#define STUB_DECLARE(ret, fn, args) \
+ typedef ret(*type_##fn) args; \
+ static type_##fn ptr_##fn = NULL
+
+#define STUB_SAFE_CALL0(fn) \
+ if (ptr_##fn) { \
+ return ptr_##fn(); \
+ }
+#define STUB_SAFE_CALL1(fn, a1) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1); \
+ }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2); \
+ }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3); \
+ }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3, a4); \
+ }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+ if (ptr_##fn) { \
+ return ptr_##fn(a1, a2, a3, a4, a5, a6); \
+ }
+
+#define STUB_FETCH_FUNCTION(fn) \
+ ptr_##fn = (type_##fn)dlsym(lib, #fn); \
+ if (ptr_##fn == NULL) { \
+ return SECFailure; \
+ }
+
+#else
+/*
+ * this uses the loader weak attribute. it works automatically, but once
+ * freebl is loaded, the symbols are 'fixed' (later loading of NSPR or
+ * libutil will not resolve these symbols).
+ */
+
+#define STUB_DECLARE(ret, fn, args) \
+ WEAK extern ret fn args
+
+#define STUB_SAFE_CALL0(fn) \
+ if (fn) { \
+ return fn(); \
+ }
+#define STUB_SAFE_CALL1(fn, a1) \
+ if (fn) { \
+ return fn(a1); \
+ }
+#define STUB_SAFE_CALL2(fn, a1, a2) \
+ if (fn) { \
+ return fn(a1, a2); \
+ }
+#define STUB_SAFE_CALL3(fn, a1, a2, a3) \
+ if (fn) { \
+ return fn(a1, a2, a3); \
+ }
+#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \
+ if (fn) { \
+ return fn(a1, a2, a3, a4); \
+ }
+#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \
+ if (fn) { \
+ return fn(a1, a2, a3, a4, a5, a6); \
+ }
+#endif
+
+STUB_DECLARE(void *, PORT_Alloc_Util, (size_t len));
+STUB_DECLARE(void *, PORT_ArenaAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void *, PORT_ArenaZAlloc_Util, (PLArenaPool * arena, size_t size));
+STUB_DECLARE(void, PORT_Free_Util, (void *ptr));
+STUB_DECLARE(void, PORT_FreeArena_Util, (PLArenaPool * arena, PRBool zero));
+STUB_DECLARE(int, PORT_GetError_Util, (void));
+STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize));
+STUB_DECLARE(void, PORT_SetError_Util, (int value));
+STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len));
+STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len));
+
+STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln));
+STUB_DECLARE(PRStatus, PR_Access, (const char *name, PRAccessHow how));
+STUB_DECLARE(PRStatus, PR_CallOnce, (PRCallOnceType * once, PRCallOnceFN func));
+STUB_DECLARE(PRStatus, PR_Close, (PRFileDesc * fd));
+STUB_DECLARE(void, PR_DestroyLock, (PRLock * lock));
+STUB_DECLARE(void, PR_DestroyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(void, PR_Free, (void *ptr));
+STUB_DECLARE(char *, PR_GetLibraryFilePathname, (const char *name,
+ PRFuncPtr addr));
+STUB_DECLARE(PRFileDesc *, PR_ImportPipe, (PROsfd osfd));
+STUB_DECLARE(void, PR_Lock, (PRLock * lock));
+STUB_DECLARE(PRCondVar *, PR_NewCondVar, (PRLock * lock));
+STUB_DECLARE(PRLock *, PR_NewLock, (void));
+STUB_DECLARE(PRStatus, PR_NotifyCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRStatus, PR_NotifyAllCondVar, (PRCondVar * cvar));
+STUB_DECLARE(PRFileDesc *, PR_Open, (const char *name, PRIntn flags,
+ PRIntn mode));
+STUB_DECLARE(PRInt32, PR_Read, (PRFileDesc * fd, void *buf, PRInt32 amount));
+STUB_DECLARE(PROffset32, PR_Seek, (PRFileDesc * fd, PROffset32 offset,
+ PRSeekWhence whence));
+STUB_DECLARE(PRStatus, PR_Sleep, (PRIntervalTime ticks));
+STUB_DECLARE(PRStatus, PR_Unlock, (PRLock * lock));
+STUB_DECLARE(PRStatus, PR_WaitCondVar, (PRCondVar * cvar,
+ PRIntervalTime timeout));
+STUB_DECLARE(char *, PR_GetEnvSecure, (const char *));
+
+STUB_DECLARE(SECItem *, SECITEM_AllocItem_Util, (PLArenaPool * arena,
+ SECItem *item, unsigned int len));
+STUB_DECLARE(SECComparison, SECITEM_CompareItem_Util, (const SECItem *a,
+ const SECItem *b));
+STUB_DECLARE(SECStatus, SECITEM_CopyItem_Util, (PLArenaPool * arena,
+ SECItem *to, const SECItem *from));
+STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit));
+STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid));
+STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n));
+
+#define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type))
+#define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type))
+#define PORT_ZNewArray_stub(type, num) \
+ (type *)PORT_ZAlloc_stub(sizeof(type) * (num))
+
+/*
+ * NOTE: in order to support hashing only the memory allocation stubs,
+ * the get library name stubs, and the file io stubs are needed (the latter
+ * two are for the library verification). The remaining stubs are simply to
+ * compile. Attempts to use the library for other operations without NSPR
+ * will most likely fail.
+ */
+
+/* memory */
+extern void *
+PORT_Alloc_stub(size_t len)
+{
+ STUB_SAFE_CALL1(PORT_Alloc_Util, len);
+ return malloc(len);
+}
+
+extern void
+PORT_Free_stub(void *ptr)
+{
+ STUB_SAFE_CALL1(PORT_Free_Util, ptr);
+ return free(ptr);
+}
+
+extern void *
+PORT_ZAlloc_stub(size_t len)
+{
+ STUB_SAFE_CALL1(PORT_ZAlloc_Util, len);
+ void *ptr = malloc(len);
+ if (ptr) {
+ memset(ptr, 0, len);
+ }
+ return ptr;
+}
+
+extern void
+PORT_ZFree_stub(void *ptr, size_t len)
+{
+ STUB_SAFE_CALL2(PORT_ZFree_Util, ptr, len);
+ memset(ptr, 0, len);
+ return free(ptr);
+}
+
+extern void
+PR_Free_stub(void *ptr)
+{
+ STUB_SAFE_CALL1(PR_Free, ptr);
+ return free(ptr);
+}
+
+/*
+ * arenas
+ *
+ */
+extern PLArenaPool *
+PORT_NewArena_stub(unsigned long chunksize)
+{
+ STUB_SAFE_CALL1(PORT_NewArena_Util, chunksize);
+ abort();
+ return NULL;
+}
+
+extern void *
+PORT_ArenaAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+ STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+ abort();
+ return NULL;
+}
+
+extern void *
+PORT_ArenaZAlloc_stub(PLArenaPool *arena, size_t size)
+{
+
+ STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size);
+ abort();
+ return NULL;
+}
+
+extern void
+PORT_FreeArena_stub(PLArenaPool *arena, PRBool zero)
+{
+
+ STUB_SAFE_CALL2(PORT_FreeArena_Util, arena, zero);
+ abort();
+}
+
+/* io */
+extern PRFileDesc *
+PR_Open_stub(const char *name, PRIntn flags, PRIntn mode)
+{
+ int *lfd = NULL;
+ int fd;
+ int lflags = 0;
+
+ STUB_SAFE_CALL3(PR_Open, name, flags, mode);
+
+ if (flags & PR_RDWR) {
+ lflags = O_RDWR;
+ } else if (flags & PR_WRONLY) {
+ lflags = O_WRONLY;
+ } else {
+ lflags = O_RDONLY;
+ }
+
+ if (flags & PR_EXCL)
+ lflags |= O_EXCL;
+ if (flags & PR_APPEND)
+ lflags |= O_APPEND;
+ if (flags & PR_TRUNCATE)
+ lflags |= O_TRUNC;
+
+ fd = open(name, lflags, mode);
+ if (fd >= 0) {
+ lfd = PORT_New_stub(int);
+ if (lfd != NULL) {
+ *lfd = fd;
+ } else {
+ close(fd);
+ }
+ }
+ return (PRFileDesc *)lfd;
+}
+
+extern PRFileDesc *
+PR_ImportPipe_stub(PROsfd fd)
+{
+ int *lfd = NULL;
+
+ STUB_SAFE_CALL1(PR_ImportPipe, fd);
+
+ lfd = PORT_New_stub(int);
+ if (lfd != NULL) {
+ *lfd = fd;
+ }
+ return (PRFileDesc *)lfd;
+}
+
+extern PRStatus
+PR_Close_stub(PRFileDesc *fd)
+{
+ int *lfd;
+ STUB_SAFE_CALL1(PR_Close, fd);
+
+ lfd = (int *)fd;
+ close(*lfd);
+ PORT_Free_stub(lfd);
+
+ return PR_SUCCESS;
+}
+
+extern PRInt32
+PR_Read_stub(PRFileDesc *fd, void *buf, PRInt32 amount)
+{
+ int *lfd;
+ STUB_SAFE_CALL3(PR_Read, fd, buf, amount);
+
+ lfd = (int *)fd;
+ return read(*lfd, buf, amount);
+}
+
+extern PROffset32
+PR_Seek_stub(PRFileDesc *fd, PROffset32 offset, PRSeekWhence whence)
+{
+ int *lfd;
+ int lwhence = SEEK_SET;
+ STUB_SAFE_CALL3(PR_Seek, fd, offset, whence);
+ lfd = (int *)fd;
+ switch (whence) {
+ case PR_SEEK_CUR:
+ lwhence = SEEK_CUR;
+ break;
+ case PR_SEEK_END:
+ lwhence = SEEK_END;
+ break;
+ case PR_SEEK_SET:
+ break;
+ }
+
+ return lseek(*lfd, offset, lwhence);
+}
+
+PRStatus
+PR_Access_stub(const char *name, PRAccessHow how)
+{
+ int mode = F_OK;
+ int rv;
+ STUB_SAFE_CALL2(PR_Access, name, how);
+ switch (how) {
+ case PR_ACCESS_WRITE_OK:
+ mode = W_OK;
+ break;
+ case PR_ACCESS_READ_OK:
+ mode = R_OK;
+ break;
+ /* assume F_OK for all others */
+ default:
+ break;
+ }
+ rv = access(name, mode);
+ if (rv == 0) {
+ return PR_SUCCESS;
+ }
+ return PR_FAILURE;
+}
+
+/*
+ * library
+ */
+extern char *
+PR_GetLibraryFilePathname_stub(const char *name, PRFuncPtr addr)
+{
+ Dl_info dli;
+ char *result;
+
+ STUB_SAFE_CALL2(PR_GetLibraryFilePathname, name, addr);
+
+ if (dladdr((void *)addr, &dli) == 0) {
+ return NULL;
+ }
+ result = PORT_Alloc_stub(strlen(dli.dli_fname) + 1);
+ if (result != NULL) {
+ strcpy(result, dli.dli_fname);
+ }
+ return result;
+}
+
+#include <errno.h>
+
+/* errors */
+extern int
+PORT_GetError_stub(void)
+{
+ STUB_SAFE_CALL0(PORT_GetError_Util);
+ return errno;
+}
+
+extern void
+PORT_SetError_stub(int value)
+{
+ STUB_SAFE_CALL1(PORT_SetError_Util, value);
+ errno = value;
+}
+
+/* misc */
+extern void
+PR_Assert_stub(const char *s, const char *file, PRIntn ln)
+{
+ STUB_SAFE_CALL3(PR_Assert, s, file, ln);
+ fprintf(stderr, "%s line %d: %s\n", file, ln, s);
+ abort();
+}
+
+/* time */
+extern PRStatus
+PR_Sleep_stub(PRIntervalTime ticks)
+{
+ STUB_SAFE_CALL1(PR_Sleep, ticks);
+ usleep(ticks * 1000);
+ return PR_SUCCESS;
+}
+
+/* locking */
+extern PRLock *
+PR_NewLock_stub(void)
+{
+ STUB_SAFE_CALL0(PR_NewLock);
+ abort();
+ return NULL;
+}
+
+extern PRStatus
+PR_Unlock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_Unlock, lock);
+ abort();
+ return PR_FAILURE;
+}
+
+extern void
+PR_Lock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_Lock, lock);
+ abort();
+ return;
+}
+
+extern void
+PR_DestroyLock_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_DestroyLock, lock);
+ abort();
+ return;
+}
+
+extern PRCondVar *
+PR_NewCondVar_stub(PRLock *lock)
+{
+ STUB_SAFE_CALL1(PR_NewCondVar, lock);
+ abort();
+ return NULL;
+}
+
+extern PRStatus
+PR_NotifyCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_NotifyCondVar, cvar);
+ abort();
+ return PR_FAILURE;
+}
+
+extern PRStatus
+PR_NotifyAllCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_NotifyAllCondVar, cvar);
+ abort();
+ return PR_FAILURE;
+}
+
+extern PRStatus
+PR_WaitCondVar_stub(PRCondVar *cvar, PRIntervalTime timeout)
+{
+ STUB_SAFE_CALL2(PR_WaitCondVar, cvar, timeout);
+ abort();
+ return PR_FAILURE;
+}
+
+extern char *
+PR_GetEnvSecure_stub(const char *var)
+{
+ STUB_SAFE_CALL1(PR_GetEnvSecure, var);
+ abort();
+ return NULL;
+}
+
+extern void
+PR_DestroyCondVar_stub(PRCondVar *cvar)
+{
+ STUB_SAFE_CALL1(PR_DestroyCondVar, cvar);
+ abort();
+ return;
+}
+
+/*
+ * NOTE: this presupposes GCC 4.1
+ */
+extern PRStatus
+PR_CallOnce_stub(PRCallOnceType *once, PRCallOnceFN func)
+{
+ STUB_SAFE_CALL2(PR_CallOnce, once, func);
+ abort();
+ return PR_FAILURE;
+}
+
+/*
+ * SECITEMS implement Item Utilities
+ */
+extern void
+SECITEM_FreeItem_stub(SECItem *zap, PRBool freeit)
+{
+ STUB_SAFE_CALL2(SECITEM_FreeItem_Util, zap, freeit);
+ abort();
+}
+
+extern SECItem *
+SECITEM_AllocItem_stub(PLArenaPool *arena, SECItem *item, unsigned int len)
+{
+ STUB_SAFE_CALL3(SECITEM_AllocItem_Util, arena, item, len);
+ abort();
+ return NULL;
+}
+
+extern SECComparison
+SECITEM_CompareItem_stub(const SECItem *a, const SECItem *b)
+{
+ STUB_SAFE_CALL2(SECITEM_CompareItem_Util, a, b);
+ abort();
+ return SECEqual;
+}
+
+extern SECStatus
+SECITEM_CopyItem_stub(PLArenaPool *arena, SECItem *to, const SECItem *from)
+{
+ STUB_SAFE_CALL3(SECITEM_CopyItem_Util, arena, to, from);
+ abort();
+ return SECFailure;
+}
+
+extern SECOidTag
+SECOID_FindOIDTag_stub(const SECItem *oid)
+{
+ STUB_SAFE_CALL1(SECOID_FindOIDTag_Util, oid);
+ abort();
+ return SEC_OID_UNKNOWN;
+}
+
+extern void
+SECITEM_ZfreeItem_stub(SECItem *zap, PRBool freeit)
+{
+ STUB_SAFE_CALL2(SECITEM_ZfreeItem_Util, zap, freeit);
+ abort();
+}
+
+extern int
+NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n)
+{
+ STUB_SAFE_CALL3(NSS_SecureMemcmp, a, b, n);
+ abort();
+}
+
+#ifdef FREEBL_NO_WEAK
+
+static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX;
+static const char *nssutilLibName = SHLIB_PREFIX "nssutil3." SHLIB_SUFFIX;
+
+static SECStatus
+freebl_InitNSPR(void *lib)
+{
+ STUB_FETCH_FUNCTION(PR_Free);
+ STUB_FETCH_FUNCTION(PR_Open);
+ STUB_FETCH_FUNCTION(PR_ImportPipe);
+ STUB_FETCH_FUNCTION(PR_Close);
+ STUB_FETCH_FUNCTION(PR_Read);
+ STUB_FETCH_FUNCTION(PR_Seek);
+ STUB_FETCH_FUNCTION(PR_GetLibraryFilePathname);
+ STUB_FETCH_FUNCTION(PR_Assert);
+ STUB_FETCH_FUNCTION(PR_Access);
+ STUB_FETCH_FUNCTION(PR_Sleep);
+ STUB_FETCH_FUNCTION(PR_CallOnce);
+ STUB_FETCH_FUNCTION(PR_NewCondVar);
+ STUB_FETCH_FUNCTION(PR_NotifyCondVar);
+ STUB_FETCH_FUNCTION(PR_NotifyAllCondVar);
+ STUB_FETCH_FUNCTION(PR_WaitCondVar);
+ STUB_FETCH_FUNCTION(PR_DestroyCondVar);
+ STUB_FETCH_FUNCTION(PR_NewLock);
+ STUB_FETCH_FUNCTION(PR_Unlock);
+ STUB_FETCH_FUNCTION(PR_Lock);
+ STUB_FETCH_FUNCTION(PR_DestroyLock);
+ STUB_FETCH_FUNCTION(PR_GetEnvSecure);
+ return SECSuccess;
+}
+
+static SECStatus
+freebl_InitNSSUtil(void *lib)
+{
+ STUB_FETCH_FUNCTION(PORT_Alloc_Util);
+ STUB_FETCH_FUNCTION(PORT_Free_Util);
+ STUB_FETCH_FUNCTION(PORT_ZAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_ZFree_Util);
+ STUB_FETCH_FUNCTION(PORT_NewArena_Util);
+ STUB_FETCH_FUNCTION(PORT_ArenaAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_ArenaZAlloc_Util);
+ STUB_FETCH_FUNCTION(PORT_FreeArena_Util);
+ STUB_FETCH_FUNCTION(PORT_GetError_Util);
+ STUB_FETCH_FUNCTION(PORT_SetError_Util);
+ STUB_FETCH_FUNCTION(SECITEM_FreeItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_AllocItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_CompareItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_CopyItem_Util);
+ STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util);
+ STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util);
+ STUB_FETCH_FUNCTION(NSS_SecureMemcmp);
+ return SECSuccess;
+}
+
+/*
+ * fetch the library if it's loaded. For NSS it should already be loaded
+ */
+#define freebl_getLibrary(libName) \
+ dlopen(libName, RTLD_LAZY | RTLD_NOLOAD)
+
+#define freebl_releaseLibrary(lib) \
+ if (lib) \
+ dlclose(lib)
+
+static void *FREEBLnsprGlobalLib = NULL;
+static void *FREEBLnssutilGlobalLib = NULL;
+
+void __attribute((destructor)) FREEBL_unload()
+{
+ freebl_releaseLibrary(FREEBLnsprGlobalLib);
+ freebl_releaseLibrary(FREEBLnssutilGlobalLib);
+}
+#endif
+
+/*
+ * load the symbols from the real libraries if available.
+ *
+ * if force is set, explicitly load the libraries if they are not already
+ * loaded. If we could not use the real libraries, return failure.
+ */
+extern SECStatus
+FREEBL_InitStubs()
+{
+ SECStatus rv = SECSuccess;
+#ifdef FREEBL_NO_WEAK
+ void *nspr = NULL;
+ void *nssutil = NULL;
+
+ /* NSPR should be first */
+ if (!FREEBLnsprGlobalLib) {
+ nspr = freebl_getLibrary(nsprLibName);
+ if (!nspr) {
+ return SECFailure;
+ }
+ rv = freebl_InitNSPR(nspr);
+ if (rv != SECSuccess) {
+ freebl_releaseLibrary(nspr);
+ return rv;
+ }
+ FREEBLnsprGlobalLib = nspr; /* adopt */
+ }
+ /* now load NSSUTIL */
+ if (!FREEBLnssutilGlobalLib) {
+ nssutil = freebl_getLibrary(nssutilLibName);
+ if (!nssutil) {
+ return SECFailure;
+ }
+ rv = freebl_InitNSSUtil(nssutil);
+ if (rv != SECSuccess) {
+ freebl_releaseLibrary(nssutil);
+ return rv;
+ }
+ FREEBLnssutilGlobalLib = nssutil; /* adopt */
+ }
+#endif
+
+ return rv;
+}
diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h
new file mode 100644
index 000000000..25ec394ec
--- /dev/null
+++ b/security/nss/lib/freebl/stubs.h
@@ -0,0 +1,66 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+/*
+ * Allow freebl and softoken to be loaded without util or NSPR.
+ *
+ * These symbols are overridden once real NSPR, and libutil are attached.
+ */
+
+#ifndef _STUBS_H
+#define _STUBS_H_ 1
+
+#ifdef _LIBUTIL_H_
+/* must be included before util */
+/*#error stubs.h included too late */
+#define MP_DIGITES(x) "stubs included too late"
+#endif
+
+/* hide libutil rename */
+#define _LIBUTIL_H_ 1
+
+#define PORT_Alloc PORT_Alloc_stub
+#define PORT_ArenaAlloc PORT_ArenaAlloc_stub
+#define PORT_ArenaZAlloc PORT_ArenaZAlloc_stub
+#define PORT_Free PORT_Free_stub
+#define PORT_FreeArena PORT_FreeArena_stub
+#define PORT_GetError PORT_GetError_stub
+#define PORT_NewArena PORT_NewArena_stub
+#define PORT_SetError PORT_SetError_stub
+#define PORT_ZAlloc PORT_ZAlloc_stub
+#define PORT_ZFree PORT_ZFree_stub
+
+#define SECITEM_AllocItem SECITEM_AllocItem_stub
+#define SECITEM_CompareItem SECITEM_CompareItem_stub
+#define SECITEM_CopyItem SECITEM_CopyItem_stub
+#define SECITEM_FreeItem SECITEM_FreeItem_stub
+#define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub
+#define SECOID_FindOIDTag SECOID_FindOIDTag_stub
+#define NSS_SecureMemcmp NSS_SecureMemcmp_stub
+
+#define PR_Assert PR_Assert_stub
+#define PR_Access PR_Access_stub
+#define PR_CallOnce PR_CallOnce_stub
+#define PR_Close PR_Close_stub
+#define PR_DestroyCondVar PR_DestroyCondVar_stub
+#define PR_DestroyLock PR_DestroyLock_stub
+#define PR_Free PR_Free_stub
+#define PR_GetLibraryFilePathname PR_GetLibraryFilePathname_stub
+#define PR_ImportPipe PR_ImportPipe_stub
+#define PR_Lock PR_Lock_stub
+#define PR_NewCondVar PR_NewCondVar_stub
+#define PR_NewLock PR_NewLock_stub
+#define PR_NotifyCondVar PR_NotifyCondVar_stub
+#define PR_NotifyAllCondVar PR_NotifyAllCondVar_stub
+#define PR_Open PR_Open_stub
+#define PR_Read PR_Read_stub
+#define PR_Seek PR_Seek_stub
+#define PR_Sleep PR_Sleep_stub
+#define PR_Unlock PR_Unlock_stub
+#define PR_WaitCondVar PR_WaitCondVar_stub
+#define PR_GetEnvSecure PR_GetEnvSecure_stub
+
+extern int FREEBL_InitStubs(void);
+
+#endif
diff --git a/security/nss/lib/freebl/sysrand.c b/security/nss/lib/freebl/sysrand.c
new file mode 100644
index 000000000..0128fa0ee
--- /dev/null
+++ b/security/nss/lib/freebl/sysrand.c
@@ -0,0 +1,49 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "seccomon.h"
+
+#ifndef XP_WIN
+static size_t rng_systemFromNoise(unsigned char *dest, size_t maxLen);
+#endif
+
+#if defined(XP_UNIX) || defined(XP_BEOS)
+#include "unix_rand.c"
+#endif
+#ifdef XP_WIN
+#include "win_rand.c"
+#endif
+#ifdef XP_OS2
+#include "os2_rand.c"
+#endif
+
+#ifndef XP_WIN
+/*
+ * Normal RNG_SystemRNG() isn't available, use the system noise to collect
+ * the required amount of entropy.
+ */
+static size_t
+rng_systemFromNoise(unsigned char *dest, size_t maxLen)
+{
+ size_t retBytes = maxLen;
+
+ while (maxLen) {
+ size_t nbytes = RNG_GetNoise(dest, maxLen);
+
+ PORT_Assert(nbytes != 0);
+
+ dest += nbytes;
+ maxLen -= nbytes;
+
+ /* some hw op to try to introduce more entropy into the next
+ * RNG_GetNoise call */
+ rng_systemJitter();
+ }
+ return retBytes;
+}
+#endif
diff --git a/security/nss/lib/freebl/tlsprfalg.c b/security/nss/lib/freebl/tlsprfalg.c
new file mode 100644
index 000000000..1e5e67886
--- /dev/null
+++ b/security/nss/lib/freebl/tlsprfalg.c
@@ -0,0 +1,134 @@
+/* tlsprfalg.c - TLS Pseudo Random Function (PRF) implementation
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifdef FREEBL_NO_DEPEND
+#include "stubs.h"
+#endif
+
+#include "blapi.h"
+#include "hasht.h"
+#include "alghmac.h"
+
+#define PHASH_STATE_MAX_LEN HASH_LENGTH_MAX
+
+/* TLS P_hash function */
+SECStatus
+TLS_P_hash(HASH_HashType hashType, const SECItem *secret, const char *label,
+ SECItem *seed, SECItem *result, PRBool isFIPS)
+{
+ unsigned char state[PHASH_STATE_MAX_LEN];
+ unsigned char outbuf[PHASH_STATE_MAX_LEN];
+ unsigned int state_len = 0, label_len = 0, outbuf_len = 0, chunk_size;
+ unsigned int remaining;
+ unsigned char *res;
+ SECStatus status;
+ HMACContext *cx;
+ SECStatus rv = SECFailure;
+ const SECHashObject *hashObj = HASH_GetRawHashObject(hashType);
+
+ PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+ PORT_Assert((seed != NULL) && (seed->data != NULL));
+ PORT_Assert((result != NULL) && (result->data != NULL));
+
+ remaining = result->len;
+ res = result->data;
+
+ if (label != NULL)
+ label_len = PORT_Strlen(label);
+
+ cx = HMAC_Create(hashObj, secret->data, secret->len, isFIPS);
+ if (cx == NULL)
+ goto loser;
+
+ /* initialize the state = A(1) = HMAC_hash(secret, seed) */
+ HMAC_Begin(cx);
+ HMAC_Update(cx, (unsigned char *)label, label_len);
+ HMAC_Update(cx, seed->data, seed->len);
+ status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+ if (status != SECSuccess)
+ goto loser;
+
+ /* generate a block at a time until we're done */
+ while (remaining > 0) {
+
+ HMAC_Begin(cx);
+ HMAC_Update(cx, state, state_len);
+ if (label_len)
+ HMAC_Update(cx, (unsigned char *)label, label_len);
+ HMAC_Update(cx, seed->data, seed->len);
+ status = HMAC_Finish(cx, outbuf, &outbuf_len, sizeof(outbuf));
+ if (status != SECSuccess)
+ goto loser;
+
+ /* Update the state = A(i) = HMAC_hash(secret, A(i-1)) */
+ HMAC_Begin(cx);
+ HMAC_Update(cx, state, state_len);
+ status = HMAC_Finish(cx, state, &state_len, sizeof(state));
+ if (status != SECSuccess)
+ goto loser;
+
+ chunk_size = PR_MIN(outbuf_len, remaining);
+ PORT_Memcpy(res, &outbuf, chunk_size);
+ res += chunk_size;
+ remaining -= chunk_size;
+ }
+
+ rv = SECSuccess;
+
+loser:
+ /* clear out state so it's not left on the stack */
+ if (cx)
+ HMAC_Destroy(cx, PR_TRUE);
+ PORT_Memset(state, 0, sizeof(state));
+ PORT_Memset(outbuf, 0, sizeof(outbuf));
+ return rv;
+}
+
+SECStatus
+TLS_PRF(const SECItem *secret, const char *label, SECItem *seed,
+ SECItem *result, PRBool isFIPS)
+{
+ SECStatus rv = SECFailure, status;
+ unsigned int i;
+ SECItem tmp = { siBuffer, NULL, 0 };
+ SECItem S1;
+ SECItem S2;
+
+ PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len));
+ PORT_Assert((seed != NULL) && (seed->data != NULL));
+ PORT_Assert((result != NULL) && (result->data != NULL));
+
+ S1.type = siBuffer;
+ S1.len = (secret->len / 2) + (secret->len & 1);
+ S1.data = secret->data;
+
+ S2.type = siBuffer;
+ S2.len = S1.len;
+ S2.data = secret->data + (secret->len - S2.len);
+
+ tmp.data = (unsigned char *)PORT_Alloc(result->len);
+ if (tmp.data == NULL)
+ goto loser;
+ tmp.len = result->len;
+
+ status = TLS_P_hash(HASH_AlgMD5, &S1, label, seed, result, isFIPS);
+ if (status != SECSuccess)
+ goto loser;
+
+ status = TLS_P_hash(HASH_AlgSHA1, &S2, label, seed, &tmp, isFIPS);
+ if (status != SECSuccess)
+ goto loser;
+
+ for (i = 0; i < result->len; i++)
+ result->data[i] ^= tmp.data[i];
+
+ rv = SECSuccess;
+
+loser:
+ if (tmp.data != NULL)
+ PORT_ZFree(tmp.data, tmp.len);
+ return rv;
+}
diff --git a/security/nss/lib/freebl/unix_rand.c b/security/nss/lib/freebl/unix_rand.c
new file mode 100644
index 000000000..ea3b6af3d
--- /dev/null
+++ b/security/nss/lib/freebl/unix_rand.c
@@ -0,0 +1,1176 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <stdio.h>
+#include <string.h>
+#include <signal.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include "secrng.h"
+#include "secerr.h"
+#include "prerror.h"
+#include "prthread.h"
+#include "prprf.h"
+#include "prenv.h"
+
+size_t RNG_FileUpdate(const char *fileName, size_t limit);
+
+/*
+ * When copying data to the buffer we want the least signicant bytes
+ * from the input since those bits are changing the fastest. The address
+ * of least significant byte depends upon whether we are running on
+ * a big-endian or little-endian machine.
+ *
+ * Does this mean the least signicant bytes are the most significant
+ * to us? :-)
+ */
+
+static size_t
+CopyLowBits(void *dst, size_t dstlen, void *src, size_t srclen)
+{
+ union endianness {
+ PRInt32 i;
+ char c[4];
+ } u;
+
+ if (srclen <= dstlen) {
+ memcpy(dst, src, srclen);
+ return srclen;
+ }
+ u.i = 0x01020304;
+ if (u.c[0] == 0x01) {
+ /* big-endian case */
+ memcpy(dst, (char *)src + (srclen - dstlen), dstlen);
+ } else {
+ /* little-endian case */
+ memcpy(dst, src, dstlen);
+ }
+ return dstlen;
+}
+
+#ifdef SOLARIS
+
+#include <kstat.h>
+
+static const PRUint32 entropy_buf_len = 4096; /* buffer up to 4 KB */
+
+/* Buffer entropy data, and feed it to the RNG, entropy_buf_len bytes at a time.
+ * Returns error if RNG_RandomUpdate fails. Also increments *total_fed
+ * by the number of bytes successfully buffered.
+ */
+static SECStatus
+BufferEntropy(char *inbuf, PRUint32 inlen,
+ char *entropy_buf, PRUint32 *entropy_buffered,
+ PRUint32 *total_fed)
+{
+ PRUint32 tocopy = 0;
+ PRUint32 avail = 0;
+ SECStatus rv = SECSuccess;
+
+ while (inlen) {
+ avail = entropy_buf_len - *entropy_buffered;
+ if (!avail) {
+ /* Buffer is full, time to feed it to the RNG. */
+ rv = RNG_RandomUpdate(entropy_buf, entropy_buf_len);
+ if (SECSuccess != rv) {
+ break;
+ }
+ *entropy_buffered = 0;
+ avail = entropy_buf_len;
+ }
+ tocopy = PR_MIN(avail, inlen);
+ memcpy(entropy_buf + *entropy_buffered, inbuf, tocopy);
+ *entropy_buffered += tocopy;
+ inlen -= tocopy;
+ inbuf += tocopy;
+ *total_fed += tocopy;
+ }
+ return rv;
+}
+
+/* Feed kernel statistics structures and ks_data field to the RNG.
+ * Returns status as well as the number of bytes successfully fed to the RNG.
+ */
+static SECStatus
+RNG_kstat(PRUint32 *fed)
+{
+ kstat_ctl_t *kc = NULL;
+ kstat_t *ksp = NULL;
+ PRUint32 entropy_buffered = 0;
+ char *entropy_buf = NULL;
+ SECStatus rv = SECSuccess;
+
+ PORT_Assert(fed);
+ if (!fed) {
+ return SECFailure;
+ }
+ *fed = 0;
+
+ kc = kstat_open();
+ PORT_Assert(kc);
+ if (!kc) {
+ return SECFailure;
+ }
+ entropy_buf = (char *)PORT_Alloc(entropy_buf_len);
+ PORT_Assert(entropy_buf);
+ if (entropy_buf) {
+ for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) {
+ if (-1 == kstat_read(kc, ksp, NULL)) {
+ /* missing data from a single kstat shouldn't be fatal */
+ continue;
+ }
+ rv = BufferEntropy((char *)ksp, sizeof(kstat_t),
+ entropy_buf, &entropy_buffered,
+ fed);
+ if (SECSuccess != rv) {
+ break;
+ }
+
+ if (ksp->ks_data && ksp->ks_data_size > 0 && ksp->ks_ndata > 0) {
+ rv = BufferEntropy((char *)ksp->ks_data, ksp->ks_data_size,
+ entropy_buf, &entropy_buffered,
+ fed);
+ if (SECSuccess != rv) {
+ break;
+ }
+ }
+ }
+ if (SECSuccess == rv && entropy_buffered) {
+ /* Buffer is not empty, time to feed it to the RNG */
+ rv = RNG_RandomUpdate(entropy_buf, entropy_buffered);
+ }
+ PORT_Free(entropy_buf);
+ } else {
+ rv = SECFailure;
+ }
+ if (kstat_close(kc)) {
+ PORT_Assert(0);
+ rv = SECFailure;
+ }
+ return rv;
+}
+
+#endif
+
+#if defined(SCO) || defined(UNIXWARE) || defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(DARWIN) || defined(OPENBSD) || defined(NTO) || defined(__riscos__)
+#include <sys/times.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ int ticks;
+ struct tms buffer;
+
+ ticks = times(&buffer);
+ return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /*
+ * Is this really necessary? Why not use rand48 or something?
+ */
+ si = sysconf(_SC_CHILD_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+
+ si = sysconf(_SC_STREAM_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+
+ si = sysconf(_SC_OPEN_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+
+#if defined(__sun)
+#if defined(__svr4) || defined(SVR4)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ hrtime_t t;
+ t = gethrtime();
+ if (t) {
+ return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+ }
+ return 0;
+}
+#else /* SunOS (Sun, but not SVR4) */
+
+extern long sysconf(int name);
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /* This is not very good */
+ si = sysconf(_SC_CHILD_MAX);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif
+#endif /* Sun */
+
+#if defined(__hpux)
+#include <sys/unistd.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+#if defined(__ia64)
+#include <ia64/sys/inline.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ PRUint64 t;
+
+ t = _Asm_mov_from_ar(_AREG44);
+ return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+}
+#else
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ extern int ret_cr16();
+ int cr16val;
+
+ cr16val = ret_cr16();
+ return CopyLowBits(buf, maxbytes, &cr16val, sizeof(cr16val));
+}
+#endif
+
+static void
+GiveSystemInfo(void)
+{
+ long si;
+
+ /* This is not very good */
+ si = sysconf(_AES_OS_VERSION);
+ RNG_RandomUpdate(&si, sizeof(si));
+ si = sysconf(_SC_CPU_VERSION);
+ RNG_RandomUpdate(&si, sizeof(si));
+}
+#endif /* HPUX */
+
+#if defined(OSF1)
+#include <sys/types.h>
+#include <sys/sysinfo.h>
+#include <sys/systeminfo.h>
+#include <c_asm.h>
+
+static void
+GiveSystemInfo(void)
+{
+ char buf[BUFSIZ];
+ int rv;
+ int off = 0;
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+/*
+ * Use the "get the cycle counter" instruction on the alpha.
+ * The low 32 bits completely turn over in less than a minute.
+ * The high 32 bits are some non-counter gunk that changes sometimes.
+ */
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ unsigned long t;
+
+ t = asm("rpcc %v0");
+ return CopyLowBits(buf, maxbytes, &t, sizeof(t));
+}
+
+#endif /* Alpha */
+
+#if defined(_IBMR2)
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ /* XXX haven't found any yet! */
+}
+#endif /* IBM R2 */
+
+#if defined(LINUX)
+#include <sys/sysinfo.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+#ifndef NO_SYSINFO
+ struct sysinfo si;
+ if (sysinfo(&si) == 0) {
+ RNG_RandomUpdate(&si, sizeof(si));
+ }
+#endif
+}
+#endif /* LINUX */
+
+#if defined(NCR)
+
+#include <sys/utsname.h>
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+#endif /* NCR */
+
+#if defined(sgi)
+#include <fcntl.h>
+#undef PRIVATE
+#include <sys/mman.h>
+#include <sys/syssgi.h>
+#include <sys/immu.h>
+#include <sys/systeminfo.h>
+#include <sys/utsname.h>
+#include <wait.h>
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[4096];
+
+ rv = syssgi(SGI_SYSID, &buf[0]);
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, MAXSYSIDSIZE);
+ }
+#ifdef SGI_RDUBLK
+ rv = syssgi(SGI_RDUBLK, getpid(), &buf[0], sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, sizeof(buf));
+ }
+#endif /* SGI_RDUBLK */
+ rv = syssgi(SGI_INVENT, SGI_INV_READ, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, sizeof(buf));
+ }
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+
+static size_t
+GetHighResClock(void *buf, size_t maxbuf)
+{
+ unsigned phys_addr, raddr, cycleval;
+ static volatile unsigned *iotimer_addr = NULL;
+ static int tries = 0;
+ static int cntr_size;
+ int mfd;
+ long s0[2];
+ struct timeval tv;
+
+#ifndef SGI_CYCLECNTR_SIZE
+#define SGI_CYCLECNTR_SIZE 165 /* Size user needs to use to read CC */
+#endif
+
+ if (iotimer_addr == NULL) {
+ if (tries++ > 1) {
+ /* Don't keep trying if it didn't work */
+ return 0;
+ }
+
+ /*
+ ** For SGI machines we can use the cycle counter, if it has one,
+ ** to generate some truly random numbers
+ */
+ phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval);
+ if (phys_addr) {
+ int pgsz = getpagesize();
+ int pgoffmask = pgsz - 1;
+
+ raddr = phys_addr & ~pgoffmask;
+ mfd = open("/dev/mmem", O_RDONLY);
+ if (mfd < 0) {
+ return 0;
+ }
+ iotimer_addr = (unsigned *)
+ mmap(0, pgoffmask, PROT_READ, MAP_PRIVATE, mfd, (int)raddr);
+ if (iotimer_addr == (void *)-1) {
+ close(mfd);
+ iotimer_addr = NULL;
+ return 0;
+ }
+ iotimer_addr = (unsigned *)((__psint_t)iotimer_addr | (phys_addr & pgoffmask));
+ /*
+ * The file 'mfd' is purposefully not closed.
+ */
+ cntr_size = syssgi(SGI_CYCLECNTR_SIZE);
+ if (cntr_size < 0) {
+ struct utsname utsinfo;
+
+ /*
+ * We must be executing on a 6.0 or earlier system, since the
+ * SGI_CYCLECNTR_SIZE call is not supported.
+ *
+ * The only pre-6.1 platforms with 64-bit counters are
+ * IP19 and IP21 (Challenge, PowerChallenge, Onyx).
+ */
+ uname(&utsinfo);
+ if (!strncmp(utsinfo.machine, "IP19", 4) ||
+ !strncmp(utsinfo.machine, "IP21", 4))
+ cntr_size = 64;
+ else
+ cntr_size = 32;
+ }
+ cntr_size /= 8; /* Convert from bits to bytes */
+ }
+ }
+
+ s0[0] = *iotimer_addr;
+ if (cntr_size > 4)
+ s0[1] = *(iotimer_addr + 1);
+ memcpy(buf, (char *)&s0[0], cntr_size);
+ return CopyLowBits(buf, maxbuf, &s0, cntr_size);
+}
+#endif
+
+#if defined(sony)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* sony */
+
+#if defined(sinix)
+#include <sys/systeminfo.h>
+#include <sys/times.h>
+
+int gettimeofday(struct timeval *, struct timezone *);
+int gethostname(char *, int);
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ int ticks;
+ struct tms buffer;
+
+ ticks = times(&buffer);
+ return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks));
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* sinix */
+
+#ifdef BEOS
+#include <be/kernel/OS.h>
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ bigtime_t bigtime; /* Actually a int64 */
+
+ bigtime = real_time_clock_usecs();
+ return CopyLowBits(buf, maxbytes, &bigtime, sizeof(bigtime));
+}
+
+static void
+GiveSystemInfo(void)
+{
+ system_info *info = NULL;
+ PRInt32 val;
+ get_system_info(info);
+ if (info) {
+ val = info->boot_time;
+ RNG_RandomUpdate(&val, sizeof(val));
+ val = info->used_pages;
+ RNG_RandomUpdate(&val, sizeof(val));
+ val = info->used_ports;
+ RNG_RandomUpdate(&val, sizeof(val));
+ val = info->used_threads;
+ RNG_RandomUpdate(&val, sizeof(val));
+ val = info->used_teams;
+ RNG_RandomUpdate(&val, sizeof(val));
+ }
+}
+#endif /* BEOS */
+
+#if defined(nec_ews)
+#include <sys/systeminfo.h>
+
+#define getdtablesize() sysconf(_SC_OPEN_MAX)
+
+static size_t
+GetHighResClock(void *buf, size_t maxbytes)
+{
+ return 0;
+}
+
+static void
+GiveSystemInfo(void)
+{
+ int rv;
+ char buf[2000];
+
+ rv = sysinfo(SI_MACHINE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_RELEASE, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+ rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf));
+ if (rv > 0) {
+ RNG_RandomUpdate(buf, rv);
+ }
+}
+#endif /* nec_ews */
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbytes)
+{
+ struct timeval tv;
+ int n = 0;
+ int c;
+
+ n = GetHighResClock(buf, maxbytes);
+ maxbytes -= n;
+
+ (void)gettimeofday(&tv, 0);
+ c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_usec, sizeof(tv.tv_usec));
+ n += c;
+ maxbytes -= c;
+ c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_sec, sizeof(tv.tv_sec));
+ n += c;
+ return n;
+}
+
+#define SAFE_POPEN_MAXARGS 10 /* must be at least 2 */
+
+/*
+ * safe_popen is static to this module and we know what arguments it is
+ * called with. Note that this version only supports a single open child
+ * process at any time.
+ */
+static pid_t safe_popen_pid;
+static struct sigaction oldact;
+
+static FILE *
+safe_popen(char *cmd)
+{
+ int p[2], fd, argc;
+ pid_t pid;
+ char *argv[SAFE_POPEN_MAXARGS + 1];
+ FILE *fp;
+ static char blank[] = " \t";
+ static struct sigaction newact;
+
+ if (pipe(p) < 0)
+ return 0;
+
+ fp = fdopen(p[0], "r");
+ if (fp == 0) {
+ close(p[0]);
+ close(p[1]);
+ return 0;
+ }
+
+ /* Setup signals so that SIGCHLD is ignored as we want to do waitpid */
+ newact.sa_handler = SIG_DFL;
+ newact.sa_flags = 0;
+ sigfillset(&newact.sa_mask);
+ sigaction(SIGCHLD, &newact, &oldact);
+
+ pid = fork();
+ switch (pid) {
+ int ndesc;
+
+ case -1:
+ fclose(fp); /* this closes p[0], the fd associated with fp */
+ close(p[1]);
+ sigaction(SIGCHLD, &oldact, NULL);
+ return 0;
+
+ case 0:
+ /* dup write-side of pipe to stderr and stdout */
+ if (p[1] != 1)
+ dup2(p[1], 1);
+ if (p[1] != 2)
+ dup2(p[1], 2);
+
+ /*
+ * close the other file descriptors, except stdin which we
+ * try reassociating with /dev/null, first (bug 174993)
+ */
+ if (!freopen("/dev/null", "r", stdin))
+ close(0);
+ ndesc = getdtablesize();
+ for (fd = PR_MIN(65536, ndesc); --fd > 2; close(fd))
+ ;
+
+ /* clean up environment in the child process */
+ putenv("PATH=/bin:/usr/bin:/sbin:/usr/sbin:/etc:/usr/etc");
+ putenv("SHELL=/bin/sh");
+ putenv("IFS= \t");
+
+ /*
+ * The caller may have passed us a string that is in text
+ * space. It may be illegal to modify the string
+ */
+ cmd = strdup(cmd);
+ /* format argv */
+ argv[0] = strtok(cmd, blank);
+ argc = 1;
+ while ((argv[argc] = strtok(0, blank)) != 0) {
+ if (++argc == SAFE_POPEN_MAXARGS) {
+ argv[argc] = 0;
+ break;
+ }
+ }
+
+ /* and away we go */
+ execvp(argv[0], argv);
+ exit(127);
+ break;
+
+ default:
+ close(p[1]);
+ break;
+ }
+
+ /* non-zero means there's a cmd running */
+ safe_popen_pid = pid;
+ return fp;
+}
+
+static int
+safe_pclose(FILE *fp)
+{
+ pid_t pid;
+ int status = -1, rv;
+
+ if ((pid = safe_popen_pid) == 0)
+ return -1;
+ safe_popen_pid = 0;
+
+ fclose(fp);
+
+ /* yield the processor so the child gets some time to exit normally */
+ PR_Sleep(PR_INTERVAL_NO_WAIT);
+
+ /* if the child hasn't exited, kill it -- we're done with its output */
+ while ((rv = waitpid(pid, &status, WNOHANG)) == -1 && errno == EINTR)
+ ;
+ if (rv == 0) {
+ kill(pid, SIGKILL);
+ while ((rv = waitpid(pid, &status, 0)) == -1 && errno == EINTR)
+ ;
+ }
+
+ /* Reset SIGCHLD signal hander before returning */
+ sigaction(SIGCHLD, &oldact, NULL);
+
+ return status;
+}
+
+#ifdef DARWIN
+#include <TargetConditionals.h>
+#if !TARGET_OS_IPHONE
+#include <crt_externs.h>
+#endif
+#endif
+
+/* Fork netstat to collect its output by default. Do not unset this unless
+ * another source of entropy is available
+ */
+#define DO_NETSTAT 1
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ FILE *fp;
+ char buf[BUFSIZ];
+ size_t bytes;
+ const char *const *cp;
+ char *randfile;
+#ifdef DARWIN
+#if TARGET_OS_IPHONE
+ /* iOS does not expose a way to access environ. */
+ char **environ = NULL;
+#else
+ char **environ = *_NSGetEnviron();
+#endif
+#else
+ extern char **environ;
+#endif
+#ifdef BEOS
+ static const char *const files[] = {
+ "/boot/var/swap",
+ "/boot/var/log/syslog",
+ "/boot/var/tmp",
+ "/boot/home/config/settings",
+ "/boot/home",
+ 0
+ };
+#else
+ static const char *const files[] = {
+ "/etc/passwd",
+ "/etc/utmp",
+ "/tmp",
+ "/var/tmp",
+ "/usr/tmp",
+ 0
+ };
+#endif
+
+#if defined(BSDI)
+ static char netstat_ni_cmd[] = "netstat -nis";
+#else
+ static char netstat_ni_cmd[] = "netstat -ni";
+#endif
+
+ GiveSystemInfo();
+
+ bytes = RNG_GetNoise(buf, sizeof(buf));
+ RNG_RandomUpdate(buf, bytes);
+
+ /*
+ * Pass the C environment and the addresses of the pointers to the
+ * hash function. This makes the random number function depend on the
+ * execution environment of the user and on the platform the program
+ * is running on.
+ */
+ if (environ != NULL) {
+ cp = (const char *const *)environ;
+ while (*cp) {
+ RNG_RandomUpdate(*cp, strlen(*cp));
+ cp++;
+ }
+ RNG_RandomUpdate(environ, (char *)cp - (char *)environ);
+ }
+
+ /* Give in system information */
+ if (gethostname(buf, sizeof(buf)) == 0) {
+ RNG_RandomUpdate(buf, strlen(buf));
+ }
+ GiveSystemInfo();
+
+ /* grab some data from system's PRNG before any other files. */
+ bytes = RNG_FileUpdate("/dev/urandom", SYSTEM_RNG_SEED_COUNT);
+
+ /* If the user points us to a random file, pass it through the rng */
+ randfile = PR_GetEnvSecure("NSRANDFILE");
+ if ((randfile != NULL) && (randfile[0] != '\0')) {
+ char *randCountString = PR_GetEnvSecure("NSRANDCOUNT");
+ int randCount = randCountString ? atoi(randCountString) : 0;
+ if (randCount != 0) {
+ RNG_FileUpdate(randfile, randCount);
+ } else {
+ RNG_FileForRNG(randfile);
+ }
+ }
+
+ /* pass other files through */
+ for (cp = files; *cp; cp++)
+ RNG_FileForRNG(*cp);
+
+/*
+ * Bug 100447: On BSD/OS 4.2 and 4.3, we have problem calling safe_popen
+ * in a pthreads environment. Therefore, we call safe_popen last and on
+ * BSD/OS we do not call safe_popen when we succeeded in getting data
+ * from /dev/urandom.
+ *
+ * Bug 174993: On platforms providing /dev/urandom, don't fork netstat
+ * either, if data has been gathered successfully.
+ */
+
+#if defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD) || defined(DARWIN) || defined(LINUX) || defined(HPUX)
+ if (bytes)
+ return;
+#endif
+
+#ifdef SOLARIS
+
+/*
+ * On Solaris, NSS may be initialized automatically from libldap in
+ * applications that are unaware of the use of NSS. safe_popen forks, and
+ * sometimes creates issues with some applications' pthread_atfork handlers.
+ * We always have /dev/urandom on Solaris 9 and above as an entropy source,
+ * and for Solaris 8 we have the libkstat interface, so we don't need to
+ * fork netstat.
+ */
+
+#undef DO_NETSTAT
+ if (!bytes) {
+ /* On Solaris 8, /dev/urandom isn't available, so we use libkstat. */
+ PRUint32 kstat_bytes = 0;
+ if (SECSuccess != RNG_kstat(&kstat_bytes)) {
+ PORT_Assert(0);
+ }
+ bytes += kstat_bytes;
+ PORT_Assert(bytes);
+ }
+#endif
+
+#ifdef DO_NETSTAT
+ fp = safe_popen(netstat_ni_cmd);
+ if (fp != NULL) {
+ while ((bytes = fread(buf, 1, sizeof(buf), fp)) > 0)
+ RNG_RandomUpdate(buf, bytes);
+ safe_pclose(fp);
+ }
+#endif
+}
+
+#define TOTAL_FILE_LIMIT 1000000 /* one million */
+
+size_t
+RNG_FileUpdate(const char *fileName, size_t limit)
+{
+ FILE *file;
+ int fd;
+ int bytes;
+ size_t fileBytes = 0;
+ struct stat stat_buf;
+ unsigned char buffer[BUFSIZ];
+ static size_t totalFileBytes = 0;
+
+ /* suppress valgrind warnings due to holes in struct stat */
+ memset(&stat_buf, 0, sizeof(stat_buf));
+
+ if (stat((char *)fileName, &stat_buf) < 0)
+ return fileBytes;
+ RNG_RandomUpdate(&stat_buf, sizeof(stat_buf));
+
+ file = fopen(fileName, "r");
+ if (file != NULL) {
+ /* Read from the underlying file descriptor directly to bypass stdio
+ * buffering and avoid reading more bytes than we need from
+ * /dev/urandom. NOTE: we can't use fread with unbuffered I/O because
+ * fread may return EOF in unbuffered I/O mode on Android.
+ *
+ * Moreover, we read into a buffer of size BUFSIZ, so buffered I/O
+ * has no performance advantage. */
+ fd = fileno(file);
+ /* 'file' was just opened, so this should not fail. */
+ PORT_Assert(fd != -1);
+ while (limit > fileBytes && fd != -1) {
+ bytes = PR_MIN(sizeof buffer, limit - fileBytes);
+ bytes = read(fd, buffer, bytes);
+ if (bytes <= 0)
+ break;
+ RNG_RandomUpdate(buffer, bytes);
+ fileBytes += bytes;
+ totalFileBytes += bytes;
+ /* after TOTAL_FILE_LIMIT has been reached, only read in first
+ ** buffer of data from each subsequent file.
+ */
+ if (totalFileBytes > TOTAL_FILE_LIMIT)
+ break;
+ }
+ fclose(file);
+ }
+ /*
+ * Pass yet another snapshot of our highest resolution clock into
+ * the hash function.
+ */
+ bytes = RNG_GetNoise(buffer, sizeof(buffer));
+ RNG_RandomUpdate(buffer, bytes);
+ return fileBytes;
+}
+
+void
+RNG_FileForRNG(const char *fileName)
+{
+ RNG_FileUpdate(fileName, TOTAL_FILE_LIMIT);
+}
+
+void
+ReadSingleFile(const char *fileName)
+{
+ FILE *file;
+ unsigned char buffer[BUFSIZ];
+
+ file = fopen(fileName, "rb");
+ if (file != NULL) {
+ while (fread(buffer, 1, sizeof(buffer), file) > 0)
+ ;
+ fclose(file);
+ }
+}
+
+#define _POSIX_PTHREAD_SEMANTICS
+#include <dirent.h>
+
+PRBool
+ReadFileOK(char *dir, char *file)
+{
+ struct stat stat_buf;
+ char filename[PATH_MAX];
+ int count = snprintf(filename, sizeof filename, "%s/%s", dir, file);
+
+ if (count <= 0) {
+ return PR_FALSE; /* name too long, can't read it anyway */
+ }
+
+ if (stat(filename, &stat_buf) < 0)
+ return PR_FALSE; /* can't stat, probably can't read it then as well */
+ return S_ISREG(stat_buf.st_mode) ? PR_TRUE : PR_FALSE;
+}
+
+/*
+ * read one file out of either /etc or the user's home directory.
+ * fileToRead tells which file to read.
+ *
+ * return 1 if it's time to reset the fileToRead (no more files to read).
+ */
+static int
+ReadOneFile(int fileToRead)
+{
+ char *dir = "/etc";
+ DIR *fd = opendir(dir);
+ int resetCount = 0;
+ struct dirent *entry;
+#if defined(__sun)
+ char firstName[256];
+#else
+ char firstName[NAME_MAX + 1];
+#endif
+ const char *name = NULL;
+ int i;
+
+ if (fd == NULL) {
+ dir = PR_GetEnvSecure("HOME");
+ if (dir) {
+ fd = opendir(dir);
+ }
+ }
+ if (fd == NULL) {
+ return 1;
+ }
+
+ firstName[0] = '\0';
+ for (i = 0; i <= fileToRead; i++) {
+ do {
+ /* readdir() isn't guaranteed to be thread safe on every platform;
+ * this code assumes the same directory isn't read concurrently.
+ * This usage is confirmed safe on Linux, see bug 1254334. */
+ entry = readdir(fd);
+ } while (entry != NULL && !ReadFileOK(dir, &entry->d_name[0]));
+ if (entry == NULL) {
+ resetCount = 1; /* read to the end, start again at the beginning */
+ if (firstName[0]) {
+ /* ran out of entries in the directory, use the first one */
+ name = firstName;
+ }
+ break;
+ }
+ name = entry->d_name;
+ if (i == 0) {
+ /* copy the name of the first in case we run out of entries */
+ PORT_Assert(PORT_Strlen(name) < sizeof(firstName));
+ PORT_Strncpy(firstName, name, sizeof(firstName) - 1);
+ firstName[sizeof(firstName) - 1] = '\0';
+ }
+ }
+
+ if (name) {
+ char filename[PATH_MAX];
+ int count = snprintf(filename, sizeof(filename), "%s/%s", dir, name);
+ if (count >= 1) {
+ ReadSingleFile(filename);
+ }
+ }
+
+ closedir(fd);
+ return resetCount;
+}
+
+/*
+ * do something to try to introduce more noise into the 'GetNoise' call
+ */
+static void
+rng_systemJitter(void)
+{
+ static int fileToRead = 1;
+
+ if (ReadOneFile(fileToRead)) {
+ fileToRead = 1;
+ } else {
+ fileToRead++;
+ }
+}
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ FILE *file;
+ int fd;
+ int bytes;
+ size_t fileBytes = 0;
+ unsigned char *buffer = dest;
+
+ file = fopen("/dev/urandom", "r");
+ if (file == NULL) {
+ return rng_systemFromNoise(dest, maxLen);
+ }
+ /* Read from the underlying file descriptor directly to bypass stdio
+ * buffering and avoid reading more bytes than we need from /dev/urandom.
+ * NOTE: we can't use fread with unbuffered I/O because fread may return
+ * EOF in unbuffered I/O mode on Android.
+ */
+ fd = fileno(file);
+ /* 'file' was just opened, so this should not fail. */
+ PORT_Assert(fd != -1);
+ while (maxLen > fileBytes && fd != -1) {
+ bytes = maxLen - fileBytes;
+ bytes = read(fd, buffer, bytes);
+ if (bytes <= 0)
+ break;
+ fileBytes += bytes;
+ buffer += bytes;
+ }
+ fclose(file);
+ if (fileBytes != maxLen) {
+ PORT_SetError(SEC_ERROR_NEED_RANDOM); /* system RNG failed */
+ fileBytes = 0;
+ }
+ return fileBytes;
+}
diff --git a/security/nss/lib/freebl/win_rand.c b/security/nss/lib/freebl/win_rand.c
new file mode 100644
index 000000000..b863776d2
--- /dev/null
+++ b/security/nss/lib/freebl/win_rand.c
@@ -0,0 +1,161 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "secrng.h"
+
+#ifdef XP_WIN
+#include <windows.h>
+#include <time.h>
+
+static BOOL
+CurrentClockTickTime(LPDWORD lpdwHigh, LPDWORD lpdwLow)
+{
+ LARGE_INTEGER liCount;
+
+ if (!QueryPerformanceCounter(&liCount))
+ return FALSE;
+
+ *lpdwHigh = liCount.u.HighPart;
+ *lpdwLow = liCount.u.LowPart;
+ return TRUE;
+}
+
+size_t
+RNG_GetNoise(void *buf, size_t maxbuf)
+{
+ DWORD dwHigh, dwLow, dwVal;
+ int n = 0;
+ int nBytes;
+ time_t sTime;
+
+ if (maxbuf <= 0)
+ return 0;
+
+ CurrentClockTickTime(&dwHigh, &dwLow);
+
+ // get the maximally changing bits first
+ nBytes = sizeof(dwLow) > maxbuf ? maxbuf : sizeof(dwLow);
+ memcpy((char *)buf, &dwLow, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ nBytes = sizeof(dwHigh) > maxbuf ? maxbuf : sizeof(dwHigh);
+ memcpy(((char *)buf) + n, &dwHigh, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ // get the number of milliseconds that have elapsed since Windows started
+ dwVal = GetTickCount();
+
+ nBytes = sizeof(dwVal) > maxbuf ? maxbuf : sizeof(dwVal);
+ memcpy(((char *)buf) + n, &dwVal, nBytes);
+ n += nBytes;
+ maxbuf -= nBytes;
+
+ if (maxbuf <= 0)
+ return n;
+
+ // get the time in seconds since midnight Jan 1, 1970
+ time(&sTime);
+ nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime);
+ memcpy(((char *)buf) + n, &sTime, nBytes);
+ n += nBytes;
+
+ return n;
+}
+
+void
+RNG_SystemInfoForRNG(void)
+{
+ DWORD dwVal;
+ char buffer[256];
+ int nBytes;
+ MEMORYSTATUS sMem;
+ HANDLE hVal;
+ DWORD dwSerialNum;
+ DWORD dwComponentLen;
+ DWORD dwSysFlags;
+ char volName[128];
+ DWORD dwSectors, dwBytes, dwFreeClusters, dwNumClusters;
+
+ nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+ RNG_RandomUpdate(buffer, nBytes);
+
+ sMem.dwLength = sizeof(sMem);
+ GlobalMemoryStatus(&sMem); // assorted memory stats
+ RNG_RandomUpdate(&sMem, sizeof(sMem));
+
+ dwVal = GetLogicalDrives();
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal)); // bitfields in bits 0-25
+
+ dwVal = sizeof(buffer);
+ if (GetComputerName(buffer, &dwVal))
+ RNG_RandomUpdate(buffer, dwVal);
+
+ hVal = GetCurrentProcess(); // 4 or 8 byte pseudo handle (a
+ // constant!) of current process
+ RNG_RandomUpdate(&hVal, sizeof(hVal));
+
+ dwVal = GetCurrentProcessId(); // process ID (4 bytes)
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+ dwVal = GetCurrentThreadId(); // thread ID (4 bytes)
+ RNG_RandomUpdate(&dwVal, sizeof(dwVal));
+
+ volName[0] = '\0';
+ buffer[0] = '\0';
+ GetVolumeInformation(NULL,
+ volName,
+ sizeof(volName),
+ &dwSerialNum,
+ &dwComponentLen,
+ &dwSysFlags,
+ buffer,
+ sizeof(buffer));
+
+ RNG_RandomUpdate(volName, strlen(volName));
+ RNG_RandomUpdate(&dwSerialNum, sizeof(dwSerialNum));
+ RNG_RandomUpdate(&dwComponentLen, sizeof(dwComponentLen));
+ RNG_RandomUpdate(&dwSysFlags, sizeof(dwSysFlags));
+ RNG_RandomUpdate(buffer, strlen(buffer));
+
+ if (GetDiskFreeSpace(NULL, &dwSectors, &dwBytes, &dwFreeClusters,
+ &dwNumClusters)) {
+ RNG_RandomUpdate(&dwSectors, sizeof(dwSectors));
+ RNG_RandomUpdate(&dwBytes, sizeof(dwBytes));
+ RNG_RandomUpdate(&dwFreeClusters, sizeof(dwFreeClusters));
+ RNG_RandomUpdate(&dwNumClusters, sizeof(dwNumClusters));
+ }
+
+ nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes
+ RNG_RandomUpdate(buffer, nBytes);
+}
+
+/*
+ * The RtlGenRandom function is declared in <ntsecapi.h>, but the
+ * declaration is missing a calling convention specifier. So we
+ * declare it manually here.
+ */
+#define RtlGenRandom SystemFunction036
+DECLSPEC_IMPORT BOOLEAN WINAPI RtlGenRandom(
+ PVOID RandomBuffer,
+ ULONG RandomBufferLength);
+
+size_t
+RNG_SystemRNG(void *dest, size_t maxLen)
+{
+ size_t bytes = 0;
+
+ if (RtlGenRandom(dest, maxLen)) {
+ bytes = maxLen;
+ }
+ return bytes;
+}
+#endif /* is XP_WIN */