From 5f8de423f190bbb79a62f804151bc24824fa32d8 Mon Sep 17 00:00:00 2001 From: "Matt A. Tobin" Date: Fri, 2 Feb 2018 04:16:08 -0500 Subject: Add m-esr52 at 52.6.0 --- security/nss/lib/freebl/Makefile | 764 + security/nss/lib/freebl/aeskeywrap.c | 389 + security/nss/lib/freebl/alg2268.c | 509 + security/nss/lib/freebl/alghmac.c | 165 + security/nss/lib/freebl/alghmac.h | 64 + security/nss/lib/freebl/arcfive.c | 87 + security/nss/lib/freebl/arcfour-amd64-gas.s | 88 + security/nss/lib/freebl/arcfour-amd64-masm.asm | 107 + security/nss/lib/freebl/arcfour-amd64-sun.s | 84 + security/nss/lib/freebl/arcfour.c | 594 + security/nss/lib/freebl/blapi.h | 1625 ++ security/nss/lib/freebl/blapii.h | 61 + security/nss/lib/freebl/blapit.h | 414 + security/nss/lib/freebl/blname.c | 100 + security/nss/lib/freebl/camellia.c | 1896 +++ security/nss/lib/freebl/camellia.h | 42 + security/nss/lib/freebl/chacha20.c | 119 + security/nss/lib/freebl/chacha20.h | 26 + security/nss/lib/freebl/chacha20_vec.c | 327 + security/nss/lib/freebl/chacha20poly1305.c | 198 + security/nss/lib/freebl/chacha20poly1305.h | 15 + security/nss/lib/freebl/config.mk | 97 + security/nss/lib/freebl/ctr.c | 246 + security/nss/lib/freebl/ctr.h | 53 + security/nss/lib/freebl/cts.c | 307 + security/nss/lib/freebl/cts.h | 33 + security/nss/lib/freebl/des.c | 676 + security/nss/lib/freebl/des.h | 43 + security/nss/lib/freebl/desblapi.c | 256 + security/nss/lib/freebl/det_rng.c | 67 + security/nss/lib/freebl/det_rng.h | 12 + security/nss/lib/freebl/dh.c | 452 + security/nss/lib/freebl/drbg.c | 968 ++ security/nss/lib/freebl/dsa.c | 647 + security/nss/lib/freebl/ec.c | 1159 ++ security/nss/lib/freebl/ec.h | 21 + security/nss/lib/freebl/ecdecode.c | 311 + security/nss/lib/freebl/ecl/README | 267 + security/nss/lib/freebl/ecl/curve25519_32.c | 390 + security/nss/lib/freebl/ecl/curve25519_64.c | 514 + security/nss/lib/freebl/ecl/ec_naf.c | 68 + security/nss/lib/freebl/ecl/ecl-curve.h | 123 + security/nss/lib/freebl/ecl/ecl-exp.h | 167 + security/nss/lib/freebl/ecl/ecl-priv.h | 257 + security/nss/lib/freebl/ecl/ecl.c | 301 + security/nss/lib/freebl/ecl/ecl.h | 60 + security/nss/lib/freebl/ecl/ecl_curve.c | 93 + security/nss/lib/freebl/ecl/ecl_gf.c | 958 ++ security/nss/lib/freebl/ecl/ecl_mult.c | 305 + security/nss/lib/freebl/ecl/ecp.h | 106 + security/nss/lib/freebl/ecl/ecp_25519.c | 120 + security/nss/lib/freebl/ecl/ecp_256.c | 401 + security/nss/lib/freebl/ecl/ecp_256_32.c | 1535 ++ security/nss/lib/freebl/ecl/ecp_384.c | 258 + security/nss/lib/freebl/ecl/ecp_521.c | 137 + security/nss/lib/freebl/ecl/ecp_aff.c | 308 + security/nss/lib/freebl/ecl/ecp_jac.c | 513 + security/nss/lib/freebl/ecl/ecp_jm.c | 283 + security/nss/lib/freebl/ecl/ecp_mont.c | 154 + security/nss/lib/freebl/ecl/tests/ec_naft.c | 121 + security/nss/lib/freebl/ecl/tests/ecp_test.c | 409 + security/nss/lib/freebl/ecl/uint128.c | 87 + security/nss/lib/freebl/ecl/uint128.h | 35 + security/nss/lib/freebl/exports.gyp | 48 + security/nss/lib/freebl/fipsfreebl.c | 1715 ++ security/nss/lib/freebl/freebl.def | 26 + security/nss/lib/freebl/freebl.gyp | 408 + security/nss/lib/freebl/freebl.rc | 68 + security/nss/lib/freebl/freebl_hash.def | 39 + security/nss/lib/freebl/freebl_hash_vector.def | 34 + security/nss/lib/freebl/freeblver.c | 18 + security/nss/lib/freebl/gcm.c | 860 + security/nss/lib/freebl/gcm.h | 31 + security/nss/lib/freebl/genload.c | 167 + security/nss/lib/freebl/hmacct.c | 335 + security/nss/lib/freebl/hmacct.h | 38 + security/nss/lib/freebl/intel-aes-x64-masm.asm | 971 ++ security/nss/lib/freebl/intel-aes-x86-masm.asm | 949 ++ security/nss/lib/freebl/intel-aes.h | 143 + security/nss/lib/freebl/intel-aes.s | 2514 +++ security/nss/lib/freebl/intel-gcm-wrap.c | 254 + security/nss/lib/freebl/intel-gcm-x64-masm.asm | 1295 ++ security/nss/lib/freebl/intel-gcm-x86-masm.asm | 1209 ++ security/nss/lib/freebl/intel-gcm.h | 83 + security/nss/lib/freebl/intel-gcm.s | 1340 ++ security/nss/lib/freebl/jpake.c | 495 + security/nss/lib/freebl/ldvector.c | 353 + security/nss/lib/freebl/loader.c | 2126 +++ security/nss/lib/freebl/loader.h | 788 + security/nss/lib/freebl/lowhash_vector.c | 217 + security/nss/lib/freebl/manifest.mn | 195 + security/nss/lib/freebl/md2.c | 269 + security/nss/lib/freebl/md5.c | 598 + security/nss/lib/freebl/mknewpc2.c | 208 + security/nss/lib/freebl/mksp.c | 119 + security/nss/lib/freebl/mpi/Makefile | 244 + security/nss/lib/freebl/mpi/Makefile.os2 | 243 + security/nss/lib/freebl/mpi/Makefile.win | 254 + security/nss/lib/freebl/mpi/README | 749 + security/nss/lib/freebl/mpi/all-tests | 83 + security/nss/lib/freebl/mpi/doc/LICENSE | 11 + security/nss/lib/freebl/mpi/doc/LICENSE-MPL | 3 + security/nss/lib/freebl/mpi/doc/basecvt.pod | 65 + security/nss/lib/freebl/mpi/doc/build | 30 + security/nss/lib/freebl/mpi/doc/div.txt | 64 + security/nss/lib/freebl/mpi/doc/expt.txt | 94 + security/nss/lib/freebl/mpi/doc/gcd.pod | 28 + security/nss/lib/freebl/mpi/doc/invmod.pod | 34 + security/nss/lib/freebl/mpi/doc/isprime.pod | 63 + security/nss/lib/freebl/mpi/doc/lap.pod | 36 + security/nss/lib/freebl/mpi/doc/mpi-test.pod | 51 + security/nss/lib/freebl/mpi/doc/mul.txt | 77 + security/nss/lib/freebl/mpi/doc/pi.txt | 53 + security/nss/lib/freebl/mpi/doc/prime.txt | 6542 ++++++++ security/nss/lib/freebl/mpi/doc/prng.pod | 38 + security/nss/lib/freebl/mpi/doc/redux.txt | 86 + security/nss/lib/freebl/mpi/doc/sqrt.txt | 50 + security/nss/lib/freebl/mpi/doc/square.txt | 72 + security/nss/lib/freebl/mpi/doc/timing.txt | 213 + security/nss/lib/freebl/mpi/hpma512.s | 615 + security/nss/lib/freebl/mpi/hppa20.s | 904 ++ security/nss/lib/freebl/mpi/hppatch.adb | 21 + security/nss/lib/freebl/mpi/logtab.h | 28 + security/nss/lib/freebl/mpi/make-logtab | 29 + security/nss/lib/freebl/mpi/make-test-arrays | 98 + security/nss/lib/freebl/mpi/mdxptest.c | 306 + security/nss/lib/freebl/mpi/montmulf.c | 286 + security/nss/lib/freebl/mpi/montmulf.h | 65 + security/nss/lib/freebl/mpi/montmulf.il | 108 + security/nss/lib/freebl/mpi/montmulf.s | 1938 +++ security/nss/lib/freebl/mpi/montmulfv8.il | 108 + security/nss/lib/freebl/mpi/montmulfv8.s | 1818 +++ security/nss/lib/freebl/mpi/montmulfv9.il | 93 + security/nss/lib/freebl/mpi/montmulfv9.s | 2346 +++ security/nss/lib/freebl/mpi/mp_comba.c | 3235 ++++ .../nss/lib/freebl/mpi/mp_comba_amd64_masm.asm | 13066 +++++++++++++++ security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s | 16097 +++++++++++++++++++ security/nss/lib/freebl/mpi/mp_gf2m-priv.h | 73 + security/nss/lib/freebl/mpi/mp_gf2m.c | 678 + security/nss/lib/freebl/mpi/mp_gf2m.h | 28 + security/nss/lib/freebl/mpi/mpcpucache.c | 808 + security/nss/lib/freebl/mpi/mpcpucache_amd64.s | 861 + security/nss/lib/freebl/mpi/mpcpucache_x86.s | 902 ++ security/nss/lib/freebl/mpi/mpi-config.h | 68 + security/nss/lib/freebl/mpi/mpi-priv.h | 243 + security/nss/lib/freebl/mpi/mpi.c | 4839 ++++++ security/nss/lib/freebl/mpi/mpi.h | 313 + security/nss/lib/freebl/mpi/mpi_amd64.c | 32 + security/nss/lib/freebl/mpi/mpi_amd64_gas.s | 389 + security/nss/lib/freebl/mpi/mpi_amd64_masm.asm | 388 + security/nss/lib/freebl/mpi/mpi_amd64_sun.s | 385 + security/nss/lib/freebl/mpi/mpi_arm.c | 175 + security/nss/lib/freebl/mpi/mpi_hp.c | 81 + security/nss/lib/freebl/mpi/mpi_i86pc.s | 313 + security/nss/lib/freebl/mpi/mpi_mips.s | 472 + security/nss/lib/freebl/mpi/mpi_sparc.c | 226 + security/nss/lib/freebl/mpi/mpi_sse2.s | 294 + security/nss/lib/freebl/mpi/mpi_x86.s | 541 + security/nss/lib/freebl/mpi/mpi_x86_asm.c | 531 + security/nss/lib/freebl/mpi/mpi_x86_os2.s | 538 + security/nss/lib/freebl/mpi/mplogic.c | 443 + security/nss/lib/freebl/mpi/mplogic.h | 52 + security/nss/lib/freebl/mpi/mpmontg.c | 1141 ++ security/nss/lib/freebl/mpi/mpprime.c | 599 + security/nss/lib/freebl/mpi/mpprime.h | 38 + security/nss/lib/freebl/mpi/mpv_sparc.c | 221 + security/nss/lib/freebl/mpi/mpv_sparcv8.s | 1607 ++ security/nss/lib/freebl/mpi/mpv_sparcv9.s | 1645 ++ security/nss/lib/freebl/mpi/mpvalpha.c | 183 + security/nss/lib/freebl/mpi/mulsqr.c | 84 + security/nss/lib/freebl/mpi/multest | 76 + security/nss/lib/freebl/mpi/primes.c | 841 + security/nss/lib/freebl/mpi/stats | 39 + security/nss/lib/freebl/mpi/target.mk | 233 + security/nss/lib/freebl/mpi/test-arrays.txt | 55 + security/nss/lib/freebl/mpi/tests/LICENSE | 6 + security/nss/lib/freebl/mpi/tests/LICENSE-MPL | 3 + security/nss/lib/freebl/mpi/tests/mptest-1.c | 43 + security/nss/lib/freebl/mpi/tests/mptest-2.c | 62 + security/nss/lib/freebl/mpi/tests/mptest-3.c | 105 + security/nss/lib/freebl/mpi/tests/mptest-3a.c | 123 + security/nss/lib/freebl/mpi/tests/mptest-4.c | 111 + security/nss/lib/freebl/mpi/tests/mptest-4a.c | 109 + security/nss/lib/freebl/mpi/tests/mptest-4b.c | 107 + security/nss/lib/freebl/mpi/tests/mptest-5.c | 85 + security/nss/lib/freebl/mpi/tests/mptest-5a.c | 147 + security/nss/lib/freebl/mpi/tests/mptest-6.c | 78 + security/nss/lib/freebl/mpi/tests/mptest-7.c | 85 + security/nss/lib/freebl/mpi/tests/mptest-8.c | 68 + security/nss/lib/freebl/mpi/tests/mptest-9.c | 109 + security/nss/lib/freebl/mpi/tests/mptest-b.c | 230 + security/nss/lib/freebl/mpi/tests/pi1k.txt | 1 + security/nss/lib/freebl/mpi/tests/pi2k.txt | 1 + security/nss/lib/freebl/mpi/tests/pi5k.txt | 1 + security/nss/lib/freebl/mpi/timetest | 99 + security/nss/lib/freebl/mpi/types.pl | 127 + security/nss/lib/freebl/mpi/utils/LICENSE | 4 + security/nss/lib/freebl/mpi/utils/LICENSE-MPL | 3 + security/nss/lib/freebl/mpi/utils/PRIMES | 41 + security/nss/lib/freebl/mpi/utils/README | 206 + security/nss/lib/freebl/mpi/utils/basecvt.c | 68 + security/nss/lib/freebl/mpi/utils/bbs_rand.c | 65 + security/nss/lib/freebl/mpi/utils/bbs_rand.h | 24 + security/nss/lib/freebl/mpi/utils/bbsrand.c | 35 + security/nss/lib/freebl/mpi/utils/dec2hex.c | 40 + security/nss/lib/freebl/mpi/utils/exptmod.c | 55 + security/nss/lib/freebl/mpi/utils/fact.c | 84 + security/nss/lib/freebl/mpi/utils/gcd.c | 95 + security/nss/lib/freebl/mpi/utils/hex2dec.c | 40 + security/nss/lib/freebl/mpi/utils/identest.c | 84 + security/nss/lib/freebl/mpi/utils/invmod.c | 61 + security/nss/lib/freebl/mpi/utils/isprime.c | 89 + security/nss/lib/freebl/mpi/utils/lap.c | 90 + security/nss/lib/freebl/mpi/utils/makeprime.c | 116 + security/nss/lib/freebl/mpi/utils/metime.c | 102 + security/nss/lib/freebl/mpi/utils/pi.c | 171 + security/nss/lib/freebl/mpi/utils/primegen.c | 159 + security/nss/lib/freebl/mpi/utils/prng.c | 57 + security/nss/lib/freebl/mpi/utils/ptab.pl | 26 + security/nss/lib/freebl/mpi/utils/sieve.c | 243 + security/nss/lib/freebl/mpi/vis_32.il | 1291 ++ security/nss/lib/freebl/mpi/vis_64.il | 997 ++ security/nss/lib/freebl/mpi/vis_proto.h | 234 + security/nss/lib/freebl/nsslowhash.c | 150 + security/nss/lib/freebl/nsslowhash.h | 33 + security/nss/lib/freebl/os2_rand.c | 334 + .../poly1305-donna-x64-sse2-incremental-source.c | 881 + security/nss/lib/freebl/poly1305.c | 314 + security/nss/lib/freebl/poly1305.h | 28 + security/nss/lib/freebl/pqg.c | 1878 +++ security/nss/lib/freebl/pqg.h | 25 + security/nss/lib/freebl/rawhash.c | 154 + security/nss/lib/freebl/ret_cr16.s | 27 + security/nss/lib/freebl/rijndael.c | 1375 ++ security/nss/lib/freebl/rijndael.h | 67 + security/nss/lib/freebl/rijndael32.tab | 1219 ++ security/nss/lib/freebl/rijndael_tables.c | 215 + security/nss/lib/freebl/rsa.c | 1625 ++ security/nss/lib/freebl/rsapkcs.c | 1385 ++ security/nss/lib/freebl/secmpi.h | 54 + security/nss/lib/freebl/secrng.h | 65 + security/nss/lib/freebl/seed.c | 641 + security/nss/lib/freebl/seed.h | 125 + security/nss/lib/freebl/sha-fast-amd64-sun.s | 2151 +++ security/nss/lib/freebl/sha256.h | 19 + security/nss/lib/freebl/sha512.c | 1655 ++ security/nss/lib/freebl/sha_fast.c | 545 + security/nss/lib/freebl/sha_fast.h | 176 + security/nss/lib/freebl/shsign.h | 14 + security/nss/lib/freebl/shvfy.c | 534 + security/nss/lib/freebl/stubs.c | 711 + security/nss/lib/freebl/stubs.h | 66 + security/nss/lib/freebl/sysrand.c | 49 + security/nss/lib/freebl/tlsprfalg.c | 134 + security/nss/lib/freebl/unix_rand.c | 1176 ++ security/nss/lib/freebl/win_rand.c | 161 + 256 files changed, 133850 insertions(+) create mode 100644 security/nss/lib/freebl/Makefile create mode 100644 security/nss/lib/freebl/aeskeywrap.c create mode 100644 security/nss/lib/freebl/alg2268.c create mode 100644 security/nss/lib/freebl/alghmac.c create mode 100644 security/nss/lib/freebl/alghmac.h create mode 100644 security/nss/lib/freebl/arcfive.c create mode 100644 security/nss/lib/freebl/arcfour-amd64-gas.s create mode 100644 security/nss/lib/freebl/arcfour-amd64-masm.asm create mode 100644 security/nss/lib/freebl/arcfour-amd64-sun.s create mode 100644 security/nss/lib/freebl/arcfour.c create mode 100644 security/nss/lib/freebl/blapi.h create mode 100644 security/nss/lib/freebl/blapii.h create mode 100644 security/nss/lib/freebl/blapit.h create mode 100644 security/nss/lib/freebl/blname.c create mode 100644 security/nss/lib/freebl/camellia.c create mode 100644 security/nss/lib/freebl/camellia.h create mode 100644 security/nss/lib/freebl/chacha20.c create mode 100644 security/nss/lib/freebl/chacha20.h create mode 100644 security/nss/lib/freebl/chacha20_vec.c create mode 100644 security/nss/lib/freebl/chacha20poly1305.c create mode 100644 security/nss/lib/freebl/chacha20poly1305.h create mode 100644 security/nss/lib/freebl/config.mk create mode 100644 security/nss/lib/freebl/ctr.c create mode 100644 security/nss/lib/freebl/ctr.h create mode 100644 security/nss/lib/freebl/cts.c create mode 100644 security/nss/lib/freebl/cts.h create mode 100644 security/nss/lib/freebl/des.c create mode 100644 security/nss/lib/freebl/des.h create mode 100644 security/nss/lib/freebl/desblapi.c create mode 100644 security/nss/lib/freebl/det_rng.c create mode 100644 security/nss/lib/freebl/det_rng.h create mode 100644 security/nss/lib/freebl/dh.c create mode 100644 security/nss/lib/freebl/drbg.c create mode 100644 security/nss/lib/freebl/dsa.c create mode 100644 security/nss/lib/freebl/ec.c create mode 100644 security/nss/lib/freebl/ec.h create mode 100644 security/nss/lib/freebl/ecdecode.c create mode 100644 security/nss/lib/freebl/ecl/README create mode 100644 security/nss/lib/freebl/ecl/curve25519_32.c create mode 100644 security/nss/lib/freebl/ecl/curve25519_64.c create mode 100644 security/nss/lib/freebl/ecl/ec_naf.c create mode 100644 security/nss/lib/freebl/ecl/ecl-curve.h create mode 100644 security/nss/lib/freebl/ecl/ecl-exp.h create mode 100644 security/nss/lib/freebl/ecl/ecl-priv.h create mode 100644 security/nss/lib/freebl/ecl/ecl.c create mode 100644 security/nss/lib/freebl/ecl/ecl.h create mode 100644 security/nss/lib/freebl/ecl/ecl_curve.c create mode 100644 security/nss/lib/freebl/ecl/ecl_gf.c create mode 100644 security/nss/lib/freebl/ecl/ecl_mult.c create mode 100644 security/nss/lib/freebl/ecl/ecp.h create mode 100644 security/nss/lib/freebl/ecl/ecp_25519.c create mode 100644 security/nss/lib/freebl/ecl/ecp_256.c create mode 100644 security/nss/lib/freebl/ecl/ecp_256_32.c create mode 100644 security/nss/lib/freebl/ecl/ecp_384.c create mode 100644 security/nss/lib/freebl/ecl/ecp_521.c create mode 100644 security/nss/lib/freebl/ecl/ecp_aff.c create mode 100644 security/nss/lib/freebl/ecl/ecp_jac.c create mode 100644 security/nss/lib/freebl/ecl/ecp_jm.c create mode 100644 security/nss/lib/freebl/ecl/ecp_mont.c create mode 100644 security/nss/lib/freebl/ecl/tests/ec_naft.c create mode 100644 security/nss/lib/freebl/ecl/tests/ecp_test.c create mode 100644 security/nss/lib/freebl/ecl/uint128.c create mode 100644 security/nss/lib/freebl/ecl/uint128.h create mode 100644 security/nss/lib/freebl/exports.gyp create mode 100644 security/nss/lib/freebl/fipsfreebl.c create mode 100644 security/nss/lib/freebl/freebl.def create mode 100644 security/nss/lib/freebl/freebl.gyp create mode 100644 security/nss/lib/freebl/freebl.rc create mode 100644 security/nss/lib/freebl/freebl_hash.def create mode 100644 security/nss/lib/freebl/freebl_hash_vector.def create mode 100644 security/nss/lib/freebl/freeblver.c create mode 100644 security/nss/lib/freebl/gcm.c create mode 100644 security/nss/lib/freebl/gcm.h create mode 100644 security/nss/lib/freebl/genload.c create mode 100644 security/nss/lib/freebl/hmacct.c create mode 100644 security/nss/lib/freebl/hmacct.h create mode 100644 security/nss/lib/freebl/intel-aes-x64-masm.asm create mode 100644 security/nss/lib/freebl/intel-aes-x86-masm.asm create mode 100644 security/nss/lib/freebl/intel-aes.h create mode 100644 security/nss/lib/freebl/intel-aes.s create mode 100644 security/nss/lib/freebl/intel-gcm-wrap.c create mode 100644 security/nss/lib/freebl/intel-gcm-x64-masm.asm create mode 100644 security/nss/lib/freebl/intel-gcm-x86-masm.asm create mode 100644 security/nss/lib/freebl/intel-gcm.h create mode 100644 security/nss/lib/freebl/intel-gcm.s create mode 100644 security/nss/lib/freebl/jpake.c create mode 100644 security/nss/lib/freebl/ldvector.c create mode 100644 security/nss/lib/freebl/loader.c create mode 100644 security/nss/lib/freebl/loader.h create mode 100644 security/nss/lib/freebl/lowhash_vector.c create mode 100644 security/nss/lib/freebl/manifest.mn create mode 100644 security/nss/lib/freebl/md2.c create mode 100644 security/nss/lib/freebl/md5.c create mode 100644 security/nss/lib/freebl/mknewpc2.c create mode 100644 security/nss/lib/freebl/mksp.c create mode 100644 security/nss/lib/freebl/mpi/Makefile create mode 100644 security/nss/lib/freebl/mpi/Makefile.os2 create mode 100644 security/nss/lib/freebl/mpi/Makefile.win create mode 100644 security/nss/lib/freebl/mpi/README create mode 100755 security/nss/lib/freebl/mpi/all-tests create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE create mode 100644 security/nss/lib/freebl/mpi/doc/LICENSE-MPL create mode 100644 security/nss/lib/freebl/mpi/doc/basecvt.pod create mode 100755 security/nss/lib/freebl/mpi/doc/build create mode 100644 security/nss/lib/freebl/mpi/doc/div.txt create mode 100644 security/nss/lib/freebl/mpi/doc/expt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/gcd.pod create mode 100644 security/nss/lib/freebl/mpi/doc/invmod.pod create mode 100644 security/nss/lib/freebl/mpi/doc/isprime.pod create mode 100644 security/nss/lib/freebl/mpi/doc/lap.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mpi-test.pod create mode 100644 security/nss/lib/freebl/mpi/doc/mul.txt create mode 100644 security/nss/lib/freebl/mpi/doc/pi.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prime.txt create mode 100644 security/nss/lib/freebl/mpi/doc/prng.pod create mode 100644 security/nss/lib/freebl/mpi/doc/redux.txt create mode 100644 security/nss/lib/freebl/mpi/doc/sqrt.txt create mode 100644 security/nss/lib/freebl/mpi/doc/square.txt create mode 100644 security/nss/lib/freebl/mpi/doc/timing.txt create mode 100644 security/nss/lib/freebl/mpi/hpma512.s create mode 100644 security/nss/lib/freebl/mpi/hppa20.s create mode 100644 security/nss/lib/freebl/mpi/hppatch.adb create mode 100644 security/nss/lib/freebl/mpi/logtab.h create mode 100755 security/nss/lib/freebl/mpi/make-logtab create mode 100755 security/nss/lib/freebl/mpi/make-test-arrays create mode 100644 security/nss/lib/freebl/mpi/mdxptest.c create mode 100644 security/nss/lib/freebl/mpi/montmulf.c create mode 100644 security/nss/lib/freebl/mpi/montmulf.h create mode 100644 security/nss/lib/freebl/mpi/montmulf.il create mode 100644 security/nss/lib/freebl/mpi/montmulf.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv8.s create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.il create mode 100644 security/nss/lib/freebl/mpi/montmulfv9.s create mode 100644 security/nss/lib/freebl/mpi/mp_comba.c create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m-priv.h create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.c create mode 100644 security/nss/lib/freebl/mpi/mp_gf2m.h create mode 100644 security/nss/lib/freebl/mpi/mpcpucache.c create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_amd64.s create mode 100644 security/nss/lib/freebl/mpi/mpcpucache_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi-config.h create mode 100644 security/nss/lib/freebl/mpi/mpi-priv.h create mode 100644 security/nss/lib/freebl/mpi/mpi.c create mode 100644 security/nss/lib/freebl/mpi/mpi.h create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64.c create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_gas.s create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_masm.asm create mode 100644 security/nss/lib/freebl/mpi/mpi_amd64_sun.s create mode 100644 security/nss/lib/freebl/mpi/mpi_arm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_hp.c create mode 100644 security/nss/lib/freebl/mpi/mpi_i86pc.s create mode 100644 security/nss/lib/freebl/mpi/mpi_mips.s create mode 100644 security/nss/lib/freebl/mpi/mpi_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpi_sse2.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86.s create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_asm.c create mode 100644 security/nss/lib/freebl/mpi/mpi_x86_os2.s create mode 100644 security/nss/lib/freebl/mpi/mplogic.c create mode 100644 security/nss/lib/freebl/mpi/mplogic.h create mode 100644 security/nss/lib/freebl/mpi/mpmontg.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.c create mode 100644 security/nss/lib/freebl/mpi/mpprime.h create mode 100644 security/nss/lib/freebl/mpi/mpv_sparc.c create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv8.s create mode 100644 security/nss/lib/freebl/mpi/mpv_sparcv9.s create mode 100644 security/nss/lib/freebl/mpi/mpvalpha.c create mode 100644 security/nss/lib/freebl/mpi/mulsqr.c create mode 100755 security/nss/lib/freebl/mpi/multest create mode 100644 security/nss/lib/freebl/mpi/primes.c create mode 100755 security/nss/lib/freebl/mpi/stats create mode 100644 security/nss/lib/freebl/mpi/target.mk create mode 100644 security/nss/lib/freebl/mpi/test-arrays.txt create mode 100644 security/nss/lib/freebl/mpi/tests/LICENSE create mode 100644 security/nss/lib/freebl/mpi/tests/LICENSE-MPL create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-1.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-2.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-3.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-3a.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4a.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-4b.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-5.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-5a.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-6.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-7.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-8.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-9.c create mode 100644 security/nss/lib/freebl/mpi/tests/mptest-b.c create mode 100644 security/nss/lib/freebl/mpi/tests/pi1k.txt create mode 100644 security/nss/lib/freebl/mpi/tests/pi2k.txt create mode 100644 security/nss/lib/freebl/mpi/tests/pi5k.txt create mode 100755 security/nss/lib/freebl/mpi/timetest create mode 100755 security/nss/lib/freebl/mpi/types.pl create mode 100644 security/nss/lib/freebl/mpi/utils/LICENSE create mode 100644 security/nss/lib/freebl/mpi/utils/LICENSE-MPL create mode 100644 security/nss/lib/freebl/mpi/utils/PRIMES create mode 100644 security/nss/lib/freebl/mpi/utils/README create mode 100644 security/nss/lib/freebl/mpi/utils/basecvt.c create mode 100644 security/nss/lib/freebl/mpi/utils/bbs_rand.c create mode 100644 security/nss/lib/freebl/mpi/utils/bbs_rand.h create mode 100644 security/nss/lib/freebl/mpi/utils/bbsrand.c create mode 100644 security/nss/lib/freebl/mpi/utils/dec2hex.c create mode 100644 security/nss/lib/freebl/mpi/utils/exptmod.c create mode 100644 security/nss/lib/freebl/mpi/utils/fact.c create mode 100644 security/nss/lib/freebl/mpi/utils/gcd.c create mode 100644 security/nss/lib/freebl/mpi/utils/hex2dec.c create mode 100644 security/nss/lib/freebl/mpi/utils/identest.c create mode 100644 security/nss/lib/freebl/mpi/utils/invmod.c create mode 100644 security/nss/lib/freebl/mpi/utils/isprime.c create mode 100644 security/nss/lib/freebl/mpi/utils/lap.c create mode 100644 security/nss/lib/freebl/mpi/utils/makeprime.c create mode 100644 security/nss/lib/freebl/mpi/utils/metime.c create mode 100644 security/nss/lib/freebl/mpi/utils/pi.c create mode 100644 security/nss/lib/freebl/mpi/utils/primegen.c create mode 100644 security/nss/lib/freebl/mpi/utils/prng.c create mode 100755 security/nss/lib/freebl/mpi/utils/ptab.pl create mode 100644 security/nss/lib/freebl/mpi/utils/sieve.c create mode 100644 security/nss/lib/freebl/mpi/vis_32.il create mode 100644 security/nss/lib/freebl/mpi/vis_64.il create mode 100644 security/nss/lib/freebl/mpi/vis_proto.h create mode 100644 security/nss/lib/freebl/nsslowhash.c create mode 100644 security/nss/lib/freebl/nsslowhash.h create mode 100644 security/nss/lib/freebl/os2_rand.c create mode 100644 security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c create mode 100644 security/nss/lib/freebl/poly1305.c create mode 100644 security/nss/lib/freebl/poly1305.h create mode 100644 security/nss/lib/freebl/pqg.c create mode 100644 security/nss/lib/freebl/pqg.h create mode 100644 security/nss/lib/freebl/rawhash.c create mode 100644 security/nss/lib/freebl/ret_cr16.s create mode 100644 security/nss/lib/freebl/rijndael.c create mode 100644 security/nss/lib/freebl/rijndael.h create mode 100644 security/nss/lib/freebl/rijndael32.tab create mode 100644 security/nss/lib/freebl/rijndael_tables.c create mode 100644 security/nss/lib/freebl/rsa.c create mode 100644 security/nss/lib/freebl/rsapkcs.c create mode 100644 security/nss/lib/freebl/secmpi.h create mode 100644 security/nss/lib/freebl/secrng.h create mode 100644 security/nss/lib/freebl/seed.c create mode 100644 security/nss/lib/freebl/seed.h create mode 100644 security/nss/lib/freebl/sha-fast-amd64-sun.s create mode 100644 security/nss/lib/freebl/sha256.h create mode 100644 security/nss/lib/freebl/sha512.c create mode 100644 security/nss/lib/freebl/sha_fast.c create mode 100644 security/nss/lib/freebl/sha_fast.h create mode 100644 security/nss/lib/freebl/shsign.h create mode 100644 security/nss/lib/freebl/shvfy.c create mode 100644 security/nss/lib/freebl/stubs.c create mode 100644 security/nss/lib/freebl/stubs.h create mode 100644 security/nss/lib/freebl/sysrand.c create mode 100644 security/nss/lib/freebl/tlsprfalg.c create mode 100644 security/nss/lib/freebl/unix_rand.c create mode 100644 security/nss/lib/freebl/win_rand.c (limited to 'security/nss/lib/freebl') diff --git a/security/nss/lib/freebl/Makefile b/security/nss/lib/freebl/Makefile new file mode 100644 index 000000000..0ce1425f1 --- /dev/null +++ b/security/nss/lib/freebl/Makefile @@ -0,0 +1,764 @@ +#! gmake +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +####################################################################### +# (1) Include initial platform-independent assignments (MANDATORY). # +####################################################################### + +include manifest.mn + +####################################################################### +# (2) Include "global" configuration information. (OPTIONAL) # +####################################################################### + +include $(CORE_DEPTH)/coreconf/config.mk + +####################################################################### +# (3) Include "component" configuration information. (OPTIONAL) # +####################################################################### + + + +####################################################################### +# (4) Include "local" platform-dependent assignments (OPTIONAL). # +####################################################################### + +include config.mk + +# default for all platforms +# unset this on those that have multiple freebl libraries +FREEBL_BUILD_SINGLE_SHLIB = 1 + +ifdef USE_64 + DEFINES += -DNSS_USE_64 +endif + +ifdef USE_ABI32_FPU + DEFINES += -DNSS_USE_ABI32_FPU +endif + +ifeq ($(FREEBL_NO_DEPEND),1) + DEFINES += -DFREEBL_NO_DEPEND + STUBS_SRCS = stubs.c +endif + +ifeq ($(FREEBL_LOWHASH),1) + DEFINES += -DFREEBL_LOWHASH + LOWHASH_SRCS = nsslowhash.c + LOWHASH_EXPORTS = nsslowhash.h + MAPFILE_SOURCE = freebl_hash_vector.def + NEED_STUB_BUILD = 1 +else + MAPFILE_SOURCE = freebl.def +endif + +ifdef USE_STUB_BUILD + CSRCS = lowhash_vector.c + SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX)) + OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS)) + ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \ + $(NOSUCHFILE) so_locations + MAPFILE_SOURCE = freebl_hash.def +endif + +# FREEBL_USE_PRELINK +# +# Most modern version of Linux support a speed optimization scheme where an +# application called prelink modifies programs and shared libraries to quickly +# load if they fit into an already designed address space. In short, prelink +# scans the list of programs and libraries on your system, assigns them a +# predefined space in the the address space, then provides the fixups to the +# library. +# +# The modification of the shared library is correctly detected by the freebl +# FIPS checksum scheme where we check a signed hash of the library against the +# library itself. +# +# The prelink command itself can reverse the process of modification and output +# the prestine shared library as it was before prelink made it's changes. +# This option tells Freebl could use prelink to output the original copy of +# the shared library before prelink modified it. +# +# FREEBL_PRELINK_COMMAND +# +# This is an optional environment variable which can override the default +# prelink command. It could be used on systems that did something similiar to +# prelink but used a different command and syntax. The only requirement is the +# program must take the library as the last argument, the program must output +# the original library to standard out, and the program does not need to take +# any quoted or imbedded spaces in its arguments (except the path to the +# library itself, which can have imbedded spaces or special characters). +# +ifdef FREEBL_USE_PRELINK + DEFINES += -DFREEBL_USE_PRELINK +ifdef LINUX + DEFINES += -D__GNU_SOURCE=1 +endif +endif +ifdef NSS_NO_INIT_SUPPORT + DEFINES += -DNSS_NO_INIT_SUPPORT +endif + +ifdef FREEBL_PRELINK_COMMAND + DEFINES +=-DFREEBL_PRELINK_COMMAND=\"$(FREEBL_PRELINK_COMMAND)\" +endif +# NSS_X86 means the target is a 32-bits x86 CPU architecture +# NSS_X64 means the target is a 64-bits 64 CPU architecture +# NSS_X86_OR_X64 means the target is either x86 or x64 +ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) + DEFINES += -DNSS_X86_OR_X64 +ifneq (,$(USE_64)$(USE_X32)) + DEFINES += -DNSS_X64 +else + DEFINES += -DNSS_X86 +endif +endif + +ifeq ($(OS_TARGET),OSF1) + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_NO_MP_WORD + MPI_SRCS += mpvalpha.c +endif + +ifeq (OS2,$(OS_TARGET)) + ASFILES = mpi_x86_os2.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D + DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + DEFINES += -DMP_IS_LITTLE_ENDIAN +endif + +ifeq (,$(filter-out WINNT WIN95,$(OS_TARGET))) +ifndef USE_64 +# 32-bit Windows +ifdef NS_USE_GCC +# Ideally, we want to use assembler +# ASFILES = mpi_x86.s +# DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE \ +# -DMP_ASSEMBLY_DIV_2DX1D +# but we haven't figured out how to make it work, so we are not +# using assembler right now. + ASFILES = + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT +else +# MSVC + MPI_SRCS += mpi_x86_asm.c + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + ifdef BUILD_OPT + OPTIMIZER += -Ox # maximum optimization for freebl + endif + # The Intel AES assembly code requires Visual C++ 2010. + # if $(_MSC_VER) >= 1600 (Visual C++ 2010) + ifeq ($(firstword $(sort $(_MSC_VER) 1600)),1600) + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes-x86-masm.asm intel-gcm-x86-masm.asm + EXTRA_SRCS += intel-gcm-wrap.c + ifeq ($(CLANG_CL),1) + INTEL_GCM_CLANG_CL = 1 + endif + endif +endif +else + # -DMP_NO_MP_WORD + DEFINES += -DMP_IS_LITTLE_ENDIAN +ifdef NS_USE_GCC +# Ideally, we should use amd64 assembly code, but it's not yet mingw-w64 +# compatible. +else +# MSVC + ifdef BUILD_OPT + OPTIMIZER += -Ox # maximum optimization for freebl + endif + ASFILES = arcfour-amd64-masm.asm mpi_amd64_masm.asm mp_comba_amd64_masm.asm + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA + # The Intel AES assembly code requires Visual C++ 2010 (10.0). The _xgetbv + # compiler intrinsic function requires Visual C++ 2010 (10.0) SP1. + ifeq ($(_MSC_VER_GE_10SP1),1) + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes-x64-masm.asm intel-gcm-x64-masm.asm + EXTRA_SRCS += intel-gcm-wrap.c + ifeq ($(CLANG_CL),1) + INTEL_GCM_CLANG_CL = 1 + endif + endif + MPI_SRCS += mpi_amd64.c +endif +endif +endif + +ifeq ($(OS_TARGET),IRIX) +ifeq ($(USE_N32),1) + ASFILES = mpi_mips.s + ifeq ($(NS_USE_GCC),1) + ASFLAGS = -Wp,-P -Wp,-traditional -O -mips3 + else + ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 + endif + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_USE_UINT_DIGIT +endif +endif + +ifeq ($(OS_TARGET),Darwin) +ifeq ($(CPU_ARCH),x86) + ASFILES = mpi_sse2.s + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D +endif +endif # Darwin + +ifeq ($(OS_TARGET),Linux) +ifeq ($(CPU_ARCH),x86_64) + ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s + ASFLAGS += -fPIC -Wa,--noexecstack + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA + DEFINES += -DMP_IS_LITTLE_ENDIAN +# DEFINES += -DMPI_AMD64_ADD + # comment the next four lines to turn off Intel HW acceleration. + DEFINES += -DUSE_HW_AES -DINTEL_GCM + ASFILES += intel-aes.s intel-gcm.s + EXTRA_SRCS += intel-gcm-wrap.c + INTEL_GCM = 1 + MPI_SRCS += mpi_amd64.c mp_comba.c +endif +ifeq ($(CPU_ARCH),x86) + ASFILES = mpi_x86.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT + DEFINES += -DMP_IS_LITTLE_ENDIAN + # The floating point ECC code doesn't work on Linux x86 (bug 311432). + #ECL_USE_FP = 1 +endif +ifeq ($(CPU_ARCH),arm) + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 + MPI_SRCS += mpi_arm.c +endif +ifeq ($(CPU_ARCH),ppc) +ifdef USE_64 + DEFINES += -DNSS_NO_INIT_SUPPORT +endif # USE_64 +endif # ppc +endif # Linux + +ifeq ($(OS_TARGET),AIX) + DEFINES += -DMP_USE_UINT_DIGIT + ifndef USE_64 + DEFINES += -DMP_NO_DIV_WORD -DMP_NO_ADD_WORD -DMP_NO_SUB_WORD + endif +endif # AIX + +ifeq ($(OS_TARGET), HP-UX) +ifneq ($(OS_TEST), ia64) +# PA-RISC +ASFILES += ret_cr16.s +ifndef USE_64 + FREEBL_BUILD_SINGLE_SHLIB = + HAVE_ABI32_INT32 = 1 + HAVE_ABI32_FPU = 1 +endif +ifdef FREEBL_CHILD_BUILD +ifdef USE_ABI32_INT32 +# build for DA1.1 (HP PA 1.1) 32-bit ABI build with 32-bit arithmetic + DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 +else +ifdef USE_64 +# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +else +# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model +# (the 32-bit ABI with 64-bit registers) using 64-bit digits + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +ifndef NS_USE_GCC + ARCHFLAG = -Aa +e +DA2.0 +DS2.0 +endif +endif +endif +endif +endif +endif + +# The blapi functions are defined not only in the freebl shared +# libraries but also in the shared libraries linked with loader.c +# (libsoftokn3.so and libssl3.so). We need to use GNU ld's +# -Bsymbolic option or the equivalent option for other linkers +# to bind the blapi function references in FREEBLVector vector +# (ldvector.c) to the blapi functions defined in the freebl +# shared libraries. +ifeq (,$(filter-out BSD_OS FreeBSD Linux NetBSD OpenBSD, $(OS_TARGET))) + MKSHLIB += -Wl,-Bsymbolic +endif + +ifeq ($(OS_TARGET),SunOS) + +ifdef NS_USE_GCC + ifdef GCC_USE_GNU_LD + MKSHLIB += -Wl,-Bsymbolic,-z,now,-z,text + else + MKSHLIB += -Wl,-B,symbolic,-z,now,-z,text + endif # GCC_USE_GNU_LD +else + MKSHLIB += -B symbolic -z now -z text +endif # NS_USE_GCC + +# Sun's WorkShop defines v8, v8plus and v9 architectures. +# gcc on Solaris defines v8 and v9 "cpus". +# gcc's v9 is equivalent to Workshop's v8plus. +# gcc's -m64 is equivalent to Workshop's v9 +# We always use Sun's assembler, which uses Sun's naming convention. +ifeq ($(CPU_ARCH),sparc) + FREEBL_BUILD_SINGLE_SHLIB= + ifdef USE_64 + HAVE_ABI64_INT = 1 + HAVE_ABI64_FPU = 1 + else + HAVE_ABI32_FPU = 1 + HAVE_ABI32_INT64 = 1 + endif + SYSV_SPARC = 1 + SOLARIS_AS = /usr/ccs/bin/as + #### set arch, asm, c flags + ifdef NS_USE_GCC + ifdef USE_ABI32_INT64 + ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plus + SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC + endif + ifdef USE_ABI32_FPU + ARCHFLAG=-mcpu=v9 -Wa,-xarch=v8plusa + SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC + endif # USE_ABI32_FPU + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture + ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9 + SOLARIS_AS_FLAGS = -xarch=v9 -K PIC + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size + ARCHFLAG += -mcpu=v9 -Wa,-xarch=v9a + SOLARIS_AS_FLAGS = -xarch=v9a -K PIC + endif + else # NS_USE_GCC + # FPU_TARGET_OPTIMIZER specifies the target processor and cache + # properties of the ABI32_FPU and ABI64_FPU architectures for use + # by the optimizer. + ifeq (,$(findstring Sun WorkShop 6,$(shell $(CC) -V 2>&1))) + # if the compiler is not Forte 6 + FPU_TARGET_OPTIMIZER = -xcache=64/32/4:1024/64/4 -xchip=ultra3 + else + # Forte 6 C compiler generates incorrect code for rijndael.c + # if -xchip=ultra3 is used (Bugzilla bug 333925). So we revert + # to what we used in NSS 3.10. + FPU_TARGET_OPTIMIZER = -xchip=ultra2 + endif + ifdef USE_ABI32_INT64 + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses 64-bit words, integer arithmetic, + # no FPU (non-VIS cpus). + # These flags were suggested by the compiler group for building + # with SunStudio 10. + ifdef BUILD_OPT + SOL_CFLAGS += -xO4 + endif + SOL_CFLAGS += -xtarget=generic + ARCHFLAG = -xarch=v8plus + SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC + endif + ifdef USE_ABI32_FPU + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses FPU code, and 32-bit word size. + # these flags were determined by running cc -### -fast and copying + # the generated flag settings + SOL_CFLAGS += -fsingle -xmemalign=8s + ifdef BUILD_OPT + SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1 + SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all + SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend + SOL_CFLAGS += -xlibmil -xO5 + endif + ARCHFLAG = -xarch=v8plusa + SOLARIS_AS_FLAGS = -xarch=v8plusa -K PIC + endif + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture, + # no FPU (non-VIS cpus). For building with SunStudio 10. + ifdef BUILD_OPT + SOL_CFLAGS += -xO4 + endif + SOL_CFLAGS += -xtarget=generic + ARCHFLAG = -xarch=v9 + SOLARIS_AS_FLAGS = -xarch=v9 -K PIC + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size. + # See comment for USE_ABI32_FPU. + SOL_CFLAGS += -fsingle -xmemalign=8s + ifdef BUILD_OPT + SOL_CFLAGS += -D__MATHERR_ERRNO_DONTCARE -fsimple=1 + SOL_CFLAGS += -xalias_level=basic -xbuiltin=%all + SOL_CFLAGS += $(FPU_TARGET_OPTIMIZER) -xdepend + SOL_CFLAGS += -xlibmil -xO5 + endif + ARCHFLAG = -xarch=v9a + SOLARIS_AS_FLAGS = -xarch=v9a -K PIC + endif + endif # NS_USE_GCC + + ### set flags for both GCC and Sun cc + ifdef USE_ABI32_INT64 + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU + # best times are with no MP_ flags specified + endif + ifdef USE_ABI32_FPU + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # 32-bit ABI, it uses FPU code, and 32-bit word size + MPI_SRCS += mpi_sparc.c + ASFILES = mpv_sparcv8.s montmulfv8.s + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL + ECL_USE_FP = 1 + endif + ifdef USE_ABI64_INT + # this builds for Sparc v9a pure 64-bit architecture + # best times are with no MP_ flags specified + endif + ifdef USE_ABI64_FPU + # this builds for Sparc v9a pure 64-bit architecture + # It uses floating point, and 32-bit word size + MPI_SRCS += mpi_sparc.c + ASFILES = mpv_sparcv9.s montmulfv9.s + DEFINES += -DMP_NO_MP_WORD -DMP_USE_UINT_DIGIT -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DMP_USING_MONT_MULF -DMP_MONT_USE_MP_MUL + ECL_USE_FP = 1 + endif + +else + # Solaris for non-sparc family CPUs + ifdef NS_USE_GCC + LD = gcc + AS = gcc + ASFLAGS = -x assembler-with-cpp + endif + ifeq ($(USE_64),1) + # Solaris for AMD64 + ifdef NS_USE_GCC + ASFILES = arcfour-amd64-gas.s mpi_amd64_gas.s + ASFLAGS += -march=opteron -m64 -fPIC + MPI_SRCS += mp_comba.c + # comment the next four lines to turn off Intel HW acceleration + ASFILES += intel-gcm.s + EXTRA_SRCS += intel-gcm-wrap.c + INTEL_GCM = 1 + DEFINES += -DINTEL_GCM + else + ASFILES = arcfour-amd64-sun.s mpi_amd64_sun.s sha-fast-amd64-sun.s + ASFILES += mp_comba_amd64_sun.s mpcpucache_amd64.s + ASFLAGS += -xarch=generic64 -K PIC + SOL_CFLAGS += -xprefetch=no + SHA_SRCS = + MPCPU_SRCS = + # Intel acceleration for GCM does not build currently with Studio + endif + DEFINES += -DNSS_BEVAND_ARCFOUR -DMPI_AMD64 -DMP_ASSEMBLY_MULTIPLY + DEFINES += -DNSS_USE_COMBA -DMP_IS_LITTLE_ENDIAN + # comment the next two lines to turn off Intel HW acceleration + DEFINES += -DUSE_HW_AES + ASFILES += intel-aes.s + MPI_SRCS += mpi_amd64.c + else + # Solaris x86 + DEFINES += -DMP_USE_UINT_DIGIT + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_DIV_2DX1D + ASFILES = mpi_i86pc.s + ifndef NS_USE_GCC + MPCPU_SRCS = + ASFILES += mpcpucache_x86.s + endif + endif +endif # Solaris for non-sparc family CPUs +endif # target == SunOS + +ifndef NSS_DISABLE_ECC + ifdef ECL_USE_FP + #enable floating point ECC code + DEFINES += -DECL_USE_FP + ECL_SRCS += ecp_fp160.c ecp_fp192.c ecp_fp224.c ecp_fp.c + ECL_HDRS += ecp_fp.h + endif +endif + +# poly1305-donna-x64-sse2-incremental-source.c requires __int128 support +# in GCC 4.6.0. +ifdef USE_64 + ifdef CC_IS_CLANG + HAVE_INT128_SUPPORT = 1 + DEFINES += -DHAVE_INT128_SUPPORT + else ifeq (1,$(CC_IS_GCC)) + ifneq (,$(filter 4.6 4.7 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) + HAVE_INT128_SUPPORT = 1 + DEFINES += -DHAVE_INT128_SUPPORT + endif + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + HAVE_INT128_SUPPORT = 1 + DEFINES += -DHAVE_INT128_SUPPORT + endif + endif +endif + +ifndef NSS_DISABLE_CHACHAPOLY + ifeq ($(CPU_ARCH),x86_64) + ifdef HAVE_INT128_SUPPORT + EXTRA_SRCS += poly1305-donna-x64-sse2-incremental-source.c + else + EXTRA_SRCS += poly1305.c + endif + + ifneq (1,$(CC_IS_GCC)) + EXTRA_SRCS += chacha20.c + else + EXTRA_SRCS += chacha20_vec.c + endif + else + EXTRA_SRCS += poly1305.c + EXTRA_SRCS += chacha20.c + endif # x86_64 +endif # NSS_DISABLE_CHACHAPOLY + +ifeq (,$(filter-out i386 x386 x86 x86_64,$(CPU_ARCH))) + # All intel architectures get the 64 bit version + # With custom uint128 if necessary (faster than generic 32 bit version). + ECL_SRCS += curve25519_64.c +else + # All non intel architectures get the generic 32 bit implementation (slow!) + ECL_SRCS += curve25519_32.c +endif + +ifndef HAVE_INT128_SUPPORT + ECL_SRCS += uint128.c +endif + +####################################################################### +# (5) Execute "global" rules. (OPTIONAL) # +####################################################################### + +include $(CORE_DEPTH)/coreconf/rules.mk + +####################################################################### +# (6) Execute "component" rules. (OPTIONAL) # +####################################################################### + + + +####################################################################### +# (7) Execute "local" rules. (OPTIONAL). # +####################################################################### + +export:: private_export + +rijndael_tables: + $(CC) -o $(OBJDIR)/make_rijndael_tab rijndael_tables.c \ + $(DEFINES) $(INCLUDES) $(OBJDIR)/libfreebl.a + $(OBJDIR)/make_rijndael_tab + +vpath %.h mpi ecl +vpath %.c mpi ecl +vpath %.S mpi ecl +vpath %.s mpi ecl +vpath %.asm mpi ecl +INCLUDES += -Impi -Iecl + + +DEFINES += -DMP_API_COMPATIBLE + +MPI_USERS = dh.c pqg.c dsa.c rsa.c ec.c + +MPI_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_SRCS:.c=$(OBJ_SUFFIX))) +MPI_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(MPI_USERS:.c=$(OBJ_SUFFIX))) + +$(MPI_OBJS): $(MPI_HDRS) + +ECL_USERS = ec.c + +ECL_OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_SRCS:.c=$(OBJ_SUFFIX)) $(ECL_ASM_SRCS:$(ASM_SUFFIX)=$(OBJ_SUFFIX))) +ECL_OBJS += $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(ECL_USERS:.c=$(OBJ_SUFFIX))) + +$(ECL_OBJS): $(ECL_HDRS) + + + +$(OBJDIR)/sysrand$(OBJ_SUFFIX): sysrand.c unix_rand.c win_rand.c os2_rand.c + +$(OBJDIR)/$(PROG_PREFIX)mpprime$(OBJ_SUFFIX): primes.c + +$(OBJDIR)/ldvector$(OBJ_SUFFIX) $(OBJDIR)/loader$(OBJ_SUFFIX) : loader.h + +ifeq ($(SYSV_SPARC),1) + +$(OBJDIR)/mpv_sparcv8.o $(OBJDIR)/mpv_sparcv8x.o $(OBJDIR)/montmulfv8.o : $(OBJDIR)/%.o : %.s + @$(MAKE_OBJDIR) + $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $< + +$(OBJDIR)/mpv_sparcv9.o $(OBJDIR)/montmulfv9.o : $(OBJDIR)/%.o : %.s + @$(MAKE_OBJDIR) + $(SOLARIS_AS) -o $@ $(SOLARIS_AS_FLAGS) $< + +$(OBJDIR)/mpmontg.o: mpmontg.c montmulf.h + +endif + +ifndef FREEBL_CHILD_BUILD + +# Parent build. This is where we decide which shared libraries to build + +ifdef FREEBL_BUILD_SINGLE_SHLIB + +################### Single shared lib stuff ######################### +SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB +ALL_TRASH += $(SINGLE_SHLIB_DIR) + +$(SINGLE_SHLIB_DIR): + -mkdir -p $(SINGLE_SHLIB_DIR) + +release_md libs:: $(SINGLE_SHLIB_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 \ + OBJDIR=$(SINGLE_SHLIB_DIR) $@ +######################## common stuff ######################### + +endif + +ifdef NEED_STUB_BUILD +SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB +ALL_TRASH += $(SINGLE_SHLIB_DIR) +$(SINGLE_SHLIB_DIR): + -mkdir $(SINGLE_SHLIB_DIR) + +release_md libs:: $(SINGLE_SHLIB_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_STUB_BUILD=1 \ + OBJDIR=$(SINGLE_SHLIB_DIR) $@ +endif + +# multiple shared libraries + +######################## ABI32_FPU stuff ######################### +ifdef HAVE_ABI32_FPU +ABI32_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_FPU +ALL_TRASH += $(ABI32_FPU_DIR) + +$(ABI32_FPU_DIR): + -mkdir $(ABI32_FPU_DIR) + +release_md libs:: $(ABI32_FPU_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_FPU=1 \ + OBJDIR=$(ABI32_FPU_DIR) $@ +endif + +######################## ABI32_INT32 stuff ######################### +ifdef HAVE_ABI32_INT32 +ABI32_INT32_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT32 +ALL_TRASH += $(ABI32_INT32_DIR) + +$(ABI32_INT32_DIR): + -mkdir $(ABI32_INT32_DIR) + +release_md libs:: $(ABI32_INT32_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT32=1 \ + OBJDIR=$(ABI32_INT32_DIR) $@ +endif + +######################## ABI32_INT64 stuff ######################### +ifdef HAVE_ABI32_INT64 +ABI32_INT64_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT64 +ALL_TRASH += $(ABI32_INT64_DIR) + +$(ABI32_INT64_DIR): + -mkdir $(ABI32_INT64_DIR) + +release_md libs:: $(ABI32_INT64_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI32_INT64=1\ + OBJDIR=$(ABI32_INT64_DIR) $@ +endif + +######################## END of 32-bit stuff ######################### + +# above is 32-bit builds, below is 64-bit builds + +######################## ABI64_FPU stuff ######################### +ifdef HAVE_ABI64_FPU +ABI64_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_FPU +ALL_TRASH += $(ABI64_FPU_DIR) + +$(ABI64_FPU_DIR): + -mkdir $(ABI64_FPU_DIR) + +release_md libs:: $(ABI64_FPU_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_FPU=1 \ + OBJDIR=$(ABI64_FPU_DIR) $@ +endif + +######################## ABI64_INT stuff ######################### +ifdef HAVE_ABI64_INT +ABI64_INT_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_INT +ALL_TRASH += $(ABI64_INT_DIR) + +$(ABI64_INT_DIR): + -mkdir $(ABI64_INT_DIR) + +release_md libs:: $(ABI64_INT_DIR) + $(MAKE) FREEBL_CHILD_BUILD=1 USE_ABI64_INT=1 \ + OBJDIR=$(ABI64_INT_DIR) $@ +endif + +endif # FREEBL_CHILD_BUILD + + +# Bugzilla Bug 333917: the non-x86 code in desblapi.c seems to violate +# ANSI C's strict aliasing rules. +ifeq ($(OS_TARGET),Linux) +ifneq ($(CPU_ARCH),x86) +$(OBJDIR)/$(PROG_PREFIX)desblapi$(OBJ_SUFFIX): desblapi.c + @$(MAKE_OBJDIR) +ifdef NEED_ABSOLUTE_PATH + $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $(call core_abspath,$<) +else + $(CC) -o $@ -c $(CFLAGS) -fno-strict-aliasing $< +endif +endif +endif + +ifdef INTEL_GCM +# +# GCM binary needs -mssse3 +# +$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3 + +# The integrated assembler in Clang 3.2 does not support % in the +# expression of a .set directive. intel-gcm.s uses .set to give +# symbolic names to registers, for example, +# .set Htbl, %rdi +# So we can't use Clang's integrated assembler with intel-gcm.s. +ifdef CC_IS_CLANG +$(OBJDIR)/$(PROG_PREFIX)intel-gcm$(OBJ_SUFFIX): CFLAGS += -no-integrated-as +endif +endif + +ifdef INTEL_GCM_CLANG_CL +# +# clang-cl needs -mssse3 +# +$(OBJDIR)/$(PROG_PREFIX)intel-gcm-wrap$(OBJ_SUFFIX): CFLAGS += -mssse3 +endif diff --git a/security/nss/lib/freebl/aeskeywrap.c b/security/nss/lib/freebl/aeskeywrap.c new file mode 100644 index 000000000..79ff8a852 --- /dev/null +++ b/security/nss/lib/freebl/aeskeywrap.c @@ -0,0 +1,389 @@ +/* + * aeskeywrap.c - implement AES Key Wrap algorithm from RFC 3394 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prcpucfg.h" +#if defined(IS_LITTLE_ENDIAN) || defined(SHA_NO_LONG_LONG) +#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 0 +#else +#define BIG_ENDIAN_WITH_64_BIT_REGISTERS 1 +#endif +#include "prtypes.h" /* for PRUintXX */ +#include "secport.h" /* for PORT_XXX */ +#include "secerr.h" +#include "blapi.h" /* for AES_ functions */ +#include "rijndael.h" + +struct AESKeyWrapContextStr { + unsigned char iv[AES_KEY_WRAP_IV_BYTES]; + AESContext aescx; +}; + +/******************************************/ +/* +** AES key wrap algorithm, RFC 3394 +*/ + +AESKeyWrapContext * +AESKeyWrap_AllocateContext(void) +{ + AESKeyWrapContext *cx = PORT_New(AESKeyWrapContext); + return cx; +} + +SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int x1, + unsigned int encrypt, + unsigned int x2) +{ + SECStatus rv = SECFailure; + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (iv) { + memcpy(cx->iv, iv, sizeof cx->iv); + } else { + memset(cx->iv, 0xA6, sizeof cx->iv); + } + rv = AES_InitContext(&cx->aescx, key, keylen, NULL, NSS_AES, encrypt, + AES_BLOCK_SIZE); + return rv; +} + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen) +{ + SECStatus rv; + AESKeyWrapContext *cx = AESKeyWrap_AllocateContext(); + if (!cx) + return NULL; /* error is already set */ + rv = AESKeyWrap_InitContext(cx, key, keylen, iv, 0, encrypt, 0); + if (rv != SECSuccess) { + PORT_Free(cx); + cx = NULL; /* error should already be set */ + } + return cx; +} + +/* +** Destroy a AES KeyWrap context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit) +{ + if (cx) { + AES_DestroyContext(&cx->aescx, PR_FALSE); + /* memset(cx, 0, sizeof *cx); */ + if (freeit) + PORT_Free(cx); + } +} + +#if !BIG_ENDIAN_WITH_64_BIT_REGISTERS + +/* The AES Key Wrap algorithm has 64-bit values that are ALWAYS big-endian +** (Most significant byte first) in memory. The only ALU operations done +** on them are increment, decrement, and XOR. So, on little-endian CPUs, +** and on CPUs that lack 64-bit registers, these big-endian 64-bit operations +** are simulated in the following code. This is thought to be faster and +** simpler than trying to convert the data to little-endian and back. +*/ + +/* A and T point to two 64-bit values stored most signficant byte first +** (big endian). This function increments the 64-bit value T, and then +** XORs it with A, changing A. +*/ +static void +increment_and_xor(unsigned char *A, unsigned char *T) +{ + if (!++T[7]) + if (!++T[6]) + if (!++T[5]) + if (!++T[4]) + if (!++T[3]) + if (!++T[2]) + if (!++T[1]) + ++T[0]; + + A[0] ^= T[0]; + A[1] ^= T[1]; + A[2] ^= T[2]; + A[3] ^= T[3]; + A[4] ^= T[4]; + A[5] ^= T[5]; + A[6] ^= T[6]; + A[7] ^= T[7]; +} + +/* A and T point to two 64-bit values stored most signficant byte first +** (big endian). This function XORs T with A, giving a new A, then +** decrements the 64-bit value T. +*/ +static void +xor_and_decrement(PRUint64 *A, PRUint64 *T) +{ + unsigned char *TP = (unsigned char *)T; + const PRUint64 mask = 0xFF; + *A = ((*A & mask << 56) ^ (*T & mask << 56)) | + ((*A & mask << 48) ^ (*T & mask << 48)) | + ((*A & mask << 40) ^ (*T & mask << 40)) | + ((*A & mask << 32) ^ (*T & mask << 32)) | + ((*A & mask << 24) ^ (*T & mask << 23)) | + ((*A & mask << 16) ^ (*T & mask << 16)) | + ((*A & mask << 8) ^ (*T & mask << 8)) | + ((*A & mask) ^ (*T & mask)); + + if (!TP[7]--) + if (!TP[6]--) + if (!TP[5]--) + if (!TP[4]--) + if (!TP[3]--) + if (!TP[2]--) + if (!TP[1]--) + TP[0]--; +} + +/* Given an unsigned long t (in host byte order), store this value as a +** 64-bit big-endian value (MSB first) in *pt. +*/ +static void +set_t(unsigned char *pt, unsigned long t) +{ + pt[7] = (unsigned char)t; + t >>= 8; + pt[6] = (unsigned char)t; + t >>= 8; + pt[5] = (unsigned char)t; + t >>= 8; + pt[4] = (unsigned char)t; + t >>= 8; + pt[3] = (unsigned char)t; + t >>= 8; + pt[2] = (unsigned char)t; + t >>= 8; + pt[1] = (unsigned char)t; + t >>= 8; + pt[0] = (unsigned char)t; +} + +#endif + +/* +** Perform AES key wrap. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint64 *R = NULL; + unsigned int nBlocks; + unsigned int i, j; + unsigned int aesLen = AES_BLOCK_SIZE; + unsigned int outLen = inputLen + AES_KEY_WRAP_BLOCK_SIZE; + SECStatus s = SECFailure; + /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */ + PRUint64 t; + PRUint64 B[2]; + +#define A B[0] + + /* Check args */ + if (!inputLen || 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return s; + } +#ifdef maybe + if (!output && pOutputLen) { /* caller is asking for output size */ + *pOutputLen = outLen; + return SECSuccess; + } +#endif + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return s; + } + if (cx == NULL || output == NULL || input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return s; + } + nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE; + R = PORT_NewArray(PRUint64, nBlocks + 1); + if (!R) + return s; /* error is already set. */ + /* + ** 1) Initialize variables. + */ + memcpy(&A, cx->iv, AES_KEY_WRAP_IV_BYTES); + memcpy(&R[1], input, inputLen); +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + t = 0; +#else + memset(&t, 0, sizeof t); +#endif + /* + ** 2) Calculate intermediate values. + */ + for (j = 0; j < 6; ++j) { + for (i = 1; i <= nBlocks; ++i) { + B[1] = R[i]; + s = AES_Encrypt(&cx->aescx, (unsigned char *)B, &aesLen, + sizeof B, (unsigned char *)B, sizeof B); + if (s != SECSuccess) + break; + R[i] = B[1]; +/* here, increment t and XOR A with t (in big endian order); */ +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + A ^= ++t; +#else + increment_and_xor((unsigned char *)&A, (unsigned char *)&t); +#endif + } + } + /* + ** 3) Output the results. + */ + if (s == SECSuccess) { + R[0] = A; + memcpy(output, &R[0], outLen); + if (pOutputLen) + *pOutputLen = outLen; + } else if (pOutputLen) { + *pOutputLen = 0; + } + PORT_ZFree(R, outLen); + return s; +} +#undef A + +/* +** Perform AES key unwrap. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *pOutputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint64 *R = NULL; + unsigned int nBlocks; + unsigned int i, j; + unsigned int aesLen = AES_BLOCK_SIZE; + unsigned int outLen; + SECStatus s = SECFailure; + /* These PRUint64s are ALWAYS big endian, regardless of CPU orientation. */ + PRUint64 t; + PRUint64 B[2]; + + /* Check args */ + if (inputLen < 3 * AES_KEY_WRAP_BLOCK_SIZE || + 0 != inputLen % AES_KEY_WRAP_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return s; + } + outLen = inputLen - AES_KEY_WRAP_BLOCK_SIZE; +#ifdef maybe + if (!output && pOutputLen) { /* caller is asking for output size */ + *pOutputLen = outLen; + return SECSuccess; + } +#endif + if (maxOutputLen < outLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return s; + } + if (cx == NULL || output == NULL || input == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return s; + } + nBlocks = inputLen / AES_KEY_WRAP_BLOCK_SIZE; + R = PORT_NewArray(PRUint64, nBlocks); + if (!R) + return s; /* error is already set. */ + nBlocks--; + /* + ** 1) Initialize variables. + */ + memcpy(&R[0], input, inputLen); + B[0] = R[0]; +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + t = 6UL * nBlocks; +#else + set_t((unsigned char *)&t, 6UL * nBlocks); +#endif + /* + ** 2) Calculate intermediate values. + */ + for (j = 0; j < 6; ++j) { + for (i = nBlocks; i; --i) { +/* here, XOR A with t (in big endian order) and decrement t; */ +#if BIG_ENDIAN_WITH_64_BIT_REGISTERS + B[0] ^= t--; +#else + xor_and_decrement(&B[0], &t); +#endif + B[1] = R[i]; + s = AES_Decrypt(&cx->aescx, (unsigned char *)B, &aesLen, + sizeof B, (unsigned char *)B, sizeof B); + if (s != SECSuccess) + break; + R[i] = B[1]; + } + } + /* + ** 3) Output the results. + */ + if (s == SECSuccess) { + int bad = memcmp(&B[0], cx->iv, AES_KEY_WRAP_IV_BYTES); + if (!bad) { + memcpy(output, &R[1], outLen); + if (pOutputLen) + *pOutputLen = outLen; + } else { + s = SECFailure; + PORT_SetError(SEC_ERROR_BAD_DATA); + if (pOutputLen) + *pOutputLen = 0; + } + } else if (pOutputLen) { + *pOutputLen = 0; + } + PORT_ZFree(R, inputLen); + return s; +} +#undef A diff --git a/security/nss/lib/freebl/alg2268.c b/security/nss/lib/freebl/alg2268.c new file mode 100644 index 000000000..54c6f4dff --- /dev/null +++ b/security/nss/lib/freebl/alg2268.c @@ -0,0 +1,509 @@ +/* + * alg2268.c - implementation of the algorithm in RFC 2268 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "blapii.h" +#include "secerr.h" +#ifdef XP_UNIX_XXX +#include /* for ptrdiff_t */ +#endif + +/* +** RC2 symmetric block cypher +*/ + +typedef SECStatus(rc2Func)(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen); + +/* forward declarations */ +static rc2Func rc2_EncryptECB; +static rc2Func rc2_DecryptECB; +static rc2Func rc2_EncryptCBC; +static rc2Func rc2_DecryptCBC; + +typedef union { + PRUint32 l[2]; + PRUint16 s[4]; + PRUint8 b[8]; +} RC2Block; + +struct RC2ContextStr { + union { + PRUint8 Kb[128]; + PRUint16 Kw[64]; + } u; + RC2Block iv; + rc2Func *enc; + rc2Func *dec; +}; + +#define B u.Kb +#define K u.Kw +#define BYTESWAP(x) ((x) << 8 | (x) >> 8) +#define SWAPK(i) cx->K[i] = (tmpS = cx->K[i], BYTESWAP(tmpS)) +#define RC2_BLOCK_SIZE 8 + +#define LOAD_HARD(R) \ + R[0] = (PRUint16)input[1] << 8 | input[0]; \ + R[1] = (PRUint16)input[3] << 8 | input[2]; \ + R[2] = (PRUint16)input[5] << 8 | input[4]; \ + R[3] = (PRUint16)input[7] << 8 | input[6]; +#define LOAD_EASY(R) \ + R[0] = ((PRUint16 *)input)[0]; \ + R[1] = ((PRUint16 *)input)[1]; \ + R[2] = ((PRUint16 *)input)[2]; \ + R[3] = ((PRUint16 *)input)[3]; +#define STORE_HARD(R) \ + output[0] = (PRUint8)(R[0]); \ + output[1] = (PRUint8)(R[0] >> 8); \ + output[2] = (PRUint8)(R[1]); \ + output[3] = (PRUint8)(R[1] >> 8); \ + output[4] = (PRUint8)(R[2]); \ + output[5] = (PRUint8)(R[2] >> 8); \ + output[6] = (PRUint8)(R[3]); \ + output[7] = (PRUint8)(R[3] >> 8); +#define STORE_EASY(R) \ + ((PRUint16 *)output)[0] = R[0]; \ + ((PRUint16 *)output)[1] = R[1]; \ + ((PRUint16 *)output)[2] = R[2]; \ + ((PRUint16 *)output)[3] = R[3]; + +#if defined(NSS_X86_OR_X64) +#define LOAD(R) LOAD_EASY(R) +#define STORE(R) STORE_EASY(R) +#elif !defined(IS_LITTLE_ENDIAN) +#define LOAD(R) LOAD_HARD(R) +#define STORE(R) STORE_HARD(R) +#else +#define LOAD(R) \ + if ((ptrdiff_t)input & 1) { \ + LOAD_HARD(R) \ + } else { \ + LOAD_EASY(R) \ + } +#define STORE(R) \ + if ((ptrdiff_t)input & 1) { \ + STORE_HARD(R) \ + } else { \ + STORE_EASY(R) \ + } +#endif + +static const PRUint8 S[256] = { + 0331, 0170, 0371, 0304, 0031, 0335, 0265, 0355, 0050, 0351, 0375, 0171, 0112, 0240, 0330, 0235, + 0306, 0176, 0067, 0203, 0053, 0166, 0123, 0216, 0142, 0114, 0144, 0210, 0104, 0213, 0373, 0242, + 0027, 0232, 0131, 0365, 0207, 0263, 0117, 0023, 0141, 0105, 0155, 0215, 0011, 0201, 0175, 0062, + 0275, 0217, 0100, 0353, 0206, 0267, 0173, 0013, 0360, 0225, 0041, 0042, 0134, 0153, 0116, 0202, + 0124, 0326, 0145, 0223, 0316, 0140, 0262, 0034, 0163, 0126, 0300, 0024, 0247, 0214, 0361, 0334, + 0022, 0165, 0312, 0037, 0073, 0276, 0344, 0321, 0102, 0075, 0324, 0060, 0243, 0074, 0266, 0046, + 0157, 0277, 0016, 0332, 0106, 0151, 0007, 0127, 0047, 0362, 0035, 0233, 0274, 0224, 0103, 0003, + 0370, 0021, 0307, 0366, 0220, 0357, 0076, 0347, 0006, 0303, 0325, 0057, 0310, 0146, 0036, 0327, + 0010, 0350, 0352, 0336, 0200, 0122, 0356, 0367, 0204, 0252, 0162, 0254, 0065, 0115, 0152, 0052, + 0226, 0032, 0322, 0161, 0132, 0025, 0111, 0164, 0113, 0237, 0320, 0136, 0004, 0030, 0244, 0354, + 0302, 0340, 0101, 0156, 0017, 0121, 0313, 0314, 0044, 0221, 0257, 0120, 0241, 0364, 0160, 0071, + 0231, 0174, 0072, 0205, 0043, 0270, 0264, 0172, 0374, 0002, 0066, 0133, 0045, 0125, 0227, 0061, + 0055, 0135, 0372, 0230, 0343, 0212, 0222, 0256, 0005, 0337, 0051, 0020, 0147, 0154, 0272, 0311, + 0323, 0000, 0346, 0317, 0341, 0236, 0250, 0054, 0143, 0026, 0001, 0077, 0130, 0342, 0211, 0251, + 0015, 0070, 0064, 0033, 0253, 0063, 0377, 0260, 0273, 0110, 0014, 0137, 0271, 0261, 0315, 0056, + 0305, 0363, 0333, 0107, 0345, 0245, 0234, 0167, 0012, 0246, 0040, 0150, 0376, 0177, 0301, 0255 +}; + +RC2Context * +RC2_AllocateContext(void) +{ + return PORT_ZNew(RC2Context); +} +SECStatus +RC2_InitContext(RC2Context *cx, const unsigned char *key, unsigned int len, + const unsigned char *input, int mode, unsigned int efLen8, + unsigned int unused) +{ + PRUint8 *L, *L2; + int i; +#if !defined(IS_LITTLE_ENDIAN) + PRUint16 tmpS; +#endif + PRUint8 tmpB; + + if (!key || !cx || !len || len > (sizeof cx->B) || + efLen8 > (sizeof cx->B)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_RC2) { + /* groovy */ + } else if (mode == NSS_RC2_CBC) { + if (!input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } else { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (mode == NSS_RC2_CBC) { + cx->enc = &rc2_EncryptCBC; + cx->dec = &rc2_DecryptCBC; + LOAD(cx->iv.s); + } else { + cx->enc = &rc2_EncryptECB; + cx->dec = &rc2_DecryptECB; + } + + /* Step 0. Copy key into table. */ + memcpy(cx->B, key, len); + + /* Step 1. Compute all values to the right of the key. */ + L2 = cx->B; + L = L2 + len; + tmpB = L[-1]; + for (i = (sizeof cx->B) - len; i > 0; --i) { + *L++ = tmpB = S[(PRUint8)(tmpB + *L2++)]; + } + + /* step 2. Adjust left most byte of effective key. */ + i = (sizeof cx->B) - efLen8; + L = cx->B + i; + *L = tmpB = S[*L]; /* mask is always 0xff */ + + /* step 3. Recompute all values to the left of effective key. */ + L2 = --L + efLen8; + while (L >= cx->B) { + *L-- = tmpB = S[tmpB ^ *L2--]; + } + +#if !defined(IS_LITTLE_ENDIAN) + for (i = 63; i >= 0; --i) { + SWAPK(i); /* candidate for unrolling */ + } +#endif + return SECSuccess; +} + +/* +** Create a new RC2 context suitable for RC2 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC) +** "mode" one of NSS_RC2 or NSS_RC2_CBC +** "effectiveKeyLen" in bytes, not bits. +** +** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block +** chaining" mode. +*/ +RC2Context * +RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, unsigned efLen8) +{ + RC2Context *cx = PORT_ZNew(RC2Context); + if (cx) { + SECStatus rv = RC2_InitContext(cx, key, len, iv, mode, efLen8, 0); + if (rv != SECSuccess) { + RC2_DestroyContext(cx, PR_TRUE); + cx = NULL; + } + } + return cx; +} + +/* +** Destroy an RC2 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +void +RC2_DestroyContext(RC2Context *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } + } +} + +#define ROL(x, k) (x << k | x >> (16 - k)) +#define MIX(j) \ + R0 = R0 + cx->K[4 * j + 0] + (R3 & R2) + (~R3 & R1); \ + R0 = ROL(R0, 1); \ + R1 = R1 + cx->K[4 * j + 1] + (R0 & R3) + (~R0 & R2); \ + R1 = ROL(R1, 2); \ + R2 = R2 + cx->K[4 * j + 2] + (R1 & R0) + (~R1 & R3); \ + R2 = ROL(R2, 3); \ + R3 = R3 + cx->K[4 * j + 3] + (R2 & R1) + (~R2 & R0); \ + R3 = ROL(R3, 5) +#define MASH \ + R0 = R0 + cx->K[R3 & 63]; \ + R1 = R1 + cx->K[R0 & 63]; \ + R2 = R2 + cx->K[R1 & 63]; \ + R3 = R3 + cx->K[R2 & 63] + +/* Encrypt one block */ +static void +rc2_Encrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input) +{ + register PRUint16 R0, R1, R2, R3; + + /* step 1. Initialize input. */ + R0 = input->s[0]; + R1 = input->s[1]; + R2 = input->s[2]; + R3 = input->s[3]; + + /* step 2. Expand Key (already done, in context) */ + /* step 3. j = 0 */ + /* step 4. Perform 5 mixing rounds. */ + + MIX(0); + MIX(1); + MIX(2); + MIX(3); + MIX(4); + + /* step 5. Perform 1 mashing round. */ + MASH; + + /* step 6. Perform 6 mixing rounds. */ + + MIX(5); + MIX(6); + MIX(7); + MIX(8); + MIX(9); + MIX(10); + + /* step 7. Perform 1 mashing round. */ + MASH; + + /* step 8. Perform 5 mixing rounds. */ + + MIX(11); + MIX(12); + MIX(13); + MIX(14); + MIX(15); + + /* output results */ + output->s[0] = R0; + output->s[1] = R1; + output->s[2] = R2; + output->s[3] = R3; +} + +#define ROR(x, k) (x >> k | x << (16 - k)) +#define R_MIX(j) \ + R3 = ROR(R3, 5); \ + R3 = R3 - cx->K[4 * j + 3] - (R2 & R1) - (~R2 & R0); \ + R2 = ROR(R2, 3); \ + R2 = R2 - cx->K[4 * j + 2] - (R1 & R0) - (~R1 & R3); \ + R1 = ROR(R1, 2); \ + R1 = R1 - cx->K[4 * j + 1] - (R0 & R3) - (~R0 & R2); \ + R0 = ROR(R0, 1); \ + R0 = R0 - cx->K[4 * j + 0] - (R3 & R2) - (~R3 & R1) +#define R_MASH \ + R3 = R3 - cx->K[R2 & 63]; \ + R2 = R2 - cx->K[R1 & 63]; \ + R1 = R1 - cx->K[R0 & 63]; \ + R0 = R0 - cx->K[R3 & 63] + +/* Encrypt one block */ +static void +rc2_Decrypt1Block(RC2Context *cx, RC2Block *output, RC2Block *input) +{ + register PRUint16 R0, R1, R2, R3; + + /* step 1. Initialize input. */ + R0 = input->s[0]; + R1 = input->s[1]; + R2 = input->s[2]; + R3 = input->s[3]; + + /* step 2. Expand Key (already done, in context) */ + /* step 3. j = 63 */ + /* step 4. Perform 5 r_mixing rounds. */ + R_MIX(15); + R_MIX(14); + R_MIX(13); + R_MIX(12); + R_MIX(11); + + /* step 5. Perform 1 r_mashing round. */ + R_MASH; + + /* step 6. Perform 6 r_mixing rounds. */ + R_MIX(10); + R_MIX(9); + R_MIX(8); + R_MIX(7); + R_MIX(6); + R_MIX(5); + + /* step 7. Perform 1 r_mashing round. */ + R_MASH; + + /* step 8. Perform 5 r_mixing rounds. */ + R_MIX(4); + R_MIX(3); + R_MIX(2); + R_MIX(1); + R_MIX(0); + + /* output results */ + output->s[0] = R0; + output->s[1] = R1; + output->s[2] = R2; + output->s[3] = R3; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_EncryptECB(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Encrypt1Block(cx, &iBlock, &iBlock); + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_DecryptECB(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Decrypt1Block(cx, &iBlock, &iBlock); + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_EncryptCBC(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + + while (inputLen > 0) { + + LOAD(iBlock.s) + iBlock.l[0] ^= cx->iv.l[0]; + iBlock.l[1] ^= cx->iv.l[1]; + rc2_Encrypt1Block(cx, &iBlock, &iBlock); + cx->iv = iBlock; + STORE(iBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rc2_DecryptCBC(RC2Context *cx, unsigned char *output, + const unsigned char *input, unsigned int inputLen) +{ + RC2Block iBlock; + RC2Block oBlock; + + while (inputLen > 0) { + LOAD(iBlock.s) + rc2_Decrypt1Block(cx, &oBlock, &iBlock); + oBlock.l[0] ^= cx->iv.l[0]; + oBlock.l[1] ^= cx->iv.l[1]; + cx->iv = iBlock; + STORE(oBlock.s) + output += RC2_BLOCK_SIZE; + input += RC2_BLOCK_SIZE; + inputLen -= RC2_BLOCK_SIZE; + } + return SECSuccess; +} + +/* +** Perform RC2 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC2_Encrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + if (inputLen) { + if (inputLen % RC2_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + rv = (*cx->enc)(cx, output, input, inputLen); + } + if (rv == SECSuccess) { + *outputLen = inputLen; + } + return rv; +} + +/* +** Perform RC2 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC2_Decrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + if (inputLen) { + if (inputLen % RC2_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + rv = (*cx->dec)(cx, output, input, inputLen); + } + if (rv == SECSuccess) { + *outputLen = inputLen; + } + return rv; +} diff --git a/security/nss/lib/freebl/alghmac.c b/security/nss/lib/freebl/alghmac.c new file mode 100644 index 000000000..dd8b73c5f --- /dev/null +++ b/security/nss/lib/freebl/alghmac.c @@ -0,0 +1,165 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secport.h" +#include "hasht.h" +#include "blapit.h" +#include "alghmac.h" +#include "secerr.h" + +#define HMAC_PAD_SIZE HASH_BLOCK_LENGTH_MAX + +struct HMACContextStr { + void *hash; + const SECHashObject *hashobj; + PRBool wasAllocated; + unsigned char ipad[HMAC_PAD_SIZE]; + unsigned char opad[HMAC_PAD_SIZE]; +}; + +void +HMAC_Destroy(HMACContext *cx, PRBool freeit) +{ + if (cx == NULL) + return; + + PORT_Assert(!freeit == !cx->wasAllocated); + if (cx->hash != NULL) { + cx->hashobj->destroy(cx->hash, PR_TRUE); + PORT_Memset(cx, 0, sizeof *cx); + } + if (freeit) + PORT_Free(cx); +} + +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS) +{ + unsigned int i; + unsigned char hashed_secret[HASH_LENGTH_MAX]; + + /* required by FIPS 198 Section 3 */ + if (isFIPS && secret_len < hash_obj->length / 2) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (cx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + cx->wasAllocated = PR_FALSE; + cx->hashobj = hash_obj; + cx->hash = cx->hashobj->create(); + if (cx->hash == NULL) + goto loser; + + if (secret_len > cx->hashobj->blocklength) { + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, secret, secret_len); + PORT_Assert(cx->hashobj->length <= sizeof hashed_secret); + cx->hashobj->end(cx->hash, hashed_secret, &secret_len, + sizeof hashed_secret); + if (secret_len != cx->hashobj->length) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + goto loser; + } + secret = (const unsigned char *)&hashed_secret[0]; + } + + PORT_Memset(cx->ipad, 0x36, cx->hashobj->blocklength); + PORT_Memset(cx->opad, 0x5c, cx->hashobj->blocklength); + + /* fold secret into padding */ + for (i = 0; i < secret_len; i++) { + cx->ipad[i] ^= secret[i]; + cx->opad[i] ^= secret[i]; + } + PORT_Memset(hashed_secret, 0, sizeof hashed_secret); + return SECSuccess; + +loser: + PORT_Memset(hashed_secret, 0, sizeof hashed_secret); + if (cx->hash != NULL) + cx->hashobj->destroy(cx->hash, PR_TRUE); + return SECFailure; +} + +HMACContext * +HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS) +{ + SECStatus rv; + HMACContext *cx = PORT_ZNew(HMACContext); + if (cx == NULL) + return NULL; + rv = HMAC_Init(cx, hash_obj, secret, secret_len, isFIPS); + cx->wasAllocated = PR_TRUE; + if (rv != SECSuccess) { + PORT_Free(cx); /* contains no secret info */ + cx = NULL; + } + return cx; +} + +void +HMAC_Begin(HMACContext *cx) +{ + /* start inner hash */ + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, cx->ipad, cx->hashobj->blocklength); +} + +void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len) +{ + cx->hashobj->update(cx->hash, data, data_len); +} + +SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len) +{ + if (max_result_len < cx->hashobj->length) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->hashobj->end(cx->hash, result, result_len, max_result_len); + if (*result_len != cx->hashobj->length) + return SECFailure; + + cx->hashobj->begin(cx->hash); + cx->hashobj->update(cx->hash, cx->opad, cx->hashobj->blocklength); + cx->hashobj->update(cx->hash, result, *result_len); + cx->hashobj->end(cx->hash, result, result_len, max_result_len); + return SECSuccess; +} + +HMACContext * +HMAC_Clone(HMACContext *cx) +{ + HMACContext *newcx; + + newcx = (HMACContext *)PORT_ZAlloc(sizeof(HMACContext)); + if (newcx == NULL) + goto loser; + + newcx->wasAllocated = PR_TRUE; + newcx->hashobj = cx->hashobj; + newcx->hash = cx->hashobj->clone(cx->hash); + if (newcx->hash == NULL) + goto loser; + PORT_Memcpy(newcx->ipad, cx->ipad, cx->hashobj->blocklength); + PORT_Memcpy(newcx->opad, cx->opad, cx->hashobj->blocklength); + return newcx; + +loser: + HMAC_Destroy(newcx, PR_TRUE); + return NULL; +} diff --git a/security/nss/lib/freebl/alghmac.h b/security/nss/lib/freebl/alghmac.h new file mode 100644 index 000000000..462526ac4 --- /dev/null +++ b/security/nss/lib/freebl/alghmac.h @@ -0,0 +1,64 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _ALGHMAC_H_ +#define _ALGHMAC_H_ + +typedef struct HMACContextStr HMACContext; + +SEC_BEGIN_PROTOS + +/* destroy HMAC context */ +extern void +HMAC_Destroy(HMACContext *cx, PRBool freeit); + +/* create HMAC context + * hash_obj hash object from SECRawHashObjects[] + * secret the secret with which the HMAC is performed. + * secret_len the length of the secret. + * isFIPS true if conforming to FIPS 198. + * + * NULL is returned if an error occurs. + */ +extern HMACContext * +HMAC_Create(const SECHashObject *hash_obj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + +/* like HMAC_Create, except caller allocates HMACContext. */ +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS); + +/* reset HMAC for a fresh round */ +extern void +HMAC_Begin(HMACContext *cx); + +/* update HMAC + * cx HMAC Context + * data the data to perform HMAC on + * data_len the length of the data to process + */ +extern void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len); + +/* Finish HMAC -- place the results within result + * cx HMAC context + * result buffer for resulting hmac'd data + * result_len where the resultant hmac length is stored + * max_result_len maximum possible length that can be stored in result + */ +extern SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len); + +/* clone a copy of the HMAC state. this is usefult when you would + * need to keep a running hmac but also need to extract portions + * partway through the process. + */ +extern HMACContext * +HMAC_Clone(HMACContext *cx); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/arcfive.c b/security/nss/lib/freebl/arcfive.c new file mode 100644 index 000000000..dda77710f --- /dev/null +++ b/security/nss/lib/freebl/arcfive.c @@ -0,0 +1,87 @@ +/* + * arcfive.c - stubs for RC5 - NOT a working implementation! + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "prerror.h" + +/******************************************/ +/* +** RC5 symmetric block cypher -- 64-bit block size +*/ + +/* +** Create a new RC5 context suitable for RC5 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC) +** "mode" one of NSS_RC5 or NSS_RC5_CBC +** +** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block +** chaining" mode. +*/ +RC5Context * +RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return NULL; +} + +/* +** Destroy an RC5 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +void +RC5_DestroyContext(RC5Context *cx, PRBool freeit) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} + +/* +** Perform RC5 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} + +/* +** Perform RC5 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +SECStatus +RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} diff --git a/security/nss/lib/freebl/arcfour-amd64-gas.s b/security/nss/lib/freebl/arcfour-amd64-gas.s new file mode 100644 index 000000000..7c4f5358f --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-gas.s @@ -0,0 +1,88 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# ** ARCFOUR implementation optimized for AMD64. +# ** +# ** The throughput achieved by this code is about 320 MBytes/sec, on +# ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.text +.align 16 +.globl ARCFOUR +.type ARCFOUR,@function +ARCFOUR: + pushq %rbp + pushq %rbx + movq %rdi, %rbp # key = ARG(key) + movq %rsi, %rbx # rbx = ARG(len) + movq %rdx, %rsi # in = ARG(in) + movq %rcx, %rdi # out = ARG(out) + movq (%rbp), %rcx # x = key->x + movq 8(%rbp), %rdx # y = key->y + addq $16, %rbp # d = key->data + incq %rcx # x++ + andq $255, %rcx # x &= 0xff + leaq -8(%rbx,%rsi), %rbx # rbx = in+len-8 + movq %rbx, %r9 # tmp = in+len-8 + movq 0(%rbp,%rcx,8), %rax # tx = d[x] + cmpq %rsi, %rbx # cmp in with in+len-8 + jl .Lend # jump if (in+len-8 < in) + +.Lstart: + addq $8, %rsi # increment in + addq $8, %rdi # increment out + + # generate the next 8 bytes of the rc4 stream into %r8 + movq $8, %r11 # byte counter +1: addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + decb %r11b + rorq $8, %r8 # (ror does not change ZF) + jnz 1b + + # xor 8 bytes + xorq -8(%rsi), %r8 + cmpq %r9, %rsi # cmp in+len-8 with in + movq %r8, -8(%rdi) + jle .Lstart # jump if (in <= in+len-8) + +.Lend: + addq $8, %r9 # tmp = in+len + + # handle the last bytes, one by one +1: cmpq %rsi, %r9 # cmp in with in+len + jle .Lfinished # jump if (in+len <= in) + addb %al, %dl # y += tx + movl 0(%rbp,%rdx,8), %ebx # ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) # d[x] = ty + addb %al, %bl # val = ty + tx + movl %eax, 0(%rbp,%rdx,8) # d[y] = tx + incb %cl # x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax # tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b # val = d[val] + xorb (%rsi), %r8b # xor 1 byte + movb %r8b, (%rdi) + incq %rsi # in++ + incq %rdi # out++ + jmp 1b + +.Lfinished: + decq %rcx # x-- + movb %dl, -8(%rbp) # key->y = y + movb %cl, -16(%rbp) # key->x = x + popq %rbx + popq %rbp + ret +.L_ARCFOUR_end: +.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR + +# Magic indicating no need for an executable stack +.section .note.GNU-stack,"",@progbits +.previous diff --git a/security/nss/lib/freebl/arcfour-amd64-masm.asm b/security/nss/lib/freebl/arcfour-amd64-masm.asm new file mode 100644 index 000000000..1601c4f89 --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-masm.asm @@ -0,0 +1,107 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; ** ARCFOUR implementation optimized for AMD64. +; ** +; ** The throughput achieved by this code is about 320 MBytes/sec, on +; ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.CODE + +; extern void ARCFOUR(RC4Context *cx, unsigned long long inputLen, +; const unsigned char *input, unsigned char *output); + + +ARCFOUR PROC + + push rbp + push rbx + push rsi + push rdi + + mov rbp, rcx ; key = ARG(key) + mov rbx, rdx ; rbx = ARG(len) + mov rsi, r8 ; in = ARG(in) + mov rdi, r9 ; out = ARG(out) + mov rcx, [rbp] ; x = key->x + mov rdx, [rbp+8] ; y = key->y + add rbp, 16 ; d = key->data + inc rcx ; x++ + and rcx, 0ffh ; x &= 0xff + lea rbx, [rbx+rsi-8] ; rbx = in+len-8 + mov r9, rbx ; tmp = in+len-8 + mov rax, [rbp+rcx*8] ; tx = d[x] + cmp rbx, rsi ; cmp in with in+len-8 + jl Lend ; jump if (in+len-8 < in) + +Lstart: + add rsi, 8 ; increment in + add rdi, 8 ; increment out + + ; + ; generate the next 8 bytes of the rc4 stream into r8 + ; + + mov r11, 8 ; byte counter + +@@: + add dl, al ; y += tx + mov ebx, [rbp+rdx*8] ; ty = d[y] + mov [rbp+rcx*8], ebx ; d[x] = ty + add bl, al ; val = ty + tx + mov [rbp+rdx*8], eax ; d[y] = tx + inc cl ; x++ (NEXT ROUND) + mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) + mov r8b, [rbp+rbx*8] ; val = d[val] + dec r11b + ror r8, 8 ; (ror does not change ZF) + jnz @b + + ; + ; xor 8 bytes + ; + + xor r8, [rsi-8] + cmp rsi, r9 ; cmp in+len-8 with in + mov [rdi-8], r8 + jle Lstart + +Lend: + add r9, 8 ; tmp = in+len + + ; + ; handle the last bytes, one by one + ; + +@@: + cmp r9, rsi ; cmp in with in+len + jle Lfinished ; jump if (in+len <= in) + add dl, al ; y += tx + mov ebx, [rbp+rdx*8] ; ty = d[y] + mov [rbp+rcx*8], ebx ; d[x] = ty + add bl, al ; val = ty + tx + mov [rbp+rdx*8], eax ; d[y] = tx + inc cl ; x++ (NEXT ROUND) + mov eax, [rbp+rcx*8] ; tx = d[x] (NEXT ROUND) + mov r8b, [rbp+rbx*8] ; val = d[val] + xor r8b, [rsi] ; xor 1 byte + mov [rdi], r8b + inc rsi ; in++ + inc rdi + jmp @b + +Lfinished: + dec rcx ; x-- + mov [rbp-8], dl ; key->y = y + mov [rbp-16], cl ; key->x = x + + pop rdi + pop rsi + pop rbx + pop rbp + ret + +ARCFOUR ENDP + +END diff --git a/security/nss/lib/freebl/arcfour-amd64-sun.s b/security/nss/lib/freebl/arcfour-amd64-sun.s new file mode 100644 index 000000000..8b649f901 --- /dev/null +++ b/security/nss/lib/freebl/arcfour-amd64-sun.s @@ -0,0 +1,84 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + +/ ** ARCFOUR implementation optimized for AMD64. +/ ** +/ ** The throughput achieved by this code is about 320 MBytes/sec, on +/ ** a 1.8 GHz AMD Opteron (rev C0) processor. + +.text +.align 16 +.globl ARCFOUR +.type ARCFOUR,@function +ARCFOUR: + pushq %rbp + pushq %rbx + movq %rdi, %rbp / key = ARG(key) + movq %rsi, %rbx / rbx = ARG(len) + movq %rdx, %rsi / in = ARG(in) + movq %rcx, %rdi / out = ARG(out) + movq (%rbp), %rcx / x = key->x + movq 8(%rbp), %rdx / y = key->y + addq $16, %rbp / d = key->data + incq %rcx / x++ + andq $255, %rcx / x &= 0xff + leaq -8(%rbx,%rsi), %rbx / rbx = in+len-8 + movq %rbx, %r9 / tmp = in+len-8 + movq 0(%rbp,%rcx,8), %rax / tx = d[x] + cmpq %rsi, %rbx / cmp in with in+len-8 + jl .Lend / jump if (in+len-8 < in) + +.Lstart: + addq $8, %rsi / increment in + addq $8, %rdi / increment out + + / generate the next 8 bytes of the rc4 stream into %r8 + movq $8, %r11 / byte counter +1: addb %al, %dl / y += tx + movl 0(%rbp,%rdx,8), %ebx / ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty + addb %al, %bl / val = ty + tx + movl %eax, 0(%rbp,%rdx,8) / d[y] = tx + incb %cl / x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b / val = d[val] + decb %r11b + rorq $8, %r8 / (ror does not change ZF) + jnz 1b + + / xor 8 bytes + xorq -8(%rsi), %r8 + cmpq %r9, %rsi / cmp in+len-8 with in + movq %r8, -8(%rdi) + jle .Lstart / jump if (in <= in+len-8) + +.Lend: + addq $8, %r9 / tmp = in+len + + / handle the last bytes, one by one +1: cmpq %rsi, %r9 / cmp in with in+len + jle .Lfinished / jump if (in+len <= in) + addb %al, %dl / y += tx + movl 0(%rbp,%rdx,8), %ebx / ty = d[y] + movl %ebx, 0(%rbp,%rcx,8) / d[x] = ty + addb %al, %bl / val = ty + tx + movl %eax, 0(%rbp,%rdx,8) / d[y] = tx + incb %cl / x++ (NEXT ROUND) + movl 0(%rbp,%rcx,8), %eax / tx = d[x] (NEXT ROUND) + movb 0(%rbp,%rbx,8), %r8b / val = d[val] + xorb (%rsi), %r8b / xor 1 byte + movb %r8b, (%rdi) + incq %rsi / in++ + incq %rdi / out++ + jmp 1b + +.Lfinished: + decq %rcx / x-- + movb %dl, -8(%rbp) / key->y = y + movb %cl, -16(%rbp) / key->x = x + popq %rbx + popq %rbp + ret +.L_ARCFOUR_end: +.size ARCFOUR,.L_ARCFOUR_end-ARCFOUR diff --git a/security/nss/lib/freebl/arcfour.c b/security/nss/lib/freebl/arcfour.c new file mode 100644 index 000000000..e37b45843 --- /dev/null +++ b/security/nss/lib/freebl/arcfour.c @@ -0,0 +1,594 @@ +/* arcfour.c - the arc four algorithm. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" + +/* Architecture-dependent defines */ + +#if defined(SOLARIS) || defined(HPUX) || defined(NSS_X86) || \ + defined(_WIN64) +/* Convert the byte-stream to a word-stream */ +#define CONVERT_TO_WORDS +#endif + +#if defined(AIX) || defined(OSF1) || defined(NSS_BEVAND_ARCFOUR) +/* Treat array variables as words, not bytes, on CPUs that take + * much longer to write bytes than to write words, or when using + * assembler code that required it. + */ +#define USE_WORD +#endif + +#if defined(IS_64) || defined(NSS_BEVAND_ARCFOUR) +typedef PRUint64 WORD; +#else +typedef PRUint32 WORD; +#endif +#define WORDSIZE sizeof(WORD) + +#if defined(USE_WORD) +typedef WORD Stype; +#else +typedef PRUint8 Stype; +#endif + +#define ARCFOUR_STATE_SIZE 256 + +#define MASK1BYTE (WORD)(0xff) + +#define SWAP(a, b) \ + tmp = a; \ + a = b; \ + b = tmp; + +/* + * State information for stream cipher. + */ +struct RC4ContextStr { +#if defined(NSS_ARCFOUR_IJ_B4_S) || defined(NSS_BEVAND_ARCFOUR) + Stype i; + Stype j; + Stype S[ARCFOUR_STATE_SIZE]; +#else + Stype S[ARCFOUR_STATE_SIZE]; + Stype i; + Stype j; +#endif +}; + +/* + * array indices [0..255] to initialize cx->S array (faster than loop). + */ +static const Stype Kinit[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, + 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, + 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, + 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, + 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff +}; + +RC4Context * +RC4_AllocateContext(void) +{ + return PORT_ZNew(RC4Context); +} + +SECStatus +RC4_InitContext(RC4Context *cx, const unsigned char *key, unsigned int len, + const unsigned char *unused1, int unused2, + unsigned int unused3, unsigned int unused4) +{ + unsigned int i; + PRUint8 j, tmp; + PRUint8 K[256]; + PRUint8 *L; + + /* verify the key length. */ + PORT_Assert(len > 0 && len < ARCFOUR_STATE_SIZE); + if (len == 0 || len >= ARCFOUR_STATE_SIZE) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + if (cx == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* Initialize the state using array indices. */ + memcpy(cx->S, Kinit, sizeof cx->S); + /* Fill in K repeatedly with values from key. */ + L = K; + for (i = sizeof K; i > len; i -= len) { + memcpy(L, key, len); + L += len; + } + memcpy(L, key, i); + /* Stir the state of the generator. At this point it is assumed + * that the key is the size of the state buffer. If this is not + * the case, the key bytes are repeated to fill the buffer. + */ + j = 0; +#define ARCFOUR_STATE_STIR(ii) \ + j = j + cx->S[ii] + K[ii]; \ + SWAP(cx->S[ii], cx->S[j]); + for (i = 0; i < ARCFOUR_STATE_SIZE; i++) { + ARCFOUR_STATE_STIR(i); + } + cx->i = 0; + cx->j = 0; + return SECSuccess; +} + +/* + * Initialize a new generator. + */ +RC4Context * +RC4_CreateContext(const unsigned char *key, int len) +{ + RC4Context *cx = RC4_AllocateContext(); + if (cx) { + SECStatus rv = RC4_InitContext(cx, key, len, NULL, 0, 0, 0); + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof(*cx)); + cx = NULL; + } + } + return cx; +} + +void +RC4_DestroyContext(RC4Context *cx, PRBool freeit) +{ + if (freeit) + PORT_ZFree(cx, sizeof(*cx)); +} + +#if defined(NSS_BEVAND_ARCFOUR) +extern void ARCFOUR(RC4Context *cx, WORD inputLen, + const unsigned char *input, unsigned char *output); +#else +/* + * Generate the next byte in the stream. + */ +#define ARCFOUR_NEXT_BYTE() \ + tmpSi = cx->S[++tmpi]; \ + tmpj += tmpSi; \ + tmpSj = cx->S[tmpj]; \ + cx->S[tmpi] = tmpSj; \ + cx->S[tmpj] = tmpSi; \ + t = tmpSi + tmpSj; + +#ifdef CONVERT_TO_WORDS +/* + * Straight ARCFOUR op. No optimization. + */ +static SECStatus +rc4_no_opt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint8 t; + Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + unsigned int index; + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + for (index = 0; index < inputLen; index++) { + /* Generate next byte from stream. */ + ARCFOUR_NEXT_BYTE(); + /* output = next stream byte XOR next input byte */ + output[index] = cx->S[t] ^ input[index]; + } + *outputLen = inputLen; + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; +} + +#else +/* !CONVERT_TO_WORDS */ + +/* + * Byte-at-a-time ARCFOUR, unrolling the loop into 8 pieces. + */ +static SECStatus +rc4_unrolled(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PRUint8 t; + Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + int index; + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + for (index = inputLen / 8; index-- > 0; input += 8, output += 8) { + ARCFOUR_NEXT_BYTE(); + output[0] = cx->S[t] ^ input[0]; + ARCFOUR_NEXT_BYTE(); + output[1] = cx->S[t] ^ input[1]; + ARCFOUR_NEXT_BYTE(); + output[2] = cx->S[t] ^ input[2]; + ARCFOUR_NEXT_BYTE(); + output[3] = cx->S[t] ^ input[3]; + ARCFOUR_NEXT_BYTE(); + output[4] = cx->S[t] ^ input[4]; + ARCFOUR_NEXT_BYTE(); + output[5] = cx->S[t] ^ input[5]; + ARCFOUR_NEXT_BYTE(); + output[6] = cx->S[t] ^ input[6]; + ARCFOUR_NEXT_BYTE(); + output[7] = cx->S[t] ^ input[7]; + } + index = inputLen % 8; + if (index) { + input += index; + output += index; + switch (index) { + case 7: + ARCFOUR_NEXT_BYTE(); + output[-7] = cx->S[t] ^ input[-7]; /* FALLTHRU */ + case 6: + ARCFOUR_NEXT_BYTE(); + output[-6] = cx->S[t] ^ input[-6]; /* FALLTHRU */ + case 5: + ARCFOUR_NEXT_BYTE(); + output[-5] = cx->S[t] ^ input[-5]; /* FALLTHRU */ + case 4: + ARCFOUR_NEXT_BYTE(); + output[-4] = cx->S[t] ^ input[-4]; /* FALLTHRU */ + case 3: + ARCFOUR_NEXT_BYTE(); + output[-3] = cx->S[t] ^ input[-3]; /* FALLTHRU */ + case 2: + ARCFOUR_NEXT_BYTE(); + output[-2] = cx->S[t] ^ input[-2]; /* FALLTHRU */ + case 1: + ARCFOUR_NEXT_BYTE(); + output[-1] = cx->S[t] ^ input[-1]; /* FALLTHRU */ + default: + /* FALLTHRU */ + ; /* hp-ux build breaks without this */ + } + } + cx->i = tmpi; + cx->j = tmpj; + *outputLen = inputLen; + return SECSuccess; +} +#endif + +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT4BYTES_L(n) \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 8); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 16); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 24); +#else +#define ARCFOUR_NEXT4BYTES_B(n) \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 24); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 16); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n + 8); \ + ARCFOUR_NEXT_BYTE(); \ + streamWord |= (WORD)cx->S[t] << (n); +#endif + +#if (defined(IS_64) && !defined(__sparc)) || defined(NSS_USE_64) +/* 64-bit wordsize */ +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_L(0); \ + ARCFOUR_NEXT4BYTES_L(32); \ + } +#else +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_B(32); \ + ARCFOUR_NEXT4BYTES_B(0); \ + } +#endif +#else +/* 32-bit wordsize */ +#ifdef IS_LITTLE_ENDIAN +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_L(0); \ + } +#else +#define ARCFOUR_NEXT_WORD() \ + { \ + streamWord = 0; \ + ARCFOUR_NEXT4BYTES_B(0); \ + } +#endif +#endif + +#ifdef IS_LITTLE_ENDIAN +#define RSH << +#define LSH >> +#else +#define RSH >> +#define LSH << +#endif + +#ifdef IS_LITTLE_ENDIAN +#define LEFTMOST_BYTE_SHIFT 0 +#define NEXT_BYTE_SHIFT(shift) shift + 8 +#else +#define LEFTMOST_BYTE_SHIFT 8 * (WORDSIZE - 1) +#define NEXT_BYTE_SHIFT(shift) shift - 8 +#endif + +#ifdef CONVERT_TO_WORDS +static SECStatus +rc4_wordconv(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PR_STATIC_ASSERT(sizeof(PRUword) == sizeof(ptrdiff_t)); + unsigned int inOffset = (PRUword)input % WORDSIZE; + unsigned int outOffset = (PRUword)output % WORDSIZE; + register WORD streamWord; + register const WORD *pInWord; + register WORD *pOutWord; + register WORD inWord, nextInWord; + PRUint8 t; + register Stype tmpSi, tmpSj; + register PRUint8 tmpi = cx->i; + register PRUint8 tmpj = cx->j; + unsigned int bufShift, invBufShift; + unsigned int i; + const unsigned char *finalIn; + unsigned char *finalOut; + + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + if (inputLen < 2 * WORDSIZE) { + /* Ignore word conversion, do byte-at-a-time */ + return rc4_no_opt(cx, output, outputLen, maxOutputLen, input, inputLen); + } + *outputLen = inputLen; + pInWord = (const WORD *)(input - inOffset); + pOutWord = (WORD *)(output - outOffset); + if (inOffset <= outOffset) { + bufShift = 8 * (outOffset - inOffset); + invBufShift = 8 * WORDSIZE - bufShift; + } else { + invBufShift = 8 * (inOffset - outOffset); + bufShift = 8 * WORDSIZE - invBufShift; + } + /*****************************************************************/ + /* Step 1: */ + /* If the first output word is partial, consume the bytes in the */ + /* first partial output word by loading one or two words of */ + /* input and shifting them accordingly. Otherwise, just load */ + /* in the first word of input. At the end of this block, at */ + /* least one partial word of input should ALWAYS be loaded. */ + /*****************************************************************/ + if (outOffset) { + unsigned int byteCount = WORDSIZE - outOffset; + for (i = 0; i < byteCount; i++) { + ARCFOUR_NEXT_BYTE(); + output[i] = cx->S[t] ^ input[i]; + } + /* Consumed byteCount bytes of input */ + inputLen -= byteCount; + pInWord++; + + /* move to next word of output */ + pOutWord++; + + /* If buffers are relatively misaligned, shift the bytes in inWord + * to be aligned to the output buffer. + */ + if (inOffset < outOffset) { + /* The first input word (which may be partial) has more bytes + * than needed. Copy the remainder to inWord. + */ + unsigned int shift = LEFTMOST_BYTE_SHIFT; + inWord = 0; + for (i = 0; i < outOffset - inOffset; i++) { + inWord |= (WORD)input[byteCount + i] << shift; + shift = NEXT_BYTE_SHIFT(shift); + } + } else if (inOffset > outOffset) { + /* Consumed some bytes in the second input word. Copy the + * remainder to inWord. + */ + inWord = *pInWord++; + inWord = inWord LSH invBufShift; + } else { + inWord = 0; + } + } else { + /* output is word-aligned */ + if (inOffset) { + /* Input is not word-aligned. The first word load of input + * will not produce a full word of input bytes, so one word + * must be pre-loaded. The main loop below will load in the + * next input word and shift some of its bytes into inWord + * in order to create a full input word. Note that the main + * loop must execute at least once because the input must + * be at least two words. + */ + unsigned int shift = LEFTMOST_BYTE_SHIFT; + inWord = 0; + for (i = 0; i < WORDSIZE - inOffset; i++) { + inWord |= (WORD)input[i] << shift; + shift = NEXT_BYTE_SHIFT(shift); + } + pInWord++; + } else { + /* Input is word-aligned. The first word load of input + * will produce a full word of input bytes, so nothing + * needs to be loaded here. + */ + inWord = 0; + } + } + /*****************************************************************/ + /* Step 2: main loop */ + /* At this point the output buffer is word-aligned. Any unused */ + /* bytes from above will be in inWord (shifted correctly). If */ + /* the input buffer is unaligned relative to the output buffer, */ + /* shifting has to be done. */ + /*****************************************************************/ + if (bufShift) { + /* preloadedByteCount is the number of input bytes pre-loaded + * in inWord. + */ + unsigned int preloadedByteCount = bufShift / 8; + for (; inputLen >= preloadedByteCount + WORDSIZE; + inputLen -= WORDSIZE) { + nextInWord = *pInWord++; + inWord |= nextInWord RSH bufShift; + nextInWord = nextInWord LSH invBufShift; + ARCFOUR_NEXT_WORD(); + *pOutWord++ = inWord ^ streamWord; + inWord = nextInWord; + } + if (inputLen == 0) { + /* Nothing left to do. */ + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; + } + finalIn = (const unsigned char *)pInWord - preloadedByteCount; + } else { + for (; inputLen >= WORDSIZE; inputLen -= WORDSIZE) { + inWord = *pInWord++; + ARCFOUR_NEXT_WORD(); + *pOutWord++ = inWord ^ streamWord; + } + if (inputLen == 0) { + /* Nothing left to do. */ + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; + } + finalIn = (const unsigned char *)pInWord; + } + /*****************************************************************/ + /* Step 3: */ + /* Do the remaining partial word of input one byte at a time. */ + /*****************************************************************/ + finalOut = (unsigned char *)pOutWord; + for (i = 0; i < inputLen; i++) { + ARCFOUR_NEXT_BYTE(); + finalOut[i] = cx->S[t] ^ finalIn[i]; + } + cx->i = tmpi; + cx->j = tmpj; + return SECSuccess; +} +#endif +#endif /* NSS_BEVAND_ARCFOUR */ + +SECStatus +RC4_Encrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +#if defined(NSS_BEVAND_ARCFOUR) + ARCFOUR(cx, inputLen, input, output); + *outputLen = inputLen; + return SECSuccess; +#elif defined(CONVERT_TO_WORDS) + /* Convert the byte-stream to a word-stream */ + return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen); +#else + /* Operate on bytes, but unroll the main loop */ + return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen); +#endif +} + +SECStatus +RC4_Decrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + PORT_Assert(maxOutputLen >= inputLen); + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +/* decrypt and encrypt are same operation. */ +#if defined(NSS_BEVAND_ARCFOUR) + ARCFOUR(cx, inputLen, input, output); + *outputLen = inputLen; + return SECSuccess; +#elif defined(CONVERT_TO_WORDS) + /* Convert the byte-stream to a word-stream */ + return rc4_wordconv(cx, output, outputLen, maxOutputLen, input, inputLen); +#else + /* Operate on bytes, but unroll the main loop */ + return rc4_unrolled(cx, output, outputLen, maxOutputLen, input, inputLen); +#endif +} + +#undef CONVERT_TO_WORDS +#undef USE_WORD diff --git a/security/nss/lib/freebl/blapi.h b/security/nss/lib/freebl/blapi.h new file mode 100644 index 000000000..e5a6cf30e --- /dev/null +++ b/security/nss/lib/freebl/blapi.h @@ -0,0 +1,1625 @@ +/* + * blapi.h - public prototypes for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPI_H_ +#define _BLAPI_H_ + +#include "blapit.h" +#include "hasht.h" +#include "alghmac.h" + +SEC_BEGIN_PROTOS + +/* +** RSA encryption/decryption. When encrypting/decrypting the output +** buffer must be at least the size of the public key modulus. +*/ + +extern SECStatus BL_Init(void); + +/* +** Generate and return a new RSA public and private key. +** Both keys are encoded in a single RSAPrivateKey structure. +** "cx" is the random number generator context +** "keySizeInBits" is the size of the key to be generated, in bits. +** 512, 1024, etc. +** "publicExponent" when not NULL is a pointer to some data that +** represents the public exponent to use. The data is a byte +** encoded integer, in "big endian" order. +*/ +extern RSAPrivateKey *RSA_NewKey(int keySizeInBits, + SECItem *publicExponent); + +/* +** Perform a raw public-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a raw private-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a raw private-key operation, and check the parameters used in +** the operation for validity by performing a test operation first. +** Length of input and output buffers are equal to key's modulus len. +*/ +extern SECStatus RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + +/* +** Perform a check of private key parameters for consistency. +*/ +extern SECStatus RSA_PrivateKeyCheck(const RSAPrivateKey *key); + +/* +** Given only minimal private key parameters, fill in the rest of the +** parameters. +** +** +** All the entries, including those supplied by the caller, will be +** overwritten with data alocated out of the arena. +** +** If no arena is supplied, one will be created. +** +** The following fields must be supplied in order for this function +** to succeed: +** one of either publicExponent or privateExponent +** two more of the following 5 parameters (not counting the above). +** modulus (n) +** prime1 (p) +** prime2 (q) +** publicExponent (e) +** privateExponent (d) +** +** NOTE: if only the publicExponent, privateExponent, and one prime is given, +** then there may be more than one RSA key that matches that combination. If +** we find 2 possible valid keys that meet this criteria, we return an error. +** If we return the wrong key, and the original modulus is compared to the +** new modulus, both can be factored by calculateing gcd(n_old,n_new) to get +** the common prime. +** +** NOTE: in some cases the publicExponent must be less than 2^23 for this +** function to work correctly. (The case where we have only one of: modulus +** prime1 and prime2). +** +** All parameters will be replaced in the key structure with new parameters +** allocated out of the arena. There is no attempt to free the old structures. +** prime1 will always be greater than prime2 (even if the caller supplies the +** smaller prime as prime1 or the larger prime as prime2). The parameters are +** not overwritten on failure. +** +** While the remaining Chinese remainder theorem parameters (dp,dp, and qinv) +** can also be used in reconstructing the private key, they are currently +** ignored in this implementation. +*/ +extern SECStatus RSA_PopulatePrivateKey(RSAPrivateKey *key); + +/******************************************************************** +** RSA algorithm +*/ + +/******************************************************************** +** Raw signing/encryption/decryption operations. +** +** No padding or formatting will be applied. +** inputLen MUST be equivalent to the modulus size (in bytes). +*/ +extern SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + +extern SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen); + +extern SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSAES-OAEP encryption/decryption, as defined in RFC 3447, Section 7.1. +** +** Note: Only MGF1 is supported as the mask generation function. It will be +** used with maskHashAlg as the inner hash function. +** +** Unless performing Known Answer Tests, "seed" should be NULL, indicating that +** freebl should generate a random value. Otherwise, it should be an octet +** string of seedLen bytes, which should be the same size as the output of +** hashAlg. +*/ +extern SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSAES-PKCS1-v1_5 encryption/decryption, as defined in RFC 3447, Section 7.2. +*/ +extern SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +/******************************************************************** +** RSASSA-PSS signing/verifying, as defined in RFC 3447, Section 8.1. +** +** Note: Only MGF1 is supported as the mask generation function. It will be +** used with maskHashAlg as the inner hash function. +** +** Unless performing Known Answer Tests, "salt" should be NULL, indicating that +** freebl should generate a random value. +*/ +extern SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +extern SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + +/******************************************************************** +** RSASSA-PKCS1-v1_5 signing/verifying, as defined in RFC 3447, Section 8.2. +** +** These functions expect as input to be the raw value to be signed. For most +** cases using PKCS1-v1_5, this should be the value of T, the DER-encoded +** DigestInfo structure defined in Section 9.2, Step 2. +** Note: This can also be used for signatures that use PKCS1-v1_5 padding, such +** as the signatures used in SSL/TLS, which sign a raw hash. +*/ +extern SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *data, + unsigned int dataLen); + +extern SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen); + +extern SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen); + +/******************************************************************** +** DSA signing algorithm +*/ + +/* Generate a new random value within the interval [2, q-1]. +*/ +extern SECStatus DSA_NewRandom(PLArenaPool *arena, const SECItem *q, + SECItem *random); + +/* +** Generate and return a new DSA public and private key pair, +** both of which are encoded into a single DSAPrivateKey struct. +** "params" is a pointer to the PQG parameters for the domain +** Uses a random seed. +*/ +extern SECStatus DSA_NewKey(const PQGParams *params, + DSAPrivateKey **privKey); + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +extern SECStatus DSA_SignDigest(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest); + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +extern SECStatus DSA_VerifyDigest(DSAPublicKey *key, + const SECItem *signature, + const SECItem *digest); + +/* For FIPS compliance testing. Seed must be exactly 20 bytes long */ +extern SECStatus DSA_NewKeyFromSeed(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey); + +/* For FIPS compliance testing. Seed must be exactly 20 bytes. */ +extern SECStatus DSA_SignDigestWithSeed(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed); + +/****************************************************** +** Diffie Helman key exchange algorithm +*/ + +/* Generates parameters for Diffie-Helman key generation. +** primeLen is the length in bytes of prime P to be generated. +*/ +extern SECStatus DH_GenParam(int primeLen, DHParams **params); + +/* Generates a public and private key, both of which are encoded in a single +** DHPrivateKey struct. Params is input, privKey are output. +** This is Phase 1 of Diffie Hellman. +*/ +extern SECStatus DH_NewKey(DHParams *params, + DHPrivateKey **privKey); + +/* +** DH_Derive does the Diffie-Hellman phase 2 calculation, using the +** other party's publicValue, and the prime and our privateValue. +** maxOutBytes is the requested length of the generated secret in bytes. +** A zero value means produce a value of any length up to the size of +** the prime. If successful, derivedSecret->data is set +** to the address of the newly allocated buffer containing the derived +** secret, and derivedSecret->len is the size of the secret produced. +** The size of the secret produced will depend on the value of outBytes. +** If outBytes is 0, the key length will be all the significant bytes of +** the derived secret (leading zeros are dropped). This length could be less +** than the length of the prime. If outBytes is nonzero, the length of the +** produced key will be outBytes long. If the key is truncated, the most +** significant bytes are truncated. If it is expanded, zero bytes are added +** at the beginning. +** It is the caller's responsibility to free the allocated buffer +** containing the derived secret. +*/ +extern SECStatus DH_Derive(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int outBytes); + +/* +** KEA_CalcKey returns octet string with the private key for a dual +** Diffie-Helman key generation as specified for government key exchange. +*/ +extern SECStatus KEA_Derive(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret); + +/* + * verify that a KEA or DSA public key is a valid key for this prime and + * subprime domain. + */ +extern PRBool KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime); + +/**************************************** + * J-PAKE key transport + */ + +/* Given gx == g^x, create a Schnorr zero-knowledge proof for the value x + * using the specified hash algorithm and signer ID. The signature is + * returned in the values gv and r. testRandom must be NULL for a PRNG + * generated random committment to be used in the sigature. When testRandom + * is non-NULL, that value must contain a value in the subgroup q; that + * value will be used instead of a PRNG-generated committment in order to + * facilitate known-answer tests. + * + * If gxIn is non-NULL then it must contain a pre-computed value of g^x that + * will be used by the function; in this case, the gxOut parameter must be NULL. + * If the gxIn parameter is NULL then gxOut must be non-NULL; in this case + * gxOut will contain the value g^x on output. + * + * gx (if not supplied by the caller), gv, and r will be allocated in the arena. + * The arena is *not* optional so do not pass NULL for the arena parameter. + * The arena should be zeroed when it is freed. + */ +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r); + +/* Given gx == g^x, verify the Schnorr zero-knowledge proof (gv, r) for the + * value x using the specified hash algorithm and signer ID. + * + * The arena is *not* optional so do not pass NULL for the arena parameter. + */ +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r); + +/* Call before round 2 with x2, s, and x2s all non-NULL. This will calculate + * base = g^(x1+x3+x4) (mod p) and x2s = x2*s (mod q). The values to send in + * round 2 (A and the proof of knowledge of x2s) can then be calculated with + * JPAKE_Sign using pqg->base = base and x = x2s. + * + * Call after round 2 with x2, s, and x2s all NULL, and passing (gx1, gx2, gx3) + * instead of (gx1, gx3, gx4). This will calculate base = g^(x1+x2+x3). Then call + * JPAKE_Verify with pqg->base = base and then JPAKE_Final. + * + * base and x2s will be allocated in the arena. The arena is *not* optional so + * do not pass NULL for the arena parameter. The arena should be zeroed when it + * is freed. +*/ +SECStatus +JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *gx1, const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s); + +/* K = (B/g^(x2*x4*s))^x2 (mod p) + * + * K will be allocated in the arena. The arena is *not* optional so do not pass + * NULL for the arena parameter. The arena should be zeroed when it is freed. + */ +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K); + +/****************************************************** +** Elliptic Curve algorithms +*/ + +/* Generates a public and private key, both of which are encoded +** in a single ECPrivateKey struct. Params is input, privKey are +** output. +*/ +extern SECStatus EC_NewKey(ECParams *params, + ECPrivateKey **privKey); + +extern SECStatus EC_NewKeyFromSeed(ECParams *params, + ECPrivateKey **privKey, + const unsigned char *seed, + int seedlen); + +/* Validates an EC public key as described in Section 5.2.2 of + * X9.62. Such validation prevents against small subgroup attacks + * when the ECDH primitive is used with the cofactor. + */ +extern SECStatus EC_ValidatePublicKey(ECParams *params, + SECItem *publicValue); + +/* +** ECDH_Derive performs a scalar point multiplication of a point +** representing a (peer's) public key and a large integer representing +** a private key (its own). Both keys must use the same elliptic curve +** parameters. If the withCofactor parameter is true, the +** multiplication also uses the cofactor associated with the curve +** parameters. The output of this scheme is the x-coordinate of the +** resulting point. If successful, derivedSecret->data is set to the +** address of the newly allocated buffer containing the derived +** secret, and derivedSecret->len is the size of the secret +** produced. It is the caller's responsibility to free the allocated +** buffer containing the derived secret. +*/ +extern SECStatus ECDH_Derive(SECItem *publicValue, + ECParams *params, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret); + +/* On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +extern SECStatus ECDSA_SignDigest(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest); + +/* On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +extern SECStatus ECDSA_VerifyDigest(ECPublicKey *key, + const SECItem *signature, + const SECItem *digest); + +/* Uses the provided seed. */ +extern SECStatus ECDSA_SignDigestWithSeed(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed, + const int seedlen); + +/******************************************/ +/* +** RC4 symmetric stream cypher +*/ + +/* +** Create a new RC4 context suitable for RC4 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +*/ +extern RC4Context *RC4_CreateContext(const unsigned char *key, int len); + +extern RC4Context *RC4_AllocateContext(void); +extern SECStatus RC4_InitContext(RC4Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); + +/* +** Destroy an RC4 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC4_DestroyContext(RC4Context *cx, PRBool freeit); + +/* +** Perform RC4 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC4_Encrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC4 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC4_Decrypt(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** RC2 symmetric block cypher +*/ + +/* +** Create a new RC2 context suitable for RC2 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC2_CBC) +** "mode" one of NSS_RC2 or NSS_RC2_CBC +** "effectiveKeyLen" is the effective key length (as specified in +** RFC 2268) in bytes (not bits). +** +** When mode is set to NSS_RC2_CBC the RC2 cipher is run in "cipher block +** chaining" mode. +*/ +extern RC2Context *RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, + unsigned effectiveKeyLen); +extern RC2Context *RC2_AllocateContext(void); +extern SECStatus RC2_InitContext(RC2Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int effectiveKeyLen, + unsigned int); + +/* +** Destroy an RC2 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC2_DestroyContext(RC2Context *cx, PRBool freeit); + +/* +** Perform RC2 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC2_Encrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC2 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC2_Decrypt(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** RC5 symmetric block cypher -- 64-bit block size +*/ + +/* +** Create a new RC5 context suitable for RC5 encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_RC5_CBC) +** "mode" one of NSS_RC5 or NSS_RC5_CBC +** +** When mode is set to NSS_RC5_CBC the RC5 cipher is run in "cipher block +** chaining" mode. +*/ +extern RC5Context *RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode); +extern RC5Context *RC5_AllocateContext(void); +extern SECStatus RC5_InitContext(RC5Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int rounds, + unsigned int wordSize); + +/* +** Destroy an RC5 encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void RC5_DestroyContext(RC5Context *cx, PRBool freeit); + +/* +** Perform RC5 encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus RC5_Encrypt(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform RC5 decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ + +extern SECStatus RC5_Decrypt(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** DES symmetric block cypher +*/ + +/* +** Create a new DES context suitable for DES encryption/decryption. +** "key" raw key data +** "len" the number of bytes of key data +** "iv" is the CBC initialization vector (if mode is NSS_DES_CBC or +** mode is DES_EDE3_CBC) +** "mode" one of NSS_DES, NSS_DES_CBC, NSS_DES_EDE3 or NSS_DES_EDE3_CBC +** "encrypt" is PR_TRUE if the context will be used for encryption +** +** When mode is set to NSS_DES_CBC or NSS_DES_EDE3_CBC then the DES +** cipher is run in "cipher block chaining" mode. +*/ +extern DESContext *DES_CreateContext(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); +extern DESContext *DES_AllocateContext(void); +extern SECStatus DES_InitContext(DESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + +/* +** Destroy an DES encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void DES_DestroyContext(DESContext *cx, PRBool freeit); + +/* +** Perform DES encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +** +** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH +*/ +extern SECStatus DES_Encrypt(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform DES decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +** +** NOTE: the inputLen must be a multiple of DES_KEY_LENGTH +*/ +extern SECStatus DES_Decrypt(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** SEED symmetric block cypher +*/ +extern SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt); +extern SEEDContext *SEED_AllocateContext(void); +extern SECStatus SEED_InitContext(SEEDContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, unsigned int encrypt, + unsigned int); +extern void SEED_DestroyContext(SEEDContext *cx, PRBool freeit); +extern SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); +extern SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** AES symmetric block cypher (Rijndael) +*/ + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +** "blocklen" is the blocksize to use (16, 24, or 32) +** XXX currently only blocksize==16 has been tested! +*/ +extern AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen, unsigned int blocklen); +extern AESContext *AES_AllocateContext(void); +extern SECStatus AES_InitContext(AESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + +/* +** Destroy a AES encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AES_DestroyContext(AESContext *cx, PRBool freeit); + +/* +** Perform AES encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** AES key wrap algorithm, RFC 3394 +*/ + +/* +** Create a new AES context suitable for AES encryption/decryption. +** "key" raw key data +** "iv" The 8 byte "initial value" +** "encrypt", a boolean, true for key wrapping, false for unwrapping. +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen); +extern AESKeyWrapContext *AESKeyWrap_AllocateContext(void); +extern SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int, + unsigned int encrypt, + unsigned int); + +/* +** Destroy a AES KeyWrap context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit); + +/* +** Perform AES key wrap. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform AES key unwrap. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** Camellia symmetric block cypher +*/ + +/* +** Create a new Camellia context suitable for Camellia encryption/decryption. +** "key" raw key data +** "keylen" the number of bytes of key data (16, 24, or 32) +*/ +extern CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, unsigned int keylen); + +extern CamelliaContext *Camellia_AllocateContext(void); +extern SECStatus Camellia_InitContext(CamelliaContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int unused); +/* +** Destroy a Camellia encryption/decryption context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit); + +/* +** Perform Camellia encryption. +** "cx" the context +** "output" the output buffer to store the encrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/* +** Perform Camellia decryption. +** "cx" the context +** "output" the output buffer to store the decrypted data. +** "outputLen" how much data is stored in "output". Set by the routine +** after some data is stored in output. +** "maxOutputLen" the maximum amount of data that can ever be +** stored in "output" +** "input" the input data +** "inputLen" the amount of input data +*/ +extern SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + +/******************************************/ +/* +** ChaCha20+Poly1305 AEAD +*/ + +extern SECStatus ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, + unsigned int keyLen, + unsigned int tagLen); + +extern ChaCha20Poly1305Context *ChaCha20Poly1305_CreateContext( + const unsigned char *key, unsigned int keyLen, unsigned int tagLen); + +extern void ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, + PRBool freeit); + +extern SECStatus ChaCha20Poly1305_Seal( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + +extern SECStatus ChaCha20Poly1305_Open( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + +/******************************************/ +/* +** MD5 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using MD5 +*/ +extern SECStatus MD5_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using MD5 +*/ +extern SECStatus MD5_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + +/* +** Create a new MD5 context +*/ +extern MD5Context *MD5_NewContext(void); + +/* +** Destroy an MD5 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void MD5_DestroyContext(MD5Context *cx, PRBool freeit); + +/* +** Reset an MD5 context, preparing it for a fresh round of hashing +*/ +extern void MD5_Begin(MD5Context *cx); + +/* +** Update the MD5 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void MD5_Update(MD5Context *cx, + const unsigned char *input, unsigned int inputLen); + +/* +** Finish the MD5 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** Export the current state of the MD5 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD5_EndRaw(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* + * Return the the size of a buffer needed to flatten the MD5 Context into + * "cx" the context + * returns size; + */ +extern unsigned int MD5_FlattenSize(MD5Context *cx); + +/* + * Flatten the MD5 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus MD5_Flatten(MD5Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a MD5 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern MD5Context *MD5_Resurrect(unsigned char *space, void *arg); +extern void MD5_Clone(MD5Context *dest, MD5Context *src); + +/* +** trace the intermediate state info of the MD5 hash. +*/ +extern void MD5_TraceState(MD5Context *cx); + +/******************************************/ +/* +** MD2 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using MD2 +*/ +extern SECStatus MD2_Hash(unsigned char *dest, const char *src); + +/* +** Create a new MD2 context +*/ +extern MD2Context *MD2_NewContext(void); + +/* +** Destroy an MD2 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void MD2_DestroyContext(MD2Context *cx, PRBool freeit); + +/* +** Reset an MD2 context, preparing it for a fresh round of hashing +*/ +extern void MD2_Begin(MD2Context *cx); + +/* +** Update the MD2 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void MD2_Update(MD2Context *cx, + const unsigned char *input, unsigned int inputLen); + +/* +** Finish the MD2 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (16) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* + * Return the the size of a buffer needed to flatten the MD2 Context into + * "cx" the context + * returns size; + */ +extern unsigned int MD2_FlattenSize(MD2Context *cx); + +/* + * Flatten the MD2 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus MD2_Flatten(MD2Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a MD2 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern MD2Context *MD2_Resurrect(unsigned char *space, void *arg); +extern void MD2_Clone(MD2Context *dest, MD2Context *src); + +/******************************************/ +/* +** SHA-1 secure hash function +*/ + +/* +** Hash a null terminated string "src" into "dest" using SHA-1 +*/ +extern SECStatus SHA1_Hash(unsigned char *dest, const char *src); + +/* +** Hash a non-null terminated string "src" into "dest" using SHA-1 +*/ +extern SECStatus SHA1_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + +/* +** Create a new SHA-1 context +*/ +extern SHA1Context *SHA1_NewContext(void); + +/* +** Destroy a SHA-1 secure hash context. +** "cx" the context +** "freeit" if PR_TRUE then free the object as well as its sub-objects +*/ +extern void SHA1_DestroyContext(SHA1Context *cx, PRBool freeit); + +/* +** Reset a SHA-1 context, preparing it for a fresh round of hashing +*/ +extern void SHA1_Begin(SHA1Context *cx); + +/* +** Update the SHA-1 hash function with more data. +** "cx" the context +** "input" the data to hash +** "inputLen" the amount of data to hash +*/ +extern void SHA1_Update(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen); + +/* +** Finish the SHA-1 hash function. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 16 bytes of digest data are stored +** "digestLen" where the digest length (20) is stored +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA1_End(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** Export the current state of the SHA-1 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 20 bytes of digest data are stored +** "digestLen" where the digest length (20) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA1_EndRaw(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + +/* +** trace the intermediate state info of the SHA1 hash. +*/ +extern void SHA1_TraceState(SHA1Context *cx); + +/* + * Return the the size of a buffer needed to flatten the SHA-1 Context into + * "cx" the context + * returns size; + */ +extern unsigned int SHA1_FlattenSize(SHA1Context *cx); + +/* + * Flatten the SHA-1 Context into a buffer: + * "cx" the context + * "space" the buffer to flatten to + * returns status; + */ +extern SECStatus SHA1_Flatten(SHA1Context *cx, unsigned char *space); + +/* + * Resurrect a flattened context into a SHA-1 Context + * "space" the buffer of the flattend buffer + * "arg" ptr to void used by cryptographic resurrect + * returns resurected context; + */ +extern SHA1Context *SHA1_Resurrect(unsigned char *space, void *arg); +extern void SHA1_Clone(SHA1Context *dest, SHA1Context *src); + +/******************************************/ + +extern SHA224Context *SHA224_NewContext(void); +extern void SHA224_DestroyContext(SHA224Context *cx, PRBool freeit); +extern void SHA224_Begin(SHA224Context *cx); +extern void SHA224_Update(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA224_End(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-224 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 28 bytes of digest data are stored +** "digestLen" where the digest length (28) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA224_EndRaw(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA224_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA224_Hash(unsigned char *dest, const char *src); +extern void SHA224_TraceState(SHA224Context *cx); +extern unsigned int SHA224_FlattenSize(SHA224Context *cx); +extern SECStatus SHA224_Flatten(SHA224Context *cx, unsigned char *space); +extern SHA224Context *SHA224_Resurrect(unsigned char *space, void *arg); +extern void SHA224_Clone(SHA224Context *dest, SHA224Context *src); + +/******************************************/ + +extern SHA256Context *SHA256_NewContext(void); +extern void SHA256_DestroyContext(SHA256Context *cx, PRBool freeit); +extern void SHA256_Begin(SHA256Context *cx); +extern void SHA256_Update(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA256_End(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-256 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 32 bytes of digest data are stored +** "digestLen" where the digest length (32) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA256_EndRaw(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA256_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA256_Hash(unsigned char *dest, const char *src); +extern void SHA256_TraceState(SHA256Context *cx); +extern unsigned int SHA256_FlattenSize(SHA256Context *cx); +extern SECStatus SHA256_Flatten(SHA256Context *cx, unsigned char *space); +extern SHA256Context *SHA256_Resurrect(unsigned char *space, void *arg); +extern void SHA256_Clone(SHA256Context *dest, SHA256Context *src); + +/******************************************/ + +extern SHA512Context *SHA512_NewContext(void); +extern void SHA512_DestroyContext(SHA512Context *cx, PRBool freeit); +extern void SHA512_Begin(SHA512Context *cx); +extern void SHA512_Update(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen); +/* +** Export the current state of the SHA-512 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 64 bytes of digest data are stored +** "digestLen" where the digest length (64) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA512_EndRaw(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern void SHA512_End(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA512_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA512_Hash(unsigned char *dest, const char *src); +extern void SHA512_TraceState(SHA512Context *cx); +extern unsigned int SHA512_FlattenSize(SHA512Context *cx); +extern SECStatus SHA512_Flatten(SHA512Context *cx, unsigned char *space); +extern SHA512Context *SHA512_Resurrect(unsigned char *space, void *arg); +extern void SHA512_Clone(SHA512Context *dest, SHA512Context *src); + +/******************************************/ + +extern SHA384Context *SHA384_NewContext(void); +extern void SHA384_DestroyContext(SHA384Context *cx, PRBool freeit); +extern void SHA384_Begin(SHA384Context *cx); +extern void SHA384_Update(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen); +extern void SHA384_End(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +/* +** Export the current state of the SHA-384 hash without appending the standard +** padding and length bytes. Produce the digested results in "digest" +** "cx" the context +** "digest" where the 48 bytes of digest data are stored +** "digestLen" where the digest length (48) is stored (optional) +** "maxDigestLen" the maximum amount of data that can ever be +** stored in "digest" +*/ +extern void SHA384_EndRaw(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); +extern SECStatus SHA384_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); +extern SECStatus SHA384_Hash(unsigned char *dest, const char *src); +extern void SHA384_TraceState(SHA384Context *cx); +extern unsigned int SHA384_FlattenSize(SHA384Context *cx); +extern SECStatus SHA384_Flatten(SHA384Context *cx, unsigned char *space); +extern SHA384Context *SHA384_Resurrect(unsigned char *space, void *arg); +extern void SHA384_Clone(SHA384Context *dest, SHA384Context *src); + +/**************************************** + * implement TLS 1.0 Pseudo Random Function (PRF) and TLS P_hash function + */ + +extern SECStatus +TLS_PRF(const SECItem *secret, const char *label, SECItem *seed, + SECItem *result, PRBool isFIPS); + +extern SECStatus +TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS); + +/******************************************/ +/* +** Pseudo Random Number Generation. FIPS compliance desirable. +*/ + +/* +** Initialize the global RNG context and give it some seed input taken +** from the system. This function is thread-safe and will only allow +** the global context to be initialized once. The seed input is likely +** small, so it is imperative that RNG_RandomUpdate() be called with +** additional seed data before the generator is used. A good way to +** provide the generator with additional entropy is to call +** RNG_SystemInfoForRNG(). Note that NSS_Init() does exactly that. +*/ +extern SECStatus RNG_RNGInit(void); + +/* +** Update the global random number generator with more seeding +** material +*/ +extern SECStatus RNG_RandomUpdate(const void *data, size_t bytes); + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +extern SECStatus RNG_GenerateGlobalRandomBytes(void *dest, size_t len); + +extern SECStatus RNG_ResetForFuzzing(void); + +/* Destroy the global RNG context. After a call to RNG_RNGShutdown() +** a call to RNG_RNGInit() is required in order to use the generator again, +** along with seed data (see the comment above RNG_RNGInit()). +*/ +extern void RNG_RNGShutdown(void); + +extern void RNG_SystemInfoForRNG(void); + +/* + * FIPS 186-2 Change Notice 1 RNG Algorithm 1, used both to + * generate the DSA X parameter and as a generic purpose RNG. + * + * The following two FIPS186Change functions are needed for + * NIST RNG Validation System. + */ + +/* + * FIPS186Change_GenerateX is now deprecated. It will return SECFailure with + * the error set to PR_NOT_IMPLEMENTED_ERROR. + */ +extern SECStatus +FIPS186Change_GenerateX(unsigned char *XKEY, + const unsigned char *XSEEDj, + unsigned char *x_j); + +/* + * When generating the DSA X parameter, we generate 2*GSIZE bytes + * of random output and reduce it mod q. + * + * Input: w, 2*GSIZE bytes + * q, DSA_SUBPRIME_LEN bytes + * Output: xj, DSA_SUBPRIME_LEN bytes + */ +extern SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj); + +/* To allow NIST KAT tests */ +extern SECStatus +PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len); + +/* + * The following functions are for FIPS poweron self test and FIPS algorithm + * testing. + */ +extern SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len); + +extern SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len); + +extern SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len); + +extern SECStatus +PRNGTEST_Uninstantiate(void); + +extern SECStatus +PRNGTEST_RunHealthTests(void); + +/* Generate PQGParams and PQGVerify structs. + * Length of seed and length of h both equal length of P. + * All lengths are specified by "j", according to the table above. + * + * The verify parameters will conform to FIPS186-1. + */ +extern SECStatus +PQG_ParamGen(unsigned int j, /* input : determines length of P. */ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Generate PQGParams and PQGVerify structs. + * Length of P specified by j. Length of h will match length of P. + * Length of SEED in bytes specified in seedBytes. + * seedBbytes must be in the range [20..255] or an error will result. + * + * The verify parameters will conform to FIPS186-1. + */ +extern SECStatus +PQG_ParamGenSeedLen( + unsigned int j, /* input : determines length of P. */ + unsigned int seedBytes, /* input : length of seed in bytes.*/ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Generate PQGParams and PQGVerify structs. + * Length of P specified by L in bits. + * Length of Q specified by N in bits. + * Length of SEED in bytes specified in seedBytes. + * seedBbytes must be in the range [N..L*2] or an error will result. + * + * Not that J uses the above table, L is the length exact. L and N must + * match the table below or an error will result: + * + * L N + * 1024 160 + * 2048 224 + * 2048 256 + * 3072 256 + * + * If N or seedBytes are set to zero, then PQG_ParamGenSeedLen will + * pick a default value (typically the smallest secure value for these + * variables). + * + * The verify parameters will conform to FIPS186-3 using the smallest + * permissible hash for the key strength. + */ +extern SECStatus +PQG_ParamGenV2( + unsigned int L, /* input : determines length of P. */ + unsigned int N, /* input : determines length of Q. */ + unsigned int seedBytes, /* input : length of seed in bytes.*/ + PQGParams **pParams, /* output: P Q and G returned here */ + PQGVerify **pVfy); /* output: counter and seed. */ + +/* Test PQGParams for validity as DSS PQG values. + * If vfy is non-NULL, test PQGParams to make sure they were generated + * using the specified seed, counter, and h values. + * + * Return value indicates whether Verification operation ran successfully + * to completion, but does not indicate if PQGParams are valid or not. + * If return value is SECSuccess, then *pResult has these meanings: + * SECSuccess: PQGParams are valid. + * SECFailure: PQGParams are invalid. + * + * Verify the PQG againts the counter, SEED and h. + * These tests are specified in FIPS 186-3 Appendix A.1.1.1, A.1.1.3, and A.2.2 + * PQG_VerifyParams will automatically choose the appropriate test. + */ + +extern SECStatus PQG_VerifyParams(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result); + +extern void PQG_DestroyParams(PQGParams *params); + +extern void PQG_DestroyVerify(PQGVerify *vfy); + +/* + * clean-up any global tables freebl may have allocated after it starts up. + * This function is not thread safe and should be called only after the + * library has been quiessed. + */ +extern void BL_Cleanup(void); + +/* unload freebl shared library from memory */ +extern void BL_Unload(void); + +/************************************************************************** + * Verify a given Shared library signature * + **************************************************************************/ +PRBool BLAPI_SHVerify(const char *name, PRFuncPtr addr); + +/************************************************************************** + * Verify a given filename's signature * + **************************************************************************/ +PRBool BLAPI_SHVerifyFile(const char *shName); + +/************************************************************************** + * Verify Are Own Shared library signature * + **************************************************************************/ +PRBool BLAPI_VerifySelf(const char *name); + +/*********************************************************************/ +extern const SECHashObject *HASH_GetRawHashObject(HASH_HashType hashType); + +extern void BL_SetForkState(PRBool forked); + +/* +** pepare an ECParam structure from DEREncoded params + */ +extern SECStatus EC_FillParams(PLArenaPool *arena, + const SECItem *encodedParams, ECParams *params); +extern SECStatus EC_DecodeParams(const SECItem *encodedParams, + ECParams **ecparams); +extern SECStatus EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams); + +/* + * use the internal table to get the size in bytes of a single EC point + */ +extern int EC_GetPointSize(const ECParams *params); + +SEC_END_PROTOS + +#endif /* _BLAPI_H_ */ diff --git a/security/nss/lib/freebl/blapii.h b/security/nss/lib/freebl/blapii.h new file mode 100644 index 000000000..6ad2e2892 --- /dev/null +++ b/security/nss/lib/freebl/blapii.h @@ -0,0 +1,61 @@ +/* + * blapii.h - private data structures and prototypes for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPII_H_ +#define _BLAPII_H_ + +#include "blapit.h" + +/* max block size of supported block ciphers */ +#define MAX_BLOCK_SIZE 16 + +typedef SECStatus (*freeblCipherFunc)(void *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize); +typedef void (*freeblDestroyFunc)(void *cx, PRBool freeit); + +SEC_BEGIN_PROTOS + +SECStatus BL_FIPSEntryOK(PRBool freeblOnly); +PRBool BL_POSTRan(PRBool freeblOnly); + +#if defined(XP_UNIX) && !defined(NO_FORK_CHECK) + +extern PRBool bl_parentForkedAfterC_Initialize; + +#define SKIP_AFTER_FORK(x) \ + if (!bl_parentForkedAfterC_Initialize) \ + x + +#else + +#define SKIP_AFTER_FORK(x) x + +#endif + +SEC_END_PROTOS + +#if defined(NSS_X86_OR_X64) +#define HAVE_UNALIGNED_ACCESS 1 +#endif + +#if defined(__clang__) +#define HAVE_NO_SANITIZE_ATTR __has_attribute(no_sanitize) +#else +#define HAVE_NO_SANITIZE_ATTR 0 +#endif + +#if defined(HAVE_UNALIGNED_ACCESS) && HAVE_NO_SANITIZE_ATTR +#define NO_SANITIZE_ALIGNMENT __attribute__((no_sanitize("alignment"))) +#else +#define NO_SANITIZE_ALIGNMENT +#endif + +#undef HAVE_NO_SANITIZE_ATTR + +#endif /* _BLAPII_H_ */ diff --git a/security/nss/lib/freebl/blapit.h b/security/nss/lib/freebl/blapit.h new file mode 100644 index 000000000..2a17b5f46 --- /dev/null +++ b/security/nss/lib/freebl/blapit.h @@ -0,0 +1,414 @@ +/* + * blapit.h - public data structures for the freebl library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _BLAPIT_H_ +#define _BLAPIT_H_ + +#include "seccomon.h" +#include "prlink.h" +#include "plarena.h" +#include "ecl-exp.h" + +/* RC2 operation modes */ +#define NSS_RC2 0 +#define NSS_RC2_CBC 1 + +/* RC5 operation modes */ +#define NSS_RC5 0 +#define NSS_RC5_CBC 1 + +/* DES operation modes */ +#define NSS_DES 0 +#define NSS_DES_CBC 1 +#define NSS_DES_EDE3 2 +#define NSS_DES_EDE3_CBC 3 + +#define DES_KEY_LENGTH 8 /* Bytes */ + +/* AES operation modes */ +#define NSS_AES 0 +#define NSS_AES_CBC 1 +#define NSS_AES_CTS 2 +#define NSS_AES_CTR 3 +#define NSS_AES_GCM 4 + +/* Camellia operation modes */ +#define NSS_CAMELLIA 0 +#define NSS_CAMELLIA_CBC 1 + +/* SEED operation modes */ +#define NSS_SEED 0 +#define NSS_SEED_CBC 1 + +#define DSA1_SUBPRIME_LEN 20 /* Bytes */ +#define DSA1_SIGNATURE_LEN (DSA1_SUBPRIME_LEN * 2) /* Bytes */ +#define DSA_MAX_SUBPRIME_LEN 32 /* Bytes */ +#define DSA_MAX_SIGNATURE_LEN (DSA_MAX_SUBPRIME_LEN * 2) /* Bytes */ + +/* + * Mark the old defines as deprecated. This will warn code that expected + * DSA1 only that they need to change if the are to support DSA2. + */ +#if defined(__GNUC__) && (__GNUC__ > 3) +/* make GCC warn when we use these #defines */ +typedef int __BLAPI_DEPRECATED __attribute__((deprecated)); +#define DSA_SUBPRIME_LEN ((__BLAPI_DEPRECATED)DSA1_SUBPRIME_LEN) +#define DSA_SIGNATURE_LEN ((__BLAPI_DEPRECATED)DSA1_SIGNATURE_LEN) +#define DSA_Q_BITS ((__BLAPI_DEPRECATED)(DSA1_SUBPRIME_LEN * 8)) +#else +#ifdef _WIN32 +/* This magic gets the windows compiler to give us a deprecation + * warning */ +#pragma deprecated(DSA_SUBPRIME_LEN, DSA_SIGNATURE_LEN, DSA_QBITS) +#endif +#define DSA_SUBPRIME_LEN DSA1_SUBPRIME_LEN +#define DSA_SIGNATURE_LEN DSA1_SIGNATURE_LEN +#define DSA_Q_BITS (DSA1_SUBPRIME_LEN * 8) +#endif + +/* XXX We shouldn't have to hard code this limit. For + * now, this is the quickest way to support ECDSA signature + * processing (ECDSA signature lengths depend on curve + * size). This limit is sufficient for curves upto + * 576 bits. + */ +#define MAX_ECKEY_LEN 72 /* Bytes */ + +#define EC_MAX_KEY_BITS 521 /* in bits */ +#define EC_MIN_KEY_BITS 256 /* in bits */ + +/* EC point compression format */ +#define EC_POINT_FORM_COMPRESSED_Y0 0x02 +#define EC_POINT_FORM_COMPRESSED_Y1 0x03 +#define EC_POINT_FORM_UNCOMPRESSED 0x04 +#define EC_POINT_FORM_HYBRID_Y0 0x06 +#define EC_POINT_FORM_HYBRID_Y1 0x07 + +/* + * Number of bytes each hash algorithm produces + */ +#define MD2_LENGTH 16 /* Bytes */ +#define MD5_LENGTH 16 /* Bytes */ +#define SHA1_LENGTH 20 /* Bytes */ +#define SHA256_LENGTH 32 /* bytes */ +#define SHA384_LENGTH 48 /* bytes */ +#define SHA512_LENGTH 64 /* bytes */ +#define HASH_LENGTH_MAX SHA512_LENGTH + +/* + * Input block size for each hash algorithm. + */ + +#define MD2_BLOCK_LENGTH 64 /* bytes */ +#define MD5_BLOCK_LENGTH 64 /* bytes */ +#define SHA1_BLOCK_LENGTH 64 /* bytes */ +#define SHA224_BLOCK_LENGTH 64 /* bytes */ +#define SHA256_BLOCK_LENGTH 64 /* bytes */ +#define SHA384_BLOCK_LENGTH 128 /* bytes */ +#define SHA512_BLOCK_LENGTH 128 /* bytes */ +#define HASH_BLOCK_LENGTH_MAX SHA512_BLOCK_LENGTH + +#define AES_KEY_WRAP_IV_BYTES 8 +#define AES_KEY_WRAP_BLOCK_SIZE 8 /* bytes */ +#define AES_BLOCK_SIZE 16 /* bytes */ + +#define AES_128_KEY_LENGTH 16 /* bytes */ +#define AES_192_KEY_LENGTH 24 /* bytes */ +#define AES_256_KEY_LENGTH 32 /* bytes */ + +#define CAMELLIA_BLOCK_SIZE 16 /* bytes */ + +#define SEED_BLOCK_SIZE 16 /* bytes */ +#define SEED_KEY_LENGTH 16 /* bytes */ + +#define NSS_FREEBL_DEFAULT_CHUNKSIZE 2048 + +/* + * These values come from the initial key size limits from the PKCS #11 + * module. They may be arbitrarily adjusted to any value freebl supports. + */ +#define RSA_MIN_MODULUS_BITS 128 +#define RSA_MAX_MODULUS_BITS 16384 +#define RSA_MAX_EXPONENT_BITS 64 +#define DH_MIN_P_BITS 128 +#define DH_MAX_P_BITS 16384 + +/* + * The FIPS 186-1 algorithm for generating primes P and Q allows only 9 + * distinct values for the length of P, and only one value for the + * length of Q. + * The algorithm uses a variable j to indicate which of the 9 lengths + * of P is to be used. + * The following table relates j to the lengths of P and Q in bits. + * + * j bits in P bits in Q + * _ _________ _________ + * 0 512 160 + * 1 576 160 + * 2 640 160 + * 3 704 160 + * 4 768 160 + * 5 832 160 + * 6 896 160 + * 7 960 160 + * 8 1024 160 + * + * The FIPS-186-1 compliant PQG generator takes j as an input parameter. + * + * FIPS 186-3 algorithm specifies 4 distinct P and Q sizes: + * + * bits in P bits in Q + * _________ _________ + * 1024 160 + * 2048 224 + * 2048 256 + * 3072 256 + * + * The FIPS-186-3 complaiant PQG generator (PQG V2) takes arbitrary p and q + * lengths as input and returns an error if they aren't in this list. + */ + +#define DSA1_Q_BITS 160 +#define DSA_MAX_P_BITS 3072 +#define DSA_MIN_P_BITS 512 +#define DSA_MAX_Q_BITS 256 +#define DSA_MIN_Q_BITS 160 + +#if DSA_MAX_Q_BITS != DSA_MAX_SUBPRIME_LEN * 8 +#error "Inconsistent declaration of DSA SUBPRIME/Q parameters in blapit.h" +#endif + +/* + * function takes desired number of bits in P, + * returns index (0..8) or -1 if number of bits is invalid. + */ +#define PQG_PBITS_TO_INDEX(bits) \ + (((bits) < 512 || (bits) > 1024 || (bits) % 64) ? -1 : (int)((bits)-512) / 64) + +/* + * function takes index (0-8) + * returns number of bits in P for that index, or -1 if index is invalid. + */ +#define PQG_INDEX_TO_PBITS(j) (((unsigned)(j) > 8) ? -1 : (512 + 64 * (j))) + +/*************************************************************************** +** Opaque objects +*/ + +struct DESContextStr; +struct RC2ContextStr; +struct RC4ContextStr; +struct RC5ContextStr; +struct AESContextStr; +struct CamelliaContextStr; +struct MD2ContextStr; +struct MD5ContextStr; +struct SHA1ContextStr; +struct SHA256ContextStr; +struct SHA512ContextStr; +struct AESKeyWrapContextStr; +struct SEEDContextStr; +struct ChaCha20Poly1305ContextStr; + +typedef struct DESContextStr DESContext; +typedef struct RC2ContextStr RC2Context; +typedef struct RC4ContextStr RC4Context; +typedef struct RC5ContextStr RC5Context; +typedef struct AESContextStr AESContext; +typedef struct CamelliaContextStr CamelliaContext; +typedef struct MD2ContextStr MD2Context; +typedef struct MD5ContextStr MD5Context; +typedef struct SHA1ContextStr SHA1Context; +typedef struct SHA256ContextStr SHA256Context; +/* SHA224Context is really a SHA256ContextStr. This is not a mistake. */ +typedef struct SHA256ContextStr SHA224Context; +typedef struct SHA512ContextStr SHA512Context; +/* SHA384Context is really a SHA512ContextStr. This is not a mistake. */ +typedef struct SHA512ContextStr SHA384Context; +typedef struct AESKeyWrapContextStr AESKeyWrapContext; +typedef struct SEEDContextStr SEEDContext; +typedef struct ChaCha20Poly1305ContextStr ChaCha20Poly1305Context; + +/*************************************************************************** +** RSA Public and Private Key structures +*/ + +/* member names from PKCS#1, section 7.1 */ +struct RSAPublicKeyStr { + PLArenaPool *arena; + SECItem modulus; + SECItem publicExponent; +}; +typedef struct RSAPublicKeyStr RSAPublicKey; + +/* member names from PKCS#1, section 7.2 */ +struct RSAPrivateKeyStr { + PLArenaPool *arena; + SECItem version; + SECItem modulus; + SECItem publicExponent; + SECItem privateExponent; + SECItem prime1; + SECItem prime2; + SECItem exponent1; + SECItem exponent2; + SECItem coefficient; +}; +typedef struct RSAPrivateKeyStr RSAPrivateKey; + +/*************************************************************************** +** DSA Public and Private Key and related structures +*/ + +struct PQGParamsStr { + PLArenaPool *arena; + SECItem prime; /* p */ + SECItem subPrime; /* q */ + SECItem base; /* g */ + /* XXX chrisk: this needs to be expanded to hold j and validationParms (RFC2459 7.3.2) */ +}; +typedef struct PQGParamsStr PQGParams; + +struct PQGVerifyStr { + PLArenaPool *arena; /* includes this struct, seed, & h. */ + unsigned int counter; + SECItem seed; + SECItem h; +}; +typedef struct PQGVerifyStr PQGVerify; + +struct DSAPublicKeyStr { + PQGParams params; + SECItem publicValue; +}; +typedef struct DSAPublicKeyStr DSAPublicKey; + +struct DSAPrivateKeyStr { + PQGParams params; + SECItem publicValue; + SECItem privateValue; +}; +typedef struct DSAPrivateKeyStr DSAPrivateKey; + +/*************************************************************************** +** Diffie-Hellman Public and Private Key and related structures +** Structure member names suggested by PKCS#3. +*/ + +struct DHParamsStr { + PLArenaPool *arena; + SECItem prime; /* p */ + SECItem base; /* g */ +}; +typedef struct DHParamsStr DHParams; + +struct DHPublicKeyStr { + PLArenaPool *arena; + SECItem prime; + SECItem base; + SECItem publicValue; +}; +typedef struct DHPublicKeyStr DHPublicKey; + +struct DHPrivateKeyStr { + PLArenaPool *arena; + SECItem prime; + SECItem base; + SECItem publicValue; + SECItem privateValue; +}; +typedef struct DHPrivateKeyStr DHPrivateKey; + +/*************************************************************************** +** Data structures used for elliptic curve parameters and +** public and private keys. +*/ + +/* +** The ECParams data structures can encode elliptic curve +** parameters for both GFp and GF2m curves. +*/ + +typedef enum { ec_params_explicit, + ec_params_named +} ECParamsType; + +typedef enum { ec_field_GFp = 1, + ec_field_GF2m, + ec_field_plain +} ECFieldType; + +struct ECFieldIDStr { + int size; /* field size in bits */ + ECFieldType type; + union { + SECItem prime; /* prime p for (GFp) */ + SECItem poly; /* irreducible binary polynomial for (GF2m) */ + } u; + int k1; /* first coefficient of pentanomial or + * the only coefficient of trinomial + */ + int k2; /* two remaining coefficients of pentanomial */ + int k3; +}; +typedef struct ECFieldIDStr ECFieldID; + +struct ECCurveStr { + SECItem a; /* contains octet stream encoding of + * field element (X9.62 section 4.3.3) + */ + SECItem b; + SECItem seed; +}; +typedef struct ECCurveStr ECCurve; + +struct ECParamsStr { + PLArenaPool *arena; + ECParamsType type; + ECFieldID fieldID; + ECCurve curve; + SECItem base; + SECItem order; + int cofactor; + SECItem DEREncoding; + ECCurveName name; + SECItem curveOID; +}; +typedef struct ECParamsStr ECParams; + +struct ECPublicKeyStr { + ECParams ecParams; + SECItem publicValue; /* elliptic curve point encoded as + * octet stream. + */ +}; +typedef struct ECPublicKeyStr ECPublicKey; + +struct ECPrivateKeyStr { + ECParams ecParams; + SECItem publicValue; /* encoded ec point */ + SECItem privateValue; /* private big integer */ + SECItem version; /* As per SEC 1, Appendix C, Section C.4 */ +}; +typedef struct ECPrivateKeyStr ECPrivateKey; + +typedef void *(*BLapiAllocateFunc)(void); +typedef void (*BLapiDestroyContextFunc)(void *cx, PRBool freeit); +typedef SECStatus (*BLapiInitContextFunc)(void *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); +typedef SECStatus (*BLapiEncrypt)(void *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +#endif /* _BLAPIT_H_ */ diff --git a/security/nss/lib/freebl/blname.c b/security/nss/lib/freebl/blname.c new file mode 100644 index 000000000..4bad74ada --- /dev/null +++ b/security/nss/lib/freebl/blname.c @@ -0,0 +1,100 @@ +/* + * blname.c - determine the freebl library name. + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if defined(FREEBL_LOWHASH) +static const char* default_name = + SHLIB_PREFIX "freeblpriv" SHLIB_VERSION "." SHLIB_SUFFIX; +#else +static const char* default_name = + SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX; +#endif + +/* getLibName() returns the name of the library to load. */ + +#if defined(SOLARIS) && defined(__sparc) +#include +#include +#include + +#if defined(NSS_USE_64) + +const static char fpu_hybrid_shared_lib[] = "libfreebl_64fpu_3.so"; +const static char int_hybrid_shared_lib[] = "libfreebl_64int_3.so"; +const static char non_hybrid_shared_lib[] = "libfreebl_64fpu_3.so"; + +const static char int_hybrid_isa[] = "sparcv9"; +const static char fpu_hybrid_isa[] = "sparcv9+vis"; + +#else + +const static char fpu_hybrid_shared_lib[] = "libfreebl_32fpu_3.so"; +const static char int_hybrid_shared_lib[] = "libfreebl_32int64_3.so"; +/* This was for SPARC V8, now obsolete. */ +const static char* const non_hybrid_shared_lib = NULL; + +const static char int_hybrid_isa[] = "sparcv8plus"; +const static char fpu_hybrid_isa[] = "sparcv8plus+vis"; + +#endif + +static const char* +getLibName(void) +{ + char* found_int_hybrid; + char* found_fpu_hybrid; + long buflen; + char buf[256]; + + buflen = sysinfo(SI_ISALIST, buf, sizeof buf); + if (buflen <= 0) + return NULL; + /* sysinfo output is always supposed to be NUL terminated, but ... */ + if (buflen < sizeof buf) + buf[buflen] = '\0'; + else + buf[(sizeof buf) - 1] = '\0'; + /* The ISA list is a space separated string of names of ISAs and + * ISA extensions, in order of decreasing performance. + * There are two different ISAs with which NSS's crypto code can be + * accelerated. If both are in the list, we take the first one. + * If one is in the list, we use it, and if neither then we use + * the base unaccelerated code. + */ + found_int_hybrid = strstr(buf, int_hybrid_isa); + found_fpu_hybrid = strstr(buf, fpu_hybrid_isa); + if (found_fpu_hybrid && + (!found_int_hybrid || + (found_int_hybrid - found_fpu_hybrid) >= 0)) { + return fpu_hybrid_shared_lib; + } + if (found_int_hybrid) { + return int_hybrid_shared_lib; + } + return non_hybrid_shared_lib; +} + +#elif defined(HPUX) && !defined(NSS_USE_64) && !defined(__ia64) +#include + +/* This code tests to see if we're running on a PA2.x CPU. +** It returns true (1) if so, and false (0) otherwise. +*/ +static const char* +getLibName(void) +{ + long cpu = sysconf(_SC_CPU_VERSION); + return (cpu == CPU_PA_RISC2_0) + ? "libfreebl_32fpu_3.sl" + : "libfreebl_32int_3.sl"; +} +#else +/* default case, for platforms/ABIs that have only one freebl shared lib. */ +static const char* +getLibName(void) +{ + return default_name; +} +#endif diff --git a/security/nss/lib/freebl/camellia.c b/security/nss/lib/freebl/camellia.c new file mode 100644 index 000000000..8a7bcb0fe --- /dev/null +++ b/security/nss/lib/freebl/camellia.c @@ -0,0 +1,1896 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prinit.h" +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "camellia.h" +#include "sha_fast.h" /* for SHA_HTONL and related configuration macros */ + +/* key constants */ + +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* + * macros + */ + +#if defined(HAVE_UNALIGNED_ACCESS) + +/* require a CPU that allows unaligned access */ + +#if defined(SHA_NEED_TMP_VARIABLE) +#define CAMELLIA_NEED_TMP_VARIABLE 1 +#endif + +#define GETU32(p) SHA_HTONL(*((PRUint32 *)(p))) +#define PUTU32(ct, st) \ + { \ + *((PRUint32 *)(ct)) = SHA_HTONL(st); \ + } + +#else /* no unaligned access */ + +#define GETU32(pt) \ + (((PRUint32)(pt)[0] << 24) ^ ((PRUint32)(pt)[1] << 16) ^ ((PRUint32)(pt)[2] << 8) ^ ((PRUint32)(pt)[3])) + +#define PUTU32(ct, st) \ + { \ + (ct)[0] = (PRUint8)((st) >> 24); \ + (ct)[1] = (PRUint8)((st) >> 16); \ + (ct)[2] = (PRUint8)((st) >> 8); \ + (ct)[3] = (PRUint8)(st); \ + } + +#endif + +#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2]) +#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1]) + +/* rotation right shift 1byte */ +#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24)) + +#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + ll = (ll << bits) + (lr >> (32 - bits)); \ + lr = (lr << bits) + (rl >> (32 - bits)); \ + rl = (rl << bits) + (rr >> (32 - bits)); \ + rr = (rr << bits) + (w0 >> (32 - bits)); \ + } while (0) + +#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + w1 = lr; \ + ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \ + lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \ + rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \ + rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \ + } while (0) + +#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)]) +#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)]) +#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)]) +#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)]) + +#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + il = xl ^ kl; \ + ir = xr ^ kr; \ + t0 = il >> 16; \ + t1 = ir >> 16; \ + yl = CAMELLIA_SP1110(ir & 0xff) ^ \ + CAMELLIA_SP0222((t1 >> 8) & 0xff) ^ \ + CAMELLIA_SP3033(t1 & 0xff) ^ \ + CAMELLIA_SP4404((ir >> 8) & 0xff); \ + yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) ^ \ + CAMELLIA_SP0222(t0 & 0xff) ^ \ + CAMELLIA_SP3033((il >> 8) & 0xff) ^ \ + CAMELLIA_SP4404(il & 0xff); \ + yl ^= yr; \ + yr = CAMELLIA_RR8(yr); \ + yr ^= yl; \ + } while (0) + +/* + * for speed up + * + */ +#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \ + do { \ + t0 = kll; \ + t0 &= ll; \ + lr ^= CAMELLIA_RL1(t0); \ + t1 = klr; \ + t1 |= lr; \ + ll ^= t1; \ + \ + t2 = krr; \ + t2 |= rr; \ + rl ^= t2; \ + t3 = krl; \ + t3 &= rl; \ + rr ^= CAMELLIA_RL1(t3); \ + } while (0) + +#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + ir = CAMELLIA_SP1110(xr & 0xff) ^ \ + CAMELLIA_SP0222((xr >> 24) & 0xff) ^ \ + CAMELLIA_SP3033((xr >> 16) & 0xff) ^ \ + CAMELLIA_SP4404((xr >> 8) & 0xff); \ + il = CAMELLIA_SP1110((xl >> 24) & 0xff) ^ \ + CAMELLIA_SP0222((xl >> 16) & 0xff) ^ \ + CAMELLIA_SP3033((xl >> 8) & 0xff) ^ \ + CAMELLIA_SP4404(xl & 0xff); \ + il ^= kl; \ + ir ^= kr; \ + ir ^= il; \ + il = CAMELLIA_RR8(il); \ + il ^= ir; \ + yl ^= ir; \ + yr ^= il; \ + } while (0) + +static const PRUint32 camellia_sp1110[256] = { + 0x70707000, 0x82828200, 0x2c2c2c00, 0xececec00, + 0xb3b3b300, 0x27272700, 0xc0c0c000, 0xe5e5e500, + 0xe4e4e400, 0x85858500, 0x57575700, 0x35353500, + 0xeaeaea00, 0x0c0c0c00, 0xaeaeae00, 0x41414100, + 0x23232300, 0xefefef00, 0x6b6b6b00, 0x93939300, + 0x45454500, 0x19191900, 0xa5a5a500, 0x21212100, + 0xededed00, 0x0e0e0e00, 0x4f4f4f00, 0x4e4e4e00, + 0x1d1d1d00, 0x65656500, 0x92929200, 0xbdbdbd00, + 0x86868600, 0xb8b8b800, 0xafafaf00, 0x8f8f8f00, + 0x7c7c7c00, 0xebebeb00, 0x1f1f1f00, 0xcecece00, + 0x3e3e3e00, 0x30303000, 0xdcdcdc00, 0x5f5f5f00, + 0x5e5e5e00, 0xc5c5c500, 0x0b0b0b00, 0x1a1a1a00, + 0xa6a6a600, 0xe1e1e100, 0x39393900, 0xcacaca00, + 0xd5d5d500, 0x47474700, 0x5d5d5d00, 0x3d3d3d00, + 0xd9d9d900, 0x01010100, 0x5a5a5a00, 0xd6d6d600, + 0x51515100, 0x56565600, 0x6c6c6c00, 0x4d4d4d00, + 0x8b8b8b00, 0x0d0d0d00, 0x9a9a9a00, 0x66666600, + 0xfbfbfb00, 0xcccccc00, 0xb0b0b000, 0x2d2d2d00, + 0x74747400, 0x12121200, 0x2b2b2b00, 0x20202000, + 0xf0f0f000, 0xb1b1b100, 0x84848400, 0x99999900, + 0xdfdfdf00, 0x4c4c4c00, 0xcbcbcb00, 0xc2c2c200, + 0x34343400, 0x7e7e7e00, 0x76767600, 0x05050500, + 0x6d6d6d00, 0xb7b7b700, 0xa9a9a900, 0x31313100, + 0xd1d1d100, 0x17171700, 0x04040400, 0xd7d7d700, + 0x14141400, 0x58585800, 0x3a3a3a00, 0x61616100, + 0xdedede00, 0x1b1b1b00, 0x11111100, 0x1c1c1c00, + 0x32323200, 0x0f0f0f00, 0x9c9c9c00, 0x16161600, + 0x53535300, 0x18181800, 0xf2f2f200, 0x22222200, + 0xfefefe00, 0x44444400, 0xcfcfcf00, 0xb2b2b200, + 0xc3c3c300, 0xb5b5b500, 0x7a7a7a00, 0x91919100, + 0x24242400, 0x08080800, 0xe8e8e800, 0xa8a8a800, + 0x60606000, 0xfcfcfc00, 0x69696900, 0x50505000, + 0xaaaaaa00, 0xd0d0d000, 0xa0a0a000, 0x7d7d7d00, + 0xa1a1a100, 0x89898900, 0x62626200, 0x97979700, + 0x54545400, 0x5b5b5b00, 0x1e1e1e00, 0x95959500, + 0xe0e0e000, 0xffffff00, 0x64646400, 0xd2d2d200, + 0x10101000, 0xc4c4c400, 0x00000000, 0x48484800, + 0xa3a3a300, 0xf7f7f700, 0x75757500, 0xdbdbdb00, + 0x8a8a8a00, 0x03030300, 0xe6e6e600, 0xdadada00, + 0x09090900, 0x3f3f3f00, 0xdddddd00, 0x94949400, + 0x87878700, 0x5c5c5c00, 0x83838300, 0x02020200, + 0xcdcdcd00, 0x4a4a4a00, 0x90909000, 0x33333300, + 0x73737300, 0x67676700, 0xf6f6f600, 0xf3f3f300, + 0x9d9d9d00, 0x7f7f7f00, 0xbfbfbf00, 0xe2e2e200, + 0x52525200, 0x9b9b9b00, 0xd8d8d800, 0x26262600, + 0xc8c8c800, 0x37373700, 0xc6c6c600, 0x3b3b3b00, + 0x81818100, 0x96969600, 0x6f6f6f00, 0x4b4b4b00, + 0x13131300, 0xbebebe00, 0x63636300, 0x2e2e2e00, + 0xe9e9e900, 0x79797900, 0xa7a7a700, 0x8c8c8c00, + 0x9f9f9f00, 0x6e6e6e00, 0xbcbcbc00, 0x8e8e8e00, + 0x29292900, 0xf5f5f500, 0xf9f9f900, 0xb6b6b600, + 0x2f2f2f00, 0xfdfdfd00, 0xb4b4b400, 0x59595900, + 0x78787800, 0x98989800, 0x06060600, 0x6a6a6a00, + 0xe7e7e700, 0x46464600, 0x71717100, 0xbababa00, + 0xd4d4d400, 0x25252500, 0xababab00, 0x42424200, + 0x88888800, 0xa2a2a200, 0x8d8d8d00, 0xfafafa00, + 0x72727200, 0x07070700, 0xb9b9b900, 0x55555500, + 0xf8f8f800, 0xeeeeee00, 0xacacac00, 0x0a0a0a00, + 0x36363600, 0x49494900, 0x2a2a2a00, 0x68686800, + 0x3c3c3c00, 0x38383800, 0xf1f1f100, 0xa4a4a400, + 0x40404000, 0x28282800, 0xd3d3d300, 0x7b7b7b00, + 0xbbbbbb00, 0xc9c9c900, 0x43434300, 0xc1c1c100, + 0x15151500, 0xe3e3e300, 0xadadad00, 0xf4f4f400, + 0x77777700, 0xc7c7c700, 0x80808000, 0x9e9e9e00, +}; + +static const PRUint32 camellia_sp0222[256] = { + 0x00e0e0e0, 0x00050505, 0x00585858, 0x00d9d9d9, + 0x00676767, 0x004e4e4e, 0x00818181, 0x00cbcbcb, + 0x00c9c9c9, 0x000b0b0b, 0x00aeaeae, 0x006a6a6a, + 0x00d5d5d5, 0x00181818, 0x005d5d5d, 0x00828282, + 0x00464646, 0x00dfdfdf, 0x00d6d6d6, 0x00272727, + 0x008a8a8a, 0x00323232, 0x004b4b4b, 0x00424242, + 0x00dbdbdb, 0x001c1c1c, 0x009e9e9e, 0x009c9c9c, + 0x003a3a3a, 0x00cacaca, 0x00252525, 0x007b7b7b, + 0x000d0d0d, 0x00717171, 0x005f5f5f, 0x001f1f1f, + 0x00f8f8f8, 0x00d7d7d7, 0x003e3e3e, 0x009d9d9d, + 0x007c7c7c, 0x00606060, 0x00b9b9b9, 0x00bebebe, + 0x00bcbcbc, 0x008b8b8b, 0x00161616, 0x00343434, + 0x004d4d4d, 0x00c3c3c3, 0x00727272, 0x00959595, + 0x00ababab, 0x008e8e8e, 0x00bababa, 0x007a7a7a, + 0x00b3b3b3, 0x00020202, 0x00b4b4b4, 0x00adadad, + 0x00a2a2a2, 0x00acacac, 0x00d8d8d8, 0x009a9a9a, + 0x00171717, 0x001a1a1a, 0x00353535, 0x00cccccc, + 0x00f7f7f7, 0x00999999, 0x00616161, 0x005a5a5a, + 0x00e8e8e8, 0x00242424, 0x00565656, 0x00404040, + 0x00e1e1e1, 0x00636363, 0x00090909, 0x00333333, + 0x00bfbfbf, 0x00989898, 0x00979797, 0x00858585, + 0x00686868, 0x00fcfcfc, 0x00ececec, 0x000a0a0a, + 0x00dadada, 0x006f6f6f, 0x00535353, 0x00626262, + 0x00a3a3a3, 0x002e2e2e, 0x00080808, 0x00afafaf, + 0x00282828, 0x00b0b0b0, 0x00747474, 0x00c2c2c2, + 0x00bdbdbd, 0x00363636, 0x00222222, 0x00383838, + 0x00646464, 0x001e1e1e, 0x00393939, 0x002c2c2c, + 0x00a6a6a6, 0x00303030, 0x00e5e5e5, 0x00444444, + 0x00fdfdfd, 0x00888888, 0x009f9f9f, 0x00656565, + 0x00878787, 0x006b6b6b, 0x00f4f4f4, 0x00232323, + 0x00484848, 0x00101010, 0x00d1d1d1, 0x00515151, + 0x00c0c0c0, 0x00f9f9f9, 0x00d2d2d2, 0x00a0a0a0, + 0x00555555, 0x00a1a1a1, 0x00414141, 0x00fafafa, + 0x00434343, 0x00131313, 0x00c4c4c4, 0x002f2f2f, + 0x00a8a8a8, 0x00b6b6b6, 0x003c3c3c, 0x002b2b2b, + 0x00c1c1c1, 0x00ffffff, 0x00c8c8c8, 0x00a5a5a5, + 0x00202020, 0x00898989, 0x00000000, 0x00909090, + 0x00474747, 0x00efefef, 0x00eaeaea, 0x00b7b7b7, + 0x00151515, 0x00060606, 0x00cdcdcd, 0x00b5b5b5, + 0x00121212, 0x007e7e7e, 0x00bbbbbb, 0x00292929, + 0x000f0f0f, 0x00b8b8b8, 0x00070707, 0x00040404, + 0x009b9b9b, 0x00949494, 0x00212121, 0x00666666, + 0x00e6e6e6, 0x00cecece, 0x00ededed, 0x00e7e7e7, + 0x003b3b3b, 0x00fefefe, 0x007f7f7f, 0x00c5c5c5, + 0x00a4a4a4, 0x00373737, 0x00b1b1b1, 0x004c4c4c, + 0x00919191, 0x006e6e6e, 0x008d8d8d, 0x00767676, + 0x00030303, 0x002d2d2d, 0x00dedede, 0x00969696, + 0x00262626, 0x007d7d7d, 0x00c6c6c6, 0x005c5c5c, + 0x00d3d3d3, 0x00f2f2f2, 0x004f4f4f, 0x00191919, + 0x003f3f3f, 0x00dcdcdc, 0x00797979, 0x001d1d1d, + 0x00525252, 0x00ebebeb, 0x00f3f3f3, 0x006d6d6d, + 0x005e5e5e, 0x00fbfbfb, 0x00696969, 0x00b2b2b2, + 0x00f0f0f0, 0x00313131, 0x000c0c0c, 0x00d4d4d4, + 0x00cfcfcf, 0x008c8c8c, 0x00e2e2e2, 0x00757575, + 0x00a9a9a9, 0x004a4a4a, 0x00575757, 0x00848484, + 0x00111111, 0x00454545, 0x001b1b1b, 0x00f5f5f5, + 0x00e4e4e4, 0x000e0e0e, 0x00737373, 0x00aaaaaa, + 0x00f1f1f1, 0x00dddddd, 0x00595959, 0x00141414, + 0x006c6c6c, 0x00929292, 0x00545454, 0x00d0d0d0, + 0x00787878, 0x00707070, 0x00e3e3e3, 0x00494949, + 0x00808080, 0x00505050, 0x00a7a7a7, 0x00f6f6f6, + 0x00777777, 0x00939393, 0x00868686, 0x00838383, + 0x002a2a2a, 0x00c7c7c7, 0x005b5b5b, 0x00e9e9e9, + 0x00eeeeee, 0x008f8f8f, 0x00010101, 0x003d3d3d, +}; + +static const PRUint32 camellia_sp3033[256] = { + 0x38003838, 0x41004141, 0x16001616, 0x76007676, + 0xd900d9d9, 0x93009393, 0x60006060, 0xf200f2f2, + 0x72007272, 0xc200c2c2, 0xab00abab, 0x9a009a9a, + 0x75007575, 0x06000606, 0x57005757, 0xa000a0a0, + 0x91009191, 0xf700f7f7, 0xb500b5b5, 0xc900c9c9, + 0xa200a2a2, 0x8c008c8c, 0xd200d2d2, 0x90009090, + 0xf600f6f6, 0x07000707, 0xa700a7a7, 0x27002727, + 0x8e008e8e, 0xb200b2b2, 0x49004949, 0xde00dede, + 0x43004343, 0x5c005c5c, 0xd700d7d7, 0xc700c7c7, + 0x3e003e3e, 0xf500f5f5, 0x8f008f8f, 0x67006767, + 0x1f001f1f, 0x18001818, 0x6e006e6e, 0xaf00afaf, + 0x2f002f2f, 0xe200e2e2, 0x85008585, 0x0d000d0d, + 0x53005353, 0xf000f0f0, 0x9c009c9c, 0x65006565, + 0xea00eaea, 0xa300a3a3, 0xae00aeae, 0x9e009e9e, + 0xec00ecec, 0x80008080, 0x2d002d2d, 0x6b006b6b, + 0xa800a8a8, 0x2b002b2b, 0x36003636, 0xa600a6a6, + 0xc500c5c5, 0x86008686, 0x4d004d4d, 0x33003333, + 0xfd00fdfd, 0x66006666, 0x58005858, 0x96009696, + 0x3a003a3a, 0x09000909, 0x95009595, 0x10001010, + 0x78007878, 0xd800d8d8, 0x42004242, 0xcc00cccc, + 0xef00efef, 0x26002626, 0xe500e5e5, 0x61006161, + 0x1a001a1a, 0x3f003f3f, 0x3b003b3b, 0x82008282, + 0xb600b6b6, 0xdb00dbdb, 0xd400d4d4, 0x98009898, + 0xe800e8e8, 0x8b008b8b, 0x02000202, 0xeb00ebeb, + 0x0a000a0a, 0x2c002c2c, 0x1d001d1d, 0xb000b0b0, + 0x6f006f6f, 0x8d008d8d, 0x88008888, 0x0e000e0e, + 0x19001919, 0x87008787, 0x4e004e4e, 0x0b000b0b, + 0xa900a9a9, 0x0c000c0c, 0x79007979, 0x11001111, + 0x7f007f7f, 0x22002222, 0xe700e7e7, 0x59005959, + 0xe100e1e1, 0xda00dada, 0x3d003d3d, 0xc800c8c8, + 0x12001212, 0x04000404, 0x74007474, 0x54005454, + 0x30003030, 0x7e007e7e, 0xb400b4b4, 0x28002828, + 0x55005555, 0x68006868, 0x50005050, 0xbe00bebe, + 0xd000d0d0, 0xc400c4c4, 0x31003131, 0xcb00cbcb, + 0x2a002a2a, 0xad00adad, 0x0f000f0f, 0xca00caca, + 0x70007070, 0xff00ffff, 0x32003232, 0x69006969, + 0x08000808, 0x62006262, 0x00000000, 0x24002424, + 0xd100d1d1, 0xfb00fbfb, 0xba00baba, 0xed00eded, + 0x45004545, 0x81008181, 0x73007373, 0x6d006d6d, + 0x84008484, 0x9f009f9f, 0xee00eeee, 0x4a004a4a, + 0xc300c3c3, 0x2e002e2e, 0xc100c1c1, 0x01000101, + 0xe600e6e6, 0x25002525, 0x48004848, 0x99009999, + 0xb900b9b9, 0xb300b3b3, 0x7b007b7b, 0xf900f9f9, + 0xce00cece, 0xbf00bfbf, 0xdf00dfdf, 0x71007171, + 0x29002929, 0xcd00cdcd, 0x6c006c6c, 0x13001313, + 0x64006464, 0x9b009b9b, 0x63006363, 0x9d009d9d, + 0xc000c0c0, 0x4b004b4b, 0xb700b7b7, 0xa500a5a5, + 0x89008989, 0x5f005f5f, 0xb100b1b1, 0x17001717, + 0xf400f4f4, 0xbc00bcbc, 0xd300d3d3, 0x46004646, + 0xcf00cfcf, 0x37003737, 0x5e005e5e, 0x47004747, + 0x94009494, 0xfa00fafa, 0xfc00fcfc, 0x5b005b5b, + 0x97009797, 0xfe00fefe, 0x5a005a5a, 0xac00acac, + 0x3c003c3c, 0x4c004c4c, 0x03000303, 0x35003535, + 0xf300f3f3, 0x23002323, 0xb800b8b8, 0x5d005d5d, + 0x6a006a6a, 0x92009292, 0xd500d5d5, 0x21002121, + 0x44004444, 0x51005151, 0xc600c6c6, 0x7d007d7d, + 0x39003939, 0x83008383, 0xdc00dcdc, 0xaa00aaaa, + 0x7c007c7c, 0x77007777, 0x56005656, 0x05000505, + 0x1b001b1b, 0xa400a4a4, 0x15001515, 0x34003434, + 0x1e001e1e, 0x1c001c1c, 0xf800f8f8, 0x52005252, + 0x20002020, 0x14001414, 0xe900e9e9, 0xbd00bdbd, + 0xdd00dddd, 0xe400e4e4, 0xa100a1a1, 0xe000e0e0, + 0x8a008a8a, 0xf100f1f1, 0xd600d6d6, 0x7a007a7a, + 0xbb00bbbb, 0xe300e3e3, 0x40004040, 0x4f004f4f, +}; + +static const PRUint32 camellia_sp4404[256] = { + 0x70700070, 0x2c2c002c, 0xb3b300b3, 0xc0c000c0, + 0xe4e400e4, 0x57570057, 0xeaea00ea, 0xaeae00ae, + 0x23230023, 0x6b6b006b, 0x45450045, 0xa5a500a5, + 0xeded00ed, 0x4f4f004f, 0x1d1d001d, 0x92920092, + 0x86860086, 0xafaf00af, 0x7c7c007c, 0x1f1f001f, + 0x3e3e003e, 0xdcdc00dc, 0x5e5e005e, 0x0b0b000b, + 0xa6a600a6, 0x39390039, 0xd5d500d5, 0x5d5d005d, + 0xd9d900d9, 0x5a5a005a, 0x51510051, 0x6c6c006c, + 0x8b8b008b, 0x9a9a009a, 0xfbfb00fb, 0xb0b000b0, + 0x74740074, 0x2b2b002b, 0xf0f000f0, 0x84840084, + 0xdfdf00df, 0xcbcb00cb, 0x34340034, 0x76760076, + 0x6d6d006d, 0xa9a900a9, 0xd1d100d1, 0x04040004, + 0x14140014, 0x3a3a003a, 0xdede00de, 0x11110011, + 0x32320032, 0x9c9c009c, 0x53530053, 0xf2f200f2, + 0xfefe00fe, 0xcfcf00cf, 0xc3c300c3, 0x7a7a007a, + 0x24240024, 0xe8e800e8, 0x60600060, 0x69690069, + 0xaaaa00aa, 0xa0a000a0, 0xa1a100a1, 0x62620062, + 0x54540054, 0x1e1e001e, 0xe0e000e0, 0x64640064, + 0x10100010, 0x00000000, 0xa3a300a3, 0x75750075, + 0x8a8a008a, 0xe6e600e6, 0x09090009, 0xdddd00dd, + 0x87870087, 0x83830083, 0xcdcd00cd, 0x90900090, + 0x73730073, 0xf6f600f6, 0x9d9d009d, 0xbfbf00bf, + 0x52520052, 0xd8d800d8, 0xc8c800c8, 0xc6c600c6, + 0x81810081, 0x6f6f006f, 0x13130013, 0x63630063, + 0xe9e900e9, 0xa7a700a7, 0x9f9f009f, 0xbcbc00bc, + 0x29290029, 0xf9f900f9, 0x2f2f002f, 0xb4b400b4, + 0x78780078, 0x06060006, 0xe7e700e7, 0x71710071, + 0xd4d400d4, 0xabab00ab, 0x88880088, 0x8d8d008d, + 0x72720072, 0xb9b900b9, 0xf8f800f8, 0xacac00ac, + 0x36360036, 0x2a2a002a, 0x3c3c003c, 0xf1f100f1, + 0x40400040, 0xd3d300d3, 0xbbbb00bb, 0x43430043, + 0x15150015, 0xadad00ad, 0x77770077, 0x80800080, + 0x82820082, 0xecec00ec, 0x27270027, 0xe5e500e5, + 0x85850085, 0x35350035, 0x0c0c000c, 0x41410041, + 0xefef00ef, 0x93930093, 0x19190019, 0x21210021, + 0x0e0e000e, 0x4e4e004e, 0x65650065, 0xbdbd00bd, + 0xb8b800b8, 0x8f8f008f, 0xebeb00eb, 0xcece00ce, + 0x30300030, 0x5f5f005f, 0xc5c500c5, 0x1a1a001a, + 0xe1e100e1, 0xcaca00ca, 0x47470047, 0x3d3d003d, + 0x01010001, 0xd6d600d6, 0x56560056, 0x4d4d004d, + 0x0d0d000d, 0x66660066, 0xcccc00cc, 0x2d2d002d, + 0x12120012, 0x20200020, 0xb1b100b1, 0x99990099, + 0x4c4c004c, 0xc2c200c2, 0x7e7e007e, 0x05050005, + 0xb7b700b7, 0x31310031, 0x17170017, 0xd7d700d7, + 0x58580058, 0x61610061, 0x1b1b001b, 0x1c1c001c, + 0x0f0f000f, 0x16160016, 0x18180018, 0x22220022, + 0x44440044, 0xb2b200b2, 0xb5b500b5, 0x91910091, + 0x08080008, 0xa8a800a8, 0xfcfc00fc, 0x50500050, + 0xd0d000d0, 0x7d7d007d, 0x89890089, 0x97970097, + 0x5b5b005b, 0x95950095, 0xffff00ff, 0xd2d200d2, + 0xc4c400c4, 0x48480048, 0xf7f700f7, 0xdbdb00db, + 0x03030003, 0xdada00da, 0x3f3f003f, 0x94940094, + 0x5c5c005c, 0x02020002, 0x4a4a004a, 0x33330033, + 0x67670067, 0xf3f300f3, 0x7f7f007f, 0xe2e200e2, + 0x9b9b009b, 0x26260026, 0x37370037, 0x3b3b003b, + 0x96960096, 0x4b4b004b, 0xbebe00be, 0x2e2e002e, + 0x79790079, 0x8c8c008c, 0x6e6e006e, 0x8e8e008e, + 0xf5f500f5, 0xb6b600b6, 0xfdfd00fd, 0x59590059, + 0x98980098, 0x6a6a006a, 0x46460046, 0xbaba00ba, + 0x25250025, 0x42420042, 0xa2a200a2, 0xfafa00fa, + 0x07070007, 0x55550055, 0xeeee00ee, 0x0a0a000a, + 0x49490049, 0x68680068, 0x38380038, 0xa4a400a4, + 0x28280028, 0x7b7b007b, 0xc9c900c9, 0xc1c100c1, + 0xe3e300e3, 0xf4f400f4, 0xc7c700c7, 0x9e9e009e, +}; + +/** + * Stuff related to the Camellia key schedule + */ +#define subl(x) subL[(x)] +#define subr(x) subR[(x)] + +void +camellia_setup128(const unsigned char *key, PRUint32 *subkey) +{ + PRUint32 kll, klr, krl, krr; + PRUint32 il, ir, t0, t1, w0, w1; + PRUint32 kw4l, kw4r, dw, tl, tr; + PRUint32 subL[26]; + PRUint32 subR[26]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + /** + * k == kll || klr || krl || krr (|| is concatination) + */ + kll = GETU32(key); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + /** + * generate KL dependent subkeys + */ + subl(0) = kll; + subr(0) = klr; + subl(1) = krl; + subr(1) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(4) = kll; + subr(4) = klr; + subl(5) = krl; + subr(5) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(10) = kll; + subr(10) = klr; + subl(11) = krl; + subr(11) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(13) = krl; + subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(16) = kll; + subr(16) = klr; + subl(17) = krl; + subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(18) = kll; + subr(18) = klr; + subl(19) = krl; + subr(19) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; + subr(22) = klr; + subl(23) = krl; + subr(23) = krr; + + /* generate KA */ + kll = subl(0); + klr = subr(0); + krl = subl(1); + krr = subr(1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; + klr ^= w1; + + /* generate KA dependent subkeys */ + subl(2) = kll; + subr(2) = klr; + subl(3) = krl; + subr(3) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; + subr(6) = klr; + subl(7) = krl; + subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(8) = kll; + subr(8) = klr; + subl(9) = krl; + subr(9) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(12) = kll; + subr(12) = klr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(14) = kll; + subr(14) = klr; + subl(15) = krl; + subr(15) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(20) = kll; + subr(20) = klr; + subl(21) = krl; + subr(21) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(24) = kll; + subr(24) = klr; + subl(25) = krl; + subr(25) = krr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); + subr(3) ^= subr(1); + subl(5) ^= subl(1); + subr(5) ^= subr(1); + subl(7) ^= subl(1); + subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); + subr(11) ^= subr(1); + subl(13) ^= subl(1); + subr(13) ^= subr(1); + subl(15) ^= subl(1); + subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); + subr(19) ^= subr(1); + subl(21) ^= subl(1); + subr(21) ^= subr(1); + subl(23) ^= subl(1); + subr(23) ^= subr(1); + subl(24) ^= subl(1); + subr(24) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(25); + kw4r = subr(25); + subl(22) ^= kw4l; + subr(22) ^= kw4r; + subl(20) ^= kw4l; + subr(20) ^= kw4r; + subl(18) ^= kw4l; + subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; + subr(14) ^= kw4r; + subl(12) ^= kw4l; + subr(12) ^= kw4r; + subl(10) ^= kw4l; + subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; + subr(6) ^= kw4r; + subl(4) ^= kw4l; + subr(4) ^= kw4r; + subl(2) ^= kw4l; + subr(2) ^= kw4r; + subl(0) ^= kw4l; + subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + CamelliaSubkeyL(23) = subl(22); + CamelliaSubkeyR(23) = subr(22); + CamelliaSubkeyL(24) = subl(24) ^ subl(23); + CamelliaSubkeyR(24) = subr(24) ^ subr(23); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + + return; +} + +void +camellia_setup256(const unsigned char *key, PRUint32 *subkey) +{ + PRUint32 kll, klr, krl, krr; /* left half of key */ + PRUint32 krll, krlr, krrl, krrr; /* right half of key */ + PRUint32 il, ir, t0, t1, w0, w1; /* temporary variables */ + PRUint32 kw4l, kw4r, dw, tl, tr; + PRUint32 subL[34]; + PRUint32 subR[34]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + /** + * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) + * (|| is concatination) + */ + + kll = GETU32(key); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + krll = GETU32(key + 16); + krlr = GETU32(key + 20); + krrl = GETU32(key + 24); + krrr = GETU32(key + 28); + + /* generate KL dependent subkeys */ + subl(0) = kll; + subr(0) = klr; + subl(1) = krl; + subr(1) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45); + subl(12) = kll; + subr(12) = klr; + subl(13) = krl; + subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(16) = kll; + subr(16) = klr; + subl(17) = krl; + subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; + subr(22) = klr; + subl(23) = krl; + subr(23) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(30) = kll; + subr(30) = klr; + subl(31) = krl; + subr(31) = krr; + + /* generate KR dependent subkeys */ + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(4) = krll; + subr(4) = krlr; + subl(5) = krrl; + subr(5) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(8) = krll; + subr(8) = krlr; + subl(9) = krrl; + subr(9) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(18) = krll; + subr(18) = krlr; + subl(19) = krrl; + subr(19) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + subl(26) = krll; + subr(26) = krlr; + subl(27) = krrl; + subr(27) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + + /* generate KA */ + kll = subl(0) ^ krll; + klr = subr(0) ^ krlr; + krl = subl(1) ^ krrl; + krr = subr(1) ^ krrr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; + krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + kll ^= krll; + klr ^= krlr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0 ^ krrl; + krr ^= w1 ^ krrr; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; + klr ^= w1; + + /* generate KB */ + krll ^= kll; + krlr ^= klr; + krrl ^= krl; + krrr ^= krr; + CAMELLIA_F(krll, krlr, + CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, + w0, w1, il, ir, t0, t1); + krrl ^= w0; + krrr ^= w1; + CAMELLIA_F(krrl, krrr, + CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, + w0, w1, il, ir, t0, t1); + krll ^= w0; + krlr ^= w1; + + /* generate KA dependent subkeys */ + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; + subr(6) = klr; + subl(7) = krl; + subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(14) = kll; + subr(14) = klr; + subl(15) = krl; + subr(15) = krr; + subl(24) = klr; + subr(24) = krl; + subl(25) = krr; + subr(25) = kll; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49); + subl(28) = kll; + subr(28) = klr; + subl(29) = krl; + subr(29) = krr; + + /* generate KB dependent subkeys */ + subl(2) = krll; + subr(2) = krlr; + subl(3) = krrl; + subr(3) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(10) = krll; + subr(10) = krlr; + subl(11) = krrl; + subr(11) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(20) = krll; + subr(20) = krlr; + subl(21) = krrl; + subr(21) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51); + subl(32) = krll; + subr(32) = krlr; + subl(33) = krrl; + subr(33) = krrr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); + subr(3) ^= subr(1); + subl(5) ^= subl(1); + subr(5) ^= subr(1); + subl(7) ^= subl(1); + subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); + subr(11) ^= subr(1); + subl(13) ^= subl(1); + subr(13) ^= subr(1); + subl(15) ^= subl(1); + subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); + subr(19) ^= subr(1); + subl(21) ^= subl(1); + subr(21) ^= subr(1); + subl(23) ^= subl(1); + subr(23) ^= subr(1); + subl(1) ^= subr(1) & ~subr(25); + dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); + subl(27) ^= subl(1); + subr(27) ^= subr(1); + subl(29) ^= subl(1); + subr(29) ^= subr(1); + subl(31) ^= subl(1); + subr(31) ^= subr(1); + subl(32) ^= subl(1); + subr(32) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(33); + kw4r = subr(33); + subl(30) ^= kw4l; + subr(30) ^= kw4r; + subl(28) ^= kw4l; + subr(28) ^= kw4r; + subl(26) ^= kw4l; + subr(26) ^= kw4r; + kw4l ^= kw4r & ~subr(24); + dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw); + subl(22) ^= kw4l; + subr(22) ^= kw4r; + subl(20) ^= kw4l; + subr(20) ^= kw4r; + subl(18) ^= kw4l; + subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; + subr(14) ^= kw4r; + subl(12) ^= kw4l; + subr(12) ^= kw4r; + subl(10) ^= kw4l; + subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; + subr(6) ^= kw4r; + subl(4) ^= kw4l; + subr(4) ^= kw4r; + subl(2) ^= kw4l; + subr(2) ^= kw4r; + subl(0) ^= kw4l; + subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + tl = subl(26) ^ (subr(26) & ~subr(24)); + dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(23) = subl(22) ^ tl; + CamelliaSubkeyR(23) = subr(22) ^ tr; + CamelliaSubkeyL(24) = subl(24); + CamelliaSubkeyR(24) = subr(24); + CamelliaSubkeyL(25) = subl(25); + CamelliaSubkeyR(25) = subr(25); + tl = subl(23) ^ (subr(23) & ~subr(25)); + dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(26) = tl ^ subl(27); + CamelliaSubkeyR(26) = tr ^ subr(27); + CamelliaSubkeyL(27) = subl(26) ^ subl(28); + CamelliaSubkeyR(27) = subr(26) ^ subr(28); + CamelliaSubkeyL(28) = subl(27) ^ subl(29); + CamelliaSubkeyR(28) = subr(27) ^ subr(29); + CamelliaSubkeyL(29) = subl(28) ^ subl(30); + CamelliaSubkeyR(29) = subr(28) ^ subr(30); + CamelliaSubkeyL(30) = subl(29) ^ subl(31); + CamelliaSubkeyR(30) = subr(29) ^ subr(31); + CamelliaSubkeyL(31) = subl(30); + CamelliaSubkeyR(31) = subr(30); + CamelliaSubkeyL(32) = subl(32) ^ subl(31); + CamelliaSubkeyR(32) = subr(32) ^ subr(31); + + /* apply the inverse of the last half of P-function */ + dw = CamelliaSubkeyL(2) ^ CamelliaSubkeyR(2), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(2) = CamelliaSubkeyL(2) ^ dw, CamelliaSubkeyL(2) = dw; + dw = CamelliaSubkeyL(3) ^ CamelliaSubkeyR(3), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(3) = CamelliaSubkeyL(3) ^ dw, CamelliaSubkeyL(3) = dw; + dw = CamelliaSubkeyL(4) ^ CamelliaSubkeyR(4), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(4) = CamelliaSubkeyL(4) ^ dw, CamelliaSubkeyL(4) = dw; + dw = CamelliaSubkeyL(5) ^ CamelliaSubkeyR(5), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(5) = CamelliaSubkeyL(5) ^ dw, CamelliaSubkeyL(5) = dw; + dw = CamelliaSubkeyL(6) ^ CamelliaSubkeyR(6), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(6) = CamelliaSubkeyL(6) ^ dw, CamelliaSubkeyL(6) = dw; + dw = CamelliaSubkeyL(7) ^ CamelliaSubkeyR(7), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(7) = CamelliaSubkeyL(7) ^ dw, CamelliaSubkeyL(7) = dw; + dw = CamelliaSubkeyL(10) ^ CamelliaSubkeyR(10), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(10) = CamelliaSubkeyL(10) ^ dw, CamelliaSubkeyL(10) = dw; + dw = CamelliaSubkeyL(11) ^ CamelliaSubkeyR(11), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(11) = CamelliaSubkeyL(11) ^ dw, CamelliaSubkeyL(11) = dw; + dw = CamelliaSubkeyL(12) ^ CamelliaSubkeyR(12), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(12) = CamelliaSubkeyL(12) ^ dw, CamelliaSubkeyL(12) = dw; + dw = CamelliaSubkeyL(13) ^ CamelliaSubkeyR(13), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(13) = CamelliaSubkeyL(13) ^ dw, CamelliaSubkeyL(13) = dw; + dw = CamelliaSubkeyL(14) ^ CamelliaSubkeyR(14), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(14) = CamelliaSubkeyL(14) ^ dw, CamelliaSubkeyL(14) = dw; + dw = CamelliaSubkeyL(15) ^ CamelliaSubkeyR(15), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(15) = CamelliaSubkeyL(15) ^ dw, CamelliaSubkeyL(15) = dw; + dw = CamelliaSubkeyL(18) ^ CamelliaSubkeyR(18), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(18) = CamelliaSubkeyL(18) ^ dw, CamelliaSubkeyL(18) = dw; + dw = CamelliaSubkeyL(19) ^ CamelliaSubkeyR(19), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(19) = CamelliaSubkeyL(19) ^ dw, CamelliaSubkeyL(19) = dw; + dw = CamelliaSubkeyL(20) ^ CamelliaSubkeyR(20), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(20) = CamelliaSubkeyL(20) ^ dw, CamelliaSubkeyL(20) = dw; + dw = CamelliaSubkeyL(21) ^ CamelliaSubkeyR(21), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(21) = CamelliaSubkeyL(21) ^ dw, CamelliaSubkeyL(21) = dw; + dw = CamelliaSubkeyL(22) ^ CamelliaSubkeyR(22), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(22) = CamelliaSubkeyL(22) ^ dw, CamelliaSubkeyL(22) = dw; + dw = CamelliaSubkeyL(23) ^ CamelliaSubkeyR(23), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(23) = CamelliaSubkeyL(23) ^ dw, CamelliaSubkeyL(23) = dw; + dw = CamelliaSubkeyL(26) ^ CamelliaSubkeyR(26), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(26) = CamelliaSubkeyL(26) ^ dw, CamelliaSubkeyL(26) = dw; + dw = CamelliaSubkeyL(27) ^ CamelliaSubkeyR(27), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(27) = CamelliaSubkeyL(27) ^ dw, CamelliaSubkeyL(27) = dw; + dw = CamelliaSubkeyL(28) ^ CamelliaSubkeyR(28), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(28) = CamelliaSubkeyL(28) ^ dw, CamelliaSubkeyL(28) = dw; + dw = CamelliaSubkeyL(29) ^ CamelliaSubkeyR(29), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(29) = CamelliaSubkeyL(29) ^ dw, CamelliaSubkeyL(29) = dw; + dw = CamelliaSubkeyL(30) ^ CamelliaSubkeyR(30), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(30) = CamelliaSubkeyL(30) ^ dw, CamelliaSubkeyL(30) = dw; + dw = CamelliaSubkeyL(31) ^ CamelliaSubkeyR(31), dw = CAMELLIA_RL8(dw); + CamelliaSubkeyR(31) = CamelliaSubkeyL(31) ^ dw, CamelliaSubkeyL(31) = dw; + + return; +} + +void +camellia_setup192(const unsigned char *key, PRUint32 *subkey) +{ + unsigned char kk[32]; + PRUint32 krll, krlr, krrl, krrr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krll, key + 16, 4); + memcpy((unsigned char *)&krlr, key + 20, 4); + krrl = ~krll; + krrr = ~krlr; + memcpy(kk + 24, (unsigned char *)&krrl, 4); + memcpy(kk + 28, (unsigned char *)&krrr, 4); + camellia_setup256(kk, subkey); + return; +} + +/** + * Stuff related to camellia encryption/decryption + * + */ +SECStatus NO_SANITIZE_ALIGNMENT +camellia_encrypt128(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + /* main iteration */ + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(24); + io[3] ^= CamelliaSubkeyR(24); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +SECStatus NO_SANITIZE_ALIGNMENT +camellia_decrypt128(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(24); + io[1] ^= CamelliaSubkeyR(24); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +/** + * stuff for 192 and 256bit encryption/decryption + */ +SECStatus NO_SANITIZE_ALIGNMENT +camellia_encrypt256(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(24), CamelliaSubkeyR(24), + CamelliaSubkeyL(25), CamelliaSubkeyR(25), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(26), CamelliaSubkeyR(26), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(27), CamelliaSubkeyR(27), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(28), CamelliaSubkeyR(28), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(29), CamelliaSubkeyR(29), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(30), CamelliaSubkeyR(30), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(31), CamelliaSubkeyR(31), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(32); + io[3] ^= CamelliaSubkeyR(32); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +SECStatus NO_SANITIZE_ALIGNMENT +camellia_decrypt256(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input) +{ + PRUint32 il, ir, t0, t1; /* temporary valiables */ + PRUint32 io[4]; +#if defined(CAMELLIA_NEED_TMP_VARIABLE) + PRUint32 tmp; +#endif + + io[0] = GETU32(input); + io[1] = GETU32(input + 4); + io[2] = GETU32(input + 8); + io[3] = GETU32(input + 12); + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(32); + io[1] ^= CamelliaSubkeyR(32); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(31), CamelliaSubkeyR(31), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(30), CamelliaSubkeyR(30), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(29), CamelliaSubkeyR(29), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(28), CamelliaSubkeyR(28), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(27), CamelliaSubkeyR(27), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(26), CamelliaSubkeyR(26), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(25), CamelliaSubkeyR(25), + CamelliaSubkeyL(24), CamelliaSubkeyR(24), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(23), CamelliaSubkeyR(23), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(22), CamelliaSubkeyR(22), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(21), CamelliaSubkeyR(21), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(20), CamelliaSubkeyR(20), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(19), CamelliaSubkeyR(19), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(18), CamelliaSubkeyR(18), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(17), CamelliaSubkeyR(17), + CamelliaSubkeyL(16), CamelliaSubkeyR(16), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(15), CamelliaSubkeyR(15), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(14), CamelliaSubkeyR(14), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(13), CamelliaSubkeyR(13), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(12), CamelliaSubkeyR(12), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(11), CamelliaSubkeyR(11), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(10), CamelliaSubkeyR(10), + io[0], io[1], il, ir, t0, t1); + + CAMELLIA_FLS(io[0], io[1], io[2], io[3], + CamelliaSubkeyL(9), CamelliaSubkeyR(9), + CamelliaSubkeyL(8), CamelliaSubkeyR(8), + t0, t1, il, ir); + + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(7), CamelliaSubkeyR(7), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(6), CamelliaSubkeyR(6), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(5), CamelliaSubkeyR(5), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(4), CamelliaSubkeyR(4), + io[0], io[1], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[0], io[1], + CamelliaSubkeyL(3), CamelliaSubkeyR(3), + io[2], io[3], il, ir, t0, t1); + CAMELLIA_ROUNDSM(io[2], io[3], + CamelliaSubkeyL(2), CamelliaSubkeyR(2), + io[0], io[1], il, ir, t0, t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + PUTU32(output, io[0]); + PUTU32(output + 4, io[1]); + PUTU32(output + 8, io[2]); + PUTU32(output + 12, io[3]); + + return SECSuccess; +} + +/************************************************************************** + * + * Stuff related to the Camellia key schedule + * + *************************************************************************/ + +SECStatus +camellia_key_expansion(CamelliaContext *cx, + const unsigned char *key, + const unsigned int keysize) +{ + cx->keysize = keysize; + + switch (keysize) { + case 16: + camellia_setup128(key, cx->expandedKey); + break; + case 24: + camellia_setup192(key, cx->expandedKey); + break; + case 32: + camellia_setup256(key, cx->expandedKey); + break; + default: + break; + } + return SECSuccess; +} + +/************************************************************************** + * + * Camellia modes of operation (ECB and CBC) + * + *************************************************************************/ + +SECStatus +camellia_encryptECB(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + CamelliaBlockFunc *encryptor; + + encryptor = (cx->keysize == 16) + ? &camellia_encrypt128 + : &camellia_encrypt256; + + while (inputLen > 0) { + (*encryptor)(cx->expandedKey, output, input); + + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + return SECSuccess; +} + +SECStatus +camellia_encryptCBC(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + unsigned int j; + unsigned char *lastblock; + unsigned char inblock[CAMELLIA_BLOCK_SIZE]; + CamelliaBlockFunc *encryptor; + + if (!inputLen) + return SECSuccess; + lastblock = cx->iv; + + encryptor = (cx->keysize == 16) + ? &camellia_encrypt128 + : &camellia_encrypt256; + + while (inputLen > 0) { + /* XOR with the last block (IV if first block) */ + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + inblock[j] = input[j] ^ lastblock[j]; + /* encrypt */ + (*encryptor)(cx->expandedKey, output, inblock); + + /* move to the next block */ + lastblock = output; + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + memcpy(cx->iv, lastblock, CAMELLIA_BLOCK_SIZE); + return SECSuccess; +} + +SECStatus +camellia_decryptECB(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + CamelliaBlockFunc *decryptor; + + decryptor = (cx->keysize == 16) + ? &camellia_decrypt128 + : &camellia_decrypt256; + + while (inputLen > 0) { + + (*decryptor)(cx->expandedKey, output, input); + + output += CAMELLIA_BLOCK_SIZE; + input += CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + return SECSuccess; +} + +SECStatus +camellia_decryptCBC(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + const unsigned char *in; + unsigned char *out; + unsigned int j; + unsigned char newIV[CAMELLIA_BLOCK_SIZE]; + CamelliaBlockFunc *decryptor; + + if (!inputLen) + return SECSuccess; + + PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); + + in = input + (inputLen - CAMELLIA_BLOCK_SIZE); + memcpy(newIV, in, CAMELLIA_BLOCK_SIZE); + out = output + (inputLen - CAMELLIA_BLOCK_SIZE); + + decryptor = (cx->keysize == 16) + ? &camellia_decrypt128 + : &camellia_decrypt256; + + while (inputLen > CAMELLIA_BLOCK_SIZE) { + (*decryptor)(cx->expandedKey, out, in); + + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + out[j] ^= in[(int)(j - CAMELLIA_BLOCK_SIZE)]; + + out -= CAMELLIA_BLOCK_SIZE; + in -= CAMELLIA_BLOCK_SIZE; + inputLen -= CAMELLIA_BLOCK_SIZE; + } + if (in == input) { + (*decryptor)(cx->expandedKey, out, in); + + for (j = 0; j < CAMELLIA_BLOCK_SIZE; ++j) + out[j] ^= cx->iv[j]; + } + memcpy(cx->iv, newIV, CAMELLIA_BLOCK_SIZE); + return SECSuccess; +} + +/************************************************************************** + * + * BLAPI Interface functions + * + *************************************************************************/ + +CamelliaContext * +Camellia_AllocateContext(void) +{ + return PORT_ZNew(CamelliaContext); +} + +SECStatus +Camellia_InitContext(CamelliaContext *cx, const unsigned char *key, + unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int unused) +{ + if (key == NULL || + (keysize != 16 && keysize != 24 && keysize != 32)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_CAMELLIA_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_CAMELLIA_CBC) { + memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE); + cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC; + } else { + cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB; + } + + /* Generate expanded key */ + if (camellia_key_expansion(cx, key, keysize) != SECSuccess) + goto cleanup; + + return SECSuccess; +cleanup: + return SECFailure; +} + +/* + * Camellia_CreateContext + * create a new context for Camellia operations + */ + +CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keysize) +{ + CamelliaContext *cx; + + if (key == NULL || + (keysize != 16 && keysize != 24 && keysize != 32)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + if (mode != NSS_CAMELLIA && mode != NSS_CAMELLIA_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + if (mode == NSS_CAMELLIA_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + cx = PORT_ZNew(CamelliaContext); + if (!cx) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + + /* copy in the iv, if neccessary */ + if (mode == NSS_CAMELLIA_CBC) { + memcpy(cx->iv, iv, CAMELLIA_BLOCK_SIZE); + cx->worker = (encrypt) ? &camellia_encryptCBC : &camellia_decryptCBC; + } else { + cx->worker = (encrypt) ? &camellia_encryptECB : &camellia_decryptECB; + } + /* copy keysize */ + cx->keysize = keysize; + + /* Generate expanded key */ + if (camellia_key_expansion(cx, key, keysize) != SECSuccess) + goto cleanup; + + return cx; +cleanup: + PORT_ZFree(cx, sizeof *cx); + return NULL; +} + +/* + * Camellia_DestroyContext + * + * Zero an Camellia cipher context. If freeit is true, also free the pointer + * to the context. + */ +void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit) +{ + if (cx) + memset(cx, 0, sizeof *cx); + if (freeit) + PORT_Free(cx); +} + +/* + * Camellia_Encrypt + * + * Encrypt an arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + + /* Check args */ + if (cx == NULL || output == NULL || input == NULL || + outputLen == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (inputLen % CAMELLIA_BLOCK_SIZE != 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + + return (*cx->worker)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +/* + * Camellia_Decrypt + * + * Decrypt and arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + + /* Check args */ + if (cx == NULL || output == NULL || input == NULL || outputLen == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (inputLen % CAMELLIA_BLOCK_SIZE != 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + + return (*cx->worker)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} diff --git a/security/nss/lib/freebl/camellia.h b/security/nss/lib/freebl/camellia.h new file mode 100644 index 000000000..15114db9a --- /dev/null +++ b/security/nss/lib/freebl/camellia.h @@ -0,0 +1,42 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _CAMELLIA_H_ +#define _CAMELLIA_H_ 1 + +#define CAMELLIA_BLOCK_SIZE 16 /* bytes */ +#define CAMELLIA_MIN_KEYSIZE 16 /* bytes */ +#define CAMELLIA_MAX_KEYSIZE 32 /* bytes */ + +#define CAMELLIA_MAX_EXPANDEDKEY (34 * 2) /* 32bit unit */ + +typedef PRUint32 KEY_TABLE_TYPE[CAMELLIA_MAX_EXPANDEDKEY]; + +typedef SECStatus CamelliaFunc(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + +typedef SECStatus CamelliaBlockFunc(const PRUint32 *subkey, + unsigned char *output, + const unsigned char *input); + +/* CamelliaContextStr + * + * Values which maintain the state for Camellia encryption/decryption. + * + * keysize - the number of key bits + * worker - the encryption/decryption function to use with this context + * iv - initialization vector for CBC mode + * expandedKey - the round keys in 4-byte words + */ +struct CamelliaContextStr { + PRUint32 keysize; /* bytes */ + CamelliaFunc *worker; + PRUint32 expandedKey[CAMELLIA_MAX_EXPANDEDKEY]; + PRUint8 iv[CAMELLIA_BLOCK_SIZE]; +}; + +#endif /* _CAMELLIA_H_ */ diff --git a/security/nss/lib/freebl/chacha20.c b/security/nss/lib/freebl/chacha20.c new file mode 100644 index 000000000..f55d1e670 --- /dev/null +++ b/security/nss/lib/freebl/chacha20.c @@ -0,0 +1,119 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Adopted from the public domain code in NaCl by djb. */ + +#include +#include + +#include "prtypes.h" +#include "secport.h" +#include "chacha20.h" + +#if defined(_MSC_VER) +#pragma intrinsic(_lrotl) +#define ROTL32(x, n) _lrotl(x, n) +#else +#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n))) +#endif + +#define ROTATE(v, c) ROTL32((v), (c)) + +#define U32TO8_LITTLE(p, v) \ + { \ + (p)[0] = ((v)) & 0xff; \ + (p)[1] = ((v) >> 8) & 0xff; \ + (p)[2] = ((v) >> 16) & 0xff; \ + (p)[3] = ((v) >> 24) & 0xff; \ + } +#define U8TO32_LITTLE(p) \ + (((PRUint32)((p)[0])) | ((PRUint32)((p)[1]) << 8) | \ + ((PRUint32)((p)[2]) << 16) | ((PRUint32)((p)[3]) << 24)) + +#define QUARTERROUND(x, a, b, c, d) \ + x[a] = x[a] + x[b]; \ + x[d] = ROTATE(x[d] ^ x[a], 16); \ + x[c] = x[c] + x[d]; \ + x[b] = ROTATE(x[b] ^ x[c], 12); \ + x[a] = x[a] + x[b]; \ + x[d] = ROTATE(x[d] ^ x[a], 8); \ + x[c] = x[c] + x[d]; \ + x[b] = ROTATE(x[b] ^ x[c], 7); + +static void +ChaChaCore(unsigned char output[64], const PRUint32 input[16], int num_rounds) +{ + PRUint32 x[16]; + int i; + + PORT_Memcpy(x, input, sizeof(PRUint32) * 16); + for (i = num_rounds; i > 0; i -= 2) { + QUARTERROUND(x, 0, 4, 8, 12) + QUARTERROUND(x, 1, 5, 9, 13) + QUARTERROUND(x, 2, 6, 10, 14) + QUARTERROUND(x, 3, 7, 11, 15) + QUARTERROUND(x, 0, 5, 10, 15) + QUARTERROUND(x, 1, 6, 11, 12) + QUARTERROUND(x, 2, 7, 8, 13) + QUARTERROUND(x, 3, 4, 9, 14) + } + + for (i = 0; i < 16; ++i) { + x[i] = x[i] + input[i]; + } + for (i = 0; i < 16; ++i) { + U32TO8_LITTLE(output + 4 * i, x[i]); + } +} + +static const unsigned char sigma[16] = "expand 32-byte k"; + +void +ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inLen, + const unsigned char key[32], const unsigned char nonce[12], + uint32_t counter) +{ + unsigned char block[64]; + PRUint32 input[16]; + unsigned int i; + + input[4] = U8TO32_LITTLE(key + 0); + input[5] = U8TO32_LITTLE(key + 4); + input[6] = U8TO32_LITTLE(key + 8); + input[7] = U8TO32_LITTLE(key + 12); + + input[8] = U8TO32_LITTLE(key + 16); + input[9] = U8TO32_LITTLE(key + 20); + input[10] = U8TO32_LITTLE(key + 24); + input[11] = U8TO32_LITTLE(key + 28); + + input[0] = U8TO32_LITTLE(sigma + 0); + input[1] = U8TO32_LITTLE(sigma + 4); + input[2] = U8TO32_LITTLE(sigma + 8); + input[3] = U8TO32_LITTLE(sigma + 12); + + input[12] = counter; + input[13] = U8TO32_LITTLE(nonce + 0); + input[14] = U8TO32_LITTLE(nonce + 4); + input[15] = U8TO32_LITTLE(nonce + 8); + + while (inLen >= 64) { + ChaChaCore(block, input, 20); + for (i = 0; i < 64; i++) { + out[i] = in[i] ^ block[i]; + } + + input[12]++; + inLen -= 64; + in += 64; + out += 64; + } + + if (inLen > 0) { + ChaChaCore(block, input, 20); + for (i = 0; i < inLen; i++) { + out[i] = in[i] ^ block[i]; + } + } +} diff --git a/security/nss/lib/freebl/chacha20.h b/security/nss/lib/freebl/chacha20.h new file mode 100644 index 000000000..7e396fa8c --- /dev/null +++ b/security/nss/lib/freebl/chacha20.h @@ -0,0 +1,26 @@ +/* + * chacha20.h - header file for ChaCha20 implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef FREEBL_CHACHA20_H_ +#define FREEBL_CHACHA20_H_ + +#if defined(_MSC_VER) && _MSC_VER < 1600 +#include "prtypes.h" +typedef PRUint32 uint32_t; +typedef PRUint64 uint64_t; +#else +#include +#endif + +/* ChaCha20XOR encrypts |inLen| bytes from |in| with the given key and + * nonce and writes the result to |out|, which may be equal to |in|. The + * initial block counter is specified by |counter|. */ +extern void ChaCha20XOR(unsigned char *out, const unsigned char *in, + unsigned int inLen, const unsigned char key[32], + const unsigned char nonce[12], uint32_t counter); + +#endif /* FREEBL_CHACHA20_H_ */ diff --git a/security/nss/lib/freebl/chacha20_vec.c b/security/nss/lib/freebl/chacha20_vec.c new file mode 100644 index 000000000..12f94d897 --- /dev/null +++ b/security/nss/lib/freebl/chacha20_vec.c @@ -0,0 +1,327 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This implementation is by Ted Krovetz and was submitted to SUPERCOP and + * marked as public domain. It was been altered to allow for non-aligned inputs + * and to allow the block counter to be passed in specifically. */ + +#include + +#include "chacha20.h" +#include "blapii.h" + +#ifndef CHACHA_RNDS +#define CHACHA_RNDS 20 /* 8 (high speed), 20 (conservative), 12 (middle) */ +#endif + +/* Architecture-neutral way to specify 16-byte vector of ints */ +typedef unsigned vec __attribute__((vector_size(16))); + +/* This implementation is designed for Neon, SSE and AltiVec machines. The + * following specify how to do certain vector operations efficiently on + * each architecture, using intrinsics. + * This implementation supports parallel processing of multiple blocks, + * including potentially using general-purpose registers. + */ +#if __ARM_NEON__ +#include +#define GPR_TOO 1 +#define VBPI 2 +#define ONE (vec) vsetq_lane_u32(1, vdupq_n_u32(0), 0) +#define LOAD(m) (vec)(*((vec *)(m))) +#define STORE(m, r) (*((vec *)(m))) = (r) +#define ROTV1(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 1) +#define ROTV2(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 2) +#define ROTV3(x) (vec) vextq_u32((uint32x4_t)x, (uint32x4_t)x, 3) +#define ROTW16(x) (vec) vrev32q_u16((uint16x8_t)x) +#if __clang__ +#define ROTW7(x) (x << ((vec){ 7, 7, 7, 7 })) ^ (x >> ((vec){ 25, 25, 25, 25 })) +#define ROTW8(x) (x << ((vec){ 8, 8, 8, 8 })) ^ (x >> ((vec){ 24, 24, 24, 24 })) +#define ROTW12(x) (x << ((vec){ 12, 12, 12, 12 })) ^ (x >> ((vec){ 20, 20, 20, 20 })) +#else +#define ROTW7(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 7), (uint32x4_t)x, 25) +#define ROTW8(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 8), (uint32x4_t)x, 24) +#define ROTW12(x) (vec) vsriq_n_u32(vshlq_n_u32((uint32x4_t)x, 12), (uint32x4_t)x, 20) +#endif +#elif __SSE2__ +#include +#define GPR_TOO 0 +#if __clang__ +#define VBPI 4 +#else +#define VBPI 3 +#endif +#define ONE (vec) _mm_set_epi32(0, 0, 0, 1) +#define LOAD(m) (vec) _mm_loadu_si128((__m128i *)(m)) +#define STORE(m, r) _mm_storeu_si128((__m128i *)(m), (__m128i)(r)) +#define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1)) +#define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2)) +#define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3)) +#define ROTW7(x) (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25)) +#define ROTW12(x) (vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20)) +#if __SSSE3__ +#include +#define ROTW8(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(14, 13, 12, 15, 10, 9, 8, 11, 6, 5, 4, 7, 2, 1, 0, 3)) +#define ROTW16(x) (vec) _mm_shuffle_epi8((__m128i)x, _mm_set_epi8(13, 12, 15, 14, 9, 8, 11, 10, 5, 4, 7, 6, 1, 0, 3, 2)) +#else +#define ROTW8(x) (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24)) +#define ROTW16(x) (vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16)) +#endif +#else +#error-- Implementation supports only machines with neon or SSE2 +#endif + +#ifndef REVV_BE +#define REVV_BE(x) (x) +#endif + +#ifndef REVW_BE +#define REVW_BE(x) (x) +#endif + +#define BPI (VBPI + GPR_TOO) /* Blocks computed per loop iteration */ + +#define DQROUND_VECTORS(a, b, c, d) \ + a += b; \ + d ^= a; \ + d = ROTW16(d); \ + c += d; \ + b ^= c; \ + b = ROTW12(b); \ + a += b; \ + d ^= a; \ + d = ROTW8(d); \ + c += d; \ + b ^= c; \ + b = ROTW7(b); \ + b = ROTV1(b); \ + c = ROTV2(c); \ + d = ROTV3(d); \ + a += b; \ + d ^= a; \ + d = ROTW16(d); \ + c += d; \ + b ^= c; \ + b = ROTW12(b); \ + a += b; \ + d ^= a; \ + d = ROTW8(d); \ + c += d; \ + b ^= c; \ + b = ROTW7(b); \ + b = ROTV3(b); \ + c = ROTV2(c); \ + d = ROTV1(d); + +#define QROUND_WORDS(a, b, c, d) \ + a = a + b; \ + d ^= a; \ + d = d << 16 | d >> 16; \ + c = c + d; \ + b ^= c; \ + b = b << 12 | b >> 20; \ + a = a + b; \ + d ^= a; \ + d = d << 8 | d >> 24; \ + c = c + d; \ + b ^= c; \ + b = b << 7 | b >> 25; + +#define WRITE_XOR(in, op, d, v0, v1, v2, v3) \ + STORE(op + d + 0, LOAD(in + d + 0) ^ REVV_BE(v0)); \ + STORE(op + d + 4, LOAD(in + d + 4) ^ REVV_BE(v1)); \ + STORE(op + d + 8, LOAD(in + d + 8) ^ REVV_BE(v2)); \ + STORE(op + d + 12, LOAD(in + d + 12) ^ REVV_BE(v3)); + +void NO_SANITIZE_ALIGNMENT +ChaCha20XOR(unsigned char *out, const unsigned char *in, unsigned int inlen, + const unsigned char key[32], const unsigned char nonce[12], + uint32_t counter) +{ + unsigned iters, i, *op = (unsigned *)out, *ip = (unsigned *)in, *kp; +#if defined(__ARM_NEON__) + unsigned *np; +#endif + vec s0, s1, s2, s3; +#if !defined(__ARM_NEON__) && !defined(__SSE2__) + __attribute__((aligned(16))) unsigned key[8], nonce[4]; +#endif + __attribute__((aligned(16))) unsigned chacha_const[] = + { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 }; +#if defined(__ARM_NEON__) || defined(__SSE2__) + kp = (unsigned *)key; +#else + ((vec *)key)[0] = REVV_BE(((vec *)key)[0]); + ((vec *)key)[1] = REVV_BE(((vec *)key)[1]); + ((unsigned *)nonce)[0] = REVW_BE(((unsigned *)nonce)[0]); + ((unsigned *)nonce)[1] = REVW_BE(((unsigned *)nonce)[1]); + ((unsigned *)nonce)[2] = REVW_BE(((unsigned *)nonce)[2]); + ((unsigned *)nonce)[3] = REVW_BE(((unsigned *)nonce)[3]); + kp = (unsigned *)key; + np = (unsigned *)nonce; +#endif +#if defined(__ARM_NEON__) + np = (unsigned *)nonce; +#endif + s0 = LOAD(chacha_const); + s1 = LOAD(&((vec *)kp)[0]); + s2 = LOAD(&((vec *)kp)[1]); + s3 = (vec){ + counter, + ((uint32_t *)nonce)[0], + ((uint32_t *)nonce)[1], + ((uint32_t *)nonce)[2] + }; + + for (iters = 0; iters < inlen / (BPI * 64); iters++) { +#if GPR_TOO + register unsigned x0, x1, x2, x3, x4, x5, x6, x7, x8, + x9, x10, x11, x12, x13, x14, x15; +#endif +#if VBPI > 2 + vec v8, v9, v10, v11; +#endif +#if VBPI > 3 + vec v12, v13, v14, v15; +#endif + + vec v0, v1, v2, v3, v4, v5, v6, v7; + v4 = v0 = s0; + v5 = v1 = s1; + v6 = v2 = s2; + v3 = s3; + v7 = v3 + ONE; +#if VBPI > 2 + v8 = v4; + v9 = v5; + v10 = v6; + v11 = v7 + ONE; +#endif +#if VBPI > 3 + v12 = v8; + v13 = v9; + v14 = v10; + v15 = v11 + ONE; +#endif +#if GPR_TOO + x0 = chacha_const[0]; + x1 = chacha_const[1]; + x2 = chacha_const[2]; + x3 = chacha_const[3]; + x4 = kp[0]; + x5 = kp[1]; + x6 = kp[2]; + x7 = kp[3]; + x8 = kp[4]; + x9 = kp[5]; + x10 = kp[6]; + x11 = kp[7]; + x12 = counter + BPI * iters + (BPI - 1); + x13 = np[0]; + x14 = np[1]; + x15 = np[2]; +#endif + for (i = CHACHA_RNDS / 2; i; i--) { + DQROUND_VECTORS(v0, v1, v2, v3) + DQROUND_VECTORS(v4, v5, v6, v7) +#if VBPI > 2 + DQROUND_VECTORS(v8, v9, v10, v11) +#endif +#if VBPI > 3 + DQROUND_VECTORS(v12, v13, v14, v15) +#endif +#if GPR_TOO + QROUND_WORDS(x0, x4, x8, x12) + QROUND_WORDS(x1, x5, x9, x13) + QROUND_WORDS(x2, x6, x10, x14) + QROUND_WORDS(x3, x7, x11, x15) + QROUND_WORDS(x0, x5, x10, x15) + QROUND_WORDS(x1, x6, x11, x12) + QROUND_WORDS(x2, x7, x8, x13) + QROUND_WORDS(x3, x4, x9, x14) +#endif + } + + WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3) + s3 += ONE; + WRITE_XOR(ip, op, 16, v4 + s0, v5 + s1, v6 + s2, v7 + s3) + s3 += ONE; +#if VBPI > 2 + WRITE_XOR(ip, op, 32, v8 + s0, v9 + s1, v10 + s2, v11 + s3) + s3 += ONE; +#endif +#if VBPI > 3 + WRITE_XOR(ip, op, 48, v12 + s0, v13 + s1, v14 + s2, v15 + s3) + s3 += ONE; +#endif + ip += VBPI * 16; + op += VBPI * 16; +#if GPR_TOO + op[0] = REVW_BE(REVW_BE(ip[0]) ^ (x0 + chacha_const[0])); + op[1] = REVW_BE(REVW_BE(ip[1]) ^ (x1 + chacha_const[1])); + op[2] = REVW_BE(REVW_BE(ip[2]) ^ (x2 + chacha_const[2])); + op[3] = REVW_BE(REVW_BE(ip[3]) ^ (x3 + chacha_const[3])); + op[4] = REVW_BE(REVW_BE(ip[4]) ^ (x4 + kp[0])); + op[5] = REVW_BE(REVW_BE(ip[5]) ^ (x5 + kp[1])); + op[6] = REVW_BE(REVW_BE(ip[6]) ^ (x6 + kp[2])); + op[7] = REVW_BE(REVW_BE(ip[7]) ^ (x7 + kp[3])); + op[8] = REVW_BE(REVW_BE(ip[8]) ^ (x8 + kp[4])); + op[9] = REVW_BE(REVW_BE(ip[9]) ^ (x9 + kp[5])); + op[10] = REVW_BE(REVW_BE(ip[10]) ^ (x10 + kp[6])); + op[11] = REVW_BE(REVW_BE(ip[11]) ^ (x11 + kp[7])); + op[12] = REVW_BE(REVW_BE(ip[12]) ^ (x12 + counter + BPI * iters + (BPI - 1))); + op[13] = REVW_BE(REVW_BE(ip[13]) ^ (x13 + np[0])); + op[14] = REVW_BE(REVW_BE(ip[14]) ^ (x14 + np[1])); + op[15] = REVW_BE(REVW_BE(ip[15]) ^ (x15 + np[2])); + s3 += ONE; + ip += 16; + op += 16; +#endif + } + + for (iters = inlen % (BPI * 64) / 64; iters != 0; iters--) { + vec v0 = s0, v1 = s1, v2 = s2, v3 = s3; + for (i = CHACHA_RNDS / 2; i; i--) { + DQROUND_VECTORS(v0, v1, v2, v3); + } + WRITE_XOR(ip, op, 0, v0 + s0, v1 + s1, v2 + s2, v3 + s3) + s3 += ONE; + ip += 16; + op += 16; + } + + inlen = inlen % 64; + if (inlen) { + __attribute__((aligned(16))) vec buf[4]; + vec v0, v1, v2, v3; + v0 = s0; + v1 = s1; + v2 = s2; + v3 = s3; + for (i = CHACHA_RNDS / 2; i; i--) { + DQROUND_VECTORS(v0, v1, v2, v3); + } + + if (inlen >= 16) { + STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); + if (inlen >= 32) { + STORE(op + 4, LOAD(ip + 4) ^ REVV_BE(v1 + s1)); + if (inlen >= 48) { + STORE(op + 8, LOAD(ip + 8) ^ REVV_BE(v2 + s2)); + buf[3] = REVV_BE(v3 + s3); + } else { + buf[2] = REVV_BE(v2 + s2); + } + } else { + buf[1] = REVV_BE(v1 + s1); + } + } else { + buf[0] = REVV_BE(v0 + s0); + } + + for (i = inlen & ~15; i < inlen; i++) { + ((char *)op)[i] = ((char *)ip)[i] ^ ((char *)buf)[i]; + } + } +} diff --git a/security/nss/lib/freebl/chacha20poly1305.c b/security/nss/lib/freebl/chacha20poly1305.c new file mode 100644 index 000000000..cd265e1ff --- /dev/null +++ b/security/nss/lib/freebl/chacha20poly1305.c @@ -0,0 +1,198 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include + +#include "seccomon.h" +#include "secerr.h" +#include "blapit.h" + +#ifndef NSS_DISABLE_CHACHAPOLY +#include "poly1305.h" +#include "chacha20.h" +#include "chacha20poly1305.h" +#endif + +/* Poly1305Do writes the Poly1305 authenticator of the given additional data + * and ciphertext to |out|. */ +#ifndef NSS_DISABLE_CHACHAPOLY +static void +Poly1305Do(unsigned char *out, const unsigned char *ad, unsigned int adLen, + const unsigned char *ciphertext, unsigned int ciphertextLen, + const unsigned char key[32]) +{ + poly1305_state state; + unsigned int j; + unsigned char lengthBytes[8]; + static const unsigned char zeros[15]; + unsigned int i; + + Poly1305Init(&state, key); + Poly1305Update(&state, ad, adLen); + if (adLen % 16 > 0) { + Poly1305Update(&state, zeros, 16 - adLen % 16); + } + Poly1305Update(&state, ciphertext, ciphertextLen); + if (ciphertextLen % 16 > 0) { + Poly1305Update(&state, zeros, 16 - ciphertextLen % 16); + } + j = adLen; + for (i = 0; i < sizeof(lengthBytes); i++) { + lengthBytes[i] = j; + j >>= 8; + } + Poly1305Update(&state, lengthBytes, sizeof(lengthBytes)); + j = ciphertextLen; + for (i = 0; i < sizeof(lengthBytes); i++) { + lengthBytes[i] = j; + j >>= 8; + } + Poly1305Update(&state, lengthBytes, sizeof(lengthBytes)); + Poly1305Finish(&state, out); +} +#endif + +SECStatus +ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + if (keyLen != 32) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + if (tagLen == 0 || tagLen > 16) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + PORT_Memcpy(ctx->key, key, sizeof(ctx->key)); + ctx->tagLen = tagLen; + + return SECSuccess; +#endif +} + +ChaCha20Poly1305Context * +ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return NULL; +#else + ChaCha20Poly1305Context *ctx; + + ctx = PORT_New(ChaCha20Poly1305Context); + if (ctx == NULL) { + return NULL; + } + + if (ChaCha20Poly1305_InitContext(ctx, key, keyLen, tagLen) != SECSuccess) { + PORT_Free(ctx); + ctx = NULL; + } + + return ctx; +#endif +} + +void +ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit) +{ +#ifndef NSS_DISABLE_CHACHAPOLY + PORT_Memset(ctx, 0, sizeof(*ctx)); + if (freeit) { + PORT_Free(ctx); + } +#endif +} + +SECStatus +ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + unsigned char block[64]; + unsigned char tag[16]; + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + *outputLen = inputLen + ctx->tagLen; + if (maxOutputLen < *outputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + PORT_Memset(block, 0, sizeof(block)); + // Generate a block of keystream. The first 32 bytes will be the poly1305 + // key. The remainder of the block is discarded. + ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0); + ChaCha20XOR(output, input, inputLen, ctx->key, nonce, 1); + + Poly1305Do(tag, ad, adLen, output, inputLen, block); + PORT_Memcpy(output + inputLen, tag, ctx->tagLen); + + return SECSuccess; +#endif +} + +SECStatus +ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ +#ifdef NSS_DISABLE_CHACHAPOLY + return SECFailure; +#else + unsigned char block[64]; + unsigned char tag[16]; + unsigned int ciphertextLen; + + if (nonceLen != 12) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (inputLen < ctx->tagLen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + ciphertextLen = inputLen - ctx->tagLen; + *outputLen = ciphertextLen; + if (maxOutputLen < *outputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + PORT_Memset(block, 0, sizeof(block)); + // Generate a block of keystream. The first 32 bytes will be the poly1305 + // key. The remainder of the block is discarded. + ChaCha20XOR(block, block, sizeof(block), ctx->key, nonce, 0); + Poly1305Do(tag, ad, adLen, input, ciphertextLen, block); + if (NSS_SecureMemcmp(tag, &input[ciphertextLen], ctx->tagLen) != 0) { + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + + ChaCha20XOR(output, input, ciphertextLen, ctx->key, nonce, 1); + + return SECSuccess; +#endif +} diff --git a/security/nss/lib/freebl/chacha20poly1305.h b/security/nss/lib/freebl/chacha20poly1305.h new file mode 100644 index 000000000..c77632aa1 --- /dev/null +++ b/security/nss/lib/freebl/chacha20poly1305.h @@ -0,0 +1,15 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _CHACHA20_POLY1305_H_ +#define _CHACHA20_POLY1305_H_ 1 + +/* ChaCha20Poly1305ContextStr saves the key and tag length for a + * ChaCha20+Poly1305 AEAD operation. */ +struct ChaCha20Poly1305ContextStr { + unsigned char key[32]; + unsigned char tagLen; +}; + +#endif /* _CHACHA20_POLY1305_H_ */ diff --git a/security/nss/lib/freebl/config.mk b/security/nss/lib/freebl/config.mk new file mode 100644 index 000000000..918a66363 --- /dev/null +++ b/security/nss/lib/freebl/config.mk @@ -0,0 +1,97 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# only do this in the outermost freebl build. +ifndef FREEBL_CHILD_BUILD + +# We're going to change this build so that it builds libfreebl.a with +# just loader.c. Then we have to build this directory twice again to +# build the two DSOs. +# To build libfreebl.a with just loader.c, we must now override many +# of the make variables setup by the prior inclusion of CORECONF's config.mk + +CSRCS = loader.c +SIMPLE_OBJS = $(CSRCS:.c=$(OBJ_SUFFIX)) +OBJS = $(addprefix $(OBJDIR)/$(PROG_PREFIX), $(SIMPLE_OBJS)) +ALL_TRASH := $(TARGETS) $(OBJS) $(OBJDIR) LOGS TAGS $(GARBAGE) \ + $(NOSUCHFILE) so_locations + +# this is not a recursive child make. We make a static lib. (archive) + +# Override the values defined in coreconf's ruleset.mk. +# +# - (1) LIBRARY: a static (archival) library +# - (2) SHARED_LIBRARY: a shared (dynamic link) library +# - (3) IMPORT_LIBRARY: an import library, used only on Windows +# - (4) PROGRAM: an executable binary +# +# override these variables to prevent building a DSO/DLL. + TARGETS = $(LIBRARY) + SHARED_LIBRARY = + IMPORT_LIBRARY = + PROGRAM = + +else + +# This is a recursive child make. We build the shared lib. + +TARGETS = $(SHARED_LIBRARY) +LIBRARY = +IMPORT_LIBRARY = +PROGRAM = + +ifeq ($(OS_TARGET), SunOS) +OS_LIBS += -lkstat +endif + +ifeq (,$(filter-out WIN%,$(OS_TARGET))) + +# don't want the 32 in the shared library name +SHARED_LIBRARY = $(OBJDIR)/$(DLL_PREFIX)$(LIBRARY_NAME)$(LIBRARY_VERSION).$(DLL_SUFFIX) + +RES = $(OBJDIR)/$(LIBRARY_NAME).res +RESNAME = freebl.rc + +ifdef NS_USE_GCC +OS_LIBS += -ladvapi32 +else +OS_LIBS += advapi32.lib +endif + +ifdef NS_USE_GCC +EXTRA_SHARED_LIBS += \ + -L$(DIST)/lib \ + -L$(NSSUTIL_LIB_DIR) \ + -lnssutil3 \ + -L$(NSPR_LIB_DIR) \ + -lnspr4 \ + $(NULL) +else # ! NS_USE_GCC +EXTRA_SHARED_LIBS += \ + $(DIST)/lib/nssutil3.lib \ + $(NSPR_LIB_DIR)/$(NSPR31_LIB_PREFIX)nspr4.lib \ + $(NULL) +endif # NS_USE_GCC + +else + +ifeq ($(FREEBL_NO_DEPEND),1) +#drop pthreads as well +OS_PTHREAD= +else +EXTRA_SHARED_LIBS += \ + -L$(DIST)/lib \ + -L$(NSSUTIL_LIB_DIR) \ + -lnssutil3 \ + -L$(NSPR_LIB_DIR) \ + -lnspr4 \ + $(NULL) +endif +endif + +ifeq ($(OS_ARCH), Darwin) +EXTRA_SHARED_LIBS += -dylib_file @executable_path/libplc4.dylib:$(DIST)/lib/libplc4.dylib -dylib_file @executable_path/libplds4.dylib:$(DIST)/lib/libplds4.dylib +endif + +endif diff --git a/security/nss/lib/freebl/ctr.c b/security/nss/lib/freebl/ctr.c new file mode 100644 index 000000000..d5715a505 --- /dev/null +++ b/security/nss/lib/freebl/ctr.c @@ -0,0 +1,246 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "prtypes.h" +#include "blapit.h" +#include "blapii.h" +#include "ctr.h" +#include "pkcs11t.h" +#include "secerr.h" + +#ifdef USE_HW_AES +#include "intel-aes.h" +#include "rijndael.h" +#endif + +SECStatus +CTR_InitContext(CTRContext *ctr, void *context, freeblCipherFunc cipher, + const unsigned char *param, unsigned int blocksize) +{ + const CK_AES_CTR_PARAMS *ctrParams = (const CK_AES_CTR_PARAMS *)param; + + if (ctrParams->ulCounterBits == 0 || + ctrParams->ulCounterBits > blocksize * PR_BITS_PER_BYTE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Invariant: 0 < ctr->bufPtr <= blocksize */ + ctr->checkWrap = PR_FALSE; + ctr->bufPtr = blocksize; /* no unused data in the buffer */ + ctr->cipher = cipher; + ctr->context = context; + ctr->counterBits = ctrParams->ulCounterBits; + if (blocksize > sizeof(ctr->counter) || + blocksize > sizeof(ctrParams->cb)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + PORT_Memcpy(ctr->counter, ctrParams->cb, blocksize); + if (ctr->counterBits < 64) { + PORT_Memcpy(ctr->counterFirst, ctr->counter, blocksize); + ctr->checkWrap = PR_TRUE; + } + return SECSuccess; +} + +CTRContext * +CTR_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *param, unsigned int blocksize) +{ + CTRContext *ctr; + SECStatus rv; + + /* first fill in the Counter context */ + ctr = PORT_ZNew(CTRContext); + if (ctr == NULL) { + return NULL; + } + rv = CTR_InitContext(ctr, context, cipher, param, blocksize); + if (rv != SECSuccess) { + CTR_DestroyContext(ctr, PR_TRUE); + ctr = NULL; + } + return ctr; +} + +void +CTR_DestroyContext(CTRContext *ctr, PRBool freeit) +{ + PORT_Memset(ctr, 0, sizeof(CTRContext)); + if (freeit) { + PORT_Free(ctr); + } +} + +/* + * Used by counter mode. Increment the counter block. Not all bits in the + * counter block are part of the counter, counterBits tells how many bits + * are part of the counter. The counter block is blocksize long. It's a + * big endian value. + * + * XXX Does not handle counter rollover. + */ +static void +ctr_GetNextCtr(unsigned char *counter, unsigned int counterBits, + unsigned int blocksize) +{ + unsigned char *counterPtr = counter + blocksize - 1; + unsigned char mask, count; + + PORT_Assert(counterBits <= blocksize * PR_BITS_PER_BYTE); + while (counterBits >= PR_BITS_PER_BYTE) { + if (++(*(counterPtr--))) { + return; + } + counterBits -= PR_BITS_PER_BYTE; + } + if (counterBits == 0) { + return; + } + /* increment the final partial byte */ + mask = (1 << counterBits) - 1; + count = ++(*counterPtr) & mask; + *counterPtr = ((*counterPtr) & ~mask) | count; + return; +} + +static void +ctr_xor(unsigned char *target, const unsigned char *x, + const unsigned char *y, unsigned int count) +{ + unsigned int i; + for (i = 0; i < count; i++) { + *target++ = *x++ ^ *y++; + } +} + +SECStatus +CTR_Update(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tmp; + SECStatus rv; + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outlen = 0; + if (ctr->bufPtr != blocksize) { + unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen); + ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed); + ctr->bufPtr += needed; + outbuf += needed; + inbuf += needed; + *outlen += needed; + inlen -= needed; + if (inlen == 0) { + return SECSuccess; + } + PORT_Assert(ctr->bufPtr == blocksize); + } + + while (inlen >= blocksize) { + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (ctr->checkWrap) { + if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, blocksize); + outbuf += blocksize; + inbuf += blocksize; + *outlen += blocksize; + inlen -= blocksize; + } + if (inlen == 0) { + return SECSuccess; + } + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (ctr->checkWrap) { + if (PORT_Memcmp(ctr->counter, ctr->counterFirst, blocksize) == 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + } + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, inlen); + ctr->bufPtr = inlen; + *outlen += inlen; + return SECSuccess; +} + +#if defined(USE_HW_AES) && defined(_MSC_VER) +SECStatus +CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int fullblocks; + unsigned int tmp; + SECStatus rv; + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outlen = 0; + if (ctr->bufPtr != blocksize) { + unsigned int needed = PR_MIN(blocksize - ctr->bufPtr, inlen); + ctr_xor(outbuf, inbuf, ctr->buffer + ctr->bufPtr, needed); + ctr->bufPtr += needed; + outbuf += needed; + inbuf += needed; + *outlen += needed; + inlen -= needed; + if (inlen == 0) { + return SECSuccess; + } + PORT_Assert(ctr->bufPtr == blocksize); + } + + intel_aes_ctr_worker(((AESContext *)(ctr->context))->Nr)( + ctr, outbuf, outlen, maxout, inbuf, inlen, blocksize); + /* XXX intel_aes_ctr_worker should set *outlen. */ + PORT_Assert(*outlen == 0); + fullblocks = (inlen / blocksize) * blocksize; + *outlen += fullblocks; + outbuf += fullblocks; + inbuf += fullblocks; + inlen -= fullblocks; + + if (inlen == 0) { + return SECSuccess; + } + rv = (*ctr->cipher)(ctr->context, ctr->buffer, &tmp, blocksize, + ctr->counter, blocksize, blocksize); + ctr_GetNextCtr(ctr->counter, ctr->counterBits, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + ctr_xor(outbuf, inbuf, ctr->buffer, inlen); + ctr->bufPtr = inlen; + *outlen += inlen; + return SECSuccess; +} +#endif diff --git a/security/nss/lib/freebl/ctr.h b/security/nss/lib/freebl/ctr.h new file mode 100644 index 000000000..a97da144e --- /dev/null +++ b/security/nss/lib/freebl/ctr.h @@ -0,0 +1,53 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CTR_H +#define CTR_H 1 + +#include "blapii.h" + +/* This structure is defined in this header because both ctr.c and gcm.c + * need it. */ +struct CTRContextStr { + freeblCipherFunc cipher; + void *context; + unsigned char counter[MAX_BLOCK_SIZE]; + unsigned char buffer[MAX_BLOCK_SIZE]; + unsigned char counterFirst[MAX_BLOCK_SIZE]; /* counter overlfow value */ + PRBool checkWrap; /*check for counter overflow*/ + unsigned long counterBits; + unsigned int bufPtr; +}; + +typedef struct CTRContextStr CTRContext; + +SECStatus CTR_InitContext(CTRContext *ctr, void *context, + freeblCipherFunc cipher, const unsigned char *param, + unsigned int blocksize); + +/* + * The context argument is the inner cipher context to use with cipher. The + * CTRContext does not own context. context needs to remain valid for as long + * as the CTRContext is valid. + * + * The cipher argument is a block cipher in the ECB encrypt mode. + */ +CTRContext *CTR_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *param, unsigned int blocksize); + +void CTR_DestroyContext(CTRContext *ctr, PRBool freeit); + +SECStatus CTR_Update(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +#ifdef USE_HW_AES +SECStatus CTR_Update_HW_AES(CTRContext *ctr, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +#endif + +#endif diff --git a/security/nss/lib/freebl/cts.c b/security/nss/lib/freebl/cts.c new file mode 100644 index 000000000..99ccebb60 --- /dev/null +++ b/security/nss/lib/freebl/cts.c @@ -0,0 +1,307 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "blapit.h" +#include "blapii.h" +#include "cts.h" +#include "secerr.h" + +struct CTSContextStr { + freeblCipherFunc cipher; + void *context; + /* iv stores the last ciphertext block of the previous message. + * Only used by decrypt. */ + unsigned char iv[MAX_BLOCK_SIZE]; +}; + +CTSContext * +CTS_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *iv, unsigned int blocksize) +{ + CTSContext *cts; + + if (blocksize > MAX_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return NULL; + } + cts = PORT_ZNew(CTSContext); + if (cts == NULL) { + return NULL; + } + PORT_Memcpy(cts->iv, iv, blocksize); + cts->cipher = cipher; + cts->context = context; + return cts; +} + +void +CTS_DestroyContext(CTSContext *cts, PRBool freeit) +{ + if (freeit) { + PORT_Free(cts); + } +} + +/* + * See addemdum to NIST SP 800-38A + * Generically handle cipher text stealing. Basically this is doing CBC + * operations except someone can pass us a partial block. + * + * Output Order: + * CS-1: C1||C2||C3..Cn-1(could be partial)||Cn (NIST) + * CS-2: pad == 0 C1||C2||C3...Cn-1(is full)||Cn (Schneier) + * CS-2: pad != 0 C1||C2||C3...Cn||Cn-1(is partial)(Schneier) + * CS-3: C1||C2||C3...Cn||Cn-1(could be partial) (Kerberos) + * + * The characteristics of these three options: + * - NIST & Schneier (CS-1 & CS-2) are identical to CBC if there are no + * partial blocks on input. + * - Scheier and Kerberos (CS-2 and CS-3) have no embedded partial blocks, + * which make decoding easier. + * - NIST & Kerberos (CS-1 and CS-3) have consistent block order independent + * of padding. + * + * PKCS #11 did not specify which version to implement, but points to the NIST + * spec, so this code implements CTS-CS-1 from NIST. + * + * To convert the returned buffer to: + * CS-2 (Schneier): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = *outlen % blocksize; + * if (pad) { + * memcpy(tmp, outbuf+*outlen-blocksize, blocksize); + * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad); + * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize); + * } + * CS-3 (Kerberos): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = *outlen % blocksize; + * if (pad == 0) { + * pad = blocksize; + * } + * memcpy(tmp, outbuf+*outlen-blocksize, blocksize); + * memcpy(outbuf+*outlen-pad,outbuf+*outlen-blocksize-pad, pad); + * memcpy(outbuf+*outlen-blocksize-pad, tmp, blocksize); + */ +SECStatus +CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned char lastBlock[MAX_BLOCK_SIZE]; + unsigned int tmp; + int fullblocks; + int written; + unsigned char *saveout = outbuf; + SECStatus rv; + + if (inlen < blocksize) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + fullblocks = (inlen / blocksize) * blocksize; + rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf, + fullblocks, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + *outlen = fullblocks; /* AES low level doesn't set outlen */ + inbuf += fullblocks; + inlen -= fullblocks; + if (inlen == 0) { + return SECSuccess; + } + written = *outlen - (blocksize - inlen); + outbuf += written; + maxout -= written; + + /* + * here's the CTS magic, we pad our final block with zeros, + * then do a CBC encrypt. CBC will xor our plain text with + * the previous block (Cn-1), capturing part of that block (Cn-1**) as it + * xors with the zero pad. We then write this full block, overwritting + * (Cn-1**) in our buffer. This allows us to have input data == output + * data since Cn contains enough information to reconver Cn-1** when + * we decrypt (at the cost of some complexity as you can see in decrypt + * below */ + PORT_Memcpy(lastBlock, inbuf, inlen); + PORT_Memset(lastBlock + inlen, 0, blocksize - inlen); + rv = (*cts->cipher)(cts->context, outbuf, &tmp, maxout, lastBlock, + blocksize, blocksize); + PORT_Memset(lastBlock, 0, blocksize); + if (rv == SECSuccess) { + *outlen = written + blocksize; + } else { + PORT_Memset(saveout, 0, written + blocksize); + } + return rv; +} + +#define XOR_BLOCK(x, y, count) \ + for (i = 0; i < count; i++) \ + x[i] = x[i] ^ y[i] + +/* + * See addemdum to NIST SP 800-38A + * Decrypt, Expect CS-1: input. See the comment on the encrypt side + * to understand what CS-2 and CS-3 mean. + * + * To convert the input buffer to CS-1 from ... + * CS-2 (Schneier): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = inlen % blocksize; + * if (pad) { + * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize); + * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad); + * memcpy(inbuf+inlen-blocksize, tmp, blocksize); + * } + * CS-3 (Kerberos): do + * unsigned char tmp[MAX_BLOCK_SIZE]; + * pad = inlen % blocksize; + * if (pad == 0) { + * pad = blocksize; + * } + * memcpy(tmp, inbuf+inlen-blocksize-pad, blocksize); + * memcpy(inbuf+inlen-blocksize-pad,inbuf+inlen-pad, pad); + * memcpy(inbuf+inlen-blocksize, tmp, blocksize); + */ +SECStatus +CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned char *Pn; + unsigned char Cn_2[MAX_BLOCK_SIZE]; /* block Cn-2 */ + unsigned char Cn_1[MAX_BLOCK_SIZE]; /* block Cn-1 */ + unsigned char Cn[MAX_BLOCK_SIZE]; /* block Cn */ + unsigned char lastBlock[MAX_BLOCK_SIZE]; + const unsigned char *tmp; + unsigned char *saveout = outbuf; + unsigned int tmpLen; + unsigned int fullblocks, pad; + unsigned int i; + SECStatus rv; + + if (inlen < blocksize) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + fullblocks = (inlen / blocksize) * blocksize; + + /* even though we expect the input to be CS-1, CS-2 is easier to parse, + * so convert to CS-2 immediately. NOTE: this is the same code as in + * the comment for encrypt. NOTE2: since we can't modify inbuf unless + * inbuf and outbuf overlap, just copy inbuf to outbuf and modify it there + */ + pad = inlen - fullblocks; + if (pad != 0) { + if (inbuf != outbuf) { + memcpy(outbuf, inbuf, inlen); + /* keep the names so we logically know how we are using the + * buffers */ + inbuf = outbuf; + } + memcpy(lastBlock, inbuf + inlen - blocksize, blocksize); + /* we know inbuf == outbuf now, inbuf is declared const and can't + * be the target, so use outbuf for the target here */ + memcpy(outbuf + inlen - pad, inbuf + inlen - blocksize - pad, pad); + memcpy(outbuf + inlen - blocksize - pad, lastBlock, blocksize); + } + /* save the previous to last block so we can undo the misordered + * chaining */ + tmp = (fullblocks < blocksize * 2) ? cts->iv : inbuf + fullblocks - blocksize * 2; + PORT_Memcpy(Cn_2, tmp, blocksize); + PORT_Memcpy(Cn, inbuf + fullblocks - blocksize, blocksize); + rv = (*cts->cipher)(cts->context, outbuf, outlen, maxout, inbuf, + fullblocks, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + *outlen = fullblocks; /* AES low level doesn't set outlen */ + inbuf += fullblocks; + inlen -= fullblocks; + if (inlen == 0) { + return SECSuccess; + } + outbuf += fullblocks; + + /* recover the stolen text */ + PORT_Memset(lastBlock, 0, blocksize); + PORT_Memcpy(lastBlock, inbuf, inlen); + PORT_Memcpy(Cn_1, inbuf, inlen); + Pn = outbuf - blocksize; + /* inbuf points to Cn-1* in the input buffer */ + /* NOTE: below there are 2 sections marked "make up for the out of order + * cbc decryption". You may ask, what is going on here. + * Short answer: CBC automatically xors the plain text with the previous + * encrypted block. We are decrypting the last 2 blocks out of order, so + * we have to 'back out' the decrypt xor and 'add back' the encrypt xor. + * Long answer: When we encrypted, we encrypted as follows: + * Pn-2, Pn-1, (Pn || 0), but on decryption we can't + * decrypt Cn-1 until we decrypt Cn because part of Cn-1 is stored in + * Cn (see below). So above we decrypted all the full blocks: + * Cn-2, Cn, + * to get: + * Pn-2, Pn, Except that Pn is not yet corect. On encrypt, we + * xor'd Pn || 0 with Cn-1, but on decrypt we xor'd it with Cn-2 + * To recover Pn, we xor the block with Cn-1* || 0 (in last block) and + * Cn-2 to get Pn || Cn-1**. Pn can then be written to the output buffer + * and we can now reunite Cn-1. With the full Cn-1 we can decrypt it, + * but now decrypt is going to xor the decrypted data with Cn instead of + * Cn-2. xoring Cn and Cn-2 restores the original Pn-1 and we can now + * write that oout to the buffer */ + + /* make up for the out of order CBC decryption */ + XOR_BLOCK(lastBlock, Cn_2, blocksize); + XOR_BLOCK(lastBlock, Pn, blocksize); + /* last buf now has Pn || Cn-1**, copy out Pn */ + PORT_Memcpy(outbuf, lastBlock, inlen); + *outlen += inlen; + /* copy Cn-1* into last buf to recover Cn-1 */ + PORT_Memcpy(lastBlock, Cn_1, inlen); + /* note: because Cn and Cn-1 were out of order, our pointer to Pn also + * points to where Pn-1 needs to reside. From here on out read Pn in + * the code as really Pn-1. */ + rv = (*cts->cipher)(cts->context, Pn, &tmpLen, blocksize, lastBlock, + blocksize, blocksize); + if (rv != SECSuccess) { + PORT_Memset(lastBlock, 0, blocksize); + PORT_Memset(saveout, 0, *outlen); + return SECFailure; + } + /* make up for the out of order CBC decryption */ + XOR_BLOCK(Pn, Cn_2, blocksize); + XOR_BLOCK(Pn, Cn, blocksize); + /* reset iv to Cn */ + PORT_Memcpy(cts->iv, Cn, blocksize); + /* This makes Cn the last block for the next decrypt operation, which + * matches the encrypt. We don't care about the contexts of last block, + * only the side effect of setting the internal IV */ + (void)(*cts->cipher)(cts->context, lastBlock, &tmpLen, blocksize, Cn, + blocksize, blocksize); + /* clear last block. At this point last block contains Pn xor Cn_1 xor + * Cn_2, both of with an attacker would know, so we need to clear this + * buffer out */ + PORT_Memset(lastBlock, 0, blocksize); + /* Cn, Cn_1, and Cn_2 have encrypted data, so no need to clear them */ + return SECSuccess; +} diff --git a/security/nss/lib/freebl/cts.h b/security/nss/lib/freebl/cts.h new file mode 100644 index 000000000..a3ec180af --- /dev/null +++ b/security/nss/lib/freebl/cts.h @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef CTS_H +#define CTS_H 1 + +#include "blapii.h" + +typedef struct CTSContextStr CTSContext; + +/* + * The context argument is the inner cipher context to use with cipher. The + * CTSContext does not own context. context needs to remain valid for as long + * as the CTSContext is valid. + * + * The cipher argument is a block cipher in the CBC mode. + */ +CTSContext *CTS_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *iv, unsigned int blocksize); + +void CTS_DestroyContext(CTSContext *cts, PRBool freeit); + +SECStatus CTS_EncryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus CTS_DecryptUpdate(CTSContext *cts, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +#endif diff --git a/security/nss/lib/freebl/des.c b/security/nss/lib/freebl/des.c new file mode 100644 index 000000000..fd433bbb2 --- /dev/null +++ b/security/nss/lib/freebl/des.c @@ -0,0 +1,676 @@ +/* + * des.c + * + * core source file for DES-150 library + * Make key schedule from DES key. + * Encrypt/Decrypt one 8-byte block. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "des.h" +#include "blapii.h" +#include /* for ptrdiff_t */ +/* #define USE_INDEXING 1 */ + +/* + * The tables below are the 8 sbox functions, with the 6-bit input permutation + * and the 32-bit output permutation pre-computed. + * They are shifted circularly to the left 3 bits, which removes 2 shifts + * and an or from each round by reducing the number of sboxes whose + * indices cross word broundaries from 2 to 1. + */ + +static const HALF SP[8][64] = { + /* Box S1 */ + { 0x04041000, 0x00000000, 0x00040000, 0x04041010, + 0x04040010, 0x00041010, 0x00000010, 0x00040000, + 0x00001000, 0x04041000, 0x04041010, 0x00001000, + 0x04001010, 0x04040010, 0x04000000, 0x00000010, + 0x00001010, 0x04001000, 0x04001000, 0x00041000, + 0x00041000, 0x04040000, 0x04040000, 0x04001010, + 0x00040010, 0x04000010, 0x04000010, 0x00040010, + 0x00000000, 0x00001010, 0x00041010, 0x04000000, + 0x00040000, 0x04041010, 0x00000010, 0x04040000, + 0x04041000, 0x04000000, 0x04000000, 0x00001000, + 0x04040010, 0x00040000, 0x00041000, 0x04000010, + 0x00001000, 0x00000010, 0x04001010, 0x00041010, + 0x04041010, 0x00040010, 0x04040000, 0x04001010, + 0x04000010, 0x00001010, 0x00041010, 0x04041000, + 0x00001010, 0x04001000, 0x04001000, 0x00000000, + 0x00040010, 0x00041000, 0x00000000, 0x04040010 }, + /* Box S2 */ + { 0x00420082, 0x00020002, 0x00020000, 0x00420080, + 0x00400000, 0x00000080, 0x00400082, 0x00020082, + 0x00000082, 0x00420082, 0x00420002, 0x00000002, + 0x00020002, 0x00400000, 0x00000080, 0x00400082, + 0x00420000, 0x00400080, 0x00020082, 0x00000000, + 0x00000002, 0x00020000, 0x00420080, 0x00400002, + 0x00400080, 0x00000082, 0x00000000, 0x00420000, + 0x00020080, 0x00420002, 0x00400002, 0x00020080, + 0x00000000, 0x00420080, 0x00400082, 0x00400000, + 0x00020082, 0x00400002, 0x00420002, 0x00020000, + 0x00400002, 0x00020002, 0x00000080, 0x00420082, + 0x00420080, 0x00000080, 0x00020000, 0x00000002, + 0x00020080, 0x00420002, 0x00400000, 0x00000082, + 0x00400080, 0x00020082, 0x00000082, 0x00400080, + 0x00420000, 0x00000000, 0x00020002, 0x00020080, + 0x00000002, 0x00400082, 0x00420082, 0x00420000 }, + /* Box S3 */ + { 0x00000820, 0x20080800, 0x00000000, 0x20080020, + 0x20000800, 0x00000000, 0x00080820, 0x20000800, + 0x00080020, 0x20000020, 0x20000020, 0x00080000, + 0x20080820, 0x00080020, 0x20080000, 0x00000820, + 0x20000000, 0x00000020, 0x20080800, 0x00000800, + 0x00080800, 0x20080000, 0x20080020, 0x00080820, + 0x20000820, 0x00080800, 0x00080000, 0x20000820, + 0x00000020, 0x20080820, 0x00000800, 0x20000000, + 0x20080800, 0x20000000, 0x00080020, 0x00000820, + 0x00080000, 0x20080800, 0x20000800, 0x00000000, + 0x00000800, 0x00080020, 0x20080820, 0x20000800, + 0x20000020, 0x00000800, 0x00000000, 0x20080020, + 0x20000820, 0x00080000, 0x20000000, 0x20080820, + 0x00000020, 0x00080820, 0x00080800, 0x20000020, + 0x20080000, 0x20000820, 0x00000820, 0x20080000, + 0x00080820, 0x00000020, 0x20080020, 0x00080800 }, + /* Box S4 */ + { 0x02008004, 0x00008204, 0x00008204, 0x00000200, + 0x02008200, 0x02000204, 0x02000004, 0x00008004, + 0x00000000, 0x02008000, 0x02008000, 0x02008204, + 0x00000204, 0x00000000, 0x02000200, 0x02000004, + 0x00000004, 0x00008000, 0x02000000, 0x02008004, + 0x00000200, 0x02000000, 0x00008004, 0x00008200, + 0x02000204, 0x00000004, 0x00008200, 0x02000200, + 0x00008000, 0x02008200, 0x02008204, 0x00000204, + 0x02000200, 0x02000004, 0x02008000, 0x02008204, + 0x00000204, 0x00000000, 0x00000000, 0x02008000, + 0x00008200, 0x02000200, 0x02000204, 0x00000004, + 0x02008004, 0x00008204, 0x00008204, 0x00000200, + 0x02008204, 0x00000204, 0x00000004, 0x00008000, + 0x02000004, 0x00008004, 0x02008200, 0x02000204, + 0x00008004, 0x00008200, 0x02000000, 0x02008004, + 0x00000200, 0x02000000, 0x00008000, 0x02008200 }, + /* Box S5 */ + { 0x00000400, 0x08200400, 0x08200000, 0x08000401, + 0x00200000, 0x00000400, 0x00000001, 0x08200000, + 0x00200401, 0x00200000, 0x08000400, 0x00200401, + 0x08000401, 0x08200001, 0x00200400, 0x00000001, + 0x08000000, 0x00200001, 0x00200001, 0x00000000, + 0x00000401, 0x08200401, 0x08200401, 0x08000400, + 0x08200001, 0x00000401, 0x00000000, 0x08000001, + 0x08200400, 0x08000000, 0x08000001, 0x00200400, + 0x00200000, 0x08000401, 0x00000400, 0x08000000, + 0x00000001, 0x08200000, 0x08000401, 0x00200401, + 0x08000400, 0x00000001, 0x08200001, 0x08200400, + 0x00200401, 0x00000400, 0x08000000, 0x08200001, + 0x08200401, 0x00200400, 0x08000001, 0x08200401, + 0x08200000, 0x00000000, 0x00200001, 0x08000001, + 0x00200400, 0x08000400, 0x00000401, 0x00200000, + 0x00000000, 0x00200001, 0x08200400, 0x00000401 }, + /* Box S6 */ + { 0x80000040, 0x81000000, 0x00010000, 0x81010040, + 0x81000000, 0x00000040, 0x81010040, 0x01000000, + 0x80010000, 0x01010040, 0x01000000, 0x80000040, + 0x01000040, 0x80010000, 0x80000000, 0x00010040, + 0x00000000, 0x01000040, 0x80010040, 0x00010000, + 0x01010000, 0x80010040, 0x00000040, 0x81000040, + 0x81000040, 0x00000000, 0x01010040, 0x81010000, + 0x00010040, 0x01010000, 0x81010000, 0x80000000, + 0x80010000, 0x00000040, 0x81000040, 0x01010000, + 0x81010040, 0x01000000, 0x00010040, 0x80000040, + 0x01000000, 0x80010000, 0x80000000, 0x00010040, + 0x80000040, 0x81010040, 0x01010000, 0x81000000, + 0x01010040, 0x81010000, 0x00000000, 0x81000040, + 0x00000040, 0x00010000, 0x81000000, 0x01010040, + 0x00010000, 0x01000040, 0x80010040, 0x00000000, + 0x81010000, 0x80000000, 0x01000040, 0x80010040 }, + /* Box S7 */ + { 0x00800000, 0x10800008, 0x10002008, 0x00000000, + 0x00002000, 0x10002008, 0x00802008, 0x10802000, + 0x10802008, 0x00800000, 0x00000000, 0x10000008, + 0x00000008, 0x10000000, 0x10800008, 0x00002008, + 0x10002000, 0x00802008, 0x00800008, 0x10002000, + 0x10000008, 0x10800000, 0x10802000, 0x00800008, + 0x10800000, 0x00002000, 0x00002008, 0x10802008, + 0x00802000, 0x00000008, 0x10000000, 0x00802000, + 0x10000000, 0x00802000, 0x00800000, 0x10002008, + 0x10002008, 0x10800008, 0x10800008, 0x00000008, + 0x00800008, 0x10000000, 0x10002000, 0x00800000, + 0x10802000, 0x00002008, 0x00802008, 0x10802000, + 0x00002008, 0x10000008, 0x10802008, 0x10800000, + 0x00802000, 0x00000000, 0x00000008, 0x10802008, + 0x00000000, 0x00802008, 0x10800000, 0x00002000, + 0x10000008, 0x10002000, 0x00002000, 0x00800008 }, + /* Box S8 */ + { 0x40004100, 0x00004000, 0x00100000, 0x40104100, + 0x40000000, 0x40004100, 0x00000100, 0x40000000, + 0x00100100, 0x40100000, 0x40104100, 0x00104000, + 0x40104000, 0x00104100, 0x00004000, 0x00000100, + 0x40100000, 0x40000100, 0x40004000, 0x00004100, + 0x00104000, 0x00100100, 0x40100100, 0x40104000, + 0x00004100, 0x00000000, 0x00000000, 0x40100100, + 0x40000100, 0x40004000, 0x00104100, 0x00100000, + 0x00104100, 0x00100000, 0x40104000, 0x00004000, + 0x00000100, 0x40100100, 0x00004000, 0x00104100, + 0x40004000, 0x00000100, 0x40000100, 0x40100000, + 0x40100100, 0x40000000, 0x00100000, 0x40004100, + 0x00000000, 0x40104100, 0x00100100, 0x40000100, + 0x40100000, 0x40004000, 0x40004100, 0x00000000, + 0x40104100, 0x00104000, 0x00104000, 0x00004100, + 0x00004100, 0x00100100, 0x40000000, 0x40104000 } +}; + +static const HALF PC2[8][64] = { + /* table 0 */ + { 0x00000000, 0x00001000, 0x04000000, 0x04001000, + 0x00100000, 0x00101000, 0x04100000, 0x04101000, + 0x00008000, 0x00009000, 0x04008000, 0x04009000, + 0x00108000, 0x00109000, 0x04108000, 0x04109000, + 0x00000004, 0x00001004, 0x04000004, 0x04001004, + 0x00100004, 0x00101004, 0x04100004, 0x04101004, + 0x00008004, 0x00009004, 0x04008004, 0x04009004, + 0x00108004, 0x00109004, 0x04108004, 0x04109004, + 0x08000000, 0x08001000, 0x0c000000, 0x0c001000, + 0x08100000, 0x08101000, 0x0c100000, 0x0c101000, + 0x08008000, 0x08009000, 0x0c008000, 0x0c009000, + 0x08108000, 0x08109000, 0x0c108000, 0x0c109000, + 0x08000004, 0x08001004, 0x0c000004, 0x0c001004, + 0x08100004, 0x08101004, 0x0c100004, 0x0c101004, + 0x08008004, 0x08009004, 0x0c008004, 0x0c009004, + 0x08108004, 0x08109004, 0x0c108004, 0x0c109004 }, + /* table 1 */ + { 0x00000000, 0x00002000, 0x80000000, 0x80002000, + 0x00000008, 0x00002008, 0x80000008, 0x80002008, + 0x00200000, 0x00202000, 0x80200000, 0x80202000, + 0x00200008, 0x00202008, 0x80200008, 0x80202008, + 0x20000000, 0x20002000, 0xa0000000, 0xa0002000, + 0x20000008, 0x20002008, 0xa0000008, 0xa0002008, + 0x20200000, 0x20202000, 0xa0200000, 0xa0202000, + 0x20200008, 0x20202008, 0xa0200008, 0xa0202008, + 0x00000400, 0x00002400, 0x80000400, 0x80002400, + 0x00000408, 0x00002408, 0x80000408, 0x80002408, + 0x00200400, 0x00202400, 0x80200400, 0x80202400, + 0x00200408, 0x00202408, 0x80200408, 0x80202408, + 0x20000400, 0x20002400, 0xa0000400, 0xa0002400, + 0x20000408, 0x20002408, 0xa0000408, 0xa0002408, + 0x20200400, 0x20202400, 0xa0200400, 0xa0202400, + 0x20200408, 0x20202408, 0xa0200408, 0xa0202408 }, + /* table 2 */ + { 0x00000000, 0x00004000, 0x00000020, 0x00004020, + 0x00080000, 0x00084000, 0x00080020, 0x00084020, + 0x00000800, 0x00004800, 0x00000820, 0x00004820, + 0x00080800, 0x00084800, 0x00080820, 0x00084820, + 0x00000010, 0x00004010, 0x00000030, 0x00004030, + 0x00080010, 0x00084010, 0x00080030, 0x00084030, + 0x00000810, 0x00004810, 0x00000830, 0x00004830, + 0x00080810, 0x00084810, 0x00080830, 0x00084830, + 0x00400000, 0x00404000, 0x00400020, 0x00404020, + 0x00480000, 0x00484000, 0x00480020, 0x00484020, + 0x00400800, 0x00404800, 0x00400820, 0x00404820, + 0x00480800, 0x00484800, 0x00480820, 0x00484820, + 0x00400010, 0x00404010, 0x00400030, 0x00404030, + 0x00480010, 0x00484010, 0x00480030, 0x00484030, + 0x00400810, 0x00404810, 0x00400830, 0x00404830, + 0x00480810, 0x00484810, 0x00480830, 0x00484830 }, + /* table 3 */ + { 0x00000000, 0x40000000, 0x00000080, 0x40000080, + 0x00040000, 0x40040000, 0x00040080, 0x40040080, + 0x00000040, 0x40000040, 0x000000c0, 0x400000c0, + 0x00040040, 0x40040040, 0x000400c0, 0x400400c0, + 0x10000000, 0x50000000, 0x10000080, 0x50000080, + 0x10040000, 0x50040000, 0x10040080, 0x50040080, + 0x10000040, 0x50000040, 0x100000c0, 0x500000c0, + 0x10040040, 0x50040040, 0x100400c0, 0x500400c0, + 0x00800000, 0x40800000, 0x00800080, 0x40800080, + 0x00840000, 0x40840000, 0x00840080, 0x40840080, + 0x00800040, 0x40800040, 0x008000c0, 0x408000c0, + 0x00840040, 0x40840040, 0x008400c0, 0x408400c0, + 0x10800000, 0x50800000, 0x10800080, 0x50800080, + 0x10840000, 0x50840000, 0x10840080, 0x50840080, + 0x10800040, 0x50800040, 0x108000c0, 0x508000c0, + 0x10840040, 0x50840040, 0x108400c0, 0x508400c0 }, + /* table 4 */ + { 0x00000000, 0x00000008, 0x08000000, 0x08000008, + 0x00040000, 0x00040008, 0x08040000, 0x08040008, + 0x00002000, 0x00002008, 0x08002000, 0x08002008, + 0x00042000, 0x00042008, 0x08042000, 0x08042008, + 0x80000000, 0x80000008, 0x88000000, 0x88000008, + 0x80040000, 0x80040008, 0x88040000, 0x88040008, + 0x80002000, 0x80002008, 0x88002000, 0x88002008, + 0x80042000, 0x80042008, 0x88042000, 0x88042008, + 0x00080000, 0x00080008, 0x08080000, 0x08080008, + 0x000c0000, 0x000c0008, 0x080c0000, 0x080c0008, + 0x00082000, 0x00082008, 0x08082000, 0x08082008, + 0x000c2000, 0x000c2008, 0x080c2000, 0x080c2008, + 0x80080000, 0x80080008, 0x88080000, 0x88080008, + 0x800c0000, 0x800c0008, 0x880c0000, 0x880c0008, + 0x80082000, 0x80082008, 0x88082000, 0x88082008, + 0x800c2000, 0x800c2008, 0x880c2000, 0x880c2008 }, + /* table 5 */ + { 0x00000000, 0x00400000, 0x00008000, 0x00408000, + 0x40000000, 0x40400000, 0x40008000, 0x40408000, + 0x00000020, 0x00400020, 0x00008020, 0x00408020, + 0x40000020, 0x40400020, 0x40008020, 0x40408020, + 0x00001000, 0x00401000, 0x00009000, 0x00409000, + 0x40001000, 0x40401000, 0x40009000, 0x40409000, + 0x00001020, 0x00401020, 0x00009020, 0x00409020, + 0x40001020, 0x40401020, 0x40009020, 0x40409020, + 0x00100000, 0x00500000, 0x00108000, 0x00508000, + 0x40100000, 0x40500000, 0x40108000, 0x40508000, + 0x00100020, 0x00500020, 0x00108020, 0x00508020, + 0x40100020, 0x40500020, 0x40108020, 0x40508020, + 0x00101000, 0x00501000, 0x00109000, 0x00509000, + 0x40101000, 0x40501000, 0x40109000, 0x40509000, + 0x00101020, 0x00501020, 0x00109020, 0x00509020, + 0x40101020, 0x40501020, 0x40109020, 0x40509020 }, + /* table 6 */ + { 0x00000000, 0x00000040, 0x04000000, 0x04000040, + 0x00000800, 0x00000840, 0x04000800, 0x04000840, + 0x00800000, 0x00800040, 0x04800000, 0x04800040, + 0x00800800, 0x00800840, 0x04800800, 0x04800840, + 0x10000000, 0x10000040, 0x14000000, 0x14000040, + 0x10000800, 0x10000840, 0x14000800, 0x14000840, + 0x10800000, 0x10800040, 0x14800000, 0x14800040, + 0x10800800, 0x10800840, 0x14800800, 0x14800840, + 0x00000080, 0x000000c0, 0x04000080, 0x040000c0, + 0x00000880, 0x000008c0, 0x04000880, 0x040008c0, + 0x00800080, 0x008000c0, 0x04800080, 0x048000c0, + 0x00800880, 0x008008c0, 0x04800880, 0x048008c0, + 0x10000080, 0x100000c0, 0x14000080, 0x140000c0, + 0x10000880, 0x100008c0, 0x14000880, 0x140008c0, + 0x10800080, 0x108000c0, 0x14800080, 0x148000c0, + 0x10800880, 0x108008c0, 0x14800880, 0x148008c0 }, + /* table 7 */ + { 0x00000000, 0x00000010, 0x00000400, 0x00000410, + 0x00000004, 0x00000014, 0x00000404, 0x00000414, + 0x00004000, 0x00004010, 0x00004400, 0x00004410, + 0x00004004, 0x00004014, 0x00004404, 0x00004414, + 0x20000000, 0x20000010, 0x20000400, 0x20000410, + 0x20000004, 0x20000014, 0x20000404, 0x20000414, + 0x20004000, 0x20004010, 0x20004400, 0x20004410, + 0x20004004, 0x20004014, 0x20004404, 0x20004414, + 0x00200000, 0x00200010, 0x00200400, 0x00200410, + 0x00200004, 0x00200014, 0x00200404, 0x00200414, + 0x00204000, 0x00204010, 0x00204400, 0x00204410, + 0x00204004, 0x00204014, 0x00204404, 0x00204414, + 0x20200000, 0x20200010, 0x20200400, 0x20200410, + 0x20200004, 0x20200014, 0x20200404, 0x20200414, + 0x20204000, 0x20204010, 0x20204400, 0x20204410, + 0x20204004, 0x20204014, 0x20204404, 0x20204414 } +}; + +/* + * The PC-1 Permutation + * If we number the bits of the 8 bytes of key input like this (in octal): + * 00 01 02 03 04 05 06 07 + * 10 11 12 13 14 15 16 17 + * 20 21 22 23 24 25 26 27 + * 30 31 32 33 34 35 36 37 + * 40 41 42 43 44 45 46 47 + * 50 51 52 53 54 55 56 57 + * 60 61 62 63 64 65 66 67 + * 70 71 72 73 74 75 76 77 + * then after the PC-1 permutation, + * C0 is + * 70 60 50 40 30 20 10 00 + * 71 61 51 41 31 21 11 01 + * 72 62 52 42 32 22 12 02 + * 73 63 53 43 + * D0 is + * 76 66 56 46 36 26 16 06 + * 75 65 55 45 35 25 15 05 + * 74 64 54 44 34 24 14 04 + * 33 23 13 03 + * and these parity bits have been discarded: + * 77 67 57 47 37 27 17 07 + * + * We achieve this by flipping the input matrix about the diagonal from 70-07, + * getting left = + * 77 67 57 47 37 27 17 07 (these are the parity bits) + * 76 66 56 46 36 26 16 06 + * 75 65 55 45 35 25 15 05 + * 74 64 54 44 34 24 14 04 + * right = + * 73 63 53 43 33 23 13 03 + * 72 62 52 42 32 22 12 02 + * 71 61 51 41 31 21 11 01 + * 70 60 50 40 30 20 10 00 + * then byte swap right, ala htonl() on a little endian machine. + * right = + * 70 60 50 40 30 20 10 00 + * 71 67 57 47 37 27 11 07 + * 72 62 52 42 32 22 12 02 + * 73 63 53 43 33 23 13 03 + * then + * c0 = right >> 4; + * d0 = ((left & 0x00ffffff) << 4) | (right & 0xf); +*/ + +#define FLIP_RIGHT_DIAGONAL(word, temp) \ + temp = (word ^ (word >> 18)) & 0x00003333; \ + word ^= temp | (temp << 18); \ + temp = (word ^ (word >> 9)) & 0x00550055; \ + word ^= temp | (temp << 9); + +#if defined(__GNUC__) && defined(NSS_X86_OR_X64) +#define BYTESWAP(word, temp) \ + __asm("bswap %0" \ + : "+r"(word)); +#elif (_MSC_VER >= 1300) && defined(NSS_X86_OR_X64) +#include +#pragma intrinsic(_byteswap_ulong) +#define BYTESWAP(word, temp) \ + word = _byteswap_ulong(word); +#elif defined(__GNUC__) && (defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)))) +#define BYTESWAP(word, temp) \ + __asm("rev %0, %0" \ + : "+r"(word)); +#else +#define BYTESWAP(word, temp) \ + word = (word >> 16) | (word << 16); \ + temp = 0x00ff00ff; \ + word = ((word & temp) << 8) | ((word >> 8) & temp); +#endif + +#define PC1(left, right, c0, d0, temp) \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; \ + FLIP_RIGHT_DIAGONAL(left, temp); \ + FLIP_RIGHT_DIAGONAL(right, temp); \ + BYTESWAP(right, temp); \ + c0 = right >> 4; \ + d0 = ((left & 0x00ffffff) << 4) | (right & 0xf); + +#define LEFT_SHIFT_1(reg) (((reg << 1) | (reg >> 27)) & 0x0FFFFFFF) +#define LEFT_SHIFT_2(reg) (((reg << 2) | (reg >> 26)) & 0x0FFFFFFF) + +/* + * setup key schedules from key + */ + +void +DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction) +{ + register HALF left, right; + register HALF c0, d0; + register HALF temp; + int delta; + unsigned int ls; + +#if defined(HAVE_UNALIGNED_ACCESS) + left = HALFPTR(key)[0]; + right = HALFPTR(key)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif +#else + if (((ptrdiff_t)key & 0x03) == 0) { + left = HALFPTR(key)[0]; + right = HALFPTR(key)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + } else { + left = ((HALF)key[0] << 24) | ((HALF)key[1] << 16) | + ((HALF)key[2] << 8) | key[3]; + right = ((HALF)key[4] << 24) | ((HALF)key[5] << 16) | + ((HALF)key[6] << 8) | key[7]; + } +#endif + + PC1(left, right, c0, d0, temp); + + if (direction == DES_ENCRYPT) { + delta = 2 * (int)sizeof(HALF); + } else { + ks += 30; + delta = (-2) * (int)sizeof(HALF); + } + + for (ls = 0x8103; ls; ls >>= 1) { + if (ls & 1) { + c0 = LEFT_SHIFT_1(c0); + d0 = LEFT_SHIFT_1(d0); + } else { + c0 = LEFT_SHIFT_2(c0); + d0 = LEFT_SHIFT_2(d0); + } + +#ifdef USE_INDEXING +#define PC2LOOKUP(b, c) PC2[b][c] + + left = PC2LOOKUP(0, ((c0 >> 22) & 0x3F)); + left |= PC2LOOKUP(1, ((c0 >> 13) & 0x3F)); + left |= PC2LOOKUP(2, ((c0 >> 4) & 0x38) | (c0 & 0x7)); + left |= PC2LOOKUP(3, ((c0 >> 18) & 0xC) | ((c0 >> 11) & 0x3) | (c0 & 0x30)); + + right = PC2LOOKUP(4, ((d0 >> 22) & 0x3F)); + right |= PC2LOOKUP(5, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf)); + right |= PC2LOOKUP(6, ((d0 >> 7) & 0x3F)); + right |= PC2LOOKUP(7, ((d0 >> 1) & 0x3C) | (d0 & 0x3)); +#else +#define PC2LOOKUP(b, c) *(HALF *)((BYTE *)&PC2[b][0] + (c)) + + left = PC2LOOKUP(0, ((c0 >> 20) & 0xFC)); + left |= PC2LOOKUP(1, ((c0 >> 11) & 0xFC)); + left |= PC2LOOKUP(2, ((c0 >> 2) & 0xE0) | ((c0 << 2) & 0x1C)); + left |= PC2LOOKUP(3, ((c0 >> 16) & 0x30) | ((c0 >> 9) & 0xC) | ((c0 << 2) & 0xC0)); + + right = PC2LOOKUP(4, ((d0 >> 20) & 0xFC)); + right |= PC2LOOKUP(5, ((d0 >> 13) & 0xC0) | ((d0 >> 12) & 0x3C)); + right |= PC2LOOKUP(6, ((d0 >> 5) & 0xFC)); + right |= PC2LOOKUP(7, ((d0 << 1) & 0xF0) | ((d0 << 2) & 0x0C)); +#endif + /* left contains key bits for S1 S3 S2 S4 */ + /* right contains key bits for S6 S8 S5 S7 */ + temp = (left << 16) /* S2 S4 XX XX */ + | (right >> 16); /* XX XX S6 S8 */ + ks[0] = temp; + + temp = (left & 0xffff0000) /* S1 S3 XX XX */ + | (right & 0x0000ffff); /* XX XX S5 S7 */ + ks[1] = temp; + + ks = (HALF *)((BYTE *)ks + delta); + } +} + +/* + * The DES Initial Permutation + * if we number the bits of the 8 bytes of input like this (in octal): + * 00 01 02 03 04 05 06 07 + * 10 11 12 13 14 15 16 17 + * 20 21 22 23 24 25 26 27 + * 30 31 32 33 34 35 36 37 + * 40 41 42 43 44 45 46 47 + * 50 51 52 53 54 55 56 57 + * 60 61 62 63 64 65 66 67 + * 70 71 72 73 74 75 76 77 + * then after the initial permutation, they will be in this order. + * 71 61 51 41 31 21 11 01 + * 73 63 53 43 33 23 13 03 + * 75 65 55 45 35 25 15 05 + * 77 67 57 47 37 27 17 07 + * 70 60 50 40 30 20 10 00 + * 72 62 52 42 32 22 12 02 + * 74 64 54 44 34 24 14 04 + * 76 66 56 46 36 26 16 06 + * + * One way to do this is in two steps: + * 1. Flip this matrix about the diagonal from 70-07 as done for PC1. + * 2. Rearrange the bytes (rows in the matrix above) with the following code. + * + * #define swapHiLo(word, temp) \ + * temp = (word ^ (word >> 24)) & 0x000000ff; \ + * word ^= temp | (temp << 24); + * + * right ^= temp = ((left << 8) ^ right) & 0xff00ff00; + * left ^= temp >> 8; + * swapHiLo(left, temp); + * swapHiLo(right,temp); + * + * However, the two steps can be combined, so that the rows are rearranged + * while the matrix is being flipped, reducing the number of bit exchange + * operations from 8 ot 5. + * + * Initial Permutation */ +#define IP(left, right, temp) \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; \ + right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \ + left ^= temp << 16; \ + right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \ + left ^= temp >> 2; \ + right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \ + left ^= temp >> 8; \ + right ^= temp = ((left >> 1) ^ right) & 0x55555555; \ + left ^= temp << 1; + +/* The Final (Inverse Initial) permutation is done by reversing the +** steps of the Initital Permutation +*/ + +#define FP(left, right, temp) \ + right ^= temp = ((left >> 1) ^ right) & 0x55555555; \ + left ^= temp << 1; \ + right ^= temp = ((left << 8) ^ right) & 0xff00ff00; \ + left ^= temp >> 8; \ + right ^= temp = ((left << 2) ^ right) & 0xcccccccc; \ + left ^= temp >> 2; \ + right ^= temp = ((left >> 16) ^ right) & 0x0000ffff; \ + left ^= temp << 16; \ + right ^= temp = ((left >> 4) ^ right) & 0x0f0f0f0f; \ + left ^= temp << 4; + +void NO_SANITIZE_ALIGNMENT +DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf) +{ + register HALF left, right; + register HALF temp; + +#if defined(HAVE_UNALIGNED_ACCESS) + left = HALFPTR(inbuf)[0]; + right = HALFPTR(inbuf)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif +#else + if (((ptrdiff_t)inbuf & 0x03) == 0) { + left = HALFPTR(inbuf)[0]; + right = HALFPTR(inbuf)[1]; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + } else { + left = ((HALF)inbuf[0] << 24) | ((HALF)inbuf[1] << 16) | + ((HALF)inbuf[2] << 8) | inbuf[3]; + right = ((HALF)inbuf[4] << 24) | ((HALF)inbuf[5] << 16) | + ((HALF)inbuf[6] << 8) | inbuf[7]; + } +#endif + + IP(left, right, temp); + + /* shift the values left circularly 3 bits. */ + left = (left << 3) | (left >> 29); + right = (right << 3) | (right >> 29); + +#ifdef USE_INDEXING +#define KSLOOKUP(s, b) SP[s][((temp >> (b + 2)) & 0x3f)] +#else +#define KSLOOKUP(s, b) *(HALF *)((BYTE *)&SP[s][0] + ((temp >> b) & 0xFC)) +#endif +#define ROUND(out, in, r) \ + temp = in ^ ks[2 * r]; \ + out ^= KSLOOKUP(1, 24); \ + out ^= KSLOOKUP(3, 16); \ + out ^= KSLOOKUP(5, 8); \ + out ^= KSLOOKUP(7, 0); \ + temp = ((in >> 4) | (in << 28)) ^ ks[2 * r + 1]; \ + out ^= KSLOOKUP(0, 24); \ + out ^= KSLOOKUP(2, 16); \ + out ^= KSLOOKUP(4, 8); \ + out ^= KSLOOKUP(6, 0); + + /* Do the 16 Feistel rounds */ + ROUND(left, right, 0) + ROUND(right, left, 1) + ROUND(left, right, 2) + ROUND(right, left, 3) + ROUND(left, right, 4) + ROUND(right, left, 5) + ROUND(left, right, 6) + ROUND(right, left, 7) + ROUND(left, right, 8) + ROUND(right, left, 9) + ROUND(left, right, 10) + ROUND(right, left, 11) + ROUND(left, right, 12) + ROUND(right, left, 13) + ROUND(left, right, 14) + ROUND(right, left, 15) + + /* now shift circularly right 3 bits to undo the shifting done + ** above. switch left and right here. + */ + temp = (left >> 3) | (left << 29); + left = (right >> 3) | (right << 29); + right = temp; + + FP(left, right, temp); + +#if defined(HAVE_UNALIGNED_ACCESS) +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + HALFPTR(outbuf) + [0] = left; + HALFPTR(outbuf) + [1] = right; +#else + if (((ptrdiff_t)outbuf & 0x03) == 0) { +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP(left, temp); + BYTESWAP(right, temp); +#endif + HALFPTR(outbuf) + [0] = left; + HALFPTR(outbuf) + [1] = right; + } else { + outbuf[0] = (BYTE)(left >> 24); + outbuf[1] = (BYTE)(left >> 16); + outbuf[2] = (BYTE)(left >> 8); + outbuf[3] = (BYTE)(left); + + outbuf[4] = (BYTE)(right >> 24); + outbuf[5] = (BYTE)(right >> 16); + outbuf[6] = (BYTE)(right >> 8); + outbuf[7] = (BYTE)(right); + } +#endif +} + +/* Ackowledgements: +** Two ideas used in this implementation were shown to me by Dennis Ferguson +** in 1990. He credits them to Richard Outerbridge and Dan Hoey. They were: +** 1. The method of computing the Initial and Final permutations. +** 2. Circularly rotating the SP tables and the initial values of left and +** right to reduce the number of shifts required during the 16 rounds. +*/ diff --git a/security/nss/lib/freebl/des.h b/security/nss/lib/freebl/des.h new file mode 100644 index 000000000..70a17e510 --- /dev/null +++ b/security/nss/lib/freebl/des.h @@ -0,0 +1,43 @@ +/* + * des.h + * + * header file for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _DES_H_ +#define _DES_H_ 1 + +#include "blapi.h" + +typedef unsigned char BYTE; +typedef unsigned int HALF; + +#define HALFPTR(x) ((HALF *)(x)) +#define SHORTPTR(x) ((unsigned short *)(x)) +#define BYTEPTR(x) ((BYTE *)(x)) + +typedef enum { + DES_ENCRYPT = 0x5555, + DES_DECRYPT = 0xAAAA +} DESDirection; + +typedef void DESFunc(struct DESContextStr *cx, BYTE *out, const BYTE *in, + unsigned int len); + +struct DESContextStr { + /* key schedule, 16 internal keys, each with 8 6-bit parts */ + HALF ks0[32]; + HALF ks1[32]; + HALF ks2[32]; + HALF iv[2]; + DESDirection direction; + DESFunc *worker; +}; + +void DES_MakeSchedule(HALF *ks, const BYTE *key, DESDirection direction); +void DES_Do1Block(HALF *ks, const BYTE *inbuf, BYTE *outbuf); + +#endif diff --git a/security/nss/lib/freebl/desblapi.c b/security/nss/lib/freebl/desblapi.c new file mode 100644 index 000000000..c03ab27cc --- /dev/null +++ b/security/nss/lib/freebl/desblapi.c @@ -0,0 +1,256 @@ +/* + * desblapi.c + * + * core source file for DES-150 library + * Implement DES Modes of Operation and Triple-DES. + * Adapt DES-150 to blapi API. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "des.h" +#include "blapii.h" +#include +#include "secerr.h" + +#if defined(NSS_X86_OR_X64) +/* Intel X86 CPUs do unaligned loads and stores without complaint. */ +#define COPY8B(to, from, ptr) \ + HALFPTR(to) \ + [0] = HALFPTR(from)[0]; \ + HALFPTR(to) \ + [1] = HALFPTR(from)[1]; +#else +#define COPY8B(to, from, ptr) memcpy(to, from, 8) +#endif +#define COPY8BTOHALF(to, from) COPY8B(to, from, from) +#define COPY8BFROMHALF(to, from) COPY8B(to, from, to) + +static void +DES_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + while (len) { + DES_Do1Block(cx->ks0, in, out); + len -= 8; + in += 8; + out += 8; + } +} + +static void +DES_EDE3_ECB(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + while (len) { + DES_Do1Block(cx->ks0, in, out); + len -= 8; + in += 8; + DES_Do1Block(cx->ks1, out, out); + DES_Do1Block(cx->ks2, out, out); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend = in + len; + HALF vec[2]; + + while (in != bufend) { + COPY8BTOHALF(vec, in); + in += 8; + vec[0] ^= cx->iv[0]; + vec[1] ^= cx->iv[1]; + DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv); + COPY8BFROMHALF(out, cx->iv); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend; + HALF oldciphertext[2]; + HALF plaintext[2]; + + for (bufend = in + len; in != bufend;) { + oldciphertext[0] = cx->iv[0]; + oldciphertext[1] = cx->iv[1]; + COPY8BTOHALF(cx->iv, in); + in += 8; + DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext); + plaintext[0] ^= oldciphertext[0]; + plaintext[1] ^= oldciphertext[1]; + COPY8BFROMHALF(out, plaintext); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_EDE3CBCEn(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend = in + len; + HALF vec[2]; + + while (in != bufend) { + COPY8BTOHALF(vec, in); + in += 8; + vec[0] ^= cx->iv[0]; + vec[1] ^= cx->iv[1]; + DES_Do1Block(cx->ks0, (BYTE *)vec, (BYTE *)cx->iv); + DES_Do1Block(cx->ks1, (BYTE *)cx->iv, (BYTE *)cx->iv); + DES_Do1Block(cx->ks2, (BYTE *)cx->iv, (BYTE *)cx->iv); + COPY8BFROMHALF(out, cx->iv); + out += 8; + } +} + +static void NO_SANITIZE_ALIGNMENT +DES_EDE3CBCDe(DESContext *cx, BYTE *out, const BYTE *in, unsigned int len) +{ + const BYTE *bufend; + HALF oldciphertext[2]; + HALF plaintext[2]; + + for (bufend = in + len; in != bufend;) { + oldciphertext[0] = cx->iv[0]; + oldciphertext[1] = cx->iv[1]; + COPY8BTOHALF(cx->iv, in); + in += 8; + DES_Do1Block(cx->ks0, (BYTE *)cx->iv, (BYTE *)plaintext); + DES_Do1Block(cx->ks1, (BYTE *)plaintext, (BYTE *)plaintext); + DES_Do1Block(cx->ks2, (BYTE *)plaintext, (BYTE *)plaintext); + plaintext[0] ^= oldciphertext[0]; + plaintext[1] ^= oldciphertext[1]; + COPY8BFROMHALF(out, plaintext); + out += 8; + } +} + +DESContext * +DES_AllocateContext(void) +{ + return PORT_ZNew(DESContext); +} + +SECStatus +DES_InitContext(DESContext *cx, const unsigned char *key, unsigned int keylen, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int unused) +{ + DESDirection opposite; + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + cx->direction = encrypt ? DES_ENCRYPT : DES_DECRYPT; + opposite = encrypt ? DES_DECRYPT : DES_ENCRYPT; + switch (mode) { + case NSS_DES: /* DES ECB */ + DES_MakeSchedule(cx->ks0, key, cx->direction); + cx->worker = &DES_ECB; + break; + + case NSS_DES_EDE3: /* DES EDE ECB */ + cx->worker = &DES_EDE3_ECB; + if (encrypt) { + DES_MakeSchedule(cx->ks0, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks2, key + 16, cx->direction); + } else { + DES_MakeSchedule(cx->ks2, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks0, key + 16, cx->direction); + } + break; + + case NSS_DES_CBC: /* DES CBC */ + COPY8BTOHALF(cx->iv, iv); + cx->worker = encrypt ? &DES_CBCEn : &DES_CBCDe; + DES_MakeSchedule(cx->ks0, key, cx->direction); + break; + + case NSS_DES_EDE3_CBC: /* DES EDE CBC */ + COPY8BTOHALF(cx->iv, iv); + if (encrypt) { + cx->worker = &DES_EDE3CBCEn; + DES_MakeSchedule(cx->ks0, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks2, key + 16, cx->direction); + } else { + cx->worker = &DES_EDE3CBCDe; + DES_MakeSchedule(cx->ks2, key, cx->direction); + DES_MakeSchedule(cx->ks1, key + 8, opposite); + DES_MakeSchedule(cx->ks0, key + 16, cx->direction); + } + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return SECSuccess; +} + +DESContext * +DES_CreateContext(const BYTE *key, const BYTE *iv, int mode, PRBool encrypt) +{ + DESContext *cx = PORT_ZNew(DESContext); + SECStatus rv = DES_InitContext(cx, key, 0, iv, mode, encrypt, 0); + + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof *cx); + cx = NULL; + } + return cx; +} + +void +DES_DestroyContext(DESContext *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + if (freeit) + PORT_Free(cx); + } +} + +SECStatus +DES_Encrypt(DESContext *cx, BYTE *out, unsigned int *outLen, + unsigned int maxOutLen, const BYTE *in, unsigned int inLen) +{ + + if ((inLen % 8) != 0 || maxOutLen < inLen || !cx || + cx->direction != DES_ENCRYPT) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->worker(cx, out, in, inLen); + if (outLen) + *outLen = inLen; + return SECSuccess; +} + +SECStatus +DES_Decrypt(DESContext *cx, BYTE *out, unsigned int *outLen, + unsigned int maxOutLen, const BYTE *in, unsigned int inLen) +{ + + if ((inLen % 8) != 0 || maxOutLen < inLen || !cx || + cx->direction != DES_DECRYPT) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + cx->worker(cx, out, in, inLen); + if (outLen) + *outLen = inLen; + return SECSuccess; +} diff --git a/security/nss/lib/freebl/det_rng.c b/security/nss/lib/freebl/det_rng.c new file mode 100644 index 000000000..fcbf9b34a --- /dev/null +++ b/security/nss/lib/freebl/det_rng.c @@ -0,0 +1,67 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "blapi.h" +#include "blapit.h" +#include "chacha20.h" +#include "nssilock.h" +#include "seccomon.h" +#include "secerr.h" + +static unsigned long globalNumCalls = 0; + +SECStatus +prng_ResetForFuzzing(PZLock *rng_lock) +{ + /* Check for a valid RNG lock. */ + PORT_Assert(rng_lock != NULL); + if (rng_lock == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* --- LOCKED --- */ + PZ_Lock(rng_lock); + globalNumCalls = 0; + PZ_Unlock(rng_lock); + /* --- UNLOCKED --- */ + + return SECSuccess; +} + +SECStatus +prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest, size_t len) +{ + static const uint8_t key[32]; + uint8_t nonce[12] = { 0 }; + + /* Check for a valid RNG lock. */ + PORT_Assert(rng_lock != NULL); + if (rng_lock == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* --- LOCKED --- */ + PZ_Lock(rng_lock); + + memcpy(nonce, &globalNumCalls, sizeof(globalNumCalls)); + globalNumCalls++; + + ChaCha20Poly1305Context *cx = + ChaCha20Poly1305_CreateContext(key, sizeof(key), 16); + if (!cx) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PZ_Unlock(rng_lock); + return SECFailure; + } + + memset(dest, 0, len); + ChaCha20XOR(dest, dest, len, key, nonce, 0); + ChaCha20Poly1305_DestroyContext(cx, PR_TRUE); + + PZ_Unlock(rng_lock); + /* --- UNLOCKED --- */ + return SECSuccess; +} diff --git a/security/nss/lib/freebl/det_rng.h b/security/nss/lib/freebl/det_rng.h new file mode 100644 index 000000000..599d726ca --- /dev/null +++ b/security/nss/lib/freebl/det_rng.h @@ -0,0 +1,12 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __det_rng_h_ +#define __det_rng_h_ + +SECStatus prng_ResetForFuzzing(PZLock *rng_lock); +SECStatus prng_GenerateDeterministicRandomBytes(PZLock *rng_lock, void *dest, + size_t len); + +#endif /* __det_rng_h_ */ diff --git a/security/nss/lib/freebl/dh.c b/security/nss/lib/freebl/dh.c new file mode 100644 index 000000000..97025c7e2 --- /dev/null +++ b/security/nss/lib/freebl/dh.c @@ -0,0 +1,452 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Diffie-Hellman parameter generation, key generation, and secret derivation. + * KEA secret generation and verification. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "blapi.h" +#include "secitem.h" +#include "mpi.h" +#include "mpprime.h" +#include "secmpi.h" + +#define KEA_DERIVED_SECRET_LEN 128 + +/* Lengths are in bytes. */ +static unsigned int +dh_GetSecretKeyLen(unsigned int primeLen) +{ + /* Based on Table 2 in NIST SP 800-57. */ + if (primeLen >= 1920) { /* 15360 bits */ + return 64; /* 512 bits */ + } + if (primeLen >= 960) { /* 7680 bits */ + return 48; /* 384 bits */ + } + if (primeLen >= 384) { /* 3072 bits */ + return 32; /* 256 bits */ + } + if (primeLen >= 256) { /* 2048 bits */ + return 28; /* 224 bits */ + } + return 20; /* 160 bits */ +} + +SECStatus +DH_GenParam(int primeLen, DHParams **params) +{ + PLArenaPool *arena; + DHParams *dhparams; + unsigned char *pb = NULL; + unsigned char *ab = NULL; + unsigned long counter = 0; + mp_int p, q, a, h, psub1, test; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!params || primeLen < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + dhparams = (DHParams *)PORT_ArenaZAlloc(arena, sizeof(DHParams)); + if (!dhparams) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + dhparams->arena = arena; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&test) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&test)); + /* generate prime with MPI, uses Miller-Rabin to generate strong prime. */ + pb = PORT_Alloc(primeLen); + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen)); + pb[0] |= 0x80; /* set high-order bit */ + pb[primeLen - 1] |= 0x01; /* set low-order bit */ + CHECK_MPI_OK(mp_read_unsigned_octets(&p, pb, primeLen)); + CHECK_MPI_OK(mpp_make_prime(&p, primeLen * 8, PR_TRUE, &counter)); + /* construct Sophie-Germain prime q = (p-1)/2. */ + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + CHECK_MPI_OK(mp_div_2(&psub1, &q)); + /* construct a generator from the prime. */ + ab = PORT_Alloc(primeLen); + /* generate a candidate number a in p's field */ + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(ab, primeLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&a, ab, primeLen)); + /* force a < p (note that quot(a/p) <= 1) */ + if (mp_cmp(&a, &p) > 0) + CHECK_MPI_OK(mp_sub(&a, &p, &a)); + do { + /* check that a is in the range [2..p-1] */ + if (mp_cmp_d(&a, 2) < 0 || mp_cmp(&a, &psub1) >= 0) { + /* a is outside of the allowed range. Set a=3 and keep going. */ + mp_set(&a, 3); + } + /* if a**q mod p != 1 then a is a generator */ + CHECK_MPI_OK(mp_exptmod(&a, &q, &p, &test)); + if (mp_cmp_d(&test, 1) != 0) + break; + /* increment the candidate and try again. */ + CHECK_MPI_OK(mp_add_d(&a, 1, &a)); + } while (PR_TRUE); + MPINT_TO_SECITEM(&p, &dhparams->prime, arena); + MPINT_TO_SECITEM(&a, &dhparams->base, arena); + *params = dhparams; +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&a); + mp_clear(&h); + mp_clear(&psub1); + mp_clear(&test); + if (pb) + PORT_ZFree(pb, primeLen); + if (ab) + PORT_ZFree(ab, primeLen); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv) + PORT_FreeArena(arena, PR_TRUE); + return rv; +} + +SECStatus +DH_NewKey(DHParams *params, DHPrivateKey **privKey) +{ + PLArenaPool *arena; + DHPrivateKey *key; + mp_int g, xa, p, Ya; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!params || !privKey) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + key = (DHPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DHPrivateKey)); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + key->arena = arena; + MP_DIGITS(&g) = 0; + MP_DIGITS(&xa) = 0; + MP_DIGITS(&p) = 0; + MP_DIGITS(&Ya) = 0; + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&xa)); + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Ya)); + /* Set private key's p */ + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->prime, ¶ms->prime)); + SECITEM_TO_MPINT(key->prime, &p); + /* Set private key's g */ + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->base, ¶ms->base)); + SECITEM_TO_MPINT(key->base, &g); + /* Generate private key xa */ + SECITEM_AllocItem(arena, &key->privateValue, + dh_GetSecretKeyLen(params->prime.len)); + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(key->privateValue.data, + key->privateValue.len)); + SECITEM_TO_MPINT(key->privateValue, &xa); + /* xa < p */ + CHECK_MPI_OK(mp_mod(&xa, &p, &xa)); + /* Compute public key Ya = g ** xa mod p */ + CHECK_MPI_OK(mp_exptmod(&g, &xa, &p, &Ya)); + MPINT_TO_SECITEM(&Ya, &key->publicValue, key->arena); + *privKey = key; +cleanup: + mp_clear(&g); + mp_clear(&xa); + mp_clear(&p); + mp_clear(&Ya); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv) { + *privKey = NULL; + PORT_FreeArena(arena, PR_TRUE); + } + return rv; +} + +SECStatus +DH_Derive(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int outBytes) +{ + mp_int p, Xa, Yb, ZZ, psub1; + mp_err err = MP_OKAY; + unsigned int len = 0; + unsigned int nb; + unsigned char *secret = NULL; + if (!publicValue || !prime || !privateValue || !derivedSecret) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + memset(derivedSecret, 0, sizeof *derivedSecret); + MP_DIGITS(&p) = 0; + MP_DIGITS(&Xa) = 0; + MP_DIGITS(&Yb) = 0; + MP_DIGITS(&ZZ) = 0; + MP_DIGITS(&psub1) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Xa)); + CHECK_MPI_OK(mp_init(&Yb)); + CHECK_MPI_OK(mp_init(&ZZ)); + CHECK_MPI_OK(mp_init(&psub1)); + SECITEM_TO_MPINT(*publicValue, &Yb); + SECITEM_TO_MPINT(*privateValue, &Xa); + SECITEM_TO_MPINT(*prime, &p); + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + + /* We assume that the modulus, p, is a safe prime. That is, p = 2q+1 where + * q is also a prime. Thus the orders of the subgroups are factors of 2q: + * namely 1, 2, q and 2q. + * + * We check that the peer's public value isn't zero (which isn't in the + * group), one (subgroup of order one) or p-1 (subgroup of order 2). We + * also check that the public value is less than p, to avoid being fooled + * by values like p+1 or 2*p-1. + * + * Thus we must be operating in the subgroup of size q or 2q. */ + if (mp_cmp_d(&Yb, 1) <= 0 || + mp_cmp(&Yb, &psub1) >= 0) { + err = MP_BADARG; + goto cleanup; + } + + /* ZZ = (Yb)**Xa mod p */ + CHECK_MPI_OK(mp_exptmod(&Yb, &Xa, &p, &ZZ)); + /* number of bytes in the derived secret */ + len = mp_unsigned_octet_size(&ZZ); + if (len <= 0) { + err = MP_BADARG; + goto cleanup; + } + + /* + * We check to make sure that ZZ is not equal to 1 or -1 mod p. + * This helps guard against small subgroup attacks, since an attacker + * using a subgroup of size N will produce 1 or -1 with probability 1/N. + * When the protocol is executed within a properly large subgroup, the + * probability of this result will be negligibly small. For example, + * with a strong prime of the form 2p+1, the probability will be 1/p. + * + * We return MP_BADARG because this is probably the result of a bad + * public value or a bad prime having been provided. + */ + if (mp_cmp_d(&ZZ, 1) == 0 || + mp_cmp(&ZZ, &psub1) == 0) { + err = MP_BADARG; + goto cleanup; + } + + /* allocate a buffer which can hold the entire derived secret. */ + secret = PORT_Alloc(len); + if (secret == NULL) { + err = MP_MEM; + goto cleanup; + } + /* grab the derived secret */ + err = mp_to_unsigned_octets(&ZZ, secret, len); + if (err >= 0) + err = MP_OKAY; + /* + ** if outBytes is 0 take all of the bytes from the derived secret. + ** if outBytes is not 0 take exactly outBytes from the derived secret, zero + ** pad at the beginning if necessary, and truncate beginning bytes + ** if necessary. + */ + if (outBytes > 0) + nb = outBytes; + else + nb = len; + if (SECITEM_AllocItem(NULL, derivedSecret, nb) == NULL) { + err = MP_MEM; + goto cleanup; + } + if (len < nb) { + unsigned int offset = nb - len; + memset(derivedSecret->data, 0, offset); + memcpy(derivedSecret->data + offset, secret, len); + } else { + memcpy(derivedSecret->data, secret + len - nb, nb); + } +cleanup: + mp_clear(&p); + mp_clear(&Xa); + mp_clear(&Yb); + mp_clear(&ZZ); + mp_clear(&psub1); + if (secret) { + /* free the buffer allocated for the full secret. */ + PORT_ZFree(secret, len); + } + if (err) { + MP_TO_SEC_ERROR(err); + if (derivedSecret->data) + PORT_ZFree(derivedSecret->data, derivedSecret->len); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +KEA_Derive(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret) +{ + mp_int p, Y, R, r, x, t, u, w; + mp_err err; + unsigned char *secret = NULL; + unsigned int len = 0, offset; + if (!prime || !public1 || !public2 || !private1 || !private2 || + !derivedSecret) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + memset(derivedSecret, 0, sizeof *derivedSecret); + MP_DIGITS(&p) = 0; + MP_DIGITS(&Y) = 0; + MP_DIGITS(&R) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&u) = 0; + MP_DIGITS(&w) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&Y)); + CHECK_MPI_OK(mp_init(&R)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&u)); + CHECK_MPI_OK(mp_init(&w)); + SECITEM_TO_MPINT(*prime, &p); + SECITEM_TO_MPINT(*public1, &Y); + SECITEM_TO_MPINT(*public2, &R); + SECITEM_TO_MPINT(*private1, &r); + SECITEM_TO_MPINT(*private2, &x); + /* t = DH(Y, r, p) = Y ** r mod p */ + CHECK_MPI_OK(mp_exptmod(&Y, &r, &p, &t)); + /* u = DH(R, x, p) = R ** x mod p */ + CHECK_MPI_OK(mp_exptmod(&R, &x, &p, &u)); + /* w = (t + u) mod p */ + CHECK_MPI_OK(mp_addmod(&t, &u, &p, &w)); + /* allocate a buffer for the full derived secret */ + len = mp_unsigned_octet_size(&w); + secret = PORT_Alloc(len); + if (secret == NULL) { + err = MP_MEM; + goto cleanup; + } + /* grab the secret */ + err = mp_to_unsigned_octets(&w, secret, len); + if (err > 0) + err = MP_OKAY; + /* allocate output buffer */ + if (SECITEM_AllocItem(NULL, derivedSecret, KEA_DERIVED_SECRET_LEN) == NULL) { + err = MP_MEM; + goto cleanup; + } + memset(derivedSecret->data, 0, derivedSecret->len); + /* copy in the 128 lsb of the secret */ + if (len >= KEA_DERIVED_SECRET_LEN) { + memcpy(derivedSecret->data, secret + (len - KEA_DERIVED_SECRET_LEN), + KEA_DERIVED_SECRET_LEN); + } else { + offset = KEA_DERIVED_SECRET_LEN - len; + memcpy(derivedSecret->data + offset, secret, len); + } +cleanup: + mp_clear(&p); + mp_clear(&Y); + mp_clear(&R); + mp_clear(&r); + mp_clear(&x); + mp_clear(&t); + mp_clear(&u); + mp_clear(&w); + if (secret) + PORT_ZFree(secret, len); + if (err) { + MP_TO_SEC_ERROR(err); + if (derivedSecret->data) + PORT_ZFree(derivedSecret->data, derivedSecret->len); + return SECFailure; + } + return SECSuccess; +} + +PRBool +KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime) +{ + mp_int p, q, y, r; + mp_err err; + int cmp = 1; /* default is false */ + if (!Y || !prime || !subPrime) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&y) = 0; + MP_DIGITS(&r) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&y)); + CHECK_MPI_OK(mp_init(&r)); + SECITEM_TO_MPINT(*prime, &p); + SECITEM_TO_MPINT(*subPrime, &q); + SECITEM_TO_MPINT(*Y, &y); + /* compute r = y**q mod p */ + CHECK_MPI_OK(mp_exptmod(&y, &q, &p, &r)); + /* compare to 1 */ + cmp = mp_cmp_d(&r, 1); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&y); + mp_clear(&r); + if (err) { + MP_TO_SEC_ERROR(err); + return PR_FALSE; + } + return (cmp == 0) ? PR_TRUE : PR_FALSE; +} diff --git a/security/nss/lib/freebl/drbg.c b/security/nss/lib/freebl/drbg.c new file mode 100644 index 000000000..ac0bba6e0 --- /dev/null +++ b/security/nss/lib/freebl/drbg.c @@ -0,0 +1,968 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerror.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prinit.h" +#include "blapi.h" +#include "blapii.h" +#include "nssilock.h" +#include "secitem.h" +#include "sha_fast.h" +#include "sha256.h" +#include "secrng.h" /* for RNG_SystemRNG() */ +#include "secmpi.h" + +#ifdef UNSAFE_FUZZER_MODE +#include "det_rng.h" +#endif + +/* PRNG_SEEDLEN defined in NIST SP 800-90 section 10.1 + * for SHA-1, SHA-224, and SHA-256 it's 440 bits. + * for SHA-384 and SHA-512 it's 888 bits */ +#define PRNG_SEEDLEN (440 / PR_BITS_PER_BYTE) +#define PRNG_MAX_ADDITIONAL_BYTES PR_INT64(0x100000000) +/* 2^35 bits or 2^32 bytes */ +#define PRNG_MAX_REQUEST_SIZE 0x10000 /* 2^19 bits or 2^16 bytes */ +#define PRNG_ADDITONAL_DATA_CACHE_SIZE (8 * 1024) /* must be less than \ + * PRNG_MAX_ADDITIONAL_BYTES \ + */ + +/* RESEED_COUNT is how many calls to the prng before we need to reseed + * under normal NIST rules, you must return an error. In the NSS case, we + * self-reseed with RNG_SystemRNG(). Count can be a large number. For code + * simplicity, we specify count with 2 components: RESEED_BYTE (which is + * the same as LOG256(RESEED_COUNT)) and RESEED_VALUE (which is the same as + * RESEED_COUNT / (256 ^ RESEED_BYTE)). Another way to look at this is + * RESEED_COUNT = RESEED_VALUE * (256 ^ RESEED_BYTE). For Hash based DRBG + * we use the maximum count value, 2^48, or RESEED_BYTE=6 and RESEED_VALUE=1 + */ +#define RESEED_BYTE 6 +#define RESEED_VALUE 1 + +#define PRNG_RESET_RESEED_COUNT(rng) \ + PORT_Memset((rng)->reseed_counter, 0, sizeof(rng)->reseed_counter); \ + (rng)->reseed_counter[RESEED_BYTE] = 1; + +/* + * The actual values of this enum are specified in SP 800-90, 10.1.1.* + * The spec does not name the types, it only uses bare values + */ +typedef enum { + prngCGenerateType = 0, /* used when creating a new 'C' */ + prngReseedType = 1, /* used in reseeding */ + prngAdditionalDataType = 2, /* used in mixing additional data */ + prngGenerateByteType = 3 /* used when mixing internal state while + * generating bytes */ +} prngVTypes; + +/* + * Global RNG context + */ +struct RNGContextStr { + PZLock *lock; /* Lock to serialize access to global rng */ + /* + * NOTE, a number of steps in the drbg algorithm need to hash + * V_type || V. The code, therefore, depends on the V array following + * immediately after V_type to avoid extra copies. To accomplish this + * in a way that compiliers can't perturb, we declare V_type and V + * as a V_Data array and reference them by macros */ + PRUint8 V_Data[PRNG_SEEDLEN + 1]; /* internal state variables */ +#define V_type V_Data[0] +#define V(rng) (((rng)->V_Data) + 1) +#define VSize(rng) ((sizeof(rng)->V_Data) - 1) + PRUint8 C[PRNG_SEEDLEN]; /* internal state variables */ + PRUint8 lastOutput[SHA256_LENGTH]; /* for continuous rng checking */ + /* If we get calls for the PRNG to return less than the length of our + * hash, we extend the request for a full hash (since we'll be doing + * the full hash anyway). Future requests for random numbers are fulfilled + * from the remainder of the bytes we generated. Requests for bytes longer + * than the hash size are fulfilled directly from the HashGen function + * of the random number generator. */ + PRUint8 reseed_counter[RESEED_BYTE + 1]; /* number of requests since the + * last reseed. Need only be + * big enough to hold the whole + * reseed count */ + PRUint8 data[SHA256_LENGTH]; /* when we request less than a block + * save the rest of the rng output for + * another partial block */ + PRUint8 dataAvail; /* # bytes of output available in our cache, + * [0...SHA256_LENGTH] */ + /* store additional data that has been shovelled off to us by + * RNG_RandomUpdate. */ + PRUint8 additionalDataCache[PRNG_ADDITONAL_DATA_CACHE_SIZE]; + PRUint32 additionalAvail; + PRBool isValid; /* false if RNG reaches an invalid state */ + PRBool isKatTest; /* true if running NIST PRNG KAT tests */ +}; + +typedef struct RNGContextStr RNGContext; +static RNGContext *globalrng = NULL; +static RNGContext theGlobalRng; + +/* + * The next several functions are derived from the NIST SP 800-90 + * spec. In these functions, an attempt was made to use names consistent + * with the names in the spec, even if they differ from normal NSS usage. + */ + +/* + * Hash Derive function defined in NISP SP 800-90 Section 10.4.1. + * This function is used in the Instantiate and Reseed functions. + * + * NOTE: requested_bytes cannot overlap with input_string_1 or input_string_2. + * input_string_1 and input_string_2 are logically concatentated. + * input_string_1 must be supplied. + * if input_string_2 is not supplied, NULL should be passed for this parameter. + */ +static SECStatus +prng_Hash_df(PRUint8 *requested_bytes, unsigned int no_of_bytes_to_return, + const PRUint8 *input_string_1, unsigned int input_string_1_len, + const PRUint8 *input_string_2, unsigned int input_string_2_len) +{ + SHA256Context ctx; + PRUint32 tmp; + PRUint8 counter; + + tmp = SHA_HTONL(no_of_bytes_to_return * 8); + + for (counter = 1; no_of_bytes_to_return > 0; counter++) { + unsigned int hash_return_len; + SHA256_Begin(&ctx); + SHA256_Update(&ctx, &counter, 1); + SHA256_Update(&ctx, (unsigned char *)&tmp, sizeof tmp); + SHA256_Update(&ctx, input_string_1, input_string_1_len); + if (input_string_2) { + SHA256_Update(&ctx, input_string_2, input_string_2_len); + } + SHA256_End(&ctx, requested_bytes, &hash_return_len, + no_of_bytes_to_return); + requested_bytes += hash_return_len; + no_of_bytes_to_return -= hash_return_len; + } + return SECSuccess; +} + +/* + * Hash_DRBG Instantiate NIST SP 800-90 10.1.1.2 + * + * NOTE: bytes & len are entropy || nonce || personalization_string. In + * normal operation, NSS calculates them all together in a single call. + */ +static SECStatus +prng_instantiate(RNGContext *rng, const PRUint8 *bytes, unsigned int len) +{ + if (!rng->isKatTest && len < PRNG_SEEDLEN) { + /* If the seedlen is too small, it's probably because we failed to get + * enough random data. + * This is stricter than NIST SP800-90A requires. Don't enforce it for + * tests. */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + prng_Hash_df(V(rng), VSize(rng), bytes, len, NULL, 0); + rng->V_type = prngCGenerateType; + prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0); + PRNG_RESET_RESEED_COUNT(rng) + return SECSuccess; +} + +/* + * Update the global random number generator with more seeding + * material. Use the Hash_DRBG reseed algorithm from NIST SP-800-90 + * section 10.1.1.3 + * + * If entropy is NULL, it is fetched from the noise generator. + */ +static SECStatus +prng_reseed(RNGContext *rng, const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional_input, unsigned int additional_input_len) +{ + PRUint8 noiseData[(sizeof rng->V_Data) + PRNG_SEEDLEN]; + PRUint8 *noise = &noiseData[0]; + + /* if entropy wasn't supplied, fetch it. (normal operation case) */ + if (entropy == NULL) { + entropy_len = (unsigned int)RNG_SystemRNG( + &noiseData[sizeof rng->V_Data], PRNG_SEEDLEN); + } else { + /* NOTE: this code is only available for testing, not to applications */ + /* if entropy was too big for the stack variable, get it from malloc */ + if (entropy_len > PRNG_SEEDLEN) { + noise = PORT_Alloc(entropy_len + (sizeof rng->V_Data)); + if (noise == NULL) { + return SECFailure; + } + } + PORT_Memcpy(&noise[sizeof rng->V_Data], entropy, entropy_len); + } + + if (entropy_len < 256 / PR_BITS_PER_BYTE) { + /* noise == &noiseData[0] at this point, so nothing to free */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + + rng->V_type = prngReseedType; + PORT_Memcpy(noise, rng->V_Data, sizeof rng->V_Data); + prng_Hash_df(V(rng), VSize(rng), noise, (sizeof rng->V_Data) + entropy_len, + additional_input, additional_input_len); + /* clear potential CSP */ + PORT_Memset(noise, 0, (sizeof rng->V_Data) + entropy_len); + rng->V_type = prngCGenerateType; + prng_Hash_df(rng->C, sizeof rng->C, rng->V_Data, sizeof rng->V_Data, NULL, 0); + PRNG_RESET_RESEED_COUNT(rng) + + if (noise != &noiseData[0]) { + PORT_Free(noise); + } + return SECSuccess; +} + +/* + * SP 800-90 requires we rerun our health tests on reseed + */ +static SECStatus +prng_reseed_test(RNGContext *rng, const PRUint8 *entropy, + unsigned int entropy_len, const PRUint8 *additional_input, + unsigned int additional_input_len) +{ + SECStatus rv; + + /* do health checks in FIPS mode */ + rv = PRNGTEST_RunHealthTests(); + if (rv != SECSuccess) { + /* error set by PRNGTEST_RunHealTests() */ + rng->isValid = PR_FALSE; + return SECFailure; + } + return prng_reseed(rng, entropy, entropy_len, + additional_input, additional_input_len); +} + +/* + * build some fast inline functions for adding. + */ +#define PRNG_ADD_CARRY_ONLY(dest, start, carry) \ + { \ + int k1; \ + for (k1 = start; carry && k1 >= 0; k1--) { \ + carry = !(++dest[k1]); \ + } \ + } + +/* + * NOTE: dest must be an array for the following to work. + */ +#define PRNG_ADD_BITS(dest, dest_len, add, len, carry) \ + carry = 0; \ + PORT_Assert((dest_len) >= (len)); \ + { \ + int k1, k2; \ + for (k1 = dest_len - 1, k2 = len - 1; k2 >= 0; --k1, --k2) { \ + carry += dest[k1] + add[k2]; \ + dest[k1] = (PRUint8)carry; \ + carry >>= 8; \ + } \ + } + +#define PRNG_ADD_BITS_AND_CARRY(dest, dest_len, add, len, carry) \ + PRNG_ADD_BITS(dest, dest_len, add, len, carry) \ + PRNG_ADD_CARRY_ONLY(dest, dest_len - len - 1, carry) + +/* + * This function expands the internal state of the prng to fulfill any number + * of bytes we need for this request. We only use this call if we need more + * than can be supplied by a single call to SHA256_HashBuf. + * + * This function is specified in NIST SP 800-90 section 10.1.1.4, Hashgen + */ +static void +prng_Hashgen(RNGContext *rng, PRUint8 *returned_bytes, + unsigned int no_of_returned_bytes) +{ + PRUint8 data[VSize(rng)]; + PRUint8 thisHash[SHA256_LENGTH]; + PRUint8 *lastHash = rng->lastOutput; + + PORT_Memcpy(data, V(rng), VSize(rng)); + while (no_of_returned_bytes) { + SHA256Context ctx; + unsigned int len; + unsigned int carry; + + SHA256_Begin(&ctx); + SHA256_Update(&ctx, data, sizeof data); + SHA256_End(&ctx, thisHash, &len, SHA256_LENGTH); + if (PORT_Memcmp(lastHash, thisHash, len) == 0) { + rng->isValid = PR_FALSE; + break; + } + if (no_of_returned_bytes < SHA256_LENGTH) { + len = no_of_returned_bytes; + } + PORT_Memcpy(returned_bytes, thisHash, len); + lastHash = returned_bytes; + returned_bytes += len; + no_of_returned_bytes -= len; + /* The carry parameter is a bool (increment or not). + * This increments data if no_of_returned_bytes is not zero */ + carry = no_of_returned_bytes; + PRNG_ADD_CARRY_ONLY(data, (sizeof data) - 1, carry); + } + PORT_Memcpy(rng->lastOutput, thisHash, SHA256_LENGTH); + PORT_Memset(data, 0, sizeof data); + PORT_Memset(thisHash, 0, sizeof thisHash); +} + +/* + * Generates new random bytes and advances the internal prng state. + * additional bytes are only used in algorithm testing. + * + * This function is specified in NIST SP 800-90 section 10.1.1.4 + */ +static SECStatus +prng_generateNewBytes(RNGContext *rng, + PRUint8 *returned_bytes, unsigned int no_of_returned_bytes, + const PRUint8 *additional_input, + unsigned int additional_input_len) +{ + PRUint8 H[SHA256_LENGTH]; /* both H and w since they + * aren't used concurrently */ + unsigned int carry; + + if (!rng->isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This code only triggers during tests, normal + * prng operation does not use additional_input */ + if (additional_input) { + SHA256Context ctx; +/* NIST SP 800-90 defines two temporaries in their calculations, + * w and H. These temporaries are the same lengths, and used + * at different times, so we use the following macro to collapse + * them to the same variable, but keeping their unique names for + * easy comparison to the spec */ +#define w H + rng->V_type = prngAdditionalDataType; + SHA256_Begin(&ctx); + SHA256_Update(&ctx, rng->V_Data, sizeof rng->V_Data); + SHA256_Update(&ctx, additional_input, additional_input_len); + SHA256_End(&ctx, w, NULL, sizeof w); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), w, sizeof w, carry) + PORT_Memset(w, 0, sizeof w); +#undef w + } + + if (no_of_returned_bytes == SHA256_LENGTH) { + /* short_cut to hashbuf and a couple of copies and clears */ + SHA256_HashBuf(returned_bytes, V(rng), VSize(rng)); + /* continuous rng check */ + if (memcmp(rng->lastOutput, returned_bytes, SHA256_LENGTH) == 0) { + rng->isValid = PR_FALSE; + } + PORT_Memcpy(rng->lastOutput, returned_bytes, sizeof rng->lastOutput); + } else { + prng_Hashgen(rng, returned_bytes, no_of_returned_bytes); + } + /* advance our internal state... */ + rng->V_type = prngGenerateByteType; + SHA256_HashBuf(H, rng->V_Data, sizeof rng->V_Data); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), H, sizeof H, carry) + PRNG_ADD_BITS(V(rng), VSize(rng), rng->C, sizeof rng->C, carry); + PRNG_ADD_BITS_AND_CARRY(V(rng), VSize(rng), rng->reseed_counter, + sizeof rng->reseed_counter, carry) + carry = 1; + PRNG_ADD_CARRY_ONLY(rng->reseed_counter, (sizeof rng->reseed_counter) - 1, carry); + + /* if the prng failed, don't return any output, signal softoken */ + if (!rng->isValid) { + PORT_Memset(returned_bytes, 0, no_of_returned_bytes); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + return SECSuccess; +} + +/* Use NSPR to prevent RNG_RNGInit from being called from separate + * threads, creating a race condition. + */ +static const PRCallOnceType pristineCallOnce; +static PRCallOnceType coRNGInit; +static PRStatus +rng_init(void) +{ + PRUint8 bytes[PRNG_SEEDLEN * 2]; /* entropy + nonce */ + unsigned int numBytes; + SECStatus rv = SECSuccess; + + if (globalrng == NULL) { + /* bytes needs to have enough space to hold + * a SHA256 hash value. Blow up at compile time if this isn't true */ + PR_STATIC_ASSERT(sizeof(bytes) >= SHA256_LENGTH); + /* create a new global RNG context */ + globalrng = &theGlobalRng; + PORT_Assert(NULL == globalrng->lock); + /* create a lock for it */ + globalrng->lock = PZ_NewLock(nssILockOther); + if (globalrng->lock == NULL) { + globalrng = NULL; + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return PR_FAILURE; + } + + /* Try to get some seed data for the RNG */ + numBytes = (unsigned int)RNG_SystemRNG(bytes, sizeof bytes); + PORT_Assert(numBytes == 0 || numBytes == sizeof bytes); + if (numBytes != 0) { + /* if this is our first call, instantiate, otherwise reseed + * prng_instantiate gets a new clean state, we want to mix + * any previous entropy we may have collected */ + if (V(globalrng)[0] == 0) { + rv = prng_instantiate(globalrng, bytes, numBytes); + } else { + rv = prng_reseed_test(globalrng, bytes, numBytes, NULL, 0); + } + memset(bytes, 0, numBytes); + } else { + PZ_DestroyLock(globalrng->lock); + globalrng->lock = NULL; + globalrng = NULL; + return PR_FAILURE; + } + + if (rv != SECSuccess) { + return PR_FAILURE; + } + /* the RNG is in a valid state */ + globalrng->isValid = PR_TRUE; + globalrng->isKatTest = PR_FALSE; + + /* fetch one random value so that we can populate rng->oldV for our + * continous random number test. */ + prng_generateNewBytes(globalrng, bytes, SHA256_LENGTH, NULL, 0); + + /* Fetch more entropy into the PRNG */ + RNG_SystemInfoForRNG(); + } + return PR_SUCCESS; +} + +/* + * Clean up the global RNG context + */ +static void +prng_freeRNGContext(RNGContext *rng) +{ + PRUint8 inputhash[VSize(rng) + (sizeof rng->C)]; + + /* destroy context lock */ + SKIP_AFTER_FORK(PZ_DestroyLock(globalrng->lock)); + + /* zero global RNG context except for C & V to preserve entropy */ + prng_Hash_df(inputhash, sizeof rng->C, rng->C, sizeof rng->C, NULL, 0); + prng_Hash_df(&inputhash[sizeof rng->C], VSize(rng), V(rng), VSize(rng), + NULL, 0); + memset(rng, 0, sizeof *rng); + memcpy(rng->C, inputhash, sizeof rng->C); + memcpy(V(rng), &inputhash[sizeof rng->C], VSize(rng)); + + memset(inputhash, 0, sizeof inputhash); +} + +/* + * Public functions + */ + +/* + * Initialize the global RNG context and give it some seed input taken + * from the system. This function is thread-safe and will only allow + * the global context to be initialized once. The seed input is likely + * small, so it is imperative that RNG_RandomUpdate() be called with + * additional seed data before the generator is used. A good way to + * provide the generator with additional entropy is to call + * RNG_SystemInfoForRNG(). Note that C_Initialize() does exactly that. + */ +SECStatus +RNG_RNGInit(void) +{ + /* Allow only one call to initialize the context */ + PR_CallOnce(&coRNGInit, rng_init); + /* Make sure there is a context */ + return (globalrng != NULL) ? SECSuccess : SECFailure; +} + +/* +** Update the global random number generator with more seeding +** material. +*/ +SECStatus +RNG_RandomUpdate(const void *data, size_t bytes) +{ + SECStatus rv; + + /* Make sure our assumption that size_t is unsigned is true */ + PR_STATIC_ASSERT(((size_t)-1) > (size_t)1); + +#if defined(NS_PTR_GT_32) || (defined(NSS_USE_64) && !defined(NS_PTR_LE_32)) + /* + * NIST 800-90 requires us to verify our inputs. This value can + * come from the application, so we need to make sure it's within the + * spec. The spec says it must be less than 2^32 bytes (2^35 bits). + * This can only happen if size_t is greater than 32 bits (i.e. on + * most 64 bit platforms). The 90% case (perhaps 100% case), size_t + * is less than or equal to 32 bits if the platform is not 64 bits, and + * greater than 32 bits if it is a 64 bit platform. The corner + * cases are handled with explicit defines NS_PTR_GT_32 and NS_PTR_LE_32. + * + * In general, neither NS_PTR_GT_32 nor NS_PTR_LE_32 will need to be + * defined. If you trip over the next two size ASSERTS at compile time, + * you will need to define them for your platform. + * + * if 'sizeof(size_t) > 4' is triggered it means that we were expecting + * sizeof(size_t) to be greater than 4, but it wasn't. Setting + * NS_PTR_LE_32 will correct that mistake. + * + * if 'sizeof(size_t) <= 4' is triggered, it means that we were expecting + * sizeof(size_t) to be less than or equal to 4, but it wasn't. Setting + * NS_PTR_GT_32 will correct that mistake. + */ + + PR_STATIC_ASSERT(sizeof(size_t) > 4); + + if (bytes > (size_t)PRNG_MAX_ADDITIONAL_BYTES) { + bytes = PRNG_MAX_ADDITIONAL_BYTES; + } +#else + PR_STATIC_ASSERT(sizeof(size_t) <= 4); +#endif + + PZ_Lock(globalrng->lock); + /* if we're passed more than our additionalDataCache, simply + * call reseed with that data */ + if (bytes > sizeof(globalrng->additionalDataCache)) { + rv = prng_reseed_test(globalrng, NULL, 0, data, (unsigned int)bytes); + /* if we aren't going to fill or overflow the buffer, just cache it */ + } else if (bytes < ((sizeof globalrng->additionalDataCache) - globalrng->additionalAvail)) { + PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail, + data, bytes); + globalrng->additionalAvail += (PRUint32)bytes; + rv = SECSuccess; + } else { + /* we are going to fill or overflow the buffer. In this case we will + * fill the entropy buffer, reseed with it, start a new buffer with the + * remainder. We know the remainder will fit in the buffer because + * we already handled the case where bytes > the size of the buffer. + */ + size_t bufRemain = (sizeof globalrng->additionalDataCache) - globalrng->additionalAvail; + /* fill the rest of the buffer */ + if (bufRemain) { + PORT_Memcpy(globalrng->additionalDataCache + globalrng->additionalAvail, + data, bufRemain); + data = ((unsigned char *)data) + bufRemain; + bytes -= bufRemain; + } + /* reseed from buffer */ + rv = prng_reseed_test(globalrng, NULL, 0, + globalrng->additionalDataCache, + sizeof globalrng->additionalDataCache); + + /* copy the rest into the cache */ + PORT_Memcpy(globalrng->additionalDataCache, data, bytes); + globalrng->additionalAvail = (PRUint32)bytes; + } + + PZ_Unlock(globalrng->lock); + return rv; +} + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +static SECStatus +prng_GenerateGlobalRandomBytes(RNGContext *rng, + void *dest, size_t len) +{ + SECStatus rv = SECSuccess; + PRUint8 *output = dest; + /* check for a valid global RNG context */ + PORT_Assert(rng != NULL); + if (rng == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* FIPS limits the amount of entropy available in a single request */ + if (len > PRNG_MAX_REQUEST_SIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* --- LOCKED --- */ + PZ_Lock(rng->lock); + /* Check the amount of seed data in the generator. If not enough, + * don't produce any data. + */ + if (rng->reseed_counter[0] >= RESEED_VALUE) { + rv = prng_reseed_test(rng, NULL, 0, NULL, 0); + PZ_Unlock(rng->lock); + if (rv != SECSuccess) { + return rv; + } + RNG_SystemInfoForRNG(); + PZ_Lock(rng->lock); + } + /* + * see if we have enough bytes to fulfill the request. + */ + if (len <= rng->dataAvail) { + memcpy(output, rng->data + ((sizeof rng->data) - rng->dataAvail), len); + memset(rng->data + ((sizeof rng->data) - rng->dataAvail), 0, len); + rng->dataAvail -= len; + rv = SECSuccess; + /* if we are asking for a small number of bytes, cache the rest of + * the bytes */ + } else if (len < sizeof rng->data) { + rv = prng_generateNewBytes(rng, rng->data, sizeof rng->data, + rng->additionalAvail ? rng->additionalDataCache : NULL, + rng->additionalAvail); + rng->additionalAvail = 0; + if (rv == SECSuccess) { + memcpy(output, rng->data, len); + memset(rng->data, 0, len); + rng->dataAvail = (sizeof rng->data) - len; + } + /* we are asking for lots of bytes, just ask the generator to pass them */ + } else { + rv = prng_generateNewBytes(rng, output, len, + rng->additionalAvail ? rng->additionalDataCache : NULL, + rng->additionalAvail); + rng->additionalAvail = 0; + } + PZ_Unlock(rng->lock); + /* --- UNLOCKED --- */ + return rv; +} + +/* +** Generate some random bytes, using the global random number generator +** object. +*/ +SECStatus +RNG_GenerateGlobalRandomBytes(void *dest, size_t len) +{ +#ifdef UNSAFE_FUZZER_MODE + return prng_GenerateDeterministicRandomBytes(globalrng->lock, dest, len); +#else + return prng_GenerateGlobalRandomBytes(globalrng, dest, len); +#endif +} + +SECStatus +RNG_ResetForFuzzing(void) +{ +#ifdef UNSAFE_FUZZER_MODE + return prng_ResetForFuzzing(globalrng->lock); +#else + return SECFailure; +#endif +} + +void +RNG_RNGShutdown(void) +{ + /* check for a valid global RNG context */ + PORT_Assert(globalrng != NULL); + if (globalrng == NULL) { + /* Should set a "not initialized" error code. */ + PORT_SetError(SEC_ERROR_NO_MEMORY); + return; + } + /* clear */ + prng_freeRNGContext(globalrng); + globalrng = NULL; + /* reset the callonce struct to allow a new call to RNG_RNGInit() */ + coRNGInit = pristineCallOnce; +} + +/* + * Test case interface. used by fips testing and power on self test + */ +/* make sure the test context is separate from the global context, This + * allows us to test the internal random number generator without losing + * entropy we may have previously collected. */ +RNGContext testContext; + +SECStatus +PRNGTEST_Instantiate_Kat(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + testContext.isKatTest = PR_TRUE; + return PRNGTEST_Instantiate(entropy, entropy_len, + nonce, nonce_len, + personal_string, ps_len); +} + +/* + * Test vector API. Use NIST SP 800-90 general interface so one of the + * other NIST SP 800-90 algorithms may be used in the future. + */ +SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + int bytes_len = entropy_len + nonce_len + ps_len; + PRUint8 *bytes = NULL; + SECStatus rv; + + if (entropy_len < 256 / PR_BITS_PER_BYTE) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + return SECFailure; + } + + bytes = PORT_Alloc(bytes_len); + if (bytes == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + /* concatenate the various inputs, internally NSS only instantiates with + * a single long string */ + PORT_Memcpy(bytes, entropy, entropy_len); + if (nonce) { + PORT_Memcpy(&bytes[entropy_len], nonce, nonce_len); + } else { + PORT_Assert(nonce_len == 0); + } + if (personal_string) { + PORT_Memcpy(&bytes[entropy_len + nonce_len], personal_string, ps_len); + } else { + PORT_Assert(ps_len == 0); + } + rv = prng_instantiate(&testContext, bytes, bytes_len); + PORT_ZFree(bytes, bytes_len); + if (rv == SECFailure) { + return SECFailure; + } + testContext.isValid = PR_TRUE; + return SECSuccess; +} + +SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This magic input tells us to set the reseed count to it's max count, + * so we can simulate PRNGTEST_Generate reaching max reseed count */ + if ((entropy == NULL) && (entropy_len == 0) && + (additional == NULL) && (additional_len == 0)) { + testContext.reseed_counter[0] = RESEED_VALUE; + return SECSuccess; + } + return prng_reseed(&testContext, entropy, entropy_len, additional, + additional_len); +} + +SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len) +{ + SECStatus rv; + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* replicate reseed test from prng_GenerateGlobalRandomBytes */ + if (testContext.reseed_counter[0] >= RESEED_VALUE) { + rv = prng_reseed(&testContext, NULL, 0, NULL, 0); + if (rv != SECSuccess) { + return rv; + } + } + return prng_generateNewBytes(&testContext, bytes, bytes_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Uninstantiate() +{ + if (!testContext.isValid) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + PORT_Memset(&testContext, 0, sizeof testContext); + return SECSuccess; +} + +SECStatus +PRNGTEST_RunHealthTests() +{ + static const PRUint8 entropy[] = { + 0x8e, 0x9c, 0x0d, 0x25, 0x75, 0x22, 0x04, 0xf9, + 0xc5, 0x79, 0x10, 0x8b, 0x23, 0x79, 0x37, 0x14, + 0x9f, 0x2c, 0xc7, 0x0b, 0x39, 0xf8, 0xee, 0xef, + 0x95, 0x0c, 0x97, 0x59, 0xfc, 0x0a, 0x85, 0x41, + 0x76, 0x9d, 0x6d, 0x67, 0x00, 0x4e, 0x19, 0x12, + 0x02, 0x16, 0x53, 0xea, 0xf2, 0x73, 0xd7, 0xd6, + 0x7f, 0x7e, 0xc8, 0xae, 0x9c, 0x09, 0x99, 0x7d, + 0xbb, 0x9e, 0x48, 0x7f, 0xbb, 0x96, 0x46, 0xb3, + 0x03, 0x75, 0xf8, 0xc8, 0x69, 0x45, 0x3f, 0x97, + 0x5e, 0x2e, 0x48, 0xe1, 0x5d, 0x58, 0x97, 0x4c + }; + static const PRUint8 rng_known_result[] = { + 0x16, 0xe1, 0x8c, 0x57, 0x21, 0xd8, 0xf1, 0x7e, + 0x5a, 0xa0, 0x16, 0x0b, 0x7e, 0xa6, 0x25, 0xb4, + 0x24, 0x19, 0xdb, 0x54, 0xfa, 0x35, 0x13, 0x66, + 0xbb, 0xaa, 0x2a, 0x1b, 0x22, 0x33, 0x2e, 0x4a, + 0x14, 0x07, 0x9d, 0x52, 0xfc, 0x73, 0x61, 0x48, + 0xac, 0xc1, 0x22, 0xfc, 0xa4, 0xfc, 0xac, 0xa4, + 0xdb, 0xda, 0x5b, 0x27, 0x33, 0xc4, 0xb3 + }; + static const PRUint8 reseed_entropy[] = { + 0xc6, 0x0b, 0x0a, 0x30, 0x67, 0x07, 0xf4, 0xe2, + 0x24, 0xa7, 0x51, 0x6f, 0x5f, 0x85, 0x3e, 0x5d, + 0x67, 0x97, 0xb8, 0x3b, 0x30, 0x9c, 0x7a, 0xb1, + 0x52, 0xc6, 0x1b, 0xc9, 0x46, 0xa8, 0x62, 0x79 + }; + static const PRUint8 additional_input[] = { + 0x86, 0x82, 0x28, 0x98, 0xe7, 0xcb, 0x01, 0x14, + 0xae, 0x87, 0x4b, 0x1d, 0x99, 0x1b, 0xc7, 0x41, + 0x33, 0xff, 0x33, 0x66, 0x40, 0x95, 0x54, 0xc6, + 0x67, 0x4d, 0x40, 0x2a, 0x1f, 0xf9, 0xeb, 0x65 + }; + static const PRUint8 rng_reseed_result[] = { + 0x02, 0x0c, 0xc6, 0x17, 0x86, 0x49, 0xba, 0xc4, + 0x7b, 0x71, 0x35, 0x05, 0xf0, 0xdb, 0x4a, 0xc2, + 0x2c, 0x38, 0xc1, 0xa4, 0x42, 0xe5, 0x46, 0x4a, + 0x7d, 0xf0, 0xbe, 0x47, 0x88, 0xb8, 0x0e, 0xc6, + 0x25, 0x2b, 0x1d, 0x13, 0xef, 0xa6, 0x87, 0x96, + 0xa3, 0x7d, 0x5b, 0x80, 0xc2, 0x38, 0x76, 0x61, + 0xc7, 0x80, 0x5d, 0x0f, 0x05, 0x76, 0x85 + }; + static const PRUint8 rng_no_reseed_result[] = { + 0xc4, 0x40, 0x41, 0x8c, 0xbf, 0x2f, 0x70, 0x23, + 0x88, 0xf2, 0x7b, 0x30, 0xc3, 0xca, 0x1e, 0xf3, + 0xef, 0x53, 0x81, 0x5d, 0x30, 0xed, 0x4c, 0xf1, + 0xff, 0x89, 0xa5, 0xee, 0x92, 0xf8, 0xc0, 0x0f, + 0x88, 0x53, 0xdf, 0xb6, 0x76, 0xf0, 0xaa, 0xd3, + 0x2e, 0x1d, 0x64, 0x37, 0x3e, 0xe8, 0x4a, 0x02, + 0xff, 0x0a, 0x7f, 0xe5, 0xe9, 0x2b, 0x6d + }; + + SECStatus rng_status = SECSuccess; + PR_STATIC_ASSERT(sizeof(rng_known_result) >= sizeof(rng_reseed_result)); + PRUint8 result[sizeof(rng_known_result)]; + + /********************************************/ + /* First test instantiate error path. */ + /* In this case we supply enough entropy, */ + /* but not enough seed. This will trigger */ + /* the code that checks for a entropy */ + /* source failure. */ + /********************************************/ + rng_status = PRNGTEST_Instantiate(entropy, 256 / PR_BITS_PER_BYTE, + NULL, 0, NULL, 0); + if (rng_status == SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* we failed with the proper error code, we can continue */ + + /********************************************/ + /* Generate random bytes with a known seed. */ + /********************************************/ + rng_status = PRNGTEST_Instantiate(entropy, sizeof entropy, + NULL, 0, NULL, 0); + if (rng_status != SECSuccess) { + /* Error set by PRNGTEST_Instantiate */ + return SECFailure; + } + rng_status = PRNGTEST_Generate(result, sizeof rng_known_result, NULL, 0); + if ((rng_status != SECSuccess) || + (PORT_Memcmp(result, rng_known_result, + sizeof rng_known_result) != 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + rng_status = PRNGTEST_Reseed(reseed_entropy, sizeof reseed_entropy, + additional_input, sizeof additional_input); + if (rng_status != SECSuccess) { + /* Error set by PRNG_Reseed */ + PRNGTEST_Uninstantiate(); + return SECFailure; + } + rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0); + if ((rng_status != SECSuccess) || + (PORT_Memcmp(result, rng_reseed_result, + sizeof rng_reseed_result) != 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* This magic forces the reseed count to it's max count, so we can see if + * PRNGTEST_Generate will actually when it reaches it's count */ + rng_status = PRNGTEST_Reseed(NULL, 0, NULL, 0); + if (rng_status != SECSuccess) { + PRNGTEST_Uninstantiate(); + /* Error set by PRNG_Reseed */ + return SECFailure; + } + /* This generate should now reseed */ + rng_status = PRNGTEST_Generate(result, sizeof rng_reseed_result, NULL, 0); + if ((rng_status != SECSuccess) || + /* NOTE we fail if the result is equal to the no_reseed_result. + * no_reseed_result is the value we would have gotten if we didn't + * do an automatic reseed in PRNGTEST_Generate */ + (PORT_Memcmp(result, rng_no_reseed_result, + sizeof rng_no_reseed_result) == 0)) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* make sure reseed fails when we don't supply enough entropy */ + rng_status = PRNGTEST_Reseed(reseed_entropy, 4, NULL, 0); + if (rng_status == SECSuccess) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_NEED_RANDOM) { + PRNGTEST_Uninstantiate(); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + rng_status = PRNGTEST_Uninstantiate(); + if (rng_status != SECSuccess) { + /* Error set by PRNG_Uninstantiate */ + return rng_status; + } + /* make sure uninstantiate fails if the contest is not initiated (also tests + * if the context was cleared in the previous Uninstantiate) */ + rng_status = PRNGTEST_Uninstantiate(); + if (rng_status == SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + if (PORT_GetError() != SEC_ERROR_LIBRARY_FAILURE) { + return rng_status; + } + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/dsa.c b/security/nss/lib/freebl/dsa.c new file mode 100644 index 000000000..9324d306b --- /dev/null +++ b/security/nss/lib/freebl/dsa.c @@ -0,0 +1,647 @@ +/* + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerror.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prinit.h" +#include "blapi.h" +#include "nssilock.h" +#include "secitem.h" +#include "blapi.h" +#include "mpi.h" +#include "secmpi.h" +#include "pqg.h" + +/* XXX to be replaced by define in blapit.h */ +#define NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE 2048 + +/* + * FIPS 186-2 requires result from random output to be reduced mod q when + * generating random numbers for DSA. + * + * Input: w, 2*qLen bytes + * q, qLen bytes + * Output: xj, qLen bytes + */ +static SECStatus +fips186Change_ReduceModQForDSA(const PRUint8 *w, const PRUint8 *q, + unsigned int qLen, PRUint8 *xj) +{ + mp_int W, Q, Xj; + mp_err err; + SECStatus rv = SECSuccess; + + /* Initialize MPI integers. */ + MP_DIGITS(&W) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&Xj) = 0; + CHECK_MPI_OK(mp_init(&W)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&Xj)); + /* + * Convert input arguments into MPI integers. + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&W, w, 2 * qLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Q, q, qLen)); + + /* + * Algorithm 1 of FIPS 186-2 Change Notice 1, Step 3.3 + * + * xj = (w0 || w1) mod q + */ + CHECK_MPI_OK(mp_mod(&W, &Q, &Xj)); + CHECK_MPI_OK(mp_to_fixlen_octets(&Xj, xj, qLen)); +cleanup: + mp_clear(&W); + mp_clear(&Q); + mp_clear(&Xj); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* + * FIPS 186-2 requires result from random output to be reduced mod q when + * generating random numbers for DSA. + */ +SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj) +{ + return fips186Change_ReduceModQForDSA(w, q, DSA1_SUBPRIME_LEN, xj); +} + +/* + * The core of Algorithm 1 of FIPS 186-2 Change Notice 1. + * + * We no longer support FIPS 186-2 RNG. This function was exported + * for power-up self tests and FIPS tests. Keep this stub, which fails, + * to prevent crashes, but also to signal to test code that FIPS 186-2 + * RNG is no longer supported. + */ +SECStatus +FIPS186Change_GenerateX(PRUint8 *XKEY, const PRUint8 *XSEEDj, + PRUint8 *x_j) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); + return SECFailure; +} + +/* + * Specialized RNG for DSA + * + * As per Algorithm 1 of FIPS 186-2 Change Notice 1, in step 3.3 the value + * Xj should be reduced mod q, a 160-bit prime number. Since this parameter + * is only meaningful in the context of DSA, the above RNG functions + * were implemented without it. They are re-implemented below for use + * with DSA. + */ + +/* +** Generate some random bytes, using the global random number generator +** object. In DSA mode, so there is a q. +*/ +static SECStatus +dsa_GenerateGlobalRandomBytes(const SECItem *qItem, PRUint8 *dest, + unsigned int *destLen, unsigned int maxDestLen) +{ + SECStatus rv; + SECItem w; + const PRUint8 *q = qItem->data; + unsigned int qLen = qItem->len; + + if (*q == 0) { + ++q; + --qLen; + } + if (maxDestLen < qLen) { + /* This condition can occur when DSA_SignDigest is passed a group + with a subprime that is larger than DSA_MAX_SUBPRIME_LEN. */ + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + w.data = NULL; /* otherwise SECITEM_AllocItem asserts */ + if (!SECITEM_AllocItem(NULL, &w, 2 * qLen)) { + return SECFailure; + } + *destLen = qLen; + + rv = RNG_GenerateGlobalRandomBytes(w.data, w.len); + if (rv == SECSuccess) { + rv = fips186Change_ReduceModQForDSA(w.data, q, qLen, dest); + } + + SECITEM_FreeItem(&w, PR_FALSE); + return rv; +} + +static void +translate_mpi_error(mp_err err) +{ + MP_TO_SEC_ERROR(err); +} + +static SECStatus +dsa_NewKeyExtended(const PQGParams *params, const SECItem *seed, + DSAPrivateKey **privKey) +{ + mp_int p, g; + mp_int x, y; + mp_err err; + PLArenaPool *arena; + DSAPrivateKey *key; + /* Check args. */ + if (!params || !privKey || !seed || !seed->data) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* Initialize an arena for the DSA key. */ + arena = PORT_NewArena(NSS_FREEBL_DSA_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + key = (DSAPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(DSAPrivateKey)); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + key->params.arena = arena; + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&y) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&y)); + /* Copy over the PQG params */ + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.prime, + ¶ms->prime)); + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.subPrime, + ¶ms->subPrime)); + CHECK_MPI_OK(SECITEM_CopyItem(arena, &key->params.base, ¶ms->base)); + /* Convert stored p, g, and received x into MPI integers. */ + SECITEM_TO_MPINT(params->prime, &p); + SECITEM_TO_MPINT(params->base, &g); + OCTETS_TO_MPINT(seed->data, &x, seed->len); + /* Store x in private key */ + SECITEM_AllocItem(arena, &key->privateValue, seed->len); + PORT_Memcpy(key->privateValue.data, seed->data, seed->len); + /* Compute public key y = g**x mod p */ + CHECK_MPI_OK(mp_exptmod(&g, &x, &p, &y)); + /* Store y in public key */ + MPINT_TO_SECITEM(&y, &key->publicValue, arena); + *privKey = key; + key = NULL; +cleanup: + mp_clear(&p); + mp_clear(&g); + mp_clear(&x); + mp_clear(&y); + if (key) + PORT_FreeArena(key->params.arena, PR_TRUE); + if (err) { + translate_mpi_error(err); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed) +{ + int retries = 10; + unsigned int i; + PRBool good; + + if (q == NULL || q->data == NULL || q->len == 0 || + (q->data[0] == 0 && q->len == 1)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (!SECITEM_AllocItem(arena, seed, q->len)) { + return SECFailure; + } + + do { + /* Generate seed bytes for x according to FIPS 186-1 appendix 3 */ + if (dsa_GenerateGlobalRandomBytes(q, seed->data, &seed->len, + seed->len)) { + goto loser; + } + /* Disallow values of 0 and 1 for x. */ + good = PR_FALSE; + for (i = 0; i < seed->len - 1; i++) { + if (seed->data[i] != 0) { + good = PR_TRUE; + break; + } + } + if (!good && seed->data[i] > 1) { + good = PR_TRUE; + } + } while (!good && --retries > 0); + + if (!good) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + loser: + if (arena != NULL) { + SECITEM_FreeItem(seed, PR_FALSE); + } + return SECFailure; + } + + return SECSuccess; +} + +/* +** Generate and return a new DSA public and private key pair, +** both of which are encoded into a single DSAPrivateKey struct. +** "params" is a pointer to the PQG parameters for the domain +** Uses a random seed. +*/ +SECStatus +DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey) +{ + SECItem seed; + SECStatus rv; + + rv = PQG_Check(params); + if (rv != SECSuccess) { + return rv; + } + seed.data = NULL; + + rv = DSA_NewRandom(NULL, ¶ms->subPrime, &seed); + if (rv == SECSuccess) { + if (seed.len != PQG_GetLength(¶ms->subPrime)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + } else { + rv = dsa_NewKeyExtended(params, &seed, privKey); + } + } + SECITEM_FreeItem(&seed, PR_FALSE); + return rv; +} + +/* For FIPS compliance testing. Seed must be exactly the size of subPrime */ +SECStatus +DSA_NewKeyFromSeed(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey) +{ + SECItem seedItem; + seedItem.data = (unsigned char *)seed; + seedItem.len = PQG_GetLength(¶ms->subPrime); + return dsa_NewKeyExtended(params, &seedItem, privKey); +} + +static SECStatus +dsa_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest, + const unsigned char *kb) +{ + mp_int p, q, g; /* PQG parameters */ + mp_int x, k; /* private key & pseudo-random integer */ + mp_int r, s; /* tuple (r, s) is signature) */ + mp_int t; /* holding tmp values */ + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + unsigned int dsa_subprime_len, dsa_signature_len, offset; + SECItem localDigest; + unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN]; + SECItem t2 = { siBuffer, NULL, 0 }; + + /* FIPS-compliance dictates that digest is a SHA hash. */ + /* Check args. */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + dsa_signature_len = dsa_subprime_len * 2; + if ((signature->len < dsa_signature_len) || + (digest->len > HASH_LENGTH_MAX) || + (digest->len < SHA1_LENGTH)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* DSA accepts digests not equal to dsa_subprime_len, if the + * digests are greater, then they are truncated to the size of + * dsa_subprime_len, using the left most bits. If they are less + * then they are padded on the left.*/ + PORT_Memset(localDigestData, 0, dsa_subprime_len); + offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0; + PORT_Memcpy(localDigestData + offset, digest->data, + dsa_subprime_len - offset); + localDigest.data = localDigestData; + localDigest.len = dsa_subprime_len; + + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&s) = 0; + MP_DIGITS(&t) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&x)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&t)); + /* + ** Convert stored PQG and private key into MPI integers. + */ + SECITEM_TO_MPINT(key->params.prime, &p); + SECITEM_TO_MPINT(key->params.subPrime, &q); + SECITEM_TO_MPINT(key->params.base, &g); + SECITEM_TO_MPINT(key->privateValue, &x); + OCTETS_TO_MPINT(kb, &k, dsa_subprime_len); + /* + ** FIPS 186-1, Section 5, Step 1 + ** + ** r = (g**k mod p) mod q + */ + CHECK_MPI_OK(mp_exptmod(&g, &k, &p, &r)); /* r = g**k mod p */ + CHECK_MPI_OK(mp_mod(&r, &q, &r)); /* r = r mod q */ + /* + ** FIPS 186-1, Section 5, Step 2 + ** + ** s = (k**-1 * (HASH(M) + x*r)) mod q + */ + if (DSA_NewRandom(NULL, &key->params.subPrime, &t2) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(t2, &t); /* t <-$ Zq */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ + CHECK_MPI_OK(mp_invmod(&k, &q, &k)); /* k = k**-1 mod q */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &q, &k)); /* k = k * t mod q */ + SECITEM_TO_MPINT(localDigest, &s); /* s = HASH(M) */ + CHECK_MPI_OK(mp_mulmod(&x, &r, &q, &x)); /* x = x * r mod q */ + CHECK_MPI_OK(mp_addmod(&s, &x, &q, &s)); /* s = s + x mod q */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &q, &s)); /* s = s * k mod q */ + /* + ** verify r != 0 and s != 0 + ** mentioned as optional in FIPS 186-1. + */ + if (mp_cmp_z(&r) == 0 || mp_cmp_z(&s) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + /* + ** Step 4 + ** + ** Signature is tuple (r, s) + */ + err = mp_to_fixlen_octets(&r, signature->data, dsa_subprime_len); + if (err < 0) + goto cleanup; + err = mp_to_fixlen_octets(&s, signature->data + dsa_subprime_len, + dsa_subprime_len); + if (err < 0) + goto cleanup; + err = MP_OKAY; + signature->len = dsa_signature_len; +cleanup: + PORT_Memset(localDigestData, 0, DSA_MAX_SUBPRIME_LEN); + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&x); + mp_clear(&k); + mp_clear(&r); + mp_clear(&s); + mp_clear(&t); + SECITEM_FreeItem(&t2, PR_FALSE); + if (err) { + translate_mpi_error(err); + rv = SECFailure; + } + return rv; +} + +/* signature is caller-supplied buffer of at least 40 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +** On output, signature->len == size of signature in buffer. +** Uses a random seed. +*/ +SECStatus +DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + SECStatus rv; + int retries = 10; + unsigned char kSeed[DSA_MAX_SUBPRIME_LEN]; + unsigned int kSeedLen = 0; + unsigned int i; + unsigned int dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + PRBool good; + + PORT_SetError(0); + do { + rv = dsa_GenerateGlobalRandomBytes(&key->params.subPrime, + kSeed, &kSeedLen, sizeof kSeed); + if (rv != SECSuccess) + break; + if (kSeedLen != dsa_subprime_len) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + break; + } + /* Disallow a value of 0 for k. */ + good = PR_FALSE; + for (i = 0; i < kSeedLen; i++) { + if (kSeed[i] != 0) { + good = PR_TRUE; + break; + } + } + if (!good) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + continue; + } + rv = dsa_SignDigest(key, signature, digest, kSeed); + } while (rv != SECSuccess && PORT_GetError() == SEC_ERROR_NEED_RANDOM && + --retries > 0); + return rv; +} + +/* For FIPS compliance testing. Seed must be exactly 20 bytes. */ +SECStatus +DSA_SignDigestWithSeed(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed) +{ + SECStatus rv; + rv = dsa_SignDigest(key, signature, digest, seed); + return rv; +} + +/* signature is caller-supplied buffer of at least 20 bytes. +** On input, signature->len == size of buffer to hold signature. +** digest->len == size of digest. +*/ +SECStatus +DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + /* FIPS-compliance dictates that digest is a SHA hash. */ + mp_int p, q, g; /* PQG parameters */ + mp_int r_, s_; /* tuple (r', s') is received signature) */ + mp_int u1, u2, v, w; /* intermediate values used in verification */ + mp_int y; /* public key */ + mp_err err; + unsigned int dsa_subprime_len, dsa_signature_len, offset; + SECItem localDigest; + unsigned char localDigestData[DSA_MAX_SUBPRIME_LEN]; + SECStatus verified = SECFailure; + + /* Check args. */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + dsa_subprime_len = PQG_GetLength(&key->params.subPrime); + dsa_signature_len = dsa_subprime_len * 2; + if ((signature->len != dsa_signature_len) || + (digest->len > HASH_LENGTH_MAX) || + (digest->len < SHA1_LENGTH)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* DSA accepts digests not equal to dsa_subprime_len, if the + * digests are greater, than they are truncated to the size of + * dsa_subprime_len, using the left most bits. If they are less + * then they are padded on the left.*/ + PORT_Memset(localDigestData, 0, dsa_subprime_len); + offset = (digest->len < dsa_subprime_len) ? (dsa_subprime_len - digest->len) : 0; + PORT_Memcpy(localDigestData + offset, digest->data, + dsa_subprime_len - offset); + localDigest.data = localDigestData; + localDigest.len = dsa_subprime_len; + + /* Initialize MPI integers. */ + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&y) = 0; + MP_DIGITS(&r_) = 0; + MP_DIGITS(&s_) = 0; + MP_DIGITS(&u1) = 0; + MP_DIGITS(&u2) = 0; + MP_DIGITS(&v) = 0; + MP_DIGITS(&w) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&y)); + CHECK_MPI_OK(mp_init(&r_)); + CHECK_MPI_OK(mp_init(&s_)); + CHECK_MPI_OK(mp_init(&u1)); + CHECK_MPI_OK(mp_init(&u2)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_MPI_OK(mp_init(&w)); + /* + ** Convert stored PQG and public key into MPI integers. + */ + SECITEM_TO_MPINT(key->params.prime, &p); + SECITEM_TO_MPINT(key->params.subPrime, &q); + SECITEM_TO_MPINT(key->params.base, &g); + SECITEM_TO_MPINT(key->publicValue, &y); + /* + ** Convert received signature (r', s') into MPI integers. + */ + OCTETS_TO_MPINT(signature->data, &r_, dsa_subprime_len); + OCTETS_TO_MPINT(signature->data + dsa_subprime_len, &s_, dsa_subprime_len); + /* + ** Verify that 0 < r' < q and 0 < s' < q + */ + if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 || + mp_cmp(&r_, &q) >= 0 || mp_cmp(&s_, &q) >= 0) { + /* err is zero here. */ + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto cleanup; /* will return verified == SECFailure */ + } + /* + ** FIPS 186-1, Section 6, Step 1 + ** + ** w = (s')**-1 mod q + */ + CHECK_MPI_OK(mp_invmod(&s_, &q, &w)); /* w = (s')**-1 mod q */ + /* + ** FIPS 186-1, Section 6, Step 2 + ** + ** u1 = ((Hash(M')) * w) mod q + */ + SECITEM_TO_MPINT(localDigest, &u1); /* u1 = HASH(M') */ + CHECK_MPI_OK(mp_mulmod(&u1, &w, &q, &u1)); /* u1 = u1 * w mod q */ + /* + ** FIPS 186-1, Section 6, Step 3 + ** + ** u2 = ((r') * w) mod q + */ + CHECK_MPI_OK(mp_mulmod(&r_, &w, &q, &u2)); + /* + ** FIPS 186-1, Section 6, Step 4 + ** + ** v = ((g**u1 * y**u2) mod p) mod q + */ + CHECK_MPI_OK(mp_exptmod(&g, &u1, &p, &g)); /* g = g**u1 mod p */ + CHECK_MPI_OK(mp_exptmod(&y, &u2, &p, &y)); /* y = y**u2 mod p */ + CHECK_MPI_OK(mp_mulmod(&g, &y, &p, &v)); /* v = g * y mod p */ + CHECK_MPI_OK(mp_mod(&v, &q, &v)); /* v = v mod q */ + /* + ** Verification: v == r' + */ + if (mp_cmp(&v, &r_)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + verified = SECFailure; /* Signature failed to verify. */ + } else { + verified = SECSuccess; /* Signature verified. */ + } +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&y); + mp_clear(&r_); + mp_clear(&s_); + mp_clear(&u1); + mp_clear(&u2); + mp_clear(&v); + mp_clear(&w); + if (err) { + translate_mpi_error(err); + } + return verified; +} diff --git a/security/nss/lib/freebl/ec.c b/security/nss/lib/freebl/ec.c new file mode 100644 index 000000000..12bfeed41 --- /dev/null +++ b/security/nss/lib/freebl/ec.c @@ -0,0 +1,1159 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "prerr.h" +#include "secerr.h" +#include "secmpi.h" +#include "secitem.h" +#include "mplogic.h" +#include "ec.h" +#include "ecl.h" + +#ifndef NSS_DISABLE_ECC + +static const ECMethod kMethods[] = { + { ECCurve25519, + ec_Curve25519_pt_mul, + ec_Curve25519_pt_validate } +}; + +static const ECMethod * +ec_get_method_from_name(ECCurveName name) +{ + int i; + for (i = 0; i < sizeof(kMethods) / sizeof(kMethods[0]); ++i) { + if (kMethods[i].name == name) { + return &kMethods[i]; + } + } + return NULL; +} + +/* + * Returns true if pointP is the point at infinity, false otherwise + */ +PRBool +ec_point_at_infinity(SECItem *pointP) +{ + unsigned int i; + + for (i = 1; i < pointP->len; i++) { + if (pointP->data[i] != 0x00) + return PR_FALSE; + } + + return PR_TRUE; +} + +/* + * Computes scalar point multiplication pointQ = k1 * G + k2 * pointP for + * the curve whose parameters are encoded in params with base point G. + */ +SECStatus +ec_points_mul(const ECParams *params, const mp_int *k1, const mp_int *k2, + const SECItem *pointP, SECItem *pointQ) +{ + mp_int Px, Py, Qx, Qy; + mp_int Gx, Gy, order, irreducible, a, b; + ECGroup *group = NULL; + SECStatus rv = SECFailure; + mp_err err = MP_OKAY; + int len; + +#if EC_DEBUG + int i; + char mpstr[256]; + + printf("ec_points_mul: params [len=%d]:", params->DEREncoding.len); + for (i = 0; i < params->DEREncoding.len; i++) + printf("%02x:", params->DEREncoding.data[i]); + printf("\n"); + + if (k1 != NULL) { + mp_tohex((mp_int *)k1, mpstr); + printf("ec_points_mul: scalar k1: %s\n", mpstr); + mp_todecimal((mp_int *)k1, mpstr); + printf("ec_points_mul: scalar k1: %s (dec)\n", mpstr); + } + + if (k2 != NULL) { + mp_tohex((mp_int *)k2, mpstr); + printf("ec_points_mul: scalar k2: %s\n", mpstr); + mp_todecimal((mp_int *)k2, mpstr); + printf("ec_points_mul: scalar k2: %s (dec)\n", mpstr); + } + + if (pointP != NULL) { + printf("ec_points_mul: pointP [len=%d]:", pointP->len); + for (i = 0; i < pointP->len; i++) + printf("%02x:", pointP->data[i]); + printf("\n"); + } +#endif + + /* NOTE: We only support uncompressed points for now */ + len = (params->fieldID.size + 7) >> 3; + if (pointP != NULL) { + if ((pointP->data[0] != EC_POINT_FORM_UNCOMPRESSED) || + (pointP->len != (2 * len + 1))) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM); + return SECFailure; + }; + } + + MP_DIGITS(&Px) = 0; + MP_DIGITS(&Py) = 0; + MP_DIGITS(&Qx) = 0; + MP_DIGITS(&Qy) = 0; + MP_DIGITS(&Gx) = 0; + MP_DIGITS(&Gy) = 0; + MP_DIGITS(&order) = 0; + MP_DIGITS(&irreducible) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&b) = 0; + CHECK_MPI_OK(mp_init(&Px)); + CHECK_MPI_OK(mp_init(&Py)); + CHECK_MPI_OK(mp_init(&Qx)); + CHECK_MPI_OK(mp_init(&Qy)); + CHECK_MPI_OK(mp_init(&Gx)); + CHECK_MPI_OK(mp_init(&Gy)); + CHECK_MPI_OK(mp_init(&order)); + CHECK_MPI_OK(mp_init(&irreducible)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&b)); + + if ((k2 != NULL) && (pointP != NULL)) { + /* Initialize Px and Py */ + CHECK_MPI_OK(mp_read_unsigned_octets(&Px, pointP->data + 1, (mp_size)len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Py, pointP->data + 1 + len, (mp_size)len)); + } + + /* construct from named params, if possible */ + if (params->name != ECCurve_noName) { + group = ECGroup_fromName(params->name); + } + + if (group == NULL) + goto cleanup; + + if ((k2 != NULL) && (pointP != NULL)) { + CHECK_MPI_OK(ECPoints_mul(group, k1, k2, &Px, &Py, &Qx, &Qy)); + } else { + CHECK_MPI_OK(ECPoints_mul(group, k1, NULL, NULL, NULL, &Qx, &Qy)); + } + + /* Construct the SECItem representation of point Q */ + pointQ->data[0] = EC_POINT_FORM_UNCOMPRESSED; + CHECK_MPI_OK(mp_to_fixlen_octets(&Qx, pointQ->data + 1, + (mp_size)len)); + CHECK_MPI_OK(mp_to_fixlen_octets(&Qy, pointQ->data + 1 + len, + (mp_size)len)); + + rv = SECSuccess; + +#if EC_DEBUG + printf("ec_points_mul: pointQ [len=%d]:", pointQ->len); + for (i = 0; i < pointQ->len; i++) + printf("%02x:", pointQ->data[i]); + printf("\n"); +#endif + +cleanup: + ECGroup_free(group); + mp_clear(&Px); + mp_clear(&Py); + mp_clear(&Qx); + mp_clear(&Qy); + mp_clear(&Gx); + mp_clear(&Gy); + mp_clear(&order); + mp_clear(&irreducible); + mp_clear(&a); + mp_clear(&b); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + + return rv; +} +#endif /* NSS_DISABLE_ECC */ + +/* Generates a new EC key pair. The private key is a supplied + * value and the public key is the result of performing a scalar + * point multiplication of that value with the curve's base point. + */ +SECStatus +ec_NewKey(ECParams *ecParams, ECPrivateKey **privKey, + const unsigned char *privKeyBytes, int privKeyLen) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + PLArenaPool *arena; + ECPrivateKey *key; + mp_int k; + mp_err err = MP_OKAY; + int len; + +#if EC_DEBUG + printf("ec_NewKey called\n"); +#endif + MP_DIGITS(&k) = 0; + + if (!ecParams || !privKey || !privKeyBytes || (privKeyLen < 0) || + !ecParams->name) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Initialize an arena for the EC key. */ + if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE))) + return SECFailure; + + key = (ECPrivateKey *)PORT_ArenaZAlloc(arena, sizeof(ECPrivateKey)); + if (!key) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + + /* Set the version number (SEC 1 section C.4 says it should be 1) */ + SECITEM_AllocItem(arena, &key->version, 1); + key->version.data[0] = 1; + + /* Copy all of the fields from the ECParams argument to the + * ECParams structure within the private key. + */ + key->ecParams.arena = arena; + key->ecParams.type = ecParams->type; + key->ecParams.fieldID.size = ecParams->fieldID.size; + key->ecParams.fieldID.type = ecParams->fieldID.type; + if (ecParams->fieldID.type == ec_field_GFp || + ecParams->fieldID.type == ec_field_plain) { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.prime, + &ecParams->fieldID.u.prime)); + } else { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.fieldID.u.poly, + &ecParams->fieldID.u.poly)); + } + key->ecParams.fieldID.k1 = ecParams->fieldID.k1; + key->ecParams.fieldID.k2 = ecParams->fieldID.k2; + key->ecParams.fieldID.k3 = ecParams->fieldID.k3; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.a, + &ecParams->curve.a)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.b, + &ecParams->curve.b)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curve.seed, + &ecParams->curve.seed)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.base, + &ecParams->base)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.order, + &ecParams->order)); + key->ecParams.cofactor = ecParams->cofactor; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.DEREncoding, + &ecParams->DEREncoding)); + key->ecParams.name = ecParams->name; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &key->ecParams.curveOID, + &ecParams->curveOID)); + + SECITEM_AllocItem(arena, &key->publicValue, EC_GetPointSize(ecParams)); + len = ecParams->order.len; + SECITEM_AllocItem(arena, &key->privateValue, len); + + /* Copy private key */ + if (privKeyLen >= len) { + memcpy(key->privateValue.data, privKeyBytes, len); + } else { + memset(key->privateValue.data, 0, (len - privKeyLen)); + memcpy(key->privateValue.data + (len - privKeyLen), privKeyBytes, privKeyLen); + } + + /* Compute corresponding public key */ + + /* Use curve specific code for point multiplication */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->mul == NULL) { + /* unknown curve */ + rv = SECFailure; + goto cleanup; + } + rv = method->mul(&key->publicValue, &key->privateValue, NULL); + goto done; + } + + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, key->privateValue.data, + (mp_size)len)); + + rv = ec_points_mul(ecParams, &k, NULL, NULL, &(key->publicValue)); + if (rv != SECSuccess) { + goto cleanup; + } + +done: + *privKey = key; + +cleanup: + mp_clear(&k); + if (rv) { + PORT_FreeArena(arena, PR_TRUE); + } + +#if EC_DEBUG + printf("ec_NewKey returning %s\n", + (rv == SECSuccess) ? "success" : "failure"); +#endif +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} + +/* Generates a new EC key pair. The private key is a supplied + * random value (in seed) and the public key is the result of + * performing a scalar point multiplication of that value with + * the curve's base point. + */ +SECStatus +EC_NewKeyFromSeed(ECParams *ecParams, ECPrivateKey **privKey, + const unsigned char *seed, int seedlen) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + rv = ec_NewKey(ecParams, privKey, seed, seedlen); +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + return rv; +} + +#ifndef NSS_DISABLE_ECC +/* Generate a random private key using the algorithm A.4.1 of ANSI X9.62, + * modified a la FIPS 186-2 Change Notice 1 to eliminate the bias in the + * random number generator. + * + * Parameters + * - order: a buffer that holds the curve's group order + * - len: the length in octets of the order buffer + * + * Return Value + * Returns a buffer of len octets that holds the private key. The caller + * is responsible for freeing the buffer with PORT_ZFree. + */ +static unsigned char * +ec_GenerateRandomPrivateKey(const unsigned char *order, int len) +{ + SECStatus rv = SECSuccess; + mp_err err; + unsigned char *privKeyBytes = NULL; + mp_int privKeyVal, order_1, one; + + MP_DIGITS(&privKeyVal) = 0; + MP_DIGITS(&order_1) = 0; + MP_DIGITS(&one) = 0; + CHECK_MPI_OK(mp_init(&privKeyVal)); + CHECK_MPI_OK(mp_init(&order_1)); + CHECK_MPI_OK(mp_init(&one)); + + /* Generates 2*len random bytes using the global random bit generator + * (which implements Algorithm 1 of FIPS 186-2 Change Notice 1) then + * reduces modulo the group order. + */ + if ((privKeyBytes = PORT_Alloc(2 * len)) == NULL) + goto cleanup; + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(privKeyBytes, 2 * len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&privKeyVal, privKeyBytes, 2 * len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&order_1, order, len)); + CHECK_MPI_OK(mp_set_int(&one, 1)); + CHECK_MPI_OK(mp_sub(&order_1, &one, &order_1)); + CHECK_MPI_OK(mp_mod(&privKeyVal, &order_1, &privKeyVal)); + CHECK_MPI_OK(mp_add(&privKeyVal, &one, &privKeyVal)); + CHECK_MPI_OK(mp_to_fixlen_octets(&privKeyVal, privKeyBytes, len)); + memset(privKeyBytes + len, 0, len); +cleanup: + mp_clear(&privKeyVal); + mp_clear(&order_1); + mp_clear(&one); + if (err < MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv != SECSuccess && privKeyBytes) { + PORT_ZFree(privKeyBytes, 2 * len); + privKeyBytes = NULL; + } + return privKeyBytes; +} +#endif /* NSS_DISABLE_ECC */ + +/* Generates a new EC key pair. The private key is a random value and + * the public key is the result of performing a scalar point multiplication + * of that value with the curve's base point. + */ +SECStatus +EC_NewKey(ECParams *ecParams, ECPrivateKey **privKey) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + int len; + unsigned char *privKeyBytes = NULL; + + if (!ecParams) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + len = ecParams->order.len; + privKeyBytes = ec_GenerateRandomPrivateKey(ecParams->order.data, len); + if (privKeyBytes == NULL) + goto cleanup; + /* generate public key */ + CHECK_SEC_OK(ec_NewKey(ecParams, privKey, privKeyBytes, len)); + +cleanup: + if (privKeyBytes) { + PORT_ZFree(privKeyBytes, len); + } +#if EC_DEBUG + printf("EC_NewKey returning %s\n", + (rv == SECSuccess) ? "success" : "failure"); +#endif +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} + +/* Validates an EC public key as described in Section 5.2.2 of + * X9.62. The ECDH primitive when used without the cofactor does + * not address small subgroup attacks, which may occur when the + * public key is not valid. These attacks can be prevented by + * validating the public key before using ECDH. + */ +SECStatus +EC_ValidatePublicKey(ECParams *ecParams, SECItem *publicValue) +{ +#ifndef NSS_DISABLE_ECC + mp_int Px, Py; + ECGroup *group = NULL; + SECStatus rv = SECFailure; + mp_err err = MP_OKAY; + int len; + + if (!ecParams || !publicValue || !ecParams->name) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Uses curve specific code for point validation. */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->validate == NULL) { + /* unknown curve */ + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return method->validate(publicValue); + } + + /* NOTE: We only support uncompressed points for now */ + len = (ecParams->fieldID.size + 7) >> 3; + if (publicValue->data[0] != EC_POINT_FORM_UNCOMPRESSED) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_EC_POINT_FORM); + return SECFailure; + } else if (publicValue->len != (2 * len + 1)) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + + MP_DIGITS(&Px) = 0; + MP_DIGITS(&Py) = 0; + CHECK_MPI_OK(mp_init(&Px)); + CHECK_MPI_OK(mp_init(&Py)); + + /* Initialize Px and Py */ + CHECK_MPI_OK(mp_read_unsigned_octets(&Px, publicValue->data + 1, (mp_size)len)); + CHECK_MPI_OK(mp_read_unsigned_octets(&Py, publicValue->data + 1 + len, (mp_size)len)); + + /* construct from named params */ + group = ECGroup_fromName(ecParams->name); + if (group == NULL) { + /* + * ECGroup_fromName fails if ecParams->name is not a valid + * ECCurveName value, or if we run out of memory, or perhaps + * for other reasons. Unfortunately if ecParams->name is a + * valid ECCurveName value, we don't know what the right error + * code should be because ECGroup_fromName doesn't return an + * error code to the caller. Set err to MP_UNDEF because + * that's what ECGroup_fromName uses internally. + */ + if ((ecParams->name <= ECCurve_noName) || + (ecParams->name >= ECCurve_pastLastCurve)) { + err = MP_BADARG; + } else { + err = MP_UNDEF; + } + goto cleanup; + } + + /* validate public point */ + if ((err = ECPoint_validate(group, &Px, &Py)) < MP_YES) { + if (err == MP_NO) { + PORT_SetError(SEC_ERROR_BAD_KEY); + rv = SECFailure; + err = MP_OKAY; /* don't change the error code */ + } + goto cleanup; + } + + rv = SECSuccess; + +cleanup: + ECGroup_free(group); + mp_clear(&Px); + mp_clear(&Py); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); + return SECFailure; +#endif /* NSS_DISABLE_ECC */ +} + +/* +** Performs an ECDH key derivation by computing the scalar point +** multiplication of privateValue and publicValue (with or without the +** cofactor) and returns the x-coordinate of the resulting elliptic +** curve point in derived secret. If successful, derivedSecret->data +** is set to the address of the newly allocated buffer containing the +** derived secret, and derivedSecret->len is the size of the secret +** produced. It is the caller's responsibility to free the allocated +** buffer containing the derived secret. +*/ +SECStatus +ECDH_Derive(SECItem *publicValue, + ECParams *ecParams, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + unsigned int len = 0; + SECItem pointQ = { siBuffer, NULL, 0 }; + mp_int k; /* to hold the private value */ + mp_int cofactor; + mp_err err = MP_OKAY; +#if EC_DEBUG + int i; +#endif + + if (!publicValue || !ecParams || !privateValue || !derivedSecret || + !ecParams->name) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Perform curve specific multiplication using ECMethod */ + if (ecParams->fieldID.type == ec_field_plain) { + const ECMethod *method; + memset(derivedSecret, 0, sizeof(*derivedSecret)); + derivedSecret = SECITEM_AllocItem(NULL, derivedSecret, EC_GetPointSize(ecParams)); + if (derivedSecret == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + method = ec_get_method_from_name(ecParams->name); + if (method == NULL || method->validate == NULL || + method->mul == NULL) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + } + if (method->validate(publicValue) != SECSuccess) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + return method->mul(derivedSecret, privateValue, publicValue); + } + + /* + * We fail if the public value is the point at infinity, since + * this produces predictable results. + */ + if (ec_point_at_infinity(publicValue)) { + PORT_SetError(SEC_ERROR_BAD_KEY); + return SECFailure; + } + + MP_DIGITS(&k) = 0; + memset(derivedSecret, 0, sizeof *derivedSecret); + len = (ecParams->fieldID.size + 7) >> 3; + pointQ.len = EC_GetPointSize(ecParams); + if ((pointQ.data = PORT_Alloc(pointQ.len)) == NULL) + goto cleanup; + + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, privateValue->data, + (mp_size)privateValue->len)); + + if (withCofactor && (ecParams->cofactor != 1)) { + /* multiply k with the cofactor */ + MP_DIGITS(&cofactor) = 0; + CHECK_MPI_OK(mp_init(&cofactor)); + mp_set(&cofactor, ecParams->cofactor); + CHECK_MPI_OK(mp_mul(&k, &cofactor, &k)); + } + + /* Multiply our private key and peer's public point */ + if (ec_points_mul(ecParams, NULL, &k, publicValue, &pointQ) != SECSuccess) { + goto cleanup; + } + if (ec_point_at_infinity(&pointQ)) { + PORT_SetError(SEC_ERROR_BAD_KEY); /* XXX better error code? */ + goto cleanup; + } + + /* Allocate memory for the derived secret and copy + * the x co-ordinate of pointQ into it. + */ + SECITEM_AllocItem(NULL, derivedSecret, len); + memcpy(derivedSecret->data, pointQ.data + 1, len); + + rv = SECSuccess; + +#if EC_DEBUG + printf("derived_secret:\n"); + for (i = 0; i < derivedSecret->len; i++) + printf("%02x:", derivedSecret->data[i]); + printf("\n"); +#endif + +cleanup: + mp_clear(&k); + + if (err) { + MP_TO_SEC_ERROR(err); + } + + if (pointQ.data) { + PORT_ZFree(pointQ.data, pointQ.len); + } +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} + +/* Computes the ECDSA signature (a concatenation of two values r and s) + * on the digest using the given key and the random value kb (used in + * computing s). + */ +SECStatus +ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *kb, const int kblen) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + mp_int x1; + mp_int d, k; /* private key, random integer */ + mp_int r, s; /* tuple (r, s) is the signature */ + mp_int t; /* holding tmp values */ + mp_int n; + mp_err err = MP_OKAY; + ECParams *ecParams = NULL; + SECItem kGpoint = { siBuffer, NULL, 0 }; + int flen = 0; /* length in bytes of the field size */ + unsigned olen; /* length in bytes of the base point order */ + unsigned obits; /* length in bits of the base point order */ + unsigned char *t2 = NULL; + +#if EC_DEBUG + char mpstr[256]; +#endif + + /* Initialize MPI integers. */ + /* must happen before the first potential call to cleanup */ + MP_DIGITS(&x1) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&s) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&t) = 0; + + /* Check args */ + if (!key || !signature || !digest || !kb || (kblen < 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + + ecParams = &(key->ecParams); + flen = (ecParams->fieldID.size + 7) >> 3; + olen = ecParams->order.len; + if (signature->data == NULL) { + /* a call to get the signature length only */ + goto finish; + } + if (signature->len < 2 * olen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto cleanup; + } + + CHECK_MPI_OK(mp_init(&x1)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&t)); + + SECITEM_TO_MPINT(ecParams->order, &n); + SECITEM_TO_MPINT(key->privateValue, &d); + + CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, kblen)); + /* Make sure k is in the interval [1, n-1] */ + if ((mp_cmp_z(&k) <= 0) || (mp_cmp(&k, &n) >= 0)) { +#if EC_DEBUG + printf("k is outside [1, n-1]\n"); + mp_tohex(&k, mpstr); + printf("k : %s \n", mpstr); + mp_tohex(&n, mpstr); + printf("n : %s \n", mpstr); +#endif + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** We do not want timing information to leak the length of k, + ** so we compute k*G using an equivalent scalar of fixed + ** bit-length. + ** Fix based on patch for ECDSA timing attack in the paper + ** by Billy Bob Brumley and Nicola Tuveri at + ** http://eprint.iacr.org/2011/232 + ** + ** How do we convert k to a value of a fixed bit-length? + ** k starts off as an integer satisfying 0 <= k < n. Hence, + ** n <= k+n < 2n, which means k+n has either the same number + ** of bits as n or one more bit than n. If k+n has the same + ** number of bits as n, the second addition ensures that the + ** final value has exactly one more bit than n. Thus, we + ** always end up with a value that exactly one more bit than n. + */ + CHECK_MPI_OK(mp_add(&k, &n, &k)); + if (mpl_significant_bits(&k) <= mpl_significant_bits(&n)) { + CHECK_MPI_OK(mp_add(&k, &n, &k)); + } + + /* + ** ANSI X9.62, Section 5.3.2, Step 2 + ** + ** Compute kG + */ + kGpoint.len = EC_GetPointSize(ecParams); + kGpoint.data = PORT_Alloc(kGpoint.len); + if ((kGpoint.data == NULL) || + (ec_points_mul(ecParams, &k, NULL, NULL, &kGpoint) != SECSuccess)) + goto cleanup; + + /* + ** ANSI X9.62, Section 5.3.3, Step 1 + ** + ** Extract the x co-ordinate of kG into x1 + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&x1, kGpoint.data + 1, + (mp_size)flen)); + + /* + ** ANSI X9.62, Section 5.3.3, Step 2 + ** + ** r = x1 mod n NOTE: n is the order of the curve + */ + CHECK_MPI_OK(mp_mod(&x1, &n, &r)); + + /* + ** ANSI X9.62, Section 5.3.3, Step 3 + ** + ** verify r != 0 + */ + if (mp_cmp_z(&r) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** ANSI X9.62, Section 5.3.3, Step 4 + ** + ** s = (k**-1 * (HASH(M) + d*r)) mod n + */ + SECITEM_TO_MPINT(*digest, &s); /* s = HASH(M) */ + + /* In the definition of EC signing, digests are truncated + * to the length of n in bits. + * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/ + CHECK_MPI_OK((obits = mpl_significant_bits(&n))); + if (digest->len * 8 > obits) { + mpl_rsh(&s, &s, digest->len * 8 - obits); + } + +#if EC_DEBUG + mp_todecimal(&n, mpstr); + printf("n : %s (dec)\n", mpstr); + mp_todecimal(&d, mpstr); + printf("d : %s (dec)\n", mpstr); + mp_tohex(&x1, mpstr); + printf("x1: %s\n", mpstr); + mp_todecimal(&s, mpstr); + printf("digest: %s (decimal)\n", mpstr); + mp_todecimal(&r, mpstr); + printf("r : %s (dec)\n", mpstr); + mp_tohex(&r, mpstr); + printf("r : %s\n", mpstr); +#endif + + if ((t2 = PORT_Alloc(2 * ecParams->order.len)) == NULL) { + rv = SECFailure; + goto cleanup; + } + if (RNG_GenerateGlobalRandomBytes(t2, 2 * ecParams->order.len) != SECSuccess) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + CHECK_MPI_OK(mp_read_unsigned_octets(&t, t2, 2 * ecParams->order.len)); /* t <-$ Zn */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + CHECK_MPI_OK(mp_invmod(&k, &n, &k)); /* k = k**-1 mod n */ + CHECK_MPI_OK(mp_mulmod(&k, &t, &n, &k)); /* k = k * t mod n */ + CHECK_MPI_OK(mp_mulmod(&d, &r, &n, &d)); /* d = d * r mod n */ + CHECK_MPI_OK(mp_addmod(&s, &d, &n, &s)); /* s = s + d mod n */ + CHECK_MPI_OK(mp_mulmod(&s, &k, &n, &s)); /* s = s * k mod n */ + +#if EC_DEBUG + mp_todecimal(&s, mpstr); + printf("s : %s (dec)\n", mpstr); + mp_tohex(&s, mpstr); + printf("s : %s\n", mpstr); +#endif + + /* + ** ANSI X9.62, Section 5.3.3, Step 5 + ** + ** verify s != 0 + */ + if (mp_cmp_z(&s) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + + /* + ** + ** Signature is tuple (r, s) + */ + CHECK_MPI_OK(mp_to_fixlen_octets(&r, signature->data, olen)); + CHECK_MPI_OK(mp_to_fixlen_octets(&s, signature->data + olen, olen)); +finish: + signature->len = 2 * olen; + + rv = SECSuccess; + err = MP_OKAY; +cleanup: + mp_clear(&x1); + mp_clear(&d); + mp_clear(&k); + mp_clear(&r); + mp_clear(&s); + mp_clear(&n); + mp_clear(&t); + + if (t2) { + PORT_Free(t2); + } + + if (kGpoint.data) { + PORT_ZFree(kGpoint.data, kGpoint.len); + } + + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + +#if EC_DEBUG + printf("ECDSA signing with seed %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} + +/* +** Computes the ECDSA signature on the digest using the given key +** and a random seed. +*/ +SECStatus +ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + int len; + unsigned char *kBytes = NULL; + + if (!key) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Generate random value k */ + len = key->ecParams.order.len; + kBytes = ec_GenerateRandomPrivateKey(key->ecParams.order.data, len); + if (kBytes == NULL) + goto cleanup; + + /* Generate ECDSA signature with the specified k value */ + rv = ECDSA_SignDigestWithSeed(key, signature, digest, kBytes, len); + +cleanup: + if (kBytes) { + PORT_ZFree(kBytes, len); + } + +#if EC_DEBUG + printf("ECDSA signing %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} + +/* +** Checks the signature on the given digest using the key provided. +** +** The key argument must represent a valid EC public key (a point on +** the relevant curve). If it is not a valid point, then the behavior +** of this function is undefined. In cases where a public key might +** not be valid, use EC_ValidatePublicKey to check. +*/ +SECStatus +ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + SECStatus rv = SECFailure; +#ifndef NSS_DISABLE_ECC + mp_int r_, s_; /* tuple (r', s') is received signature) */ + mp_int c, u1, u2, v; /* intermediate values used in verification */ + mp_int x1; + mp_int n; + mp_err err = MP_OKAY; + ECParams *ecParams = NULL; + SECItem pointC = { siBuffer, NULL, 0 }; + int slen; /* length in bytes of a half signature (r or s) */ + int flen; /* length in bytes of the field size */ + unsigned olen; /* length in bytes of the base point order */ + unsigned obits; /* length in bits of the base point order */ + +#if EC_DEBUG + char mpstr[256]; + printf("ECDSA verification called\n"); +#endif + + /* Initialize MPI integers. */ + /* must happen before the first potential call to cleanup */ + MP_DIGITS(&r_) = 0; + MP_DIGITS(&s_) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&u1) = 0; + MP_DIGITS(&u2) = 0; + MP_DIGITS(&x1) = 0; + MP_DIGITS(&v) = 0; + MP_DIGITS(&n) = 0; + + /* Check args */ + if (!key || !signature || !digest) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + + ecParams = &(key->ecParams); + flen = (ecParams->fieldID.size + 7) >> 3; + olen = ecParams->order.len; + if (signature->len == 0 || signature->len % 2 != 0 || + signature->len > 2 * olen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + goto cleanup; + } + slen = signature->len / 2; + + SECITEM_AllocItem(NULL, &pointC, EC_GetPointSize(ecParams)); + if (pointC.data == NULL) + goto cleanup; + + CHECK_MPI_OK(mp_init(&r_)); + CHECK_MPI_OK(mp_init(&s_)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&u1)); + CHECK_MPI_OK(mp_init(&u2)); + CHECK_MPI_OK(mp_init(&x1)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_MPI_OK(mp_init(&n)); + + /* + ** Convert received signature (r', s') into MPI integers. + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&r_, signature->data, slen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&s_, signature->data + slen, slen)); + + /* + ** ANSI X9.62, Section 5.4.2, Steps 1 and 2 + ** + ** Verify that 0 < r' < n and 0 < s' < n + */ + SECITEM_TO_MPINT(ecParams->order, &n); + if (mp_cmp_z(&r_) <= 0 || mp_cmp_z(&s_) <= 0 || + mp_cmp(&r_, &n) >= 0 || mp_cmp(&s_, &n) >= 0) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + goto cleanup; /* will return rv == SECFailure */ + } + + /* + ** ANSI X9.62, Section 5.4.2, Step 3 + ** + ** c = (s')**-1 mod n + */ + CHECK_MPI_OK(mp_invmod(&s_, &n, &c)); /* c = (s')**-1 mod n */ + + /* + ** ANSI X9.62, Section 5.4.2, Step 4 + ** + ** u1 = ((HASH(M')) * c) mod n + */ + SECITEM_TO_MPINT(*digest, &u1); /* u1 = HASH(M) */ + + /* In the definition of EC signing, digests are truncated + * to the length of n in bits. + * (see SEC 1 "Elliptic Curve Digit Signature Algorithm" section 4.1.*/ + CHECK_MPI_OK((obits = mpl_significant_bits(&n))); + if (digest->len * 8 > obits) { /* u1 = HASH(M') */ + mpl_rsh(&u1, &u1, digest->len * 8 - obits); + } + +#if EC_DEBUG + mp_todecimal(&r_, mpstr); + printf("r_: %s (dec)\n", mpstr); + mp_todecimal(&s_, mpstr); + printf("s_: %s (dec)\n", mpstr); + mp_todecimal(&c, mpstr); + printf("c : %s (dec)\n", mpstr); + mp_todecimal(&u1, mpstr); + printf("digest: %s (dec)\n", mpstr); +#endif + + CHECK_MPI_OK(mp_mulmod(&u1, &c, &n, &u1)); /* u1 = u1 * c mod n */ + + /* + ** ANSI X9.62, Section 5.4.2, Step 4 + ** + ** u2 = ((r') * c) mod n + */ + CHECK_MPI_OK(mp_mulmod(&r_, &c, &n, &u2)); + + /* + ** ANSI X9.62, Section 5.4.3, Step 1 + ** + ** Compute u1*G + u2*Q + ** Here, A = u1.G B = u2.Q and C = A + B + ** If the result, C, is the point at infinity, reject the signature + */ + if (ec_points_mul(ecParams, &u1, &u2, &key->publicValue, &pointC) != SECSuccess) { + rv = SECFailure; + goto cleanup; + } + if (ec_point_at_infinity(&pointC)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + goto cleanup; + } + + CHECK_MPI_OK(mp_read_unsigned_octets(&x1, pointC.data + 1, flen)); + + /* + ** ANSI X9.62, Section 5.4.4, Step 2 + ** + ** v = x1 mod n + */ + CHECK_MPI_OK(mp_mod(&x1, &n, &v)); + +#if EC_DEBUG + mp_todecimal(&r_, mpstr); + printf("r_: %s (dec)\n", mpstr); + mp_todecimal(&v, mpstr); + printf("v : %s (dec)\n", mpstr); +#endif + + /* + ** ANSI X9.62, Section 5.4.4, Step 3 + ** + ** Verification: v == r' + */ + if (mp_cmp(&v, &r_)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; /* Signature failed to verify. */ + } else { + rv = SECSuccess; /* Signature verified. */ + } + +#if EC_DEBUG + mp_todecimal(&u1, mpstr); + printf("u1: %s (dec)\n", mpstr); + mp_todecimal(&u2, mpstr); + printf("u2: %s (dec)\n", mpstr); + mp_tohex(&x1, mpstr); + printf("x1: %s\n", mpstr); + mp_todecimal(&v, mpstr); + printf("v : %s (dec)\n", mpstr); +#endif + +cleanup: + mp_clear(&r_); + mp_clear(&s_); + mp_clear(&c); + mp_clear(&u1); + mp_clear(&u2); + mp_clear(&x1); + mp_clear(&v); + mp_clear(&n); + + if (pointC.data) + SECITEM_ZfreeItem(&pointC, PR_FALSE); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + +#if EC_DEBUG + printf("ECDSA verification %s\n", + (rv == SECSuccess) ? "succeeded" : "failed"); +#endif +#else + PORT_SetError(SEC_ERROR_UNSUPPORTED_KEYALG); +#endif /* NSS_DISABLE_ECC */ + + return rv; +} diff --git a/security/nss/lib/freebl/ec.h b/security/nss/lib/freebl/ec.h new file mode 100644 index 000000000..bb65e82cd --- /dev/null +++ b/security/nss/lib/freebl/ec.h @@ -0,0 +1,21 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ec_h_ +#define __ec_h_ + +#define EC_DEBUG 0 + +#define ANSI_X962_CURVE_OID_TOTAL_LEN 10 +#define SECG_CURVE_OID_TOTAL_LEN 7 +#define PKIX_NEWCURVES_OID_TOTAL_LEN 11 + +struct ECMethodStr { + ECCurveName name; + SECStatus (*mul)(SECItem *result, SECItem *scalar, SECItem *point); + SECStatus (*validate)(const SECItem *point); +}; +typedef struct ECMethodStr ECMethod; + +#endif /* __ec_h_ */ diff --git a/security/nss/lib/freebl/ecdecode.c b/security/nss/lib/freebl/ecdecode.c new file mode 100644 index 000000000..e1f1eb8a5 --- /dev/null +++ b/security/nss/lib/freebl/ecdecode.c @@ -0,0 +1,311 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef NSS_DISABLE_ECC + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "secoid.h" +#include "secitem.h" +#include "secerr.h" +#include "ec.h" +#include "ecl-curve.h" + +#define CHECK_OK(func) \ + if (func == NULL) \ + goto cleanup +#define CHECK_SEC_OK(func) \ + if (SECSuccess != (rv = func)) \ + goto cleanup + +/* + * Initializes a SECItem from a hexadecimal string + * + * Warning: This function ignores leading 00's, so any leading 00's + * in the hexadecimal string must be optional. + */ +static SECItem * +hexString2SECItem(PLArenaPool *arena, SECItem *item, const char *str) +{ + int i = 0; + int byteval = 0; + int tmp = PORT_Strlen(str); + + PORT_Assert(arena); + PORT_Assert(item); + + if ((tmp % 2) != 0) + return NULL; + + /* skip leading 00's unless the hex string is "00" */ + while ((tmp > 2) && (str[0] == '0') && (str[1] == '0')) { + str += 2; + tmp -= 2; + } + + item->data = (unsigned char *)PORT_ArenaAlloc(arena, tmp / 2); + if (item->data == NULL) + return NULL; + item->len = tmp / 2; + + while (str[i]) { + if ((str[i] >= '0') && (str[i] <= '9')) + tmp = str[i] - '0'; + else if ((str[i] >= 'a') && (str[i] <= 'f')) + tmp = str[i] - 'a' + 10; + else if ((str[i] >= 'A') && (str[i] <= 'F')) + tmp = str[i] - 'A' + 10; + else + return NULL; + + byteval = byteval * 16 + tmp; + if ((i % 2) != 0) { + item->data[i / 2] = byteval; + byteval = 0; + } + i++; + } + + return item; +} + +/* Copy all of the fields from srcParams into dstParams + */ +SECStatus +EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams) +{ + SECStatus rv = SECFailure; + + dstParams->arena = arena; + dstParams->type = srcParams->type; + dstParams->fieldID.size = srcParams->fieldID.size; + dstParams->fieldID.type = srcParams->fieldID.type; + if (srcParams->fieldID.type == ec_field_GFp || + srcParams->fieldID.type == ec_field_plain) { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.prime, + &srcParams->fieldID.u.prime)); + } else { + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->fieldID.u.poly, + &srcParams->fieldID.u.poly)); + } + dstParams->fieldID.k1 = srcParams->fieldID.k1; + dstParams->fieldID.k2 = srcParams->fieldID.k2; + dstParams->fieldID.k3 = srcParams->fieldID.k3; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.a, + &srcParams->curve.a)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.b, + &srcParams->curve.b)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curve.seed, + &srcParams->curve.seed)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->base, + &srcParams->base)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->order, + &srcParams->order)); + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->DEREncoding, + &srcParams->DEREncoding)); + dstParams->name = srcParams->name; + CHECK_SEC_OK(SECITEM_CopyItem(arena, &dstParams->curveOID, + &srcParams->curveOID)); + dstParams->cofactor = srcParams->cofactor; + + return SECSuccess; + +cleanup: + return SECFailure; +} + +static SECStatus +gf_populate_params(ECCurveName name, ECFieldType field_type, ECParams *params) +{ + SECStatus rv = SECFailure; + const ECCurveParams *curveParams; + /* 2 ['0'+'4'] + MAX_ECKEY_LEN * 2 [x,y] * 2 [hex string] + 1 ['\0'] */ + char genenc[3 + 2 * 2 * MAX_ECKEY_LEN]; + + if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve)) + goto cleanup; + params->name = name; + curveParams = ecCurve_map[params->name]; + CHECK_OK(curveParams); + params->fieldID.size = curveParams->size; + params->fieldID.type = field_type; + if (field_type == ec_field_GFp || + field_type == ec_field_plain) { + CHECK_OK(hexString2SECItem(params->arena, ¶ms->fieldID.u.prime, + curveParams->irr)); + } else { + CHECK_OK(hexString2SECItem(params->arena, ¶ms->fieldID.u.poly, + curveParams->irr)); + } + CHECK_OK(hexString2SECItem(params->arena, ¶ms->curve.a, + curveParams->curvea)); + CHECK_OK(hexString2SECItem(params->arena, ¶ms->curve.b, + curveParams->curveb)); + genenc[0] = '0'; + genenc[1] = '4'; + genenc[2] = '\0'; + strcat(genenc, curveParams->genx); + strcat(genenc, curveParams->geny); + CHECK_OK(hexString2SECItem(params->arena, ¶ms->base, genenc)); + CHECK_OK(hexString2SECItem(params->arena, ¶ms->order, + curveParams->order)); + params->cofactor = curveParams->cofactor; + + rv = SECSuccess; + +cleanup: + return rv; +} + +SECStatus +EC_FillParams(PLArenaPool *arena, const SECItem *encodedParams, + ECParams *params) +{ + SECStatus rv = SECFailure; + SECOidTag tag; + SECItem oid = { siBuffer, NULL, 0 }; + +#if EC_DEBUG + int i; + + printf("Encoded params in EC_DecodeParams: "); + for (i = 0; i < encodedParams->len; i++) { + printf("%02x:", encodedParams->data[i]); + } + printf("\n"); +#endif + + if ((encodedParams->len != ANSI_X962_CURVE_OID_TOTAL_LEN) && + (encodedParams->len != SECG_CURVE_OID_TOTAL_LEN) && + (encodedParams->len != PKIX_NEWCURVES_OID_TOTAL_LEN)) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + }; + + oid.len = encodedParams->len - 2; + oid.data = encodedParams->data + 2; + if ((encodedParams->data[0] != SEC_ASN1_OBJECT_ID) || + ((tag = SECOID_FindOIDTag(&oid)) == SEC_OID_UNKNOWN)) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); + return SECFailure; + } + + params->arena = arena; + params->cofactor = 0; + params->type = ec_params_named; + params->name = ECCurve_noName; + + /* Fill out curveOID */ + params->curveOID.len = oid.len; + params->curveOID.data = (unsigned char *)PORT_ArenaAlloc(arena, oid.len); + if (params->curveOID.data == NULL) + goto cleanup; + memcpy(params->curveOID.data, oid.data, oid.len); + +#if EC_DEBUG + printf("Curve: %s\n", SECOID_FindOIDTagDescription(tag)); +#endif + + switch (tag) { + case SEC_OID_ANSIX962_EC_PRIME256V1: + /* Populate params for prime256v1 aka secp256r1 + * (the NIST P-256 curve) + */ + CHECK_SEC_OK(gf_populate_params(ECCurve_X9_62_PRIME_256V1, ec_field_GFp, + params)); + break; + + case SEC_OID_SECG_EC_SECP384R1: + /* Populate params for secp384r1 + * (the NIST P-384 curve) + */ + CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_384R1, ec_field_GFp, + params)); + break; + + case SEC_OID_SECG_EC_SECP521R1: + /* Populate params for secp521r1 + * (the NIST P-521 curve) + */ + CHECK_SEC_OK(gf_populate_params(ECCurve_SECG_PRIME_521R1, ec_field_GFp, + params)); + break; + + case SEC_OID_CURVE25519: + /* Populate params for Curve25519 */ + CHECK_SEC_OK(gf_populate_params(ECCurve25519, ec_field_plain, params)); + break; + + default: + break; + }; + +cleanup: + if (!params->cofactor) { + PORT_SetError(SEC_ERROR_UNSUPPORTED_ELLIPTIC_CURVE); +#if EC_DEBUG + printf("Unrecognized curve, returning NULL params\n"); +#endif + } + + return rv; +} + +SECStatus +EC_DecodeParams(const SECItem *encodedParams, ECParams **ecparams) +{ + PLArenaPool *arena; + ECParams *params; + SECStatus rv = SECFailure; + + /* Initialize an arena for the ECParams structure */ + if (!(arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE))) + return SECFailure; + + params = (ECParams *)PORT_ArenaZAlloc(arena, sizeof(ECParams)); + if (!params) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + + /* Copy the encoded params */ + SECITEM_AllocItem(arena, &(params->DEREncoding), + encodedParams->len); + memcpy(params->DEREncoding.data, encodedParams->data, encodedParams->len); + + /* Fill out the rest of the ECParams structure based on + * the encoded params + */ + rv = EC_FillParams(arena, encodedParams, params); + if (rv == SECFailure) { + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } else { + *ecparams = params; + ; + return SECSuccess; + } +} + +int +EC_GetPointSize(const ECParams *params) +{ + ECCurveName name = params->name; + const ECCurveParams *curveParams; + + if ((name < ECCurve_noName) || (name > ECCurve_pastLastCurve) || + ((curveParams = ecCurve_map[name]) == NULL)) { + /* unknown curve, calculate point size from params. assume standard curves with 2 points + * and a point compression indicator byte */ + int sizeInBytes = (params->fieldID.size + 7) / 8; + return sizeInBytes * 2 + 1; + } + return curveParams->pointSize; +} + +#endif /* NSS_DISABLE_ECC */ diff --git a/security/nss/lib/freebl/ecl/README b/security/nss/lib/freebl/ecl/README new file mode 100644 index 000000000..04a8b3b01 --- /dev/null +++ b/security/nss/lib/freebl/ecl/README @@ -0,0 +1,267 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +The ECL exposes routines for constructing and converting curve +parameters for internal use. + + +HEADER FILES +============ + +ecl-exp.h - Exports data structures and curve names. For use by code +that does not have access to mp_ints. + +ecl-curve.h - Provides hex encodings (in the form of ECCurveParams +structs) of standardizes elliptic curve domain parameters and mappings +from ECCurveName to ECCurveParams. For use by code that does not have +access to mp_ints. + +ecl.h - Interface to constructors for curve parameters and group object, +and point multiplication operations. Used by higher level algorithms +(like ECDH and ECDSA) to actually perform elliptic curve cryptography. + +ecl-priv.h - Data structures and functions for internal use within the +library. + +ecp.h - Internal header file that contains all functions for point +arithmetic over prime fields. + +DATA STRUCTURES AND TYPES +========================= + +ECCurveName (from ecl-exp.h) - Opaque name for standardized elliptic +curve domain parameters. + +ECCurveParams (from ecl-exp.h) - Provides hexadecimal encoding +of elliptic curve domain parameters. Can be generated by a user +and passed to ECGroup_fromHex or can be generated from a name by +EC_GetNamedCurveParams. ecl-curve.h contains ECCurveParams structs for +the standardized curves defined by ECCurveName. + +ECGroup (from ecl.h and ecl-priv.h) - Opaque data structure that +represents a group of elliptic curve points for a particular set of +elliptic curve domain parameters. Contains all domain parameters (curve +a and b, field, base point) as well as pointers to the functions that +should be used for point arithmetic and the underlying field GFMethod. +Generated by either ECGroup_fromHex or ECGroup_fromName. + +GFMethod (from ecl-priv.h) - Represents a field underlying a set of +elliptic curve domain parameters. Contains the irreducible that defines +the field (either the prime or the binary polynomial) as well as +pointers to the functions that should be used for field arithmetic. + +ARITHMETIC FUNCTIONS +==================== + +Higher-level algorithms (like ECDH and ECDSA) should call ECPoint_mul +or ECPoints_mul (from ecl.h) to do point arithmetic. These functions +will choose which underlying algorithms to use, based on the ECGroup +structure. + +Point Multiplication +-------------------- + +ecl_mult.c provides the ECPoints_mul and ECPoint_mul wrappers. +It also provides two implementations for the pts_mul operation - +ec_pts_mul_basic (which computes kP, lQ, and then adds kP + lQ) and +ec_pts_mul_simul_w2 (which does a simultaneous point multiplication +using a table with window size 2*2). + +ec_naf.c provides an implementation of an algorithm to calculate a +non-adjacent form of a scalar, minimizing the number of point +additions that need to be done in a point multiplication. + +Point Arithmetic over Prime Fields +---------------------------------- + +ecp_aff.c provides point arithmetic using affine coordinates. + +ecp_jac.c provides point arithmetic using Jacobian projective +coordinates and mixed Jacobian-affine coordinates. (Jacobian projective +coordinates represent a point (x, y) as (X, Y, Z), where x=X/Z^2, +y=Y/Z^3). + +ecp_jm.c provides point arithmetic using Modified Jacobian +coordinates and mixed Modified_Jacobian-affine coordinates. +(Modified Jacobian coordinates represent a point (x, y) +as (X, Y, Z, a*Z^4), where x=X/Z^2, y=Y/Z^3, and a is +the linear coefficient in the curve defining equation). + +ecp_192.c and ecp_224.c provide optimized field arithmetic. + +Point Arithmetic over Binary Polynomial Fields +---------------------------------------------- + +ec2_aff.c provides point arithmetic using affine coordinates. + +ec2_proj.c provides point arithmetic using projective coordinates. +(Projective coordinates represent a point (x, y) as (X, Y, Z), where +x=X/Z, y=Y/Z^2). + +ec2_mont.c provides point multiplication using Montgomery projective +coordinates. + +ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field arithmetic. + +Field Arithmetic +---------------- + +ecl_gf.c provides constructors for field objects (GFMethod) with the +functions GFMethod_cons*. It also provides wrappers around the basic +field operations. + +Prime Field Arithmetic +---------------------- + +The mpi library provides the basic prime field arithmetic. + +ecp_mont.c provides wrappers around the Montgomery multiplication +functions from the mpi library and adds encoding and decoding functions. +It also provides the function to construct a GFMethod object using +Montgomery multiplication. + +ecp_192.c and ecp_224.c provide optimized modular reduction for the +fields defined by nistp192 and nistp224 primes. + +ecl_gf.c provides wrappers around the basic field operations. + +Binary Polynomial Field Arithmetic +---------------------------------- + +../mpi/mp_gf2m.c provides basic binary polynomial field arithmetic, +including addition, multiplication, squaring, mod, and division, as well +as conversion ob polynomial representations between bitstring and int[]. + +ec2_163.c, ec2_193.c, and ec2_233.c provide optimized field mod, mul, +and sqr operations. + +ecl_gf.c provides wrappers around the basic field operations. + +Field Encoding +-------------- + +By default, field elements are encoded in their basic form. It is +possible to use an alternative encoding, however. For example, it is +possible to Montgomery representation of prime field elements and +take advantage of the fast modular multiplication that Montgomery +representation provides. The process of converting from basic form to +Montgomery representation is called field encoding, and the opposite +process would be field decoding. All internal point operations assume +that the operands are field encoded as appropriate. By rewiring the +underlying field arithmetic to perform operations on these encoded +values, the same overlying point arithmetic operations can be used +regardless of field representation. + +ALGORITHM WIRING +================ + +The EC library allows point and field arithmetic algorithms to be +substituted ("wired-in") on a fine-grained basis. This allows for +generic algorithms and algorithms that are optimized for a particular +curve, field, or architecture, to coexist and to be automatically +selected at runtime. + +Wiring Mechanism +---------------- + +The ECGroup and GFMethod structure contain pointers to the point and +field arithmetic functions, respectively, that are to be used in +operations. + +The selection of algorithms to use is handled in the function +ecgroup_fromNameAndHex in ecl.c. + +Default Wiring +-------------- + +Curves over prime fields by default use montgomery field arithmetic, +point multiplication using 5-bit window non-adjacent-form with +Modified Jacobian coordinates, and 2*2-bit simultaneous point +multiplication using Jacobian coordinates. +(Wiring in function ECGroup_consGFp_mont in ecl.c.) + +Curves over prime fields that have optimized modular reduction (i.e., +secp160r1, nistp192, and nistp224) do not use Montgomery field +arithmetic. Instead, they use basic field arithmetic with their +optimized reduction (as in ecp_192.c and ecp_224.c). They +use the same point multiplication and simultaneous point multiplication +algorithms as other curves over prime fields. + +Curves over binary polynomial fields by default use generic field +arithmetic with montgomery point multiplication and basic kP + lQ +computation (multiply, multiply, and add). (Wiring in function +ECGroup_cons_GF2m in ecl.c.) + +Curves over binary polynomial fields that have optimized field +arithmetic (i.e., any 163-, 193, or 233-bit field) use their optimized +field arithmetic. They use the same point multiplication and +simultaneous point multiplication algorithms as other curves over binary +fields. + +Example +------- + +We provide an example for plugging in an optimized implementation for +the Koblitz curve nistk163. + +Suppose the file ec2_k163.c contains the optimized implementation. In +particular it contains a point multiplication function: + + mp_err ec_GF2m_nistk163_pt_mul(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, const ECGroup *group); + +Since only a pt_mul function is provided, the generic pt_add function +will be used. + +There are two options for handling the optimized field arithmetic used +by the ..._pt_mul function. Say the optimized field arithmetic includes +the following functions: + + mp_err ec_GF2m_nistk163_add(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_mul(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_sqr(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + mp_err ec_GF2m_nistk163_div(const mp_int *a, const mp_int *b, + mp_int *r, const GFMethod *meth); + +First, the optimized field arithmetic could simply be called directly +by the ..._pt_mul function. This would be accomplished by changing +the ecgroup_fromNameAndHex function in ecl.c to include the following +statements: + + if (name == ECCurve_NIST_K163) { + group = ECGroup_consGF2m(&irr, NULL, &curvea, &curveb, &genx, + &geny, &order, params->cofactor); + if (group == NULL) { res = MP_UNDEF; goto CLEANUP; } + MP_CHECKOK( ec_group_set_nistk163(group) ); + } + +and including in ec2_k163.c the following function: + + mp_err ec_group_set_nistk163(ECGroup *group) { + group->point_mul = &ec_GF2m_nistk163_pt_mul; + return MP_OKAY; + } + +As a result, ec_GF2m_pt_add and similar functions would use the +basic binary polynomial field arithmetic ec_GF2m_add, ec_GF2m_mul, +ec_GF2m_sqr, and ec_GF2m_div. + +Alternatively, the optimized field arithmetic could be wired into the +group's GFMethod. This would be accomplished by putting the following +function in ec2_k163.c: + + mp_err ec_group_set_nistk163(ECGroup *group) { + group->meth->field_add = &ec_GF2m_nistk163_add; + group->meth->field_mul = &ec_GF2m_nistk163_mul; + group->meth->field_sqr = &ec_GF2m_nistk163_sqr; + group->meth->field_div = &ec_GF2m_nistk163_div; + group->point_mul = &ec_GF2m_nistk163_pt_mul; + return MP_OKAY; + } + +For an example of functions that use special field encodings, take a +look at ecp_mont.c. diff --git a/security/nss/lib/freebl/ecl/curve25519_32.c b/security/nss/lib/freebl/ecl/curve25519_32.c new file mode 100644 index 000000000..0122961e6 --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_32.c @@ -0,0 +1,390 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from public domain code by Matthew Dempsky and D. J. Bernstein. + */ + +#include "ecl-priv.h" +#include "mpi.h" + +#include +#include + +typedef uint32_t elem[32]; + +/* + * Add two field elements. + * out = a + b + */ +static void +add(elem out, const elem a, const elem b) +{ + uint32_t j; + uint32_t u = 0; + for (j = 0; j < 31; ++j) { + u += a[j] + b[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += a[31] + b[31]; + out[31] = u; +} + +/* + * Subtract two field elements. + * out = a - b + */ +static void +sub(elem out, const elem a, const elem b) +{ + uint32_t j; + uint32_t u; + u = 218; + for (j = 0; j < 31; ++j) { + u += a[j] + 0xFF00 - b[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += a[31] - b[31]; + out[31] = u; +} + +/* + * "Squeeze" an element after multiplication (and square). + */ +static void +squeeze(elem a) +{ + uint32_t j; + uint32_t u; + u = 0; + for (j = 0; j < 31; ++j) { + u += a[j]; + a[j] = u & 0xFF; + u >>= 8; + } + u += a[31]; + a[31] = u & 0x7F; + u = 19 * (u >> 7); + for (j = 0; j < 31; ++j) { + u += a[j]; + a[j] = u & 0xFF; + u >>= 8; + } + a[31] += u; +} + +static const elem minusp = { 19, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 128 }; + +/* + * Reduce point a by 2^255-19 + */ +static void +reduce(elem a) +{ + elem aorig; + uint32_t j; + uint32_t negative; + + for (j = 0; j < 32; ++j) { + aorig[j] = a[j]; + } + add(a, a, minusp); + negative = 1 + ~((a[31] >> 7) & 1); + for (j = 0; j < 32; ++j) { + a[j] ^= negative & (aorig[j] ^ a[j]); + } +} + +/* + * Multiplication and squeeze + * out = a * b + */ +static void +mult(elem out, const elem a, const elem b) +{ + uint32_t i; + uint32_t j; + uint32_t u; + + for (i = 0; i < 32; ++i) { + u = 0; + for (j = 0; j <= i; ++j) { + u += a[j] * b[i - j]; + } + for (j = i + 1; j < 32; ++j) { + u += 38 * a[j] * b[i + 32 - j]; + } + out[i] = u; + } + squeeze(out); +} + +/* + * Multiplication + * out = 121665 * a + */ +static void +mult121665(elem out, const elem a) +{ + uint32_t j; + uint32_t u; + + u = 0; + for (j = 0; j < 31; ++j) { + u += 121665 * a[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += 121665 * a[31]; + out[31] = u & 0x7F; + u = 19 * (u >> 7); + for (j = 0; j < 31; ++j) { + u += out[j]; + out[j] = u & 0xFF; + u >>= 8; + } + u += out[j]; + out[j] = u; +} + +/* + * Square a and squeeze the result. + * out = a * a + */ +static void +square(elem out, const elem a) +{ + uint32_t i; + uint32_t j; + uint32_t u; + + for (i = 0; i < 32; ++i) { + u = 0; + for (j = 0; j < i - j; ++j) { + u += a[j] * a[i - j]; + } + for (j = i + 1; j < i + 32 - j; ++j) { + u += 38 * a[j] * a[i + 32 - j]; + } + u *= 2; + if ((i & 1) == 0) { + u += a[i / 2] * a[i / 2]; + u += 38 * a[i / 2 + 16] * a[i / 2 + 16]; + } + out[i] = u; + } + squeeze(out); +} + +/* + * Constant time swap between r and s depending on b + */ +static void +cswap(uint32_t p[64], uint32_t q[64], uint32_t b) +{ + uint32_t j; + uint32_t swap = 1 + ~b; + + for (j = 0; j < 64; ++j) { + const uint32_t t = swap & (p[j] ^ q[j]); + p[j] ^= t; + q[j] ^= t; + } +} + +/* + * Montgomery ladder + */ +static void +monty(elem x_2_out, elem z_2_out, + const elem point, const elem scalar) +{ + uint32_t x_3[64] = { 0 }; + uint32_t x_2[64] = { 0 }; + uint32_t a0[64]; + uint32_t a1[64]; + uint32_t b0[64]; + uint32_t b1[64]; + uint32_t c1[64]; + uint32_t r[32]; + uint32_t s[32]; + uint32_t t[32]; + uint32_t u[32]; + uint32_t swap = 0; + uint32_t k_t = 0; + int j; + + for (j = 0; j < 32; ++j) { + x_3[j] = point[j]; + } + x_3[32] = 1; + x_2[0] = 1; + + for (j = 254; j >= 0; --j) { + k_t = (scalar[j >> 3] >> (j & 7)) & 1; + swap ^= k_t; + cswap(x_2, x_3, swap); + swap = k_t; + add(a0, x_2, x_2 + 32); + sub(a0 + 32, x_2, x_2 + 32); + add(a1, x_3, x_3 + 32); + sub(a1 + 32, x_3, x_3 + 32); + square(b0, a0); + square(b0 + 32, a0 + 32); + mult(b1, a1, a0 + 32); + mult(b1 + 32, a1 + 32, a0); + add(c1, b1, b1 + 32); + sub(c1 + 32, b1, b1 + 32); + square(r, c1 + 32); + sub(s, b0, b0 + 32); + mult121665(t, s); + add(u, t, b0); + mult(x_2, b0, b0 + 32); + mult(x_2 + 32, s, u); + square(x_3, c1); + mult(x_3 + 32, r, point); + } + + cswap(x_2, x_3, swap); + for (j = 0; j < 32; ++j) { + x_2_out[j] = x_2[j]; + } + for (j = 0; j < 32; ++j) { + z_2_out[j] = x_2[j + 32]; + } +} + +static void +recip(elem out, const elem z) +{ + elem z2; + elem z9; + elem z11; + elem z2_5_0; + elem z2_10_0; + elem z2_20_0; + elem z2_50_0; + elem z2_100_0; + elem t0; + elem t1; + int i; + + /* 2 */ square(z2, z); + /* 4 */ square(t1, z2); + /* 8 */ square(t0, t1); + /* 9 */ mult(z9, t0, z); + /* 11 */ mult(z11, z9, z2); + /* 22 */ square(t0, z11); + /* 2^5 - 2^0 = 31 */ mult(z2_5_0, t0, z9); + + /* 2^6 - 2^1 */ square(t0, z2_5_0); + /* 2^7 - 2^2 */ square(t1, t0); + /* 2^8 - 2^3 */ square(t0, t1); + /* 2^9 - 2^4 */ square(t1, t0); + /* 2^10 - 2^5 */ square(t0, t1); + /* 2^10 - 2^0 */ mult(z2_10_0, t0, z2_5_0); + + /* 2^11 - 2^1 */ square(t0, z2_10_0); + /* 2^12 - 2^2 */ square(t1, t0); + /* 2^20 - 2^10 */ + for (i = 2; i < 10; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^20 - 2^0 */ mult(z2_20_0, t1, z2_10_0); + + /* 2^21 - 2^1 */ square(t0, z2_20_0); + /* 2^22 - 2^2 */ square(t1, t0); + /* 2^40 - 2^20 */ + for (i = 2; i < 20; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^40 - 2^0 */ mult(t0, t1, z2_20_0); + + /* 2^41 - 2^1 */ square(t1, t0); + /* 2^42 - 2^2 */ square(t0, t1); + /* 2^50 - 2^10 */ + for (i = 2; i < 10; i += 2) { + square(t1, t0); + square(t0, t1); + } + /* 2^50 - 2^0 */ mult(z2_50_0, t0, z2_10_0); + + /* 2^51 - 2^1 */ square(t0, z2_50_0); + /* 2^52 - 2^2 */ square(t1, t0); + /* 2^100 - 2^50 */ + for (i = 2; i < 50; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^100 - 2^0 */ mult(z2_100_0, t1, z2_50_0); + + /* 2^101 - 2^1 */ square(t1, z2_100_0); + /* 2^102 - 2^2 */ square(t0, t1); + /* 2^200 - 2^100 */ + for (i = 2; i < 100; i += 2) { + square(t1, t0); + square(t0, t1); + } + /* 2^200 - 2^0 */ mult(t1, t0, z2_100_0); + + /* 2^201 - 2^1 */ square(t0, t1); + /* 2^202 - 2^2 */ square(t1, t0); + /* 2^250 - 2^50 */ + for (i = 2; i < 50; i += 2) { + square(t0, t1); + square(t1, t0); + } + /* 2^250 - 2^0 */ mult(t0, t1, z2_50_0); + + /* 2^251 - 2^1 */ square(t1, t0); + /* 2^252 - 2^2 */ square(t0, t1); + /* 2^253 - 2^3 */ square(t1, t0); + /* 2^254 - 2^4 */ square(t0, t1); + /* 2^255 - 2^5 */ square(t1, t0); + /* 2^255 - 21 */ mult(out, t1, z11); +} + +/* + * Computes q = Curve25519(p, s) + */ +SECStatus +ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p) +{ + elem point = { 0 }; + elem x_2 = { 0 }; + elem z_2 = { 0 }; + elem X = { 0 }; + elem scalar = { 0 }; + uint32_t i; + + /* read and mask scalar */ + for (i = 0; i < 32; ++i) { + scalar[i] = s[i]; + } + scalar[0] &= 0xF8; + scalar[31] &= 0x7F; + scalar[31] |= 64; + + /* read and mask point */ + for (i = 0; i < 32; ++i) { + point[i] = p[i]; + } + point[31] &= 0x7F; + + monty(x_2, z_2, point, scalar); + recip(z_2, z_2); + mult(X, x_2, z_2); + reduce(X); + for (i = 0; i < 32; ++i) { + q[i] = X[i]; + } + return 0; +} diff --git a/security/nss/lib/freebl/ecl/curve25519_64.c b/security/nss/lib/freebl/ecl/curve25519_64.c new file mode 100644 index 000000000..89327ad1c --- /dev/null +++ b/security/nss/lib/freebl/ecl/curve25519_64.c @@ -0,0 +1,514 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Derived from public domain C code by Adan Langley and Daniel J. Bernstein + */ + +#include "uint128.h" + +#include "ecl-priv.h" +#include "mpi.h" + +#include +#include +#include + +typedef uint8_t u8; +typedef uint64_t felem; + +/* Sum two numbers: output += in */ +static void +fsum(felem *output, const felem *in) +{ + unsigned i; + for (i = 0; i < 5; ++i) { + output[i] += in[i]; + } +} + +/* Find the difference of two numbers: output = in - output + * (note the order of the arguments!) + */ +static void +fdifference_backwards(felem *ioutput, const felem *iin) +{ + static const int64_t twotothe51 = ((int64_t)1l << 51); + const int64_t *in = (const int64_t *)iin; + int64_t *out = (int64_t *)ioutput; + + out[0] = in[0] - out[0]; + out[1] = in[1] - out[1]; + out[2] = in[2] - out[2]; + out[3] = in[3] - out[3]; + out[4] = in[4] - out[4]; + + // An arithmetic shift right of 63 places turns a positive number to 0 and a + // negative number to all 1's. This gives us a bitmask that lets us avoid + // side-channel prone branches. + int64_t t; + +#define NEGCHAIN(a, b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 1 & t; + +#define NEGCHAIN19(a, b) \ + t = out[a] >> 63; \ + out[a] += twotothe51 & t; \ + out[b] -= 19 & t; + + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); + NEGCHAIN19(4, 0); + NEGCHAIN(0, 1); + NEGCHAIN(1, 2); + NEGCHAIN(2, 3); + NEGCHAIN(3, 4); +} + +/* Multiply a number by a scalar: output = in * scalar */ +static void +fscalar_product(felem *output, const felem *in, + const felem scalar) +{ + uint128_t tmp, tmp2; + + tmp = mul6464(in[0], scalar); + output[0] = mask51(tmp); + + tmp2 = mul6464(in[1], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[1] = mask51(tmp); + + tmp2 = mul6464(in[2], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[2] = mask51(tmp); + + tmp2 = mul6464(in[3], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[3] = mask51(tmp); + + tmp2 = mul6464(in[4], scalar); + tmp = add128(tmp2, rshift128(tmp, 51)); + output[4] = mask51(tmp); + + output[0] += mask_lower(rshift128(tmp, 51)) * 19; +} + +/* Multiply two numbers: output = in2 * in + * + * output must be distinct to both inputs. The inputs are reduced coefficient + * form, the output is not. + */ +static void +fmul(felem *output, const felem *in2, const felem *in) +{ + uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; + + t0 = mul6464(in[0], in2[0]); + t1 = add128(mul6464(in[1], in2[0]), mul6464(in[0], in2[1])); + t2 = add128(add128(mul6464(in[0], in2[2]), + mul6464(in[2], in2[0])), + mul6464(in[1], in2[1])); + t3 = add128(add128(add128(mul6464(in[0], in2[3]), + mul6464(in[3], in2[0])), + mul6464(in[1], in2[2])), + mul6464(in[2], in2[1])); + t4 = add128(add128(add128(add128(mul6464(in[0], in2[4]), + mul6464(in[4], in2[0])), + mul6464(in[3], in2[1])), + mul6464(in[1], in2[3])), + mul6464(in[2], in2[2])); + t5 = add128(add128(add128(mul6464(in[4], in2[1]), + mul6464(in[1], in2[4])), + mul6464(in[2], in2[3])), + mul6464(in[3], in2[2])); + t6 = add128(add128(mul6464(in[4], in2[2]), + mul6464(in[2], in2[4])), + mul6464(in[3], in2[3])); + t7 = add128(mul6464(in[3], in2[4]), mul6464(in[4], in2[3])); + t8 = mul6464(in[4], in2[4]); + + t0 = add128(t0, mul12819(t5)); + t1 = add128(t1, mul12819(t6)); + t2 = add128(t2, mul12819(t7)); + t3 = add128(t3, mul12819(t8)); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t4 = add128(t4, rshift128(t3, 51)); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t1 = add128(t1, rshift128(t0, 51)); + t2 = mask51full(t2); + t2 = add128(t2, rshift128(t1, 51)); + + output[0] = mask51(t0); + output[1] = mask51(t1); + output[2] = mask_lower(t2); + output[3] = mask51(t3); + output[4] = mask51(t4); +} + +static void +fsquare(felem *output, const felem *in) +{ + uint128_t t0, t1, t2, t3, t4, t5, t6, t7, t8; + + t0 = mul6464(in[0], in[0]); + t1 = lshift128(mul6464(in[0], in[1]), 1); + t2 = add128(lshift128(mul6464(in[0], in[2]), 1), + mul6464(in[1], in[1])); + t3 = add128(lshift128(mul6464(in[0], in[3]), 1), + lshift128(mul6464(in[1], in[2]), 1)); + t4 = add128(add128(lshift128(mul6464(in[0], in[4]), 1), + lshift128(mul6464(in[3], in[1]), 1)), + mul6464(in[2], in[2])); + t5 = add128(lshift128(mul6464(in[4], in[1]), 1), + lshift128(mul6464(in[2], in[3]), 1)); + t6 = add128(lshift128(mul6464(in[4], in[2]), 1), + mul6464(in[3], in[3])); + t7 = lshift128(mul6464(in[3], in[4]), 1); + t8 = mul6464(in[4], in[4]); + + t0 = add128(t0, mul12819(t5)); + t1 = add128(t1, mul12819(t6)); + t2 = add128(t2, mul12819(t7)); + t3 = add128(t3, mul12819(t8)); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t4 = add128(t4, rshift128(t3, 51)); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t1 = add128(t1, rshift128(t0, 51)); + + output[0] = mask51(t0); + output[1] = mask_lower(t1); + output[2] = mask51(t2); + output[3] = mask51(t3); + output[4] = mask51(t4); +} + +/* Take a 32-byte number and expand it into polynomial form */ +static void NO_SANITIZE_ALIGNMENT +fexpand(felem *output, const u8 *in) +{ + output[0] = *((const uint64_t *)(in)) & MASK51; + output[1] = (*((const uint64_t *)(in + 6)) >> 3) & MASK51; + output[2] = (*((const uint64_t *)(in + 12)) >> 6) & MASK51; + output[3] = (*((const uint64_t *)(in + 19)) >> 1) & MASK51; + output[4] = (*((const uint64_t *)(in + 25)) >> 4) & MASK51; +} + +/* Take a fully reduced polynomial form number and contract it into a + * 32-byte array + */ +static void +fcontract(u8 *output, const felem *input) +{ + uint128_t t0 = init128x(input[0]); + uint128_t t1 = init128x(input[1]); + uint128_t t2 = init128x(input[2]); + uint128_t t3 = init128x(input[3]); + uint128_t t4 = init128x(input[4]); + uint128_t tmp = init128x(19); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + /* now t is between 0 and 2^255-1, properly carried. */ + /* case 1: between 0 and 2^255-20. case 2: between 2^255-19 and 2^255-1. */ + + t0 = add128(t0, tmp); + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t0 = add128(t0, mul12819(rshift128(t4, 51))); + t4 = mask51full(t4); + + /* now between 19 and 2^255-1 in both cases, and offset by 19. */ + + t0 = add128(t0, init128x(0x8000000000000 - 19)); + tmp = init128x(0x8000000000000 - 1); + t1 = add128(t1, tmp); + t2 = add128(t2, tmp); + t3 = add128(t3, tmp); + t4 = add128(t4, tmp); + + /* now between 2^255 and 2^256-20, and offset by 2^255. */ + + t1 = add128(t1, rshift128(t0, 51)); + t0 = mask51full(t0); + t2 = add128(t2, rshift128(t1, 51)); + t1 = mask51full(t1); + t3 = add128(t3, rshift128(t2, 51)); + t2 = mask51full(t2); + t4 = add128(t4, rshift128(t3, 51)); + t3 = mask51full(t3); + t4 = mask51full(t4); + + *((uint64_t *)(output)) = mask_lower(t0) | mask_lower(t1) << 51; + *((uint64_t *)(output + 8)) = (mask_lower(t1) >> 13) | (mask_lower(t2) << 38); + *((uint64_t *)(output + 16)) = (mask_lower(t2) >> 26) | (mask_lower(t3) << 25); + *((uint64_t *)(output + 24)) = (mask_lower(t3) >> 39) | (mask_lower(t4) << 12); +} + +/* Input: Q, Q', Q-Q' + * Output: 2Q, Q+Q' + * + * x2 z3: long form + * x3 z3: long form + * x z: short form, destroyed + * xprime zprime: short form, destroyed + * qmqp: short form, preserved + */ +static void +fmonty(felem *x2, felem *z2, /* output 2Q */ + felem *x3, felem *z3, /* output Q + Q' */ + felem *x, felem *z, /* input Q */ + felem *xprime, felem *zprime, /* input Q' */ + const felem *qmqp /* input Q - Q' */) +{ + felem origx[5], origxprime[5], zzz[5], xx[5], zz[5], xxprime[5], zzprime[5], + zzzprime[5]; + + memcpy(origx, x, 5 * sizeof(felem)); + fsum(x, z); + fdifference_backwards(z, origx); // does x - z + + memcpy(origxprime, xprime, sizeof(felem) * 5); + fsum(xprime, zprime); + fdifference_backwards(zprime, origxprime); + fmul(xxprime, xprime, z); + fmul(zzprime, x, zprime); + memcpy(origxprime, xxprime, sizeof(felem) * 5); + fsum(xxprime, zzprime); + fdifference_backwards(zzprime, origxprime); + fsquare(x3, xxprime); + fsquare(zzzprime, zzprime); + fmul(z3, zzzprime, qmqp); + + fsquare(xx, x); + fsquare(zz, z); + fmul(x2, xx, zz); + fdifference_backwards(zz, xx); // does zz = xx - zz + fscalar_product(zzz, zz, 121665); + fsum(zzz, xx); + fmul(z2, zz, zzz); +} + +// ----------------------------------------------------------------------------- +// Maybe swap the contents of two felem arrays (@a and @b), each @len elements +// long. Perform the swap iff @swap is non-zero. +// +// This function performs the swap without leaking any side-channel +// information. +// ----------------------------------------------------------------------------- +static void +swap_conditional(felem *a, felem *b, unsigned len, felem iswap) +{ + unsigned i; + const felem swap = 1 + ~iswap; + + for (i = 0; i < len; ++i) { + const felem x = swap & (a[i] ^ b[i]); + a[i] ^= x; + b[i] ^= x; + } +} + +/* Calculates nQ where Q is the x-coordinate of a point on the curve + * + * resultx/resultz: the x coordinate of the resulting curve point (short form) + * n: a 32-byte number + * q: a point of the curve (short form) + */ +static void +cmult(felem *resultx, felem *resultz, const u8 *n, const felem *q) +{ + felem a[5] = { 0 }, b[5] = { 1 }, c[5] = { 1 }, d[5] = { 0 }; + felem *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + felem e[5] = { 0 }, f[5] = { 1 }, g[5] = { 0 }, h[5] = { 1 }; + felem *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; + + unsigned i, j; + + memcpy(nqpqx, q, sizeof(felem) * 5); + + for (i = 0; i < 32; ++i) { + u8 byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const felem bit = byte >> 7; + + swap_conditional(nqx, nqpqx, 5, bit); + swap_conditional(nqz, nqpqz, 5, bit); + fmonty(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q); + swap_conditional(nqx2, nqpqx2, 5, bit); + swap_conditional(nqz2, nqpqz2, 5, bit); + + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; + + byte <<= 1; + } + } + + memcpy(resultx, nqx, sizeof(felem) * 5); + memcpy(resultz, nqz, sizeof(felem) * 5); +} + +// ----------------------------------------------------------------------------- +// Shamelessly copied from djb's code +// ----------------------------------------------------------------------------- +static void +crecip(felem *out, const felem *z) +{ + felem z2[5]; + felem z9[5]; + felem z11[5]; + felem z2_5_0[5]; + felem z2_10_0[5]; + felem z2_20_0[5]; + felem z2_50_0[5]; + felem z2_100_0[5]; + felem t0[5]; + felem t1[5]; + int i; + + /* 2 */ fsquare(z2, z); + /* 4 */ fsquare(t1, z2); + /* 8 */ fsquare(t0, t1); + /* 9 */ fmul(z9, t0, z); + /* 11 */ fmul(z11, z9, z2); + /* 22 */ fsquare(t0, z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0, t0, z9); + + /* 2^6 - 2^1 */ fsquare(t0, z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1, t0); + /* 2^8 - 2^3 */ fsquare(t0, t1); + /* 2^9 - 2^4 */ fsquare(t1, t0); + /* 2^10 - 2^5 */ fsquare(t0, t1); + /* 2^10 - 2^0 */ fmul(z2_10_0, t0, z2_5_0); + + /* 2^11 - 2^1 */ fsquare(t0, z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1, t0); + /* 2^20 - 2^10 */ for (i = 2; i < 10; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^20 - 2^0 */ fmul(z2_20_0, t1, z2_10_0); + + /* 2^21 - 2^1 */ fsquare(t0, z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1, t0); + /* 2^40 - 2^20 */ for (i = 2; i < 20; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^40 - 2^0 */ fmul(t0, t1, z2_20_0); + + /* 2^41 - 2^1 */ fsquare(t1, t0); + /* 2^42 - 2^2 */ fsquare(t0, t1); + /* 2^50 - 2^10 */ for (i = 2; i < 10; i += 2) { + fsquare(t1, t0); + fsquare(t0, t1); + } + /* 2^50 - 2^0 */ fmul(z2_50_0, t0, z2_10_0); + + /* 2^51 - 2^1 */ fsquare(t0, z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1, t0); + /* 2^100 - 2^50 */ for (i = 2; i < 50; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^100 - 2^0 */ fmul(z2_100_0, t1, z2_50_0); + + /* 2^101 - 2^1 */ fsquare(t1, z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0, t1); + /* 2^200 - 2^100 */ for (i = 2; i < 100; i += 2) { + fsquare(t1, t0); + fsquare(t0, t1); + } + /* 2^200 - 2^0 */ fmul(t1, t0, z2_100_0); + + /* 2^201 - 2^1 */ fsquare(t0, t1); + /* 2^202 - 2^2 */ fsquare(t1, t0); + /* 2^250 - 2^50 */ for (i = 2; i < 50; i += 2) { + fsquare(t0, t1); + fsquare(t1, t0); + } + /* 2^250 - 2^0 */ fmul(t0, t1, z2_50_0); + + /* 2^251 - 2^1 */ fsquare(t1, t0); + /* 2^252 - 2^2 */ fsquare(t0, t1); + /* 2^253 - 2^3 */ fsquare(t1, t0); + /* 2^254 - 2^4 */ fsquare(t0, t1); + /* 2^255 - 2^5 */ fsquare(t1, t0); + /* 2^255 - 21 */ fmul(out, t1, z11); +} + +SECStatus +ec_Curve25519_mul(uint8_t *mypublic, const uint8_t *secret, + const uint8_t *basepoint) +{ + felem bp[5], x[5], z[5], zmone[5]; + uint8_t e[32]; + int i; + + for (i = 0; i < 32; ++i) { + e[i] = secret[i]; + } + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); + + return 0; +} diff --git a/security/nss/lib/freebl/ecl/ec_naf.c b/security/nss/lib/freebl/ecl/ec_naf.c new file mode 100644 index 000000000..cad08cb27 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ec_naf.c @@ -0,0 +1,68 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-priv.h" + +/* Returns 2^e as an integer. This is meant to be used for small powers of + * two. */ +int +ec_twoTo(int e) +{ + int a = 1; + int i; + + for (i = 0; i < e; i++) { + a *= 2; + } + return a; +} + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err +ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, int w) +{ + mp_int k; + mp_err res = MP_OKAY; + int i, twowm1, mask; + + twowm1 = ec_twoTo(w - 1); + mask = 2 * twowm1 - 1; + + MP_DIGITS(&k) = 0; + MP_CHECKOK(mp_init_copy(&k, in)); + + i = 0; + /* Compute wNAF form */ + while (mp_cmp_z(&k) > 0) { + if (mp_isodd(&k)) { + out[i] = MP_DIGIT(&k, 0) & mask; + if (out[i] >= twowm1) + out[i] -= 2 * twowm1; + + /* Subtract off out[i]. Note mp_sub_d only works with + * unsigned digits */ + if (out[i] >= 0) { + MP_CHECKOK(mp_sub_d(&k, out[i], &k)); + } else { + MP_CHECKOK(mp_add_d(&k, -(out[i]), &k)); + } + } else { + out[i] = 0; + } + MP_CHECKOK(mp_div_2(&k, &k)); + i++; + } + /* Zero out the remaining elements of the out array. */ + for (; i < bitsize + 1; i++) { + out[i] = 0; + } +CLEANUP: + mp_clear(&k); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecl-curve.h b/security/nss/lib/freebl/ecl/ecl-curve.h new file mode 100644 index 000000000..df061396c --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-curve.h @@ -0,0 +1,123 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl-exp.h" +#include + +#ifndef __ecl_curve_h_ +#define __ecl_curve_h_ + +/* copied from certt.h */ +#define KU_DIGITAL_SIGNATURE (0x80) /* bit 0 */ +#define KU_KEY_AGREEMENT (0x08) /* bit 4 */ + +static const ECCurveParams ecCurve_NIST_P256 = { + "NIST-P256", ECField_GFp, 256, + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFF", + "FFFFFFFF00000001000000000000000000000000FFFFFFFFFFFFFFFFFFFFFFFC", + "5AC635D8AA3A93E7B3EBBD55769886BC651D06B0CC53B0F63BCE3C3E27D2604B", + "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296", + "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5", + "FFFFFFFF00000000FFFFFFFFFFFFFFFFBCE6FAADA7179E84F3B9CAC2FC632551", + 1, 128, 65, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve_NIST_P384 = { + "NIST-P384", ECField_GFp, 384, + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFF", + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFFFF0000000000000000FFFFFFFC", + "B3312FA7E23EE7E4988E056BE3F82D19181D9C6EFE8141120314088F5013875AC656398D8A2ED19D2A85C8EDD3EC2AEF", + "AA87CA22BE8B05378EB1C71EF320AD746E1D3B628BA79B9859F741E082542A385502F25DBF55296C3A545E3872760AB7", + "3617DE4A96262C6F5D9E98BF9292DC29F8F41DBD289A147CE9DA3113B5F0B8C00A60B1CE1D7E819D7A431D7C90EA0E5F", + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC7634D81F4372DDF581A0DB248B0A77AECEC196ACCC52973", + 1, 192, 97, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve_NIST_P521 = { + "NIST-P521", ECField_GFp, 521, + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFC", + "0051953EB9618E1C9A1F929A21A0B68540EEA2DA725B99B315F3B8B489918EF109E156193951EC7E937B1652C0BD3BB1BF073573DF883D2C34F1EF451FD46B503F00", + "00C6858E06B70404E9CD9E3ECB662395B4429C648139053FB521F828AF606B4D3DBAA14B5E77EFE75928FE1DC127A2FFA8DE3348B3C1856A429BF97E7E31C2E5BD66", + "011839296A789A3BC0045C8A5FB42C7D1BD998F54449579B446817AFBD17273E662C97EE72995EF42640C550B9013FAD0761353C7086A272C24088BE94769FD16650", + "01FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFA51868783BF2F966B7FCC0148F709A5D03BB5C9B8899C47AEBB6FB71E91386409", + 1, 256, 133, KU_DIGITAL_SIGNATURE | KU_KEY_AGREEMENT +}; + +static const ECCurveParams ecCurve25519 = { + "Curve25519", ECField_GFp, 255, + "7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed", + "076D06", + "00", + "0900000000000000000000000000000000000000000000000000000000000000", + "20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + "1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ed", + 8, 128, 32, KU_KEY_AGREEMENT +}; + +/* mapping between ECCurveName enum and pointers to ECCurveParams */ +static const ECCurveParams *ecCurve_map[] = { + NULL, /* ECCurve_noName */ + NULL, /* ECCurve_NIST_P192 */ + NULL, /* ECCurve_NIST_P224 */ + &ecCurve_NIST_P256, /* ECCurve_NIST_P256 */ + &ecCurve_NIST_P384, /* ECCurve_NIST_P384 */ + &ecCurve_NIST_P521, /* ECCurve_NIST_P521 */ + NULL, /* ECCurve_NIST_K163 */ + NULL, /* ECCurve_NIST_B163 */ + NULL, /* ECCurve_NIST_K233 */ + NULL, /* ECCurve_NIST_B233 */ + NULL, /* ECCurve_NIST_K283 */ + NULL, /* ECCurve_NIST_B283 */ + NULL, /* ECCurve_NIST_K409 */ + NULL, /* ECCurve_NIST_B409 */ + NULL, /* ECCurve_NIST_K571 */ + NULL, /* ECCurve_NIST_B571 */ + NULL, /* ECCurve_X9_62_PRIME_192V2 */ + NULL, /* ECCurve_X9_62_PRIME_192V3 */ + NULL, /* ECCurve_X9_62_PRIME_239V1 */ + NULL, /* ECCurve_X9_62_PRIME_239V2 */ + NULL, /* ECCurve_X9_62_PRIME_239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V2 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB163V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB176V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB191V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB208W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V2 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB239V3 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB272W1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB304W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB359V1 */ + NULL, /* ECCurve_X9_62_CHAR2_PNB368W1 */ + NULL, /* ECCurve_X9_62_CHAR2_TNB431R1 */ + NULL, /* ECCurve_SECG_PRIME_112R1 */ + NULL, /* ECCurve_SECG_PRIME_112R2 */ + NULL, /* ECCurve_SECG_PRIME_128R1 */ + NULL, /* ECCurve_SECG_PRIME_128R2 */ + NULL, /* ECCurve_SECG_PRIME_160K1 */ + NULL, /* ECCurve_SECG_PRIME_160R1 */ + NULL, /* ECCurve_SECG_PRIME_160R2 */ + NULL, /* ECCurve_SECG_PRIME_192K1 */ + NULL, /* ECCurve_SECG_PRIME_224K1 */ + NULL, /* ECCurve_SECG_PRIME_256K1 */ + NULL, /* ECCurve_SECG_CHAR2_113R1 */ + NULL, /* ECCurve_SECG_CHAR2_113R2 */ + NULL, /* ECCurve_SECG_CHAR2_131R1 */ + NULL, /* ECCurve_SECG_CHAR2_131R2 */ + NULL, /* ECCurve_SECG_CHAR2_163R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R1 */ + NULL, /* ECCurve_SECG_CHAR2_193R2 */ + NULL, /* ECCurve_SECG_CHAR2_239K1 */ + NULL, /* ECCurve_WTLS_1 */ + NULL, /* ECCurve_WTLS_8 */ + NULL, /* ECCurve_WTLS_9 */ + &ecCurve25519, /* ECCurve25519 */ + NULL /* ECCurve_pastLastCurve */ +}; + +#endif diff --git a/security/nss/lib/freebl/ecl/ecl-exp.h b/security/nss/lib/freebl/ecl/ecl-exp.h new file mode 100644 index 000000000..44adb8a1c --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-exp.h @@ -0,0 +1,167 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_exp_h_ +#define __ecl_exp_h_ + +/* Curve field type */ +typedef enum { + ECField_GFp, + ECField_GF2m +} ECField; + +/* Hexadecimal encoding of curve parameters */ +struct ECCurveParamsStr { + char *text; + ECField field; + unsigned int size; + char *irr; + char *curvea; + char *curveb; + char *genx; + char *geny; + char *order; + int cofactor; + int security; + int pointSize; + unsigned int usage; +}; +typedef struct ECCurveParamsStr ECCurveParams; + +/* Named curve parameters */ +typedef enum { + + ECCurve_noName = 0, + + /* NIST prime curves */ + ECCurve_NIST_P192, /* not supported */ + ECCurve_NIST_P224, /* not supported */ + ECCurve_NIST_P256, + ECCurve_NIST_P384, + ECCurve_NIST_P521, + + /* NIST binary curves */ + ECCurve_NIST_K163, /* not supported */ + ECCurve_NIST_B163, /* not supported */ + ECCurve_NIST_K233, /* not supported */ + ECCurve_NIST_B233, /* not supported */ + ECCurve_NIST_K283, /* not supported */ + ECCurve_NIST_B283, /* not supported */ + ECCurve_NIST_K409, /* not supported */ + ECCurve_NIST_B409, /* not supported */ + ECCurve_NIST_K571, /* not supported */ + ECCurve_NIST_B571, /* not supported */ + + /* ANSI X9.62 prime curves */ + /* ECCurve_X9_62_PRIME_192V1 == ECCurve_NIST_P192 */ + ECCurve_X9_62_PRIME_192V2, /* not supported */ + ECCurve_X9_62_PRIME_192V3, /* not supported */ + ECCurve_X9_62_PRIME_239V1, /* not supported */ + ECCurve_X9_62_PRIME_239V2, /* not supported */ + ECCurve_X9_62_PRIME_239V3, /* not supported */ + /* ECCurve_X9_62_PRIME_256V1 == ECCurve_NIST_P256 */ + + /* ANSI X9.62 binary curves */ + ECCurve_X9_62_CHAR2_PNB163V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V2, /* not supported */ + ECCurve_X9_62_CHAR2_PNB163V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB176V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB191V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB208W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V2, /* not supported */ + ECCurve_X9_62_CHAR2_TNB239V3, /* not supported */ + ECCurve_X9_62_CHAR2_PNB272W1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB304W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB359V1, /* not supported */ + ECCurve_X9_62_CHAR2_PNB368W1, /* not supported */ + ECCurve_X9_62_CHAR2_TNB431R1, /* not supported */ + + /* SEC2 prime curves */ + ECCurve_SECG_PRIME_112R1, /* not supported */ + ECCurve_SECG_PRIME_112R2, /* not supported */ + ECCurve_SECG_PRIME_128R1, /* not supported */ + ECCurve_SECG_PRIME_128R2, /* not supported */ + ECCurve_SECG_PRIME_160K1, /* not supported */ + ECCurve_SECG_PRIME_160R1, /* not supported */ + ECCurve_SECG_PRIME_160R2, /* not supported */ + ECCurve_SECG_PRIME_192K1, /* not supported */ + /* ECCurve_SECG_PRIME_192R1 == ECCurve_NIST_P192 */ + ECCurve_SECG_PRIME_224K1, /* not supported */ + /* ECCurve_SECG_PRIME_224R1 == ECCurve_NIST_P224 */ + ECCurve_SECG_PRIME_256K1, /* not supported */ + /* ECCurve_SECG_PRIME_256R1 == ECCurve_NIST_P256 */ + /* ECCurve_SECG_PRIME_384R1 == ECCurve_NIST_P384 */ + /* ECCurve_SECG_PRIME_521R1 == ECCurve_NIST_P521 */ + + /* SEC2 binary curves */ + ECCurve_SECG_CHAR2_113R1, /* not supported */ + ECCurve_SECG_CHAR2_113R2, /* not supported */ + ECCurve_SECG_CHAR2_131R1, /* not supported */ + ECCurve_SECG_CHAR2_131R2, /* not supported */ + /* ECCurve_SECG_CHAR2_163K1 == ECCurve_NIST_K163 */ + ECCurve_SECG_CHAR2_163R1, /* not supported */ + /* ECCurve_SECG_CHAR2_163R2 == ECCurve_NIST_B163 */ + ECCurve_SECG_CHAR2_193R1, /* not supported */ + ECCurve_SECG_CHAR2_193R2, /* not supported */ + /* ECCurve_SECG_CHAR2_233K1 == ECCurve_NIST_K233 */ + /* ECCurve_SECG_CHAR2_233R1 == ECCurve_NIST_B233 */ + ECCurve_SECG_CHAR2_239K1, /* not supported */ + /* ECCurve_SECG_CHAR2_283K1 == ECCurve_NIST_K283 */ + /* ECCurve_SECG_CHAR2_283R1 == ECCurve_NIST_B283 */ + /* ECCurve_SECG_CHAR2_409K1 == ECCurve_NIST_K409 */ + /* ECCurve_SECG_CHAR2_409R1 == ECCurve_NIST_B409 */ + /* ECCurve_SECG_CHAR2_571K1 == ECCurve_NIST_K571 */ + /* ECCurve_SECG_CHAR2_571R1 == ECCurve_NIST_B571 */ + + /* WTLS curves */ + ECCurve_WTLS_1, /* not supported */ + /* there is no WTLS 2 curve */ + /* ECCurve_WTLS_3 == ECCurve_NIST_K163 */ + /* ECCurve_WTLS_4 == ECCurve_SECG_CHAR2_113R1 */ + /* ECCurve_WTLS_5 == ECCurve_X9_62_CHAR2_PNB163V1 */ + /* ECCurve_WTLS_6 == ECCurve_SECG_PRIME_112R1 */ + /* ECCurve_WTLS_7 == ECCurve_SECG_PRIME_160R1 */ + ECCurve_WTLS_8, /* not supported */ + ECCurve_WTLS_9, /* not supported */ + /* ECCurve_WTLS_10 == ECCurve_NIST_K233 */ + /* ECCurve_WTLS_11 == ECCurve_NIST_B233 */ + /* ECCurve_WTLS_12 == ECCurve_NIST_P224 */ + + ECCurve25519, + + ECCurve_pastLastCurve +} ECCurveName; + +/* Aliased named curves */ + +#define ECCurve_X9_62_PRIME_192V1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_X9_62_PRIME_256V1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_192R1 ECCurve_NIST_P192 /* not supported */ +#define ECCurve_SECG_PRIME_224R1 ECCurve_NIST_P224 /* not supported */ +#define ECCurve_SECG_PRIME_256R1 ECCurve_NIST_P256 +#define ECCurve_SECG_PRIME_384R1 ECCurve_NIST_P384 +#define ECCurve_SECG_PRIME_521R1 ECCurve_NIST_P521 +#define ECCurve_SECG_CHAR2_163K1 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_SECG_CHAR2_163R2 ECCurve_NIST_B163 /* not supported */ +#define ECCurve_SECG_CHAR2_233K1 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_SECG_CHAR2_233R1 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_SECG_CHAR2_283K1 ECCurve_NIST_K283 /* not supported */ +#define ECCurve_SECG_CHAR2_283R1 ECCurve_NIST_B283 /* not supported */ +#define ECCurve_SECG_CHAR2_409K1 ECCurve_NIST_K409 /* not supported */ +#define ECCurve_SECG_CHAR2_409R1 ECCurve_NIST_B409 /* not supported */ +#define ECCurve_SECG_CHAR2_571K1 ECCurve_NIST_K571 /* not supported */ +#define ECCurve_SECG_CHAR2_571R1 ECCurve_NIST_B571 /* not supported */ +#define ECCurve_WTLS_3 ECCurve_NIST_K163 /* not supported */ +#define ECCurve_WTLS_4 ECCurve_SECG_CHAR2_113R1 /* not supported */ +#define ECCurve_WTLS_5 ECCurve_X9_62_CHAR2_PNB163V1 /* not supported */ +#define ECCurve_WTLS_6 ECCurve_SECG_PRIME_112R1 /* not supported */ +#define ECCurve_WTLS_7 ECCurve_SECG_PRIME_160R1 /* not supported */ +#define ECCurve_WTLS_10 ECCurve_NIST_K233 /* not supported */ +#define ECCurve_WTLS_11 ECCurve_NIST_B233 /* not supported */ +#define ECCurve_WTLS_12 ECCurve_NIST_P224 /* not supported */ + +#endif /* __ecl_exp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl-priv.h b/security/nss/lib/freebl/ecl/ecl-priv.h new file mode 100644 index 000000000..f43f19327 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl-priv.h @@ -0,0 +1,257 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecl_priv_h_ +#define __ecl_priv_h_ + +#include "ecl.h" +#include "mpi.h" +#include "mplogic.h" +#include "../blapii.h" + +/* MAX_FIELD_SIZE_DIGITS is the maximum size of field element supported */ +/* the following needs to go away... */ +#if defined(MP_USE_LONG_LONG_DIGIT) || defined(MP_USE_LONG_DIGIT) +#define ECL_SIXTY_FOUR_BIT +#else +#define ECL_THIRTY_TWO_BIT +#endif + +#define ECL_CURVE_DIGITS(curve_size_in_bits) \ + (((curve_size_in_bits) + (sizeof(mp_digit) * 8 - 1)) / (sizeof(mp_digit) * 8)) +#define ECL_BITS (sizeof(mp_digit) * 8) +#define ECL_MAX_FIELD_SIZE_DIGITS (80 / sizeof(mp_digit)) + +/* Gets the i'th bit in the binary representation of a. If i >= length(a), + * then return 0. (The above behaviour differs from mpl_get_bit, which + * causes an error if i >= length(a).) */ +#define MP_GET_BIT(a, i) \ + ((i) >= mpl_significant_bits((a))) ? 0 : mpl_get_bit((a), (i)) + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_word w; \ + w = ((mp_word)carry) + (a1) + (a2); \ + s = ACCUM(w); \ + carry = CARRYOUT(w); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_word w; \ + w = ((mp_word)(a1)) - (a2)-borrow; \ + s = ACCUM(w); \ + borrow = (w >> MP_DIGIT_BIT) & 1; \ + } + +#else +/* NOTE, + * carry and borrow are both read and written. + * a1 or a2 and s could be the same variable. + * don't trash those outputs until their respective inputs have + * been read. */ +#define MP_ADD_CARRY(a1, a2, s, carry) \ + { \ + mp_digit tmp, sum; \ + tmp = (a1); \ + sum = tmp + (a2); \ + tmp = (sum < tmp); /* detect overflow */ \ + s = sum += carry; \ + carry = tmp + (sum < carry); \ + } + +#define MP_SUB_BORROW(a1, a2, s, borrow) \ + { \ + mp_digit tmp; \ + tmp = (a1); \ + s = tmp - (a2); \ + tmp = (s > tmp); /* detect borrow */ \ + if (borrow && !s--) \ + tmp++; \ + borrow = tmp; \ + } +#endif + +struct GFMethodStr; +typedef struct GFMethodStr GFMethod; +struct GFMethodStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Irreducible that defines the field. For prime fields, this is the + * prime p. For binary polynomial fields, this is the bitstring + * representation of the irreducible polynomial. */ + mp_int irr; + /* For prime fields, the value irr_arr[0] is the number of bits in the + * field. For binary polynomial fields, the irreducible polynomial + * f(t) is represented as an array of unsigned int[], where f(t) is + * of the form: f(t) = t^p[0] + t^p[1] + ... + t^p[4] where m = p[0] + * > p[1] > ... > p[4] = 0. */ + unsigned int irr_arr[5]; + /* Field arithmetic methods. All methods (except field_enc and + * field_dec) are assumed to take field-encoded parameters and return + * field-encoded values. All methods (except field_enc and field_dec) + * are required to be implemented. */ + mp_err (*field_add)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_neg)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_sub)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_mod)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_mul)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_sqr)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_div)(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + mp_err (*field_enc)(const mp_int *a, mp_int *r, const GFMethod *meth); + mp_err (*field_dec)(const mp_int *a, mp_int *r, const GFMethod *meth); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(GFMethod *meth); +}; + +/* Construct generic GFMethods. */ +GFMethod *GFMethod_consGFp(const mp_int *irr); +GFMethod *GFMethod_consGFp_mont(const mp_int *irr); + +/* Free the memory allocated (if any) to a GFMethod object. */ +void GFMethod_free(GFMethod *meth); + +struct ECGroupStr { + /* Indicates whether the structure was constructed from dynamic memory + * or statically created. */ + int constructed; + /* Field definition and arithmetic. */ + GFMethod *meth; + /* Textual representation of curve name, if any. */ + char *text; + /* Curve parameters, field-encoded. */ + mp_int curvea, curveb; + /* x and y coordinates of the base point, field-encoded. */ + mp_int genx, geny; + /* Order and cofactor of the base point. */ + mp_int order; + int cofactor; + /* Point arithmetic methods. All methods are assumed to take + * field-encoded parameters and return field-encoded values. All + * methods (except base_point_mul and points_mul) are required to be + * implemented. */ + mp_err (*point_add)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_sub)(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_dbl)(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*point_mul)(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*base_point_mul)(const mp_int *n, mp_int *rx, mp_int *ry, + const ECGroup *group); + mp_err (*points_mul)(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + mp_err (*validate_point)(const mp_int *px, const mp_int *py, const ECGroup *group); + /* Extra storage for implementation-specific data. Any memory + * allocated to these extra fields will be cleared by extra_free. */ + void *extra1; + void *extra2; + void (*extra_free)(ECGroup *group); +}; + +/* Wrapper functions for generic prime field arithmetic. */ +mp_err ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* fixed length in-line adds. Count is in words */ +mp_err ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +mp_err ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +/* Wrapper functions for generic binary polynomial field arithmetic. */ +mp_err ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); + +/* Montgomery prime field arithmetic. */ +mp_err ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth); +mp_err ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +mp_err ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth); +void ec_GFp_extra_free_mont(GFMethod *meth); + +/* point multiplication */ +mp_err ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); +mp_err ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes the windowed non-adjacent-form (NAF) of a scalar. Out should + * be an array of signed char's to output to, bitsize should be the number + * of bits of out, in is the original scalar, and w is the window size. + * NAF is discussed in the paper: D. Hankerson, J. Hernandez and A. + * Menezes, "Software implementation of elliptic curve cryptography over + * binary fields", Proc. CHES 2000. */ +mp_err ec_compute_wNAF(signed char *out, int bitsize, const mp_int *in, + int w); + +/* Optimized field arithmetic */ +mp_err ec_group_set_gfp192(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp224(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp256(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp384(ECGroup *group, ECCurveName); +mp_err ec_group_set_gfp521(ECGroup *group, ECCurveName); +mp_err ec_group_set_gf2m163(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m193(ECGroup *group, ECCurveName name); +mp_err ec_group_set_gf2m233(ECGroup *group, ECCurveName name); + +/* Optimized point multiplication */ +mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name); + +/* Optimized floating-point arithmetic */ +#ifdef ECL_USE_FP +mp_err ec_group_set_secp160r1_fp(ECGroup *group); +mp_err ec_group_set_nistp192_fp(ECGroup *group); +mp_err ec_group_set_nistp224_fp(ECGroup *group); +#endif + +SECStatus ec_Curve25519_mul(PRUint8 *q, const PRUint8 *s, const PRUint8 *p); +#endif /* __ecl_priv_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl.c b/security/nss/lib/freebl/ecl/ecl.c new file mode 100644 index 000000000..3540af781 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.c @@ -0,0 +1,301 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include "ecp.h" +#include +#include + +/* Allocate memory for a new ECGroup object. */ +ECGroup * +ECGroup_new() +{ + mp_err res = MP_OKAY; + ECGroup *group; + group = (ECGroup *)malloc(sizeof(ECGroup)); + if (group == NULL) + return NULL; + group->constructed = MP_YES; + group->meth = NULL; + group->text = NULL; + MP_DIGITS(&group->curvea) = 0; + MP_DIGITS(&group->curveb) = 0; + MP_DIGITS(&group->genx) = 0; + MP_DIGITS(&group->geny) = 0; + MP_DIGITS(&group->order) = 0; + group->base_point_mul = NULL; + group->points_mul = NULL; + group->validate_point = NULL; + group->extra1 = NULL; + group->extra2 = NULL; + group->extra_free = NULL; + MP_CHECKOK(mp_init(&group->curvea)); + MP_CHECKOK(mp_init(&group->curveb)); + MP_CHECKOK(mp_init(&group->genx)); + MP_CHECKOK(mp_init(&group->geny)); + MP_CHECKOK(mp_init(&group->order)); + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields. */ +ECGroup * +ECGroup_consGFp(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(mp_copy(curvea, &group->curvea)); + MP_CHECKOK(mp_copy(curveb, &group->curveb)); + MP_CHECKOK(mp_copy(genx, &group->genx)); + MP_CHECKOK(mp_copy(geny, &group->geny)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct a generic ECGroup for elliptic curves over prime fields with + * field arithmetic implemented in Montgomery coordinates. */ +ECGroup * +ECGroup_consGFp_mont(const mp_int *irr, const mp_int *curvea, + const mp_int *curveb, const mp_int *genx, + const mp_int *geny, const mp_int *order, int cofactor) +{ + mp_err res = MP_OKAY; + ECGroup *group = NULL; + + group = ECGroup_new(); + if (group == NULL) + return NULL; + + group->meth = GFMethod_consGFp_mont(irr); + if (group->meth == NULL) { + res = MP_MEM; + goto CLEANUP; + } + MP_CHECKOK(group->meth->field_enc(curvea, &group->curvea, group->meth)); + MP_CHECKOK(group->meth->field_enc(curveb, &group->curveb, group->meth)); + MP_CHECKOK(group->meth->field_enc(genx, &group->genx, group->meth)); + MP_CHECKOK(group->meth->field_enc(geny, &group->geny, group->meth)); + MP_CHECKOK(mp_copy(order, &group->order)); + group->cofactor = cofactor; + group->point_add = &ec_GFp_pt_add_aff; + group->point_sub = &ec_GFp_pt_sub_aff; + group->point_dbl = &ec_GFp_pt_dbl_aff; + group->point_mul = &ec_GFp_pt_mul_jm_wNAF; + group->base_point_mul = NULL; + group->points_mul = &ec_GFp_pts_mul_jac; + group->validate_point = &ec_GFp_validate_point; + +CLEANUP: + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECGroup from hex parameters and name, if any. Called by + * ECGroup_fromHex and ECGroup_fromName. */ +ECGroup * +ecgroup_fromNameAndHex(const ECCurveName name, + const ECCurveParams *params) +{ + mp_int irr, curvea, curveb, genx, geny, order; + int bits; + ECGroup *group = NULL; + mp_err res = MP_OKAY; + + /* initialize values */ + MP_DIGITS(&irr) = 0; + MP_DIGITS(&curvea) = 0; + MP_DIGITS(&curveb) = 0; + MP_DIGITS(&genx) = 0; + MP_DIGITS(&geny) = 0; + MP_DIGITS(&order) = 0; + MP_CHECKOK(mp_init(&irr)); + MP_CHECKOK(mp_init(&curvea)); + MP_CHECKOK(mp_init(&curveb)); + MP_CHECKOK(mp_init(&genx)); + MP_CHECKOK(mp_init(&geny)); + MP_CHECKOK(mp_init(&order)); + MP_CHECKOK(mp_read_radix(&irr, params->irr, 16)); + MP_CHECKOK(mp_read_radix(&curvea, params->curvea, 16)); + MP_CHECKOK(mp_read_radix(&curveb, params->curveb, 16)); + MP_CHECKOK(mp_read_radix(&genx, params->genx, 16)); + MP_CHECKOK(mp_read_radix(&geny, params->geny, 16)); + MP_CHECKOK(mp_read_radix(&order, params->order, 16)); + + /* determine number of bits */ + bits = mpl_significant_bits(&irr) - 1; + if (bits < MP_OKAY) { + res = bits; + goto CLEANUP; + } + + /* determine which optimizations (if any) to use */ + if (params->field == ECField_GFp) { + switch (name) { + case ECCurve_SECG_PRIME_256R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp256(group, name)); + MP_CHECKOK(ec_group_set_gfp256_32(group, name)); + break; + case ECCurve_SECG_PRIME_521R1: + group = + ECGroup_consGFp(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + MP_CHECKOK(ec_group_set_gfp521(group, name)); + break; + default: + /* use generic arithmetic */ + group = + ECGroup_consGFp_mont(&irr, &curvea, &curveb, &genx, &geny, + &order, params->cofactor); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + } + } else { + res = MP_UNDEF; + goto CLEANUP; + } + + /* set name, if any */ + if ((group != NULL) && (params->text != NULL)) { + group->text = strdup(params->text); + if (group->text == NULL) { + res = MP_MEM; + } + } + +CLEANUP: + mp_clear(&irr); + mp_clear(&curvea); + mp_clear(&curveb); + mp_clear(&genx); + mp_clear(&geny); + mp_clear(&order); + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Construct ECGroup from hexadecimal representations of parameters. */ +ECGroup * +ECGroup_fromHex(const ECCurveParams *params) +{ + return ecgroup_fromNameAndHex(ECCurve_noName, params); +} + +/* Construct ECGroup from named parameters. */ +ECGroup * +ECGroup_fromName(const ECCurveName name) +{ + ECGroup *group = NULL; + ECCurveParams *params = NULL; + mp_err res = MP_OKAY; + + params = EC_GetNamedCurveParams(name); + if (params == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + + /* construct actual group */ + group = ecgroup_fromNameAndHex(name, params); + if (group == NULL) { + res = MP_UNDEF; + goto CLEANUP; + } + +CLEANUP: + EC_FreeCurveParams(params); + if (res != MP_OKAY) { + ECGroup_free(group); + return NULL; + } + return group; +} + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. */ +mp_err +ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py) +{ + /* 1: Verify that publicValue is not the point at infinity */ + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + /* 3: Verify that publicValue is on the curve. */ + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + return group->validate_point(px, py, group); +} + +/* Free the memory allocated (if any) to an ECGroup object. */ +void +ECGroup_free(ECGroup *group) +{ + if (group == NULL) + return; + GFMethod_free(group->meth); + if (group->constructed == MP_NO) + return; + mp_clear(&group->curvea); + mp_clear(&group->curveb); + mp_clear(&group->genx); + mp_clear(&group->geny); + mp_clear(&group->order); + if (group->text != NULL) + free(group->text); + if (group->extra_free != NULL) + group->extra_free(group); + free(group); +} diff --git a/security/nss/lib/freebl/ecl/ecl.h b/security/nss/lib/freebl/ecl/ecl.h new file mode 100644 index 000000000..ddcbb1f3a --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl.h @@ -0,0 +1,60 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Although this is not an exported header file, code which uses elliptic + * curve point operations will need to include it. */ + +#ifndef __ecl_h_ +#define __ecl_h_ + +#include "blapi.h" +#include "ecl-exp.h" +#include "mpi.h" + +struct ECGroupStr; +typedef struct ECGroupStr ECGroup; + +/* Construct ECGroup from hexadecimal representations of parameters. */ +ECGroup *ECGroup_fromHex(const ECCurveParams *params); + +/* Construct ECGroup from named parameters. */ +ECGroup *ECGroup_fromName(const ECCurveName name); + +/* Free an allocated ECGroup. */ +void ECGroup_free(ECGroup *group); + +/* Construct ECCurveParams from an ECCurveName */ +ECCurveParams *EC_GetNamedCurveParams(const ECCurveName name); + +/* Duplicates an ECCurveParams */ +ECCurveParams *ECCurveParams_dup(const ECCurveParams *params); + +/* Free an allocated ECCurveParams */ +void EC_FreeCurveParams(ECCurveParams *params); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *qx, mp_int *qy); + +/* Elliptic curve scalar-point multiplication. Computes Q(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Input and output values are assumed to + * be NOT field-encoded. */ +mp_err ECPoints_mul(const ECGroup *group, const mp_int *k1, + const mp_int *k2, const mp_int *px, const mp_int *py, + mp_int *qx, mp_int *qy); + +/* Validates an EC public key as described in Section 5.2.2 of X9.62. + * Returns MP_YES if the public key is valid, MP_NO if the public key + * is invalid, or an error code if the validation could not be + * performed. */ +mp_err ECPoint_validate(const ECGroup *group, const mp_int *px, const mp_int *py); + +SECStatus ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P); +SECStatus ec_Curve25519_pt_validate(const SECItem *px); + +#endif /* __ecl_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecl_curve.c b/security/nss/lib/freebl/ecl/ecl_curve.c new file mode 100644 index 000000000..cf090cfc3 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_curve.c @@ -0,0 +1,93 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecl.h" +#include "ecl-curve.h" +#include "ecl-priv.h" +#include +#include + +#define CHECK(func) \ + if ((func) == NULL) { \ + res = 0; \ + goto CLEANUP; \ + } + +/* Duplicates an ECCurveParams */ +ECCurveParams * +ECCurveParams_dup(const ECCurveParams *params) +{ + int res = 1; + ECCurveParams *ret = NULL; + + CHECK(ret = (ECCurveParams *)calloc(1, sizeof(ECCurveParams))); + if (params->text != NULL) { + CHECK(ret->text = strdup(params->text)); + } + ret->field = params->field; + ret->size = params->size; + if (params->irr != NULL) { + CHECK(ret->irr = strdup(params->irr)); + } + if (params->curvea != NULL) { + CHECK(ret->curvea = strdup(params->curvea)); + } + if (params->curveb != NULL) { + CHECK(ret->curveb = strdup(params->curveb)); + } + if (params->genx != NULL) { + CHECK(ret->genx = strdup(params->genx)); + } + if (params->geny != NULL) { + CHECK(ret->geny = strdup(params->geny)); + } + if (params->order != NULL) { + CHECK(ret->order = strdup(params->order)); + } + ret->cofactor = params->cofactor; + +CLEANUP: + if (res != 1) { + EC_FreeCurveParams(ret); + return NULL; + } + return ret; +} + +#undef CHECK + +/* Construct ECCurveParams from an ECCurveName */ +ECCurveParams * +EC_GetNamedCurveParams(const ECCurveName name) +{ + if ((name <= ECCurve_noName) || (ECCurve_pastLastCurve <= name) || + (ecCurve_map[name] == NULL)) { + return NULL; + } else { + return ECCurveParams_dup(ecCurve_map[name]); + } +} + +/* Free the memory allocated (if any) to an ECCurveParams object. */ +void +EC_FreeCurveParams(ECCurveParams *params) +{ + if (params == NULL) + return; + if (params->text != NULL) + free(params->text); + if (params->irr != NULL) + free(params->irr); + if (params->curvea != NULL) + free(params->curvea); + if (params->curveb != NULL) + free(params->curveb); + if (params->genx != NULL) + free(params->genx); + if (params->geny != NULL) + free(params->geny); + if (params->order != NULL) + free(params->order); + free(params); +} diff --git a/security/nss/lib/freebl/ecl/ecl_gf.c b/security/nss/lib/freebl/ecl/ecl_gf.c new file mode 100644 index 000000000..81b007705 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_gf.c @@ -0,0 +1,958 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mp_gf2m.h" +#include "ecl-priv.h" +#include "mpi-priv.h" +#include + +/* Allocate memory for a new GFMethod object. */ +GFMethod * +GFMethod_new() +{ + mp_err res = MP_OKAY; + GFMethod *meth; + meth = (GFMethod *)malloc(sizeof(GFMethod)); + if (meth == NULL) + return NULL; + meth->constructed = MP_YES; + MP_DIGITS(&meth->irr) = 0; + meth->extra_free = NULL; + MP_CHECKOK(mp_init(&meth->irr)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + + meth = GFMethod_new(); + if (meth == NULL) + return NULL; + + MP_CHECKOK(mp_copy(irr, &meth->irr)); + meth->irr_arr[0] = mpl_significant_bits(irr); + meth->irr_arr[1] = meth->irr_arr[2] = meth->irr_arr[3] = + meth->irr_arr[4] = 0; + switch (MP_USED(&meth->irr)) { + /* maybe we need 1 and 2 words here as well?*/ + case 3: + meth->field_add = &ec_GFp_add_3; + meth->field_sub = &ec_GFp_sub_3; + break; + case 4: + meth->field_add = &ec_GFp_add_4; + meth->field_sub = &ec_GFp_sub_4; + break; + case 5: + meth->field_add = &ec_GFp_add_5; + meth->field_sub = &ec_GFp_sub_5; + break; + case 6: + meth->field_add = &ec_GFp_add_6; + meth->field_sub = &ec_GFp_sub_6; + break; + default: + meth->field_add = &ec_GFp_add; + meth->field_sub = &ec_GFp_sub; + } + meth->field_neg = &ec_GFp_neg; + meth->field_mod = &ec_GFp_mod; + meth->field_mul = &ec_GFp_mul; + meth->field_sqr = &ec_GFp_sqr; + meth->field_div = &ec_GFp_div; + meth->field_enc = NULL; + meth->field_dec = NULL; + meth->extra1 = NULL; + meth->extra2 = NULL; + meth->extra_free = NULL; + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Free the memory allocated (if any) to a GFMethod object. */ +void +GFMethod_free(GFMethod *meth) +{ + if (meth == NULL) + return; + if (meth->constructed == MP_NO) + return; + mp_clear(&meth->irr); + if (meth->extra_free != NULL) + meth->extra_free(meth); + free(meth); +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Add two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a + b (mod p) */ + mp_err res; + + if ((res = mp_add(a, b, r)) != MP_OKAY) { + return res; + } + if (mp_cmp(r, &meth->irr) >= 0) { + return mp_sub(r, &meth->irr, r); + } + return res; +} + +/* Negates a field element. Assumes that 0 <= a < meth->irr */ +mp_err +ec_GFp_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + /* PRE: 0 <= a < p = meth->irr POST: 0 <= r < p, r = -a (mod p) */ + + if (mp_cmp_z(a) == 0) { + mp_zero(r); + return MP_OKAY; + } + return mp_sub(&meth->irr, a, r); +} + +/* Subtracts two field elements. Assumes that 0 <= a, b < meth->irr */ +mp_err +ec_GFp_sub(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* PRE: 0 <= a, b < p = meth->irr POST: 0 <= r < p, r = a - b (mod p) */ + res = mp_sub(a, b, r); + if (res == MP_RANGE) { + MP_CHECKOK(mp_sub(b, a, r)); + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } + MP_CHECKOK(ec_GFp_neg(r, r, meth)); + } + if (mp_cmp_z(r) < 0) { + MP_CHECKOK(mp_add(r, &meth->irr, r)); + } +CLEANUP: + return res; +} +/* + * Inline adds for small curve lengths. + */ +/* 3 words */ +mp_err +ec_GFp_add_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); +#else + __asm__( + "xorq %3,%3 \n\t" + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a2 = MP_DIGIT(&meth->irr, 2); + if (carry || r2 > a2 || + ((r2 == a2) && mp_cmp(r, &meth->irr) != MP_LT)) { + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); +#else + __asm__( + "subq %3,%0 \n\t" + "sbbq %4,%1 \n\t" + "sbbq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(a0), "r"(a1), "r"(a2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_add_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); +#else + __asm__( + "xorq %4,%4 \n\t" + "addq %5,%0 \n\t" + "adcq %6,%1 \n\t" + "adcq %7,%2 \n\t" + "adcq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(carry) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a3 = MP_DIGIT(&meth->irr, 3); + if (carry || r3 > a3 || + ((r3 == a3) && mp_cmp(r, &meth->irr) != MP_LT)) { + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); +#else + __asm__( + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "sbbq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(a0), "r"(a1), "r"(a2), "r"(a3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_add_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a4 = MP_DIGIT(&meth->irr, 4); + if (carry || r4 > a4 || + ((r4 == a4) && mp_cmp(r, &meth->irr) != MP_LT)) { + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_add_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit a0 = 0, a1 = 0, a2 = 0, a3 = 0, a4 = 0, a5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit carry; + + switch (MP_USED(a)) { + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + case 4: + a3 = MP_DIGIT(a, 3); + case 3: + a2 = MP_DIGIT(a, 2); + case 2: + a1 = MP_DIGIT(a, 1); + case 1: + a0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + r5 = MP_DIGIT(b, 5); + case 5: + r4 = MP_DIGIT(b, 4); + case 4: + r3 = MP_DIGIT(b, 3); + case 3: + r2 = MP_DIGIT(b, 2); + case 2: + r1 = MP_DIGIT(b, 1); + case 1: + r0 = MP_DIGIT(b, 0); + } + + carry = 0; + MP_ADD_CARRY(a0, r0, r0, carry); + MP_ADD_CARRY(a1, r1, r1, carry); + MP_ADD_CARRY(a2, r2, r2, carry); + MP_ADD_CARRY(a3, r3, r3, carry); + MP_ADD_CARRY(a4, r4, r4, carry); + MP_ADD_CARRY(a5, r5, r5, carry); + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + + /* Do quick 'subract' if we've gone over + * (add the 2's complement of the curve field) */ + a5 = MP_DIGIT(&meth->irr, 5); + if (carry || r5 > a5 || + ((r5 == a5) && mp_cmp(r, &meth->irr) != MP_LT)) { + a4 = MP_DIGIT(&meth->irr, 4); + a3 = MP_DIGIT(&meth->irr, 3); + a2 = MP_DIGIT(&meth->irr, 2); + a1 = MP_DIGIT(&meth->irr, 1); + a0 = MP_DIGIT(&meth->irr, 0); + carry = 0; + MP_SUB_BORROW(r0, a0, r0, carry); + MP_SUB_BORROW(r1, a1, r1, carry); + MP_SUB_BORROW(r2, a2, r2, carry); + MP_SUB_BORROW(r3, a3, r3, carry); + MP_SUB_BORROW(r4, a4, r4, carry); + MP_SUB_BORROW(r5, a5, r5, carry); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + } + + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* + * The following subraction functions do in-line subractions based + * on our curve size. + * + * ... 3 words + */ +mp_err +ec_GFp_sub_3(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); +#else + __asm__( + "xorq %3,%3 \n\t" + "subq %4,%0 \n\t" + "sbbq %5,%1 \n\t" + "sbbq %6,%2 \n\t" + "adcq $0,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); +#else + __asm__( + "addq %3,%0 \n\t" + "adcq %4,%1 \n\t" + "adcq %5,%2 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2) + : "r"(b0), "r"(b1), "r"(b2), + "0"(r0), "1"(r1), "2"(r2) + : "%cc"); +#endif + } + +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r2 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 3)); + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 3; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 4 words */ +mp_err +ec_GFp_sub_4(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); +#else + __asm__( + "xorq %4,%4 \n\t" + "subq %5,%0 \n\t" + "sbbq %6,%1 \n\t" + "sbbq %7,%2 \n\t" + "sbbq %8,%3 \n\t" + "adcq $0,%4 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3), "=r"(borrow) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); +#ifndef MPI_AMD64_ADD + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); +#else + __asm__( + "addq %4,%0 \n\t" + "adcq %5,%1 \n\t" + "adcq %6,%2 \n\t" + "adcq %7,%3 \n\t" + : "=r"(r0), "=r"(r1), "=r"(r2), "=r"(r3) + : "r"(b0), "r"(b1), "r"(b2), "r"(b3), + "0"(r0), "1"(r1), "2"(r2), "3"(r3) + : "%cc"); +#endif + } +#ifdef MPI_AMD64_ADD + /* compiler fakeout? */ + if ((r3 == b0) && (r1 == b0) && (r0 == b0)) { + MP_CHECKOK(s_mp_pad(r, 4)); + } +#endif + MP_CHECKOK(s_mp_pad(r, 4)); + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 5 words */ +mp_err +ec_GFp_sub_5(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + } + MP_CHECKOK(s_mp_pad(r, 5)); + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 5; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* 6 words */ +mp_err +ec_GFp_sub_6(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_digit b0 = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0, b5 = 0; + mp_digit r0 = 0, r1 = 0, r2 = 0, r3 = 0, r4 = 0, r5 = 0; + mp_digit borrow; + + switch (MP_USED(a)) { + case 6: + r5 = MP_DIGIT(a, 5); + case 5: + r4 = MP_DIGIT(a, 4); + case 4: + r3 = MP_DIGIT(a, 3); + case 3: + r2 = MP_DIGIT(a, 2); + case 2: + r1 = MP_DIGIT(a, 1); + case 1: + r0 = MP_DIGIT(a, 0); + } + switch (MP_USED(b)) { + case 6: + b5 = MP_DIGIT(b, 5); + case 5: + b4 = MP_DIGIT(b, 4); + case 4: + b3 = MP_DIGIT(b, 3); + case 3: + b2 = MP_DIGIT(b, 2); + case 2: + b1 = MP_DIGIT(b, 1); + case 1: + b0 = MP_DIGIT(b, 0); + } + + borrow = 0; + MP_SUB_BORROW(r0, b0, r0, borrow); + MP_SUB_BORROW(r1, b1, r1, borrow); + MP_SUB_BORROW(r2, b2, r2, borrow); + MP_SUB_BORROW(r3, b3, r3, borrow); + MP_SUB_BORROW(r4, b4, r4, borrow); + MP_SUB_BORROW(r5, b5, r5, borrow); + + /* Do quick 'add' if we've gone under 0 + * (subtract the 2's complement of the curve field) */ + if (borrow) { + b5 = MP_DIGIT(&meth->irr, 5); + b4 = MP_DIGIT(&meth->irr, 4); + b3 = MP_DIGIT(&meth->irr, 3); + b2 = MP_DIGIT(&meth->irr, 2); + b1 = MP_DIGIT(&meth->irr, 1); + b0 = MP_DIGIT(&meth->irr, 0); + borrow = 0; + MP_ADD_CARRY(b0, r0, r0, borrow); + MP_ADD_CARRY(b1, r1, r1, borrow); + MP_ADD_CARRY(b2, r2, r2, borrow); + MP_ADD_CARRY(b3, r3, r3, borrow); + MP_ADD_CARRY(b4, r4, r4, borrow); + MP_ADD_CARRY(b5, r5, r5, borrow); + } + + MP_CHECKOK(s_mp_pad(r, 6)); + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 6; + s_mp_clamp(r); + +CLEANUP: + return res; +} + +/* Reduces an integer to a field element. */ +mp_err +ec_GFp_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_mod(a, &meth->irr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GFp_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_mulmod(a, b, &meth->irr, r); +} + +/* Squares a field element. */ +mp_err +ec_GFp_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_sqrmod(a, &meth->irr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GFp_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mulmod(a, &t, &meth->irr, r)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wrapper functions for generic binary polynomial field arithmetic. */ + +/* Adds two field elements. */ +mp_err +ec_GF2m_add(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_badd(a, b, r); +} + +/* Negates a field element. Note that for binary polynomial fields, the + * negation of a field element is the field element itself. */ +mp_err +ec_GF2m_neg(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + if (a == r) { + return MP_OKAY; + } else { + return mp_copy(a, r); + } +} + +/* Reduces a binary polynomial to a field element. */ +mp_err +ec_GF2m_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bmod(a, meth->irr_arr, r); +} + +/* Multiplies two field elements. */ +mp_err +ec_GF2m_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + return mp_bmulmod(a, b, meth->irr_arr, r); +} + +/* Squares a field element. */ +mp_err +ec_GF2m_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return mp_bsqrmod(a, meth->irr_arr, r); +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +mp_err +ec_GF2m_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + /* The GF(2^m) portion of MPI doesn't support invmod, so we + * compute 1/b. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_set_int(&t, 1)); + MP_CHECKOK(mp_bdivmod(&t, b, &meth->irr, meth->irr_arr, r)); + CLEANUP: + mp_clear(&t); + return res; + } else { + return mp_bdivmod(a, b, &meth->irr, meth->irr_arr, r); + } +} diff --git a/security/nss/lib/freebl/ecl/ecl_mult.c b/security/nss/lib/freebl/ecl/ecl_mult.c new file mode 100644 index 000000000..ffbcbf1d9 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecl_mult.c @@ -0,0 +1,305 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecl-priv.h" +#include + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k * P(x, + * y). If x, y = NULL, then P is assumed to be the generator (base point) + * of the group of points on the elliptic curve. Input and output values + * are assumed to be NOT field-encoded. */ +mp_err +ECPoint_mul(const ECGroup *group, const mp_int *k, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int kt; + + ARGCHK((k != NULL) && (group != NULL), MP_BADARG); + MP_DIGITS(&kt) = 0; + + /* want scalar to be less than or equal to group order */ + if (mp_cmp(k, &group->order) > 0) { + MP_CHECKOK(mp_init(&kt)); + MP_CHECKOK(mp_mod(k, &group->order, &kt)); + } else { + MP_SIGN(&kt) = MP_ZPOS; + MP_USED(&kt) = MP_USED(k); + MP_ALLOC(&kt) = MP_ALLOC(k); + MP_DIGITS(&kt) = MP_DIGITS(k); + } + + if ((px == NULL) || (py == NULL)) { + if (group->base_point_mul) { + MP_CHECKOK(group->base_point_mul(&kt, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, &group->genx, &group->geny, rx, ry, + group)); + } + } else { + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(py, ry, group->meth)); + MP_CHECKOK(group->point_mul(&kt, rx, ry, rx, ry, group)); + } else { + MP_CHECKOK(group->point_mul(&kt, px, py, rx, ry, group)); + } + } + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + if (MP_DIGITS(&kt) != MP_DIGITS(k)) { + mp_clear(&kt); + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ec_pts_mul_basic(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int sx, sy; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + MP_CHECKOK(ECPoint_mul(group, k1, NULL, NULL, &sx, &sy)); + MP_CHECKOK(ECPoint_mul(group, k2, px, py, rx, ry)); + + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&sx, &sx, group->meth)); + MP_CHECKOK(group->meth->field_enc(&sy, &sy, group->meth)); + MP_CHECKOK(group->meth->field_enc(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_enc(ry, ry, group->meth)); + } + + MP_CHECKOK(group->point_add(&sx, &sy, rx, ry, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&sx); + mp_clear(&sy); + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. Uses + * algorithm 15 (simultaneous multiple point multiplication) from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST + * Elliptic Curves over Prime Fields. */ +mp_err +ec_pts_mul_simul_w2(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init_size(&precomp[i][j][0], + ECL_MAX_FIELD_SIZE_DIGITS)); + MP_CHECKOK(mp_init_size(&precomp[i][j][1], + ECL_MAX_FIELD_SIZE_DIGITS)); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + mp_zero(rx); + mp_zero(ry); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + MP_CHECKOK(group->point_dbl(rx, ry, rx, ry, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(group->point_add(rx, ry, &precomp[ai][bi][0], + &precomp[ai][bi][1], rx, ry, group)); + } + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Input and output values are assumed to be NOT field-encoded. */ +mp_err +ECPoints_mul(const ECGroup *group, const mp_int *k1, const mp_int *k2, + const mp_int *px, const mp_int *py, mp_int *rx, mp_int *ry) +{ + mp_err res = MP_OKAY; + mp_int k1t, k2t; + const mp_int *k1p, *k2p; + + MP_DIGITS(&k1t) = 0; + MP_DIGITS(&k2t) = 0; + + ARGCHK(group != NULL, MP_BADARG); + + /* want scalar to be less than or equal to group order */ + if (k1 != NULL) { + if (mp_cmp(k1, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k1t)); + MP_CHECKOK(mp_mod(k1, &group->order, &k1t)); + k1p = &k1t; + } else { + k1p = k1; + } + } else { + k1p = k1; + } + if (k2 != NULL) { + if (mp_cmp(k2, &group->order) >= 0) { + MP_CHECKOK(mp_init(&k2t)); + MP_CHECKOK(mp_mod(k2, &group->order, &k2t)); + k2p = &k2t; + } else { + k2p = k2; + } + } else { + k2p = k2; + } + + /* if points_mul is defined, then use it */ + if (group->points_mul) { + res = group->points_mul(k1p, k2p, px, py, rx, ry, group); + } else { + res = ec_pts_mul_simul_w2(k1p, k2p, px, py, rx, ry, group); + } + +CLEANUP: + mp_clear(&k1t); + mp_clear(&k2t); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp.h b/security/nss/lib/freebl/ecl/ecp.h new file mode 100644 index 000000000..7e54e4e07 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp.h @@ -0,0 +1,106 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef __ecp_h_ +#define __ecp_h_ + +#include "ecl-priv.h" + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py); + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py); + +/* Computes R = P + Q where R is (rx, ry), P is (px, py) and Q is (qx, + * qy). Uses affine coordinates. */ +mp_err ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = P - Q. Uses affine coordinates. */ +mp_err ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Computes R = 2P. Uses affine coordinates. */ +mp_err ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group); + +/* Validates a point on a GFp curve. */ +mp_err ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses affine coordinates. */ +mp_err ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). */ +mp_err ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group); + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). */ +mp_err ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, + const mp_int *pz); + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz); + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, qz). Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, + const mp_int *pz, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +/* Computes R = 2P. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, + const mp_int *pz, mp_int *rx, mp_int *ry, + mp_int *rz, const ECGroup *group); + +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Uses Jacobian coordinates. */ +mp_err ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); +#endif + +/* Computes R(x, y) = k1 * G + k2 * P(x, y), where G is the generator + * (base point) of the group of points on the elliptic curve. Allows k1 = + * NULL or { k2, P } = NULL. Implemented using mixed Jacobian-affine + * coordinates. Input and output values are assumed to be NOT + * field-encoded and are in affine form. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group); + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group); + +#endif /* __ecp_h_ */ diff --git a/security/nss/lib/freebl/ecl/ecp_25519.c b/security/nss/lib/freebl/ecl/ecp_25519.c new file mode 100644 index 000000000..a8d41520e --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_25519.c @@ -0,0 +1,120 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* curve 25519 https://www.rfc-editor.org/rfc/rfc7748.txt */ + +#ifdef FREEBL_NO_DEPEND +#include "../stubs.h" +#endif + +#include "ecl-priv.h" +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "secmpi.h" +#include "secitem.h" +#include "secport.h" +#include +#include + +/* + * point validation is not necessary in general. But this checks a point (px) + * against some known bad values. + */ +SECStatus +ec_Curve25519_pt_validate(const SECItem *px) +{ + PRUint8 *p; + int i; + PRUint8 forbiddenValues[12][32] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x00 }, + { 0x5f, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0x57 }, + { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }, + { 0xcd, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, + 0x16, 0x56, 0xe3, 0xfa, 0xf1, 0x9f, 0xc4, 0x6a, + 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32, 0xb1, 0xfd, + 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8, 0x80 }, + { 0x4c, 0x9c, 0x95, 0xbc, 0xa3, 0x50, 0x8c, 0x24, + 0xb1, 0xd0, 0xb1, 0x55, 0x9c, 0x83, 0xef, 0x5b, + 0x04, 0x44, 0x5c, 0xc4, 0x58, 0x1c, 0x8e, 0x86, + 0xd8, 0x22, 0x4e, 0xdd, 0xd0, 0x9f, 0x11, 0xd7 }, + { 0xd9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xda, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + { 0xdb, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, + }; + + /* The point must not be longer than 32 (it can be smaller). */ + if (px->len <= 32) { + p = px->data; + } else { + return SECFailure; + } + + for (i = 0; i < PR_ARRAY_SIZE(forbiddenValues); ++i) { + if (NSS_SecureMemcmp(p, forbiddenValues[i], px->len) == 0) { + return SECFailure; + } + } + + return SECSuccess; +} + +/* + * Scalar multiplication for Curve25519. + * If P == NULL, the base point is used. + * Returns X = k*P + */ +SECStatus +ec_Curve25519_pt_mul(SECItem *X, SECItem *k, SECItem *P) +{ + PRUint8 *px; + PRUint8 basePoint[32] = { 9 }; + + if (!P) { + px = basePoint; + } else { + PORT_Assert(P->len == 32); + if (P->len != 32) { + return SECFailure; + } + px = P->data; + } + + return ec_Curve25519_mul(X->data, k->data, px); +} diff --git a/security/nss/lib/freebl/ecl/ecp_256.c b/security/nss/lib/freebl/ecl/ecp_256.c new file mode 100644 index 000000000..ad4e630c1 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256.c @@ -0,0 +1,401 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p256 = 2^256 - 2^224 + 2^192+ 2^96 - 1. a can be r. + * Uses algorithm 2.29 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp256_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_size a_used = MP_USED(a); + int a_bits = mpl_significant_bits(a); + mp_digit carry; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit a8 = 0, a9 = 0, a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0, a15 = 0; + mp_digit r0, r1, r2, r3, r4, r5, r6, r7; + int r8; /* must be a signed value ! */ +#else + mp_digit a4 = 0, a5 = 0, a6 = 0, a7 = 0; + mp_digit a4h, a4l, a5h, a5l, a6h, a6l, a7h, a7l; + mp_digit r0, r1, r2, r3; + int r4; /* must be a signed value ! */ +#endif + /* for polynomials larger than twice the field size + * use regular reduction */ + if (a_bits < 256) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + if (a_bits > 512) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + +#ifdef ECL_THIRTY_TWO_BIT + switch (a_used) { + case 16: + a15 = MP_DIGIT(a, 15); + case 15: + a14 = MP_DIGIT(a, 14); + case 14: + a13 = MP_DIGIT(a, 13); + case 13: + a12 = MP_DIGIT(a, 12); + case 12: + a11 = MP_DIGIT(a, 11); + case 11: + a10 = MP_DIGIT(a, 10); + case 10: + a9 = MP_DIGIT(a, 9); + case 9: + a8 = MP_DIGIT(a, 8); + } + + r0 = MP_DIGIT(a, 0); + r1 = MP_DIGIT(a, 1); + r2 = MP_DIGIT(a, 2); + r3 = MP_DIGIT(a, 3); + r4 = MP_DIGIT(a, 4); + r5 = MP_DIGIT(a, 5); + r6 = MP_DIGIT(a, 6); + r7 = MP_DIGIT(a, 7); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 = carry; + carry = 0; + MP_ADD_CARRY(r3, a11, r3, carry); + MP_ADD_CARRY(r4, a12, r4, carry); + MP_ADD_CARRY(r5, a13, r5, carry); + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); + r8 += carry; + carry = 0; + /* sum 2 */ + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* combine last bottom of sum 3 with second sum 2 */ + MP_ADD_CARRY(r0, a8, r0, carry); + MP_ADD_CARRY(r1, a9, r1, carry); + MP_ADD_CARRY(r2, a10, r2, carry); + MP_ADD_CARRY(r3, a12, r3, carry); + MP_ADD_CARRY(r4, a13, r4, carry); + MP_ADD_CARRY(r5, a14, r5, carry); + MP_ADD_CARRY(r6, a15, r6, carry); + MP_ADD_CARRY(r7, a15, r7, carry); /* from sum 3 */ + r8 += carry; + carry = 0; + /* sum 3 (rest of it)*/ + MP_ADD_CARRY(r6, a14, r6, carry); + MP_ADD_CARRY(r7, 0, r7, carry); + r8 += carry; + carry = 0; + /* sum 4 (rest of it)*/ + MP_ADD_CARRY(r0, a9, r0, carry); + MP_ADD_CARRY(r1, a10, r1, carry); + MP_ADD_CARRY(r2, a11, r2, carry); + MP_ADD_CARRY(r3, a13, r3, carry); + MP_ADD_CARRY(r4, a14, r4, carry); + MP_ADD_CARRY(r5, a15, r5, carry); + MP_ADD_CARRY(r6, a13, r6, carry); + MP_ADD_CARRY(r7, a8, r7, carry); + r8 += carry; + carry = 0; + /* diff 5 */ + MP_SUB_BORROW(r0, a11, r0, carry); + MP_SUB_BORROW(r1, a12, r1, carry); + MP_SUB_BORROW(r2, a13, r2, carry); + MP_SUB_BORROW(r3, 0, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a8, r6, carry); + MP_SUB_BORROW(r7, a10, r7, carry); + r8 -= carry; + carry = 0; + /* diff 6 */ + MP_SUB_BORROW(r0, a12, r0, carry); + MP_SUB_BORROW(r1, a13, r1, carry); + MP_SUB_BORROW(r2, a14, r2, carry); + MP_SUB_BORROW(r3, a15, r3, carry); + MP_SUB_BORROW(r4, 0, r4, carry); + MP_SUB_BORROW(r5, 0, r5, carry); + MP_SUB_BORROW(r6, a9, r6, carry); + MP_SUB_BORROW(r7, a11, r7, carry); + r8 -= carry; + carry = 0; + /* diff 7 */ + MP_SUB_BORROW(r0, a13, r0, carry); + MP_SUB_BORROW(r1, a14, r1, carry); + MP_SUB_BORROW(r2, a15, r2, carry); + MP_SUB_BORROW(r3, a8, r3, carry); + MP_SUB_BORROW(r4, a9, r4, carry); + MP_SUB_BORROW(r5, a10, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a12, r7, carry); + r8 -= carry; + carry = 0; + /* diff 8 */ + MP_SUB_BORROW(r0, a14, r0, carry); + MP_SUB_BORROW(r1, a15, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a9, r3, carry); + MP_SUB_BORROW(r4, a10, r4, carry); + MP_SUB_BORROW(r5, a11, r5, carry); + MP_SUB_BORROW(r6, 0, r6, carry); + MP_SUB_BORROW(r7, a13, r7, carry); + r8 -= carry; + + /* reduce the overflows */ + while (r8 > 0) { + mp_digit r8_d = r8; + carry = 0; + MP_ADD_CARRY(r0, r8_d, r0, carry); + MP_ADD_CARRY(r1, 0, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, 0 - r8_d, r3, carry); + MP_ADD_CARRY(r4, MP_DIGIT_MAX, r4, carry); + MP_ADD_CARRY(r5, MP_DIGIT_MAX, r5, carry); + MP_ADD_CARRY(r6, 0 - (r8_d + 1), r6, carry); + MP_ADD_CARRY(r7, (r8_d - 1), r7, carry); + r8 = carry; + } + + /* reduce the underflows */ + while (r8 < 0) { + mp_digit r8_d = -r8; + carry = 0; + MP_SUB_BORROW(r0, r8_d, r0, carry); + MP_SUB_BORROW(r1, 0, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, 0 - r8_d, r3, carry); + MP_SUB_BORROW(r4, MP_DIGIT_MAX, r4, carry); + MP_SUB_BORROW(r5, MP_DIGIT_MAX, r5, carry); + MP_SUB_BORROW(r6, 0 - (r8_d + 1), r6, carry); + MP_SUB_BORROW(r7, (r8_d - 1), r7, carry); + r8 = 0 - carry; + } + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 8)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 8; + + MP_DIGIT(r, 7) = r7; + MP_DIGIT(r, 6) = r6; + MP_DIGIT(r, 5) = r5; + MP_DIGIT(r, 4) = r4; + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r7 == MP_DIGIT_MAX) && + ((r6 > 1) || ((r6 == 1) && + (r5 || r4 || r3 || + ((r2 == MP_DIGIT_MAX) && (r1 == MP_DIGIT_MAX) && (r0 == MP_DIGIT_MAX)))))) { + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#else + switch (a_used) { + case 8: + a7 = MP_DIGIT(a, 7); + case 7: + a6 = MP_DIGIT(a, 6); + case 6: + a5 = MP_DIGIT(a, 5); + case 5: + a4 = MP_DIGIT(a, 4); + } + a7l = a7 << 32; + a7h = a7 >> 32; + a6l = a6 << 32; + a6h = a6 >> 32; + a5l = a5 << 32; + a5h = a5 >> 32; + a4l = a4 << 32; + a4h = a4 >> 32; + r3 = MP_DIGIT(a, 3); + r2 = MP_DIGIT(a, 2); + r1 = MP_DIGIT(a, 1); + r0 = MP_DIGIT(a, 0); + + /* sum 1 */ + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 = carry; + carry = 0; + MP_ADD_CARRY(r1, a5h << 32, r1, carry); + MP_ADD_CARRY(r2, a6, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 2 */ + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + carry = 0; + MP_ADD_CARRY(r1, a6l, r1, carry); + MP_ADD_CARRY(r2, a6h | a7l, r2, carry); + MP_ADD_CARRY(r3, a7h, r3, carry); + r4 += carry; + + /* sum 3 */ + carry = 0; + MP_ADD_CARRY(r0, a4, r0, carry); + MP_ADD_CARRY(r1, a5l >> 32, r1, carry); + MP_ADD_CARRY(r2, 0, r2, carry); + MP_ADD_CARRY(r3, a7, r3, carry); + r4 += carry; + /* sum 4 */ + carry = 0; + MP_ADD_CARRY(r0, a4h | a5l, r0, carry); + MP_ADD_CARRY(r1, a5h | (a6h << 32), r1, carry); + MP_ADD_CARRY(r2, a7, r2, carry); + MP_ADD_CARRY(r3, a6h | a4l, r3, carry); + r4 += carry; + /* diff 5 */ + carry = 0; + MP_SUB_BORROW(r0, a5h | a6l, r0, carry); + MP_SUB_BORROW(r1, a6h, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, (a4l >> 32) | a5l, r3, carry); + r4 -= carry; + /* diff 6 */ + carry = 0; + MP_SUB_BORROW(r0, a6, r0, carry); + MP_SUB_BORROW(r1, a7, r1, carry); + MP_SUB_BORROW(r2, 0, r2, carry); + MP_SUB_BORROW(r3, a4h | (a5h << 32), r3, carry); + r4 -= carry; + /* diff 7 */ + carry = 0; + MP_SUB_BORROW(r0, a6h | a7l, r0, carry); + MP_SUB_BORROW(r1, a7h | a4l, r1, carry); + MP_SUB_BORROW(r2, a4h | a5l, r2, carry); + MP_SUB_BORROW(r3, a6l, r3, carry); + r4 -= carry; + /* diff 8 */ + carry = 0; + MP_SUB_BORROW(r0, a7, r0, carry); + MP_SUB_BORROW(r1, a4h << 32, r1, carry); + MP_SUB_BORROW(r2, a5, r2, carry); + MP_SUB_BORROW(r3, a6h << 32, r3, carry); + r4 -= carry; + + /* reduce the overflows */ + while (r4 > 0) { + mp_digit r4_long = r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_ADD_CARRY(r0, r4_long, r0, carry); + MP_ADD_CARRY(r1, 0 - r4l, r1, carry); + MP_ADD_CARRY(r2, MP_DIGIT_MAX, r2, carry); + MP_ADD_CARRY(r3, r4l - r4_long - 1, r3, carry); + r4 = carry; + } + + /* reduce the underflows */ + while (r4 < 0) { + mp_digit r4_long = -r4; + mp_digit r4l = (r4_long << 32); + carry = 0; + MP_SUB_BORROW(r0, r4_long, r0, carry); + MP_SUB_BORROW(r1, 0 - r4l, r1, carry); + MP_SUB_BORROW(r2, MP_DIGIT_MAX, r2, carry); + MP_SUB_BORROW(r3, r4l - r4_long - 1, r3, carry); + r4 = 0 - carry; + } + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, 4)); + } + MP_SIGN(r) = MP_ZPOS; + MP_USED(r) = 4; + + MP_DIGIT(r, 3) = r3; + MP_DIGIT(r, 2) = r2; + MP_DIGIT(r, 1) = r1; + MP_DIGIT(r, 0) = r0; + + /* final reduction if necessary */ + if ((r3 > 0xFFFFFFFF00000001ULL) || + ((r3 == 0xFFFFFFFF00000001ULL) && + (r2 || (r1 >> 32) || + (r1 == 0xFFFFFFFFULL && r0 == MP_DIGIT_MAX)))) { + /* very rare, just use mp_sub */ + MP_CHECKOK(mp_sub(r, &meth->irr, r)); + } + + s_mp_clamp(r); +#endif + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p256. Store the + * result in r. r could be a. Uses optimized modular reduction for p256. + */ +static mp_err +ec_GFp_nistp256_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p256. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p256. */ +static mp_err +ec_GFp_nistp256_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp256_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp256(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->meth->field_mod = &ec_GFp_nistp256_mod; + group->meth->field_mul = &ec_GFp_nistp256_mul; + group->meth->field_sqr = &ec_GFp_nistp256_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_256_32.c b/security/nss/lib/freebl/ecl/ecp_256_32.c new file mode 100644 index 000000000..515f6f731 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_256_32.c @@ -0,0 +1,1535 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* A 32-bit implementation of the NIST P-256 elliptic curve. */ + +#include + +#include "prtypes.h" +#include "mpi.h" +#include "mpi-priv.h" +#include "ecp.h" + +typedef PRUint8 u8; +typedef PRUint32 u32; +typedef PRUint64 u64; + +/* Our field elements are represented as nine, unsigned 32-bit words. Freebl's + * MPI library calls them digits, but here they are called limbs, which is + * GMP's terminology. + * + * The value of an felem (field element) is: + * x[0] + (x[1] * 2**29) + (x[2] * 2**57) + ... + (x[8] * 2**228) + * + * That is, each limb is alternately 29 or 28-bits wide in little-endian + * order. + * + * This means that an felem hits 2**257, rather than 2**256 as we would like. A + * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems + * when multiplying as terms end up one bit short of a limb which would require + * much bit-shifting to correct. + * + * Finally, the values stored in an felem are in Montgomery form. So the value + * |y| is stored as (y*R) mod p, where p is the P-256 prime and R is 2**257. + */ +typedef u32 limb; +#define NLIMBS 9 +typedef limb felem[NLIMBS]; + +static const limb kBottom28Bits = 0xfffffff; +static const limb kBottom29Bits = 0x1fffffff; + +/* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and + * 28-bit words. + */ +static const felem kOne = { + 2, 0, 0, 0xffff800, + 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff, + 0 +}; +static const felem kZero = { 0 }; +static const felem kP = { + 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff, + 0, 0, 0x200000, 0xf000000, + 0xfffffff +}; +static const felem k2P = { + 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff, + 0, 0, 0x400000, 0xe000000, + 0x1fffffff +}; + +/* kPrecomputed contains precomputed values to aid the calculation of scalar + * multiples of the base point, G. It's actually two, equal length, tables + * concatenated. + * + * The first table contains (x,y) felem pairs for 16 multiples of the base + * point, G. + * + * Index | Index (binary) | Value + * 0 | 0000 | 0G (all zeros, omitted) + * 1 | 0001 | G + * 2 | 0010 | 2**64G + * 3 | 0011 | 2**64G + G + * 4 | 0100 | 2**128G + * 5 | 0101 | 2**128G + G + * 6 | 0110 | 2**128G + 2**64G + * 7 | 0111 | 2**128G + 2**64G + G + * 8 | 1000 | 2**192G + * 9 | 1001 | 2**192G + G + * 10 | 1010 | 2**192G + 2**64G + * 11 | 1011 | 2**192G + 2**64G + G + * 12 | 1100 | 2**192G + 2**128G + * 13 | 1101 | 2**192G + 2**128G + G + * 14 | 1110 | 2**192G + 2**128G + 2**64G + * 15 | 1111 | 2**192G + 2**128G + 2**64G + G + * + * The second table follows the same style, but the terms are 2**32G, + * 2**96G, 2**160G, 2**224G. + * + * This is ~2KB of data. + */ +static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = { + 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7edc, 0xd4a6eab, 0x3120bee, + 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba21, 0x14b10bb, 0xae3fe3, + 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe49073, 0x3fa36cc, 0x5ebcd2c, + 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea12446, 0xe1ade1e, 0xec91f22, + 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109, 0xa267a00, 0xb57c050, + 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0x7d6dee7, 0x2976e4b, + 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a5a9, 0x843a649, 0xc3ab0fa, + 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11, 0x58c43df, 0xf423fc2, + 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db40f, 0x83e277d, 0xb0dd609, + 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5, 0xe10c9e, 0x33ab581, + 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f, 0x48764cd, 0x76dbcca, + 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b20, 0x4ba3173, 0xc168c33, + 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0, 0x65dd7ff, 0x3a1e4f6, + 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f077, 0xa6add89, 0x4894acd, + 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0, + 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c, 0xda0cf5b, 0x812e881, + 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51, 0xc22be3e, 0xe35e65a, + 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9, 0x1c5a839, 0x47a1e26, + 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c502, 0x2f32042, 0xa17769b, + 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a02, 0x3fc93, 0x5620023, + 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c, 0x407f75c, 0xbaab133, + 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea7, 0x3293ac0, 0xcdc98aa, + 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29, + 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72, 0x73e1c35, 0xee70fbc, + 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85, 0x27de188, 0x66f70b8, + 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae914, 0x2f3ec51, 0x3826b59, + 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x823d9d2, 0x8213f39, + 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4a, 0xf5ddc3d, 0x3786689, + 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a729, 0x4be3499, 0x52b23aa, + 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb048035, 0xe31de66, 0xc6ecaa3, + 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a7529, 0xcb7beb1, 0xb2a78a1, + 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff658, 0xe3d6511, 0xc7d76f, + 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72, + 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d32411, 0xb04a838, 0xd760d2d, + 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11e, 0x20bca9a, 0x66f496b, + 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a, + 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56ff, 0x65ef930, 0x21dc4a, + 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15f, 0x624e62e, 0xa90ae2f, + 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522b, 0xdc78583, 0x40eeabb, + 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef34, 0xae2a960, 0x91b8bdc, + 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0x2413c8e, 0x5425bf9, + 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633, 0x7c91952, 0xd806dce, + 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef73, 0x8956f34, 0xe4b5cf2, + 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7, 0x627b614, 0x7371cca, + 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc9, 0x9c19bf2, 0x5882229, + 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b3, 0xe85ff25, 0x408ef57, + 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113, 0xa4a1769, 0x11fbc6c, + 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b7, 0x4acbad9, 0x5efc5fa, + 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc, 0x7bf0fa9, 0x957651, + 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec, + 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c12d, 0xf20bd46, 0x1951fa7, + 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74, 0x99bb618, 0x2db944c, + 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e74779, 0x576138, 0x9587927, + 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782d, 0xfc72e0b, 0x701b298, + 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5d8, 0xf858d3a, 0x942eea8, + 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a1, 0x8395659, 0x52ed4e2, + 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c0, 0x6bdf55a, 0x4e4457d, + 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747b, 0x878558d, 0x7d29aa4, + 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d7, 0xa5bef68, 0xb7b30d8, + 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f51951, 0x9d0c177, 0x1c49a78, +}; + +/* Field element operations: + */ + +/* NON_ZERO_TO_ALL_ONES returns: + * 0xffffffff for 0 < x <= 2**31 + * 0 for x == 0 or x > 2**31. + * + * x must be a u32 or an equivalent type such as limb. + */ +#define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x)-1) >> 31) - 1) + +/* felem_reduce_carry adds a multiple of p in order to cancel |carry|, + * which is a term at 2**257. + * + * On entry: carry < 2**3, inout[0,2,...] < 2**29, inout[1,3,...] < 2**28. + * On exit: inout[0,2,..] < 2**30, inout[1,3,...] < 2**29. + */ +static void +felem_reduce_carry(felem inout, limb carry) +{ + const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry); + + inout[0] += carry << 1; + inout[3] += 0x10000000 & carry_mask; + /* carry < 2**3 thus (carry << 11) < 2**14 and we added 2**28 in the + * previous line therefore this doesn't underflow. + */ + inout[3] -= carry << 11; + inout[4] += (0x20000000 - 1) & carry_mask; + inout[5] += (0x10000000 - 1) & carry_mask; + inout[6] += (0x20000000 - 1) & carry_mask; + inout[6] -= carry << 22; + /* This may underflow if carry is non-zero but, if so, we'll fix it in the + * next line. + */ + inout[7] -= 1 & carry_mask; + inout[7] += carry << 25; +} + +/* felem_sum sets out = in+in2. + * + * On entry, in[i]+in2[i] must not overflow a 32-bit word. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_sum(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + for (i = 0;; i++) { + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] + in2[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +#define two31m3 (((limb)1) << 31) - (((limb)1) << 3) +#define two30m2 (((limb)1) << 30) - (((limb)1) << 2) +#define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2) +#define two31m2 (((limb)1) << 31) - (((limb)1) << 2) +#define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2) +#define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2) + +/* zero31 is 0 mod p. + */ +static const felem zero31 = { + two31m3, two30m2, two31m2, two30p13m2, + two31m2, two30m2, two31p24m2, two30m27m2, + two31m2 +}; + +/* felem_diff sets out = in-in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_diff(felem out, const felem in, const felem in2) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] = in[i] - in2[i]; + out[i] += zero31[i]; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words + * with the same 29,28,... bit positions as an felem. + * + * The values in felems are in Montgomery form: x*R mod p where R = 2**257. + * Since we just multiplied two Montgomery values together, the result is + * x*y*R*R mod p. We wish to divide by R in order for the result also to be + * in Montgomery form. + * + * On entry: tmp[i] < 2**64 + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29 + */ +static void +felem_reduce_degree(felem out, u64 tmp[17]) +{ + /* The following table may be helpful when reading this code: + * + * Limb number: 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10... + * Width (bits): 29| 28| 29| 28| 29| 28| 29| 28| 29| 28| 29 + * Start bit: 0 | 29| 57| 86|114|143|171|200|228|257|285 + * (odd phase): 0 | 28| 57| 85|114|142|171|199|228|256|285 + */ + limb tmp2[18], carry, x, xMask; + unsigned int i; + + /* tmp contains 64-bit words with the same 29,28,29-bit positions as an + * felem. So the top of an element of tmp might overlap with another + * element two positions down. The following loop eliminates this + * overlap. + */ + tmp2[0] = tmp[0] & kBottom29Bits; + + /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try + * and hint to the compiler that it can do a single-word shift by selecting + * the right register rather than doing a double-word shift and truncating + * afterwards. + */ + tmp2[1] = ((limb)tmp[0]) >> 29; + tmp2[1] |= (((limb)(tmp[0] >> 32)) << 3) & kBottom28Bits; + tmp2[1] += ((limb)tmp[1]) & kBottom28Bits; + carry = tmp2[1] >> 28; + tmp2[1] &= kBottom28Bits; + + for (i = 2; i < 17; i++) { + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 28; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 4) & kBottom29Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom29Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 29; + tmp2[i] &= kBottom29Bits; + + i++; + if (i == 17) + break; + tmp2[i] = ((limb)(tmp[i - 2] >> 32)) >> 25; + tmp2[i] += ((limb)(tmp[i - 1])) >> 29; + tmp2[i] += (((limb)(tmp[i - 1] >> 32)) << 3) & kBottom28Bits; + tmp2[i] += ((limb)tmp[i]) & kBottom28Bits; + tmp2[i] += carry; + carry = tmp2[i] >> 28; + tmp2[i] &= kBottom28Bits; + } + + tmp2[17] = ((limb)(tmp[15] >> 32)) >> 25; + tmp2[17] += ((limb)(tmp[16])) >> 29; + tmp2[17] += (((limb)(tmp[16] >> 32)) << 3); + tmp2[17] += carry; + + /* Montgomery elimination of terms: + * + * Since R is 2**257, we can divide by R with a bitwise shift if we can + * ensure that the right-most 257 bits are all zero. We can make that true + * by adding multiplies of p without affecting the value. + * + * So we eliminate limbs from right to left. Since the bottom 29 bits of p + * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0. + * We can do that for 8 further limbs and then right shift to eliminate the + * extra factor of R. + */ + for (i = 0;; i += 2) { + tmp2[i + 1] += tmp2[i] >> 29; + x = tmp2[i] & kBottom29Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i] = 0; + + /* The bounds calculations for this loop are tricky. Each iteration of + * the loop eliminates two words by adding values to words to their + * right. + * + * The following table contains the amounts added to each word (as an + * offset from the value of i at the top of the loop). The amounts are + * accounted for from the first and second half of the loop separately + * and are written as, for example, 28 to mean a value <2**28. + * + * Word: 3 4 5 6 7 8 9 10 + * Added in top half: 28 11 29 21 29 28 + * 28 29 + * 29 + * Added in bottom half: 29 10 28 21 28 28 + * 29 + * + * The value that is currently offset 7 will be offset 5 for the next + * iteration and then offset 3 for the iteration after that. Therefore + * the total value added will be the values added at 7, 5 and 3. + * + * The following table accumulates these values. The sums at the bottom + * are written as, for example, 29+28, to mean a value < 2**29+2**28. + * + * Word: 3 4 5 6 7 8 9 10 11 12 13 + * 28 11 10 29 21 29 28 28 28 28 28 + * 29 28 11 28 29 28 29 28 29 28 + * 29 28 21 21 29 21 29 21 + * 10 29 28 21 28 21 28 + * 28 29 28 29 28 29 28 + * 11 10 29 10 29 10 + * 29 28 11 28 11 + * 29 29 + * -------------------------------------------- + * 30+ 31+ 30+ 31+ 30+ + * 28+ 29+ 28+ 29+ 21+ + * 21+ 28+ 21+ 28+ 10 + * 10 21+ 10 21+ + * 11 11 + * + * So the greatest amount is added to tmp2[10] and tmp2[12]. If + * tmp2[10/12] has an initial value of <2**29, then the maximum value + * will be < 2**31 + 2**30 + 2**28 + 2**21 + 2**11, which is < 2**32, + * as required. + */ + tmp2[i + 3] += (x << 10) & kBottom28Bits; + tmp2[i + 4] += (x >> 18); + + tmp2[i + 6] += (x << 21) & kBottom29Bits; + tmp2[i + 7] += x >> 8; + + /* At position 200, which is the starting bit position for word 7, we + * have a factor of 0xf000000 = 2**28 - 2**24. + */ + tmp2[i + 7] += 0x10000000 & xMask; + /* Word 7 is 28 bits wide, so the 2**28 term exactly hits word 8. */ + tmp2[i + 8] += (x - 1) & xMask; + tmp2[i + 7] -= (x << 24) & kBottom28Bits; + tmp2[i + 8] -= x >> 4; + + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 8] -= x; + tmp2[i + 8] += (x << 28) & kBottom29Bits; + tmp2[i + 9] += ((x >> 1) - 1) & xMask; + + if (i + 1 == NLIMBS) + break; + tmp2[i + 2] += tmp2[i + 1] >> 28; + x = tmp2[i + 1] & kBottom28Bits; + xMask = NON_ZERO_TO_ALL_ONES(x); + tmp2[i + 1] = 0; + + tmp2[i + 4] += (x << 11) & kBottom29Bits; + tmp2[i + 5] += (x >> 18); + + tmp2[i + 7] += (x << 21) & kBottom28Bits; + tmp2[i + 8] += x >> 7; + + /* At position 199, which is the starting bit of the 8th word when + * dealing with a context starting on an odd word, we have a factor of + * 0x1e000000 = 2**29 - 2**25. Since we have not updated i, the 8th + * word from i+1 is i+8. + */ + tmp2[i + 8] += 0x20000000 & xMask; + tmp2[i + 9] += (x - 1) & xMask; + tmp2[i + 8] -= (x << 25) & kBottom29Bits; + tmp2[i + 9] -= x >> 4; + + tmp2[i + 9] += 0x10000000 & xMask; + tmp2[i + 9] -= x; + tmp2[i + 10] += (x - 1) & xMask; + } + + /* We merge the right shift with a carry chain. The words above 2**257 have + * widths of 28,29,... which we need to correct when copying them down. + */ + carry = 0; + for (i = 0; i < 8; i++) { + /* The maximum value of tmp2[i + 9] occurs on the first iteration and + * is < 2**30+2**29+2**28. Adding 2**29 (from tmp2[i + 10]) is + * therefore safe. + */ + out[i] = tmp2[i + 9]; + out[i] += carry; + out[i] += (tmp2[i + 10] << 28) & kBottom29Bits; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + out[i] = tmp2[i + 9] >> 1; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + out[8] = tmp2[17]; + out[8] += carry; + carry = out[8] >> 29; + out[8] &= kBottom29Bits; + + felem_reduce_carry(out, carry); +} + +/* felem_square sets out=in*in. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_square(felem out, const felem in) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in[0]; + tmp[1] = ((u64)in[0]) * (in[1] << 1); + tmp[2] = ((u64)in[0]) * (in[2] << 1) + + ((u64)in[1]) * (in[1] << 1); + tmp[3] = ((u64)in[0]) * (in[3] << 1) + + ((u64)in[1]) * (in[2] << 1); + tmp[4] = ((u64)in[0]) * (in[4] << 1) + + ((u64)in[1]) * (in[3] << 2) + + ((u64)in[2]) * in[2]; + tmp[5] = ((u64)in[0]) * (in[5] << 1) + + ((u64)in[1]) * (in[4] << 1) + + ((u64)in[2]) * (in[3] << 1); + tmp[6] = ((u64)in[0]) * (in[6] << 1) + + ((u64)in[1]) * (in[5] << 2) + + ((u64)in[2]) * (in[4] << 1) + + ((u64)in[3]) * (in[3] << 1); + tmp[7] = ((u64)in[0]) * (in[7] << 1) + + ((u64)in[1]) * (in[6] << 1) + + ((u64)in[2]) * (in[5] << 1) + + ((u64)in[3]) * (in[4] << 1); + /* tmp[8] has the greatest value of 2**61 + 2**60 + 2**61 + 2**60 + 2**60, + * which is < 2**64 as required. + */ + tmp[8] = ((u64)in[0]) * (in[8] << 1) + + ((u64)in[1]) * (in[7] << 2) + + ((u64)in[2]) * (in[6] << 1) + + ((u64)in[3]) * (in[5] << 2) + + ((u64)in[4]) * in[4]; + tmp[9] = ((u64)in[1]) * (in[8] << 1) + + ((u64)in[2]) * (in[7] << 1) + + ((u64)in[3]) * (in[6] << 1) + + ((u64)in[4]) * (in[5] << 1); + tmp[10] = ((u64)in[2]) * (in[8] << 1) + + ((u64)in[3]) * (in[7] << 2) + + ((u64)in[4]) * (in[6] << 1) + + ((u64)in[5]) * (in[5] << 1); + tmp[11] = ((u64)in[3]) * (in[8] << 1) + + ((u64)in[4]) * (in[7] << 1) + + ((u64)in[5]) * (in[6] << 1); + tmp[12] = ((u64)in[4]) * (in[8] << 1) + + ((u64)in[5]) * (in[7] << 2) + + ((u64)in[6]) * in[6]; + tmp[13] = ((u64)in[5]) * (in[8] << 1) + + ((u64)in[6]) * (in[7] << 1); + tmp[14] = ((u64)in[6]) * (in[8] << 1) + + ((u64)in[7]) * (in[7] << 1); + tmp[15] = ((u64)in[7]) * (in[8] << 1); + tmp[16] = ((u64)in[8]) * in[8]; + + felem_reduce_degree(out, tmp); +} + +/* felem_mul sets out=in*in2. + * + * On entry: in[0,2,...] < 2**30, in[1,3,...] < 2**29 and + * in2[0,2,...] < 2**30, in2[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_mul(felem out, const felem in, const felem in2) +{ + u64 tmp[17]; + + tmp[0] = ((u64)in[0]) * in2[0]; + tmp[1] = ((u64)in[0]) * (in2[1] << 0) + + ((u64)in[1]) * (in2[0] << 0); + tmp[2] = ((u64)in[0]) * (in2[2] << 0) + + ((u64)in[1]) * (in2[1] << 1) + + ((u64)in[2]) * (in2[0] << 0); + tmp[3] = ((u64)in[0]) * (in2[3] << 0) + + ((u64)in[1]) * (in2[2] << 0) + + ((u64)in[2]) * (in2[1] << 0) + + ((u64)in[3]) * (in2[0] << 0); + tmp[4] = ((u64)in[0]) * (in2[4] << 0) + + ((u64)in[1]) * (in2[3] << 1) + + ((u64)in[2]) * (in2[2] << 0) + + ((u64)in[3]) * (in2[1] << 1) + + ((u64)in[4]) * (in2[0] << 0); + tmp[5] = ((u64)in[0]) * (in2[5] << 0) + + ((u64)in[1]) * (in2[4] << 0) + + ((u64)in[2]) * (in2[3] << 0) + + ((u64)in[3]) * (in2[2] << 0) + + ((u64)in[4]) * (in2[1] << 0) + + ((u64)in[5]) * (in2[0] << 0); + tmp[6] = ((u64)in[0]) * (in2[6] << 0) + + ((u64)in[1]) * (in2[5] << 1) + + ((u64)in[2]) * (in2[4] << 0) + + ((u64)in[3]) * (in2[3] << 1) + + ((u64)in[4]) * (in2[2] << 0) + + ((u64)in[5]) * (in2[1] << 1) + + ((u64)in[6]) * (in2[0] << 0); + tmp[7] = ((u64)in[0]) * (in2[7] << 0) + + ((u64)in[1]) * (in2[6] << 0) + + ((u64)in[2]) * (in2[5] << 0) + + ((u64)in[3]) * (in2[4] << 0) + + ((u64)in[4]) * (in2[3] << 0) + + ((u64)in[5]) * (in2[2] << 0) + + ((u64)in[6]) * (in2[1] << 0) + + ((u64)in[7]) * (in2[0] << 0); + /* tmp[8] has the greatest value but doesn't overflow. See logic in + * felem_square. + */ + tmp[8] = ((u64)in[0]) * (in2[8] << 0) + + ((u64)in[1]) * (in2[7] << 1) + + ((u64)in[2]) * (in2[6] << 0) + + ((u64)in[3]) * (in2[5] << 1) + + ((u64)in[4]) * (in2[4] << 0) + + ((u64)in[5]) * (in2[3] << 1) + + ((u64)in[6]) * (in2[2] << 0) + + ((u64)in[7]) * (in2[1] << 1) + + ((u64)in[8]) * (in2[0] << 0); + tmp[9] = ((u64)in[1]) * (in2[8] << 0) + + ((u64)in[2]) * (in2[7] << 0) + + ((u64)in[3]) * (in2[6] << 0) + + ((u64)in[4]) * (in2[5] << 0) + + ((u64)in[5]) * (in2[4] << 0) + + ((u64)in[6]) * (in2[3] << 0) + + ((u64)in[7]) * (in2[2] << 0) + + ((u64)in[8]) * (in2[1] << 0); + tmp[10] = ((u64)in[2]) * (in2[8] << 0) + + ((u64)in[3]) * (in2[7] << 1) + + ((u64)in[4]) * (in2[6] << 0) + + ((u64)in[5]) * (in2[5] << 1) + + ((u64)in[6]) * (in2[4] << 0) + + ((u64)in[7]) * (in2[3] << 1) + + ((u64)in[8]) * (in2[2] << 0); + tmp[11] = ((u64)in[3]) * (in2[8] << 0) + + ((u64)in[4]) * (in2[7] << 0) + + ((u64)in[5]) * (in2[6] << 0) + + ((u64)in[6]) * (in2[5] << 0) + + ((u64)in[7]) * (in2[4] << 0) + + ((u64)in[8]) * (in2[3] << 0); + tmp[12] = ((u64)in[4]) * (in2[8] << 0) + + ((u64)in[5]) * (in2[7] << 1) + + ((u64)in[6]) * (in2[6] << 0) + + ((u64)in[7]) * (in2[5] << 1) + + ((u64)in[8]) * (in2[4] << 0); + tmp[13] = ((u64)in[5]) * (in2[8] << 0) + + ((u64)in[6]) * (in2[7] << 0) + + ((u64)in[7]) * (in2[6] << 0) + + ((u64)in[8]) * (in2[5] << 0); + tmp[14] = ((u64)in[6]) * (in2[8] << 0) + + ((u64)in[7]) * (in2[7] << 1) + + ((u64)in[8]) * (in2[6] << 0); + tmp[15] = ((u64)in[7]) * (in2[8] << 0) + + ((u64)in[8]) * (in2[7] << 0); + tmp[16] = ((u64)in[8]) * (in2[8] << 0); + + felem_reduce_degree(out, tmp); +} + +static void +felem_assign(felem out, const felem in) +{ + memcpy(out, in, sizeof(felem)); +} + +/* felem_inv calculates |out| = |in|^{-1} + * + * Based on Fermat's Little Theorem: + * a^p = a (mod p) + * a^{p-1} = 1 (mod p) + * a^{p-2} = a^{-1} (mod p) + */ +static void +felem_inv(felem out, const felem in) +{ + felem ftmp, ftmp2; + /* each e_I will hold |in|^{2^I - 1} */ + felem e2, e4, e8, e16, e32, e64; + unsigned int i; + + felem_square(ftmp, in); /* 2^1 */ + felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */ + felem_assign(e2, ftmp); + felem_square(ftmp, ftmp); /* 2^3 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^4 - 2^2 */ + felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */ + felem_assign(e4, ftmp); + felem_square(ftmp, ftmp); /* 2^5 - 2^1 */ + felem_square(ftmp, ftmp); /* 2^6 - 2^2 */ + felem_square(ftmp, ftmp); /* 2^7 - 2^3 */ + felem_square(ftmp, ftmp); /* 2^8 - 2^4 */ + felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */ + felem_assign(e8, ftmp); + for (i = 0; i < 8; i++) { + felem_square(ftmp, ftmp); + } /* 2^16 - 2^8 */ + felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */ + felem_assign(e16, ftmp); + for (i = 0; i < 16; i++) { + felem_square(ftmp, ftmp); + } /* 2^32 - 2^16 */ + felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */ + felem_assign(e32, ftmp); + for (i = 0; i < 32; i++) { + felem_square(ftmp, ftmp); + } /* 2^64 - 2^32 */ + felem_assign(e64, ftmp); + felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */ + for (i = 0; i < 192; i++) { + felem_square(ftmp, ftmp); + } /* 2^256 - 2^224 + 2^192 */ + + felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */ + for (i = 0; i < 16; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^80 - 2^16 */ + felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */ + for (i = 0; i < 8; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^88 - 2^8 */ + felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */ + for (i = 0; i < 4; i++) { + felem_square(ftmp2, ftmp2); + } /* 2^92 - 2^4 */ + felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */ + felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */ + felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */ + felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */ + felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */ + + felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */ +} + +/* felem_scalar_3 sets out=3*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_3(felem out) +{ + limb carry = 0; + unsigned int i; + + for (i = 0;; i++) { + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 29; + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + out[i] *= 3; + out[i] += carry; + carry = out[i] >> 28; + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_4 sets out=4*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_4(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 27; + out[i] <<= 2; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 26; + out[i] <<= 2; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_scalar_8 sets out=8*out. + * + * On entry: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + * On exit: out[0,2,...] < 2**30, out[1,3,...] < 2**29. + */ +static void +felem_scalar_8(felem out) +{ + limb carry = 0, next_carry; + unsigned int i; + + for (i = 0;; i++) { + next_carry = out[i] >> 26; + out[i] <<= 3; + out[i] &= kBottom29Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 29); + out[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + next_carry = out[i] >> 25; + out[i] <<= 3; + out[i] &= kBottom28Bits; + out[i] += carry; + carry = next_carry + (out[i] >> 28); + out[i] &= kBottom28Bits; + } + + felem_reduce_carry(out, carry); +} + +/* felem_is_zero_vartime returns 1 iff |in| == 0. It takes a variable amount of + * time depending on the value of |in|. + */ +static char +felem_is_zero_vartime(const felem in) +{ + limb carry; + int i; + limb tmp[NLIMBS]; + felem_assign(tmp, in); + + /* First, reduce tmp to a minimal form. + */ + do { + carry = 0; + for (i = 0;; i++) { + tmp[i] += carry; + carry = tmp[i] >> 29; + tmp[i] &= kBottom29Bits; + + i++; + if (i == NLIMBS) + break; + + tmp[i] += carry; + carry = tmp[i] >> 28; + tmp[i] &= kBottom28Bits; + } + + felem_reduce_carry(tmp, carry); + } while (carry); + + /* tmp < 2**257, so the only possible zero values are 0, p and 2p. + */ + return memcmp(tmp, kZero, sizeof(tmp)) == 0 || + memcmp(tmp, kP, sizeof(tmp)) == 0 || + memcmp(tmp, k2P, sizeof(tmp)) == 0; +} + +/* Group operations: + * + * Elements of the elliptic curve group are represented in Jacobian + * coordinates: (x, y, z). An affine point (x', y') is x'=x/z**2, y'=y/z**3 in + * Jacobian form. + */ + +/* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l + */ +static void +point_double(felem x_out, felem y_out, felem z_out, + const felem x, const felem y, const felem z) +{ + felem delta, gamma, alpha, beta, tmp, tmp2; + + felem_square(delta, z); + felem_square(gamma, y); + felem_mul(beta, x, gamma); + + felem_sum(tmp, x, delta); + felem_diff(tmp2, x, delta); + felem_mul(alpha, tmp, tmp2); + felem_scalar_3(alpha); + + felem_sum(tmp, y, z); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, gamma); + felem_diff(z_out, tmp, delta); + + felem_scalar_4(beta); + felem_square(x_out, alpha); + felem_diff(x_out, x_out, beta); + felem_diff(x_out, x_out, beta); + + felem_diff(tmp, beta, x_out); + felem_mul(tmp, alpha, tmp); + felem_square(tmp2, gamma); + felem_scalar_8(tmp2); + felem_diff(y_out, tmp, tmp2); +} + +/* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}. + * (i.e. the second point is affine.) + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add_mixed(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2) +{ + felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_sum(tmp, z1, z1); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, x1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, y1); + felem_sum(r, r, r); + felem_mul(v, x1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, y1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * Note that this function does not handle P+P, infinity+P nor P+infinity + * correctly. + */ +static void +point_add(felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} + + * {x2,y2,z2}. + * + * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition-add-2007-bl + * + * This function handles the case where {x1,y1,z1}={x2,y2,z2}. + */ +static void +point_add_or_double_vartime( + felem x_out, felem y_out, felem z_out, + const felem x1, const felem y1, const felem z1, + const felem x2, const felem y2, const felem z2) +{ + felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp; + char x_equal, y_equal; + + felem_square(z1z1, z1); + felem_square(z2z2, z2); + felem_mul(u1, x1, z2z2); + + felem_sum(tmp, z1, z2); + felem_square(tmp, tmp); + felem_diff(tmp, tmp, z1z1); + felem_diff(tmp, tmp, z2z2); + + felem_mul(z2z2z2, z2, z2z2); + felem_mul(s1, y1, z2z2z2); + + felem_mul(u2, x2, z1z1); + felem_mul(z1z1z1, z1, z1z1); + felem_mul(s2, y2, z1z1z1); + felem_diff(h, u2, u1); + x_equal = felem_is_zero_vartime(h); + felem_sum(i, h, h); + felem_square(i, i); + felem_mul(j, h, i); + felem_diff(r, s2, s1); + y_equal = felem_is_zero_vartime(r); + if (x_equal && y_equal) { + point_double(x_out, y_out, z_out, x1, y1, z1); + return; + } + felem_sum(r, r, r); + felem_mul(v, u1, i); + + felem_mul(z_out, tmp, h); + felem_square(rr, r); + felem_diff(x_out, rr, j); + felem_diff(x_out, x_out, v); + felem_diff(x_out, x_out, v); + + felem_diff(tmp, v, x_out); + felem_mul(y_out, tmp, r); + felem_mul(tmp, s1, j); + felem_diff(y_out, y_out, tmp); + felem_diff(y_out, y_out, tmp); +} + +/* copy_conditional sets out=in if mask = 0xffffffff in constant time. + * + * On entry: mask is either 0 or 0xffffffff. + */ +static void +copy_conditional(felem out, const felem in, limb mask) +{ + int i; + + for (i = 0; i < NLIMBS; i++) { + const limb tmp = mask & (in[i] ^ out[i]); + out[i] ^= tmp; + } +} + +/* select_affine_point sets {out_x,out_y} to the index'th entry of table. + * On entry: index < 16, table[0] must be zero. + */ +static void +select_affine_point(felem out_x, felem out_y, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + } +} + +/* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of + * table. On entry: index < 16, table[0] must be zero. + */ +static void +select_jacobian_point(felem out_x, felem out_y, felem out_z, + const limb *table, limb index) +{ + limb i, j; + + memset(out_x, 0, sizeof(felem)); + memset(out_y, 0, sizeof(felem)); + memset(out_z, 0, sizeof(felem)); + + /* The implicit value at index 0 is all zero. We don't need to perform that + * iteration of the loop because we already set out_* to zero. + */ + table += 3 * NLIMBS; + + for (i = 1; i < 16; i++) { + limb mask = i ^ index; + mask |= mask >> 2; + mask |= mask >> 1; + mask &= 1; + mask--; + for (j = 0; j < NLIMBS; j++, table++) { + out_x[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_y[j] |= *table & mask; + } + for (j = 0; j < NLIMBS; j++, table++) { + out_z[j] |= *table & mask; + } + } +} + +/* get_bit returns the bit'th bit of scalar. */ +static char +get_bit(const u8 scalar[32], int bit) +{ + return ((scalar[bit >> 3]) >> (bit & 7)) & 1; +} + +/* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian + * number. Note that the value of scalar must be less than the order of the + * group. + */ +static void +scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32]) +{ + int i, j; + limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask; + u32 table_offset; + + felem px, py; + felem tx, ty, tz; + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + + /* The loop adds bits at positions 0, 64, 128 and 192, followed by + * positions 32,96,160 and 224 and does this 32 times. + */ + for (i = 0; i < 32; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + } + table_offset = 0; + for (j = 0; j <= 32; j += 32) { + char bit0 = get_bit(scalar, 31 - i + j); + char bit1 = get_bit(scalar, 95 - i + j); + char bit2 = get_bit(scalar, 159 - i + j); + char bit3 = get_bit(scalar, 223 - i + j); + limb index = bit0 | (bit1 << 1) | (bit2 << 2) | (bit3 << 3); + + select_affine_point(px, py, kPrecomputed + table_offset, index); + table_offset += 30 * NLIMBS; + + /* Since scalar is less than the order of the group, we know that + * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle + * below. + */ + point_add_mixed(tx, ty, tz, nx, ny, nz, px, py); + /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero + * (a.k.a. the point at infinity). We handle that situation by + * copying the point from the table. + */ + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, kOne, n_is_infinity_mask); + + /* Equally, the result is also wrong if the point from the table is + * zero, which happens when the index is zero. We handle that by + * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0. + */ + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + /* If p was not zero, then n is now non-zero. */ + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } + } +} + +/* point_to_affine converts a Jacobian point to an affine point. If the input + * is the point at infinity then it returns (0, 0) in constant time. + */ +static void +point_to_affine(felem x_out, felem y_out, + const felem nx, const felem ny, const felem nz) +{ + felem z_inv, z_inv_sq; + felem_inv(z_inv, nz); + felem_square(z_inv_sq, z_inv); + felem_mul(x_out, nx, z_inv_sq); + felem_mul(z_inv, z_inv, z_inv_sq); + felem_mul(y_out, ny, z_inv); +} + +/* scalar_mult sets {nx,ny,nz} = scalar*{x,y}. */ +static void +scalar_mult(felem nx, felem ny, felem nz, + const felem x, const felem y, const u8 scalar[32]) +{ + int i; + felem px, py, pz, tx, ty, tz; + felem precomp[16][3]; + limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask; + + /* We precompute 0,1,2,... times {x,y}. */ + memset(precomp, 0, sizeof(felem) * 3); + memcpy(&precomp[1][0], x, sizeof(felem)); + memcpy(&precomp[1][1], y, sizeof(felem)); + memcpy(&precomp[1][2], kOne, sizeof(felem)); + + for (i = 2; i < 16; i += 2) { + point_double(precomp[i][0], precomp[i][1], precomp[i][2], + precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]); + + point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2], + precomp[i][0], precomp[i][1], precomp[i][2], x, y); + } + + memset(nx, 0, sizeof(felem)); + memset(ny, 0, sizeof(felem)); + memset(nz, 0, sizeof(felem)); + n_is_infinity_mask = -1; + + /* We add in a window of four bits each iteration and do this 64 times. */ + for (i = 0; i < 64; i++) { + if (i) { + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + point_double(nx, ny, nz, nx, ny, nz); + } + + index = scalar[31 - i / 2]; + if ((i & 1) == 1) { + index &= 15; + } else { + index >>= 4; + } + + /* See the comments in scalar_base_mult about handling infinities. */ + select_jacobian_point(px, py, pz, precomp[0][0], index); + point_add(tx, ty, tz, nx, ny, nz, px, py, pz); + copy_conditional(nx, px, n_is_infinity_mask); + copy_conditional(ny, py, n_is_infinity_mask); + copy_conditional(nz, pz, n_is_infinity_mask); + + p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index); + mask = p_is_noninfinite_mask & ~n_is_infinity_mask; + copy_conditional(nx, tx, mask); + copy_conditional(ny, ty, mask); + copy_conditional(nz, tz, mask); + n_is_infinity_mask &= ~p_is_noninfinite_mask; + } +} + +/* Interface with Freebl: */ + +/* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to + * little-endian order. + */ +#ifdef IS_BIG_ENDIAN +#ifdef __APPLE__ +#include +#define BYTESWAP32(x) OSSwapInt32(x) +#define BYTESWAP64(x) OSSwapInt64(x) +#else +#define BYTESWAP32(x) \ + (((x) >> 24) | (((x) >> 8) & 0xff00) | (((x)&0xff00) << 8) | ((x) << 24)) +#define BYTESWAP64(x) \ + (((x) >> 56) | (((x) >> 40) & 0xff00) | \ + (((x) >> 24) & 0xff0000) | (((x) >> 8) & 0xff000000) | \ + (((x)&0xff000000) << 8) | (((x)&0xff0000) << 24) | \ + (((x)&0xff00) << 40) | ((x) << 56)) +#endif + +#ifdef MP_USE_UINT_DIGIT +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x) +#else +#define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x) +#endif +#endif /* IS_BIG_ENDIAN */ + +#ifdef MP_USE_UINT_DIGIT +static const mp_digit kRInvDigits[8] = { + 0x80000000, 1, 0xffffffff, 0, + 0x80000001, 0xfffffffe, 1, 0x7fffffff +}; +#else +static const mp_digit kRInvDigits[4] = { + PR_UINT64(0x180000000), 0xffffffff, + PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001) +}; +#endif +#define MP_DIGITS_IN_256_BITS (32 / sizeof(mp_digit)) +static const mp_int kRInv = { + MP_ZPOS, + MP_DIGITS_IN_256_BITS, + MP_DIGITS_IN_256_BITS, + (mp_digit *)kRInvDigits +}; + +static const limb kTwo28 = 0x10000000; +static const limb kTwo29 = 0x20000000; + +/* to_montgomery sets out = R*in. */ +static mp_err +to_montgomery(felem out, const mp_int *in, const ECGroup *group) +{ + /* There are no MPI functions for bitshift operations and we wish to shift + * in 257 bits left so we move the digits 256-bits left and then multiply + * by two. + */ + mp_int in_shifted; + int i; + mp_err res; + + MP_CHECKOK(mp_init(&in_shifted)); + MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS)); + memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS), + MP_DIGITS(in), + MP_USED(in) * sizeof(mp_digit)); + MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted)); + MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth)); + + for (i = 0;; i++) { + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL)); + + i++; + if (i == NLIMBS) + break; + out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits; + MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL)); + } + +CLEANUP: + mp_clear(&in_shifted); + return res; +} + +/* from_montgomery sets out=in/R. */ +static mp_err +from_montgomery(mp_int *out, const felem in, + const ECGroup *group) +{ + mp_int result, tmp; + mp_err res; + int i; + + MP_CHECKOK(mp_init(&result)); + MP_CHECKOK(mp_init(&tmp)); + + MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS - 1], &result)); + for (i = NLIMBS - 2; i >= 0; i--) { + if ((i & 1) == 0) { + MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp)); + } else { + MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp)); + } + MP_CHECKOK(mp_add_d(&tmp, in[i], &result)); + } + + MP_CHECKOK(mp_mul(&result, &kRInv, out)); + MP_CHECKOK(group->meth->field_mod(out, out, group->meth)); + +CLEANUP: + mp_clear(&result); + mp_clear(&tmp); + return res; +} + +/* scalar_from_mp_int sets out_scalar=n, where n < the group order. */ +static void +scalar_from_mp_int(u8 out_scalar[32], const mp_int *n) +{ + /* We require that |n| is less than the order of the group and therefore it + * will fit into |out_scalar|. However, these is a timing side-channel here + * that we cannot avoid: if |n| is sufficiently small it may be one or more + * words too short and we'll copy less data. + */ + memset(out_scalar, 0, 32); +#ifdef IS_LITTLE_ENDIAN + memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit)); +#else + { + mp_size i; + mp_digit swapped[MP_DIGITS_IN_256_BITS]; + for (i = 0; i < MP_USED(n); i++) { + swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i)); + } + memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit)); + } +#endif +} + +/* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the + * order of the group. + */ +static mp_err +ec_GFp_nistp256_base_point_mul(const mp_int *n, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine; + mp_err res; + + /* FIXME(agl): test that n < order. */ + + scalar_from_mp_int(scalar, n); + scalar_base_mult(x, y, z, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is < + * the order of the group. + */ +static mp_err +ec_GFp_nistp256_point_mul(const mp_int *n, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar[32]; + felem x, y, z, x_affine, y_affine, px, py; + mp_err res; + + scalar_from_mp_int(scalar, n); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + + scalar_mult(x, y, z, px, py, scalar); + point_to_affine(x_affine, y_affine, x, y, z); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G + + * n2*{in_x,in_y}, where n1 and n2 are < the order of the group. + * + * As indicated by the name, this function operates in variable time. This + * is safe because it's used for signature validation which doesn't deal + * with secrets. + */ +static mp_err +ec_GFp_nistp256_points_mul_vartime( + const mp_int *n1, const mp_int *n2, + const mp_int *in_x, const mp_int *in_y, + mp_int *out_x, mp_int *out_y, + const ECGroup *group) +{ + u8 scalar1[32], scalar2[32]; + felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py; + mp_err res = MP_OKAY; + + /* If n2 == NULL, this is just a base-point multiplication. */ + if (n2 == NULL) { + return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group); + } + + /* If n1 == nULL, this is just an arbitary-point multiplication. */ + if (n1 == NULL) { + return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group); + } + + /* If both scalars are zero, then the result is the point at infinity. */ + if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) { + mp_zero(out_x); + mp_zero(out_y); + return res; + } + + scalar_from_mp_int(scalar1, n1); + scalar_from_mp_int(scalar2, n2); + + MP_CHECKOK(to_montgomery(px, in_x, group)); + MP_CHECKOK(to_montgomery(py, in_y, group)); + scalar_base_mult(x1, y1, z1, scalar1); + scalar_mult(x2, y2, z2, px, py, scalar2); + + if (mp_cmp_z(n2) == 0) { + /* If n2 == 0, then {x2,y2,z2} is zero and the result is just + * {x1,y1,z1}. */ + } else if (mp_cmp_z(n1) == 0) { + /* If n1 == 0, then {x1,y1,z1} is zero and the result is just + * {x2,y2,z2}. */ + memcpy(x1, x2, sizeof(x2)); + memcpy(y1, y2, sizeof(y2)); + memcpy(z1, z2, sizeof(z2)); + } else { + /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */ + point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2); + } + + point_to_affine(x_affine, y_affine, x1, y1, z1); + MP_CHECKOK(from_montgomery(out_x, x_affine, group)); + MP_CHECKOK(from_montgomery(out_y, y_affine, group)); + +CLEANUP: + return res; +} + +/* Wire in fast point multiplication for named curves. */ +mp_err +ec_group_set_gfp256_32(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P256) { + group->base_point_mul = &ec_GFp_nistp256_base_point_mul; + group->point_mul = &ec_GFp_nistp256_point_mul; + group->points_mul = &ec_GFp_nistp256_points_mul_vartime; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_384.c b/security/nss/lib/freebl/ecl/ecp_384.c new file mode 100644 index 000000000..702fd976e --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_384.c @@ -0,0 +1,258 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +/* Fast modular reduction for p384 = 2^384 - 2^128 - 2^96 + 2^32 - 1. a can be r. + * Uses algorithm 2.30 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp384_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m[10]; + +#ifdef ECL_THIRTY_TWO_BIT + mp_digit s[10][12]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 12; + MP_USED(&m[i]) = 12; + MP_DIGITS(&m[i]) = s[i]; + } +#else + mp_digit s[10][6]; + for (i = 0; i < 10; i++) { + MP_SIGN(&m[i]) = MP_ZPOS; + MP_ALLOC(&m[i]) = 6; + MP_USED(&m[i]) = 6; + MP_DIGITS(&m[i]) = s[i]; + } +#endif + +#ifdef ECL_THIRTY_TWO_BIT + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 12; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = 0; + s[1][3] = 0; + s[1][4] = MP_DIGIT(a, 21); + s[1][5] = MP_DIGIT(a, 22); + s[1][6] = MP_DIGIT(a, 23); + s[1][7] = 0; + s[1][8] = 0; + s[1][9] = 0; + s[1][10] = 0; + s[1][11] = 0; + for (i = 0; i < 12; i++) { + s[2][i] = MP_DIGIT(a, i + 12); + } + s[3][0] = MP_DIGIT(a, 21); + s[3][1] = MP_DIGIT(a, 22); + s[3][2] = MP_DIGIT(a, 23); + for (i = 3; i < 12; i++) { + s[3][i] = MP_DIGIT(a, i + 9); + } + s[4][0] = 0; + s[4][1] = MP_DIGIT(a, 23); + s[4][2] = 0; + s[4][3] = MP_DIGIT(a, 20); + for (i = 4; i < 12; i++) { + s[4][i] = MP_DIGIT(a, i + 8); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = 0; + s[5][3] = 0; + s[5][4] = MP_DIGIT(a, 20); + s[5][5] = MP_DIGIT(a, 21); + s[5][6] = MP_DIGIT(a, 22); + s[5][7] = MP_DIGIT(a, 23); + s[5][8] = 0; + s[5][9] = 0; + s[5][10] = 0; + s[5][11] = 0; + s[6][0] = MP_DIGIT(a, 20); + s[6][1] = 0; + s[6][2] = 0; + s[6][3] = MP_DIGIT(a, 21); + s[6][4] = MP_DIGIT(a, 22); + s[6][5] = MP_DIGIT(a, 23); + s[6][6] = 0; + s[6][7] = 0; + s[6][8] = 0; + s[6][9] = 0; + s[6][10] = 0; + s[6][11] = 0; + s[7][0] = MP_DIGIT(a, 23); + for (i = 1; i < 12; i++) { + s[7][i] = MP_DIGIT(a, i + 11); + } + s[8][0] = 0; + s[8][1] = MP_DIGIT(a, 20); + s[8][2] = MP_DIGIT(a, 21); + s[8][3] = MP_DIGIT(a, 22); + s[8][4] = MP_DIGIT(a, 23); + s[8][5] = 0; + s[8][6] = 0; + s[8][7] = 0; + s[8][8] = 0; + s[8][9] = 0; + s[8][10] = 0; + s[8][11] = 0; + s[9][0] = 0; + s[9][1] = 0; + s[9][2] = 0; + s[9][3] = MP_DIGIT(a, 23); + s[9][4] = MP_DIGIT(a, 23); + s[9][5] = 0; + s[9][6] = 0; + s[9][7] = 0; + s[9][8] = 0; + s[9][9] = 0; + s[9][10] = 0; + s[9][11] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#else + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if ((a_bits > 768) || (a_bits <= 736)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { + for (i = 0; i < 6; i++) { + s[0][i] = MP_DIGIT(a, i); + } + s[1][0] = 0; + s[1][1] = 0; + s[1][2] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[1][3] = MP_DIGIT(a, 11) >> 32; + s[1][4] = 0; + s[1][5] = 0; + for (i = 0; i < 6; i++) { + s[2][i] = MP_DIGIT(a, i + 6); + } + s[3][0] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[3][1] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 2; i < 6; i++) { + s[3][i] = (MP_DIGIT(a, i + 4) >> 32) | (MP_DIGIT(a, i + 5) << 32); + } + s[4][0] = (MP_DIGIT(a, 11) >> 32) << 32; + s[4][1] = MP_DIGIT(a, 10) << 32; + for (i = 2; i < 6; i++) { + s[4][i] = MP_DIGIT(a, i + 4); + } + s[5][0] = 0; + s[5][1] = 0; + s[5][2] = MP_DIGIT(a, 10); + s[5][3] = MP_DIGIT(a, 11); + s[5][4] = 0; + s[5][5] = 0; + s[6][0] = (MP_DIGIT(a, 10) << 32) >> 32; + s[6][1] = (MP_DIGIT(a, 10) >> 32) << 32; + s[6][2] = MP_DIGIT(a, 11); + s[6][3] = 0; + s[6][4] = 0; + s[6][5] = 0; + s[7][0] = (MP_DIGIT(a, 11) >> 32) | (MP_DIGIT(a, 6) << 32); + for (i = 1; i < 6; i++) { + s[7][i] = (MP_DIGIT(a, i + 5) >> 32) | (MP_DIGIT(a, i + 6) << 32); + } + s[8][0] = MP_DIGIT(a, 10) << 32; + s[8][1] = (MP_DIGIT(a, 10) >> 32) | (MP_DIGIT(a, 11) << 32); + s[8][2] = MP_DIGIT(a, 11) >> 32; + s[8][3] = 0; + s[8][4] = 0; + s[8][5] = 0; + s[9][0] = 0; + s[9][1] = (MP_DIGIT(a, 11) >> 32) << 32; + s[9][2] = MP_DIGIT(a, 11) >> 32; + s[9][3] = 0; + s[9][4] = 0; + s[9][5] = 0; + + MP_CHECKOK(mp_add(&m[0], &m[1], r)); + MP_CHECKOK(mp_add(r, &m[1], r)); + MP_CHECKOK(mp_add(r, &m[2], r)); + MP_CHECKOK(mp_add(r, &m[3], r)); + MP_CHECKOK(mp_add(r, &m[4], r)); + MP_CHECKOK(mp_add(r, &m[5], r)); + MP_CHECKOK(mp_add(r, &m[6], r)); + MP_CHECKOK(mp_sub(r, &m[7], r)); + MP_CHECKOK(mp_sub(r, &m[8], r)); + MP_CHECKOK(mp_submod(r, &m[9], &meth->irr, r)); + s_mp_clamp(r); + } +#endif + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p384. Store the + * result in r. r could be a. Uses optimized modular reduction for p384. + */ +static mp_err +ec_GFp_nistp384_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p384. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p384. */ +static mp_err +ec_GFp_nistp384_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp384_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp384(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P384) { + group->meth->field_mod = &ec_GFp_nistp384_mod; + group->meth->field_mul = &ec_GFp_nistp384_mul; + group->meth->field_sqr = &ec_GFp_nistp384_sqr; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_521.c b/security/nss/lib/freebl/ecl/ecp_521.c new file mode 100644 index 000000000..6ca0dbb11 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_521.c @@ -0,0 +1,137 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" + +#define ECP521_DIGITS ECL_CURVE_DIGITS(521) + +/* Fast modular reduction for p521 = 2^521 - 1. a can be r. Uses + * algorithm 2.31 from Hankerson, Menezes, Vanstone. Guide to + * Elliptic Curve Cryptography. */ +static mp_err +ec_GFp_nistp521_mod(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + int a_bits = mpl_significant_bits(a); + unsigned int i; + + /* m1, m2 are statically-allocated mp_int of exactly the size we need */ + mp_int m1; + + mp_digit s1[ECP521_DIGITS] = { 0 }; + + MP_SIGN(&m1) = MP_ZPOS; + MP_ALLOC(&m1) = ECP521_DIGITS; + MP_USED(&m1) = ECP521_DIGITS; + MP_DIGITS(&m1) = s1; + + if (a_bits < 521) { + if (a == r) + return MP_OKAY; + return mp_copy(a, r); + } + /* for polynomials larger than twice the field size or polynomials + * not using all words, use regular reduction */ + if (a_bits > (521 * 2)) { + MP_CHECKOK(mp_mod(a, &meth->irr, r)); + } else { +#define FIRST_DIGIT (ECP521_DIGITS - 1) + for (i = FIRST_DIGIT; i < MP_USED(a) - 1; i++) { + s1[i - FIRST_DIGIT] = (MP_DIGIT(a, i) >> 9) | (MP_DIGIT(a, 1 + i) << (MP_DIGIT_BIT - 9)); + } + s1[i - FIRST_DIGIT] = MP_DIGIT(a, i) >> 9; + + if (a != r) { + MP_CHECKOK(s_mp_pad(r, ECP521_DIGITS)); + for (i = 0; i < ECP521_DIGITS; i++) { + MP_DIGIT(r, i) = MP_DIGIT(a, i); + } + } + MP_USED(r) = ECP521_DIGITS; + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + + MP_CHECKOK(s_mp_add(r, &m1)); + if (MP_DIGIT(r, FIRST_DIGIT) & 0x200) { + MP_CHECKOK(s_mp_add_d(r, 1)); + MP_DIGIT(r, FIRST_DIGIT) &= 0x1FF; + } else if (s_mp_cmp(r, &meth->irr) == 0) { + mp_zero(r); + } + s_mp_clamp(r); + } + +CLEANUP: + return res; +} + +/* Compute the square of polynomial a, reduce modulo p521. Store the + * result in r. r could be a. Uses optimized modular reduction for p521. + */ +static mp_err +ec_GFp_nistp521_sqr(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_sqr(a, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p521. + * Store the result in r. r could be a or b; a could be b. Uses + * optimized modular reduction for p521. */ +static mp_err +ec_GFp_nistp521_mul(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); +CLEANUP: + return res; +} + +/* Divides two field elements. If a is NULL, then returns the inverse of + * b. */ +static mp_err +ec_GFp_nistp521_div(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + mp_int t; + + /* If a is NULL, then return the inverse of b, otherwise return a/b. */ + if (a == NULL) { + return mp_invmod(b, &meth->irr, r); + } else { + /* MPI doesn't support divmod, so we implement it using invmod and + * mulmod. */ + MP_CHECKOK(mp_init(&t)); + MP_CHECKOK(mp_invmod(b, &meth->irr, &t)); + MP_CHECKOK(mp_mul(a, &t, r)); + MP_CHECKOK(ec_GFp_nistp521_mod(r, r, meth)); + CLEANUP: + mp_clear(&t); + return res; + } +} + +/* Wire in fast field arithmetic and precomputation of base point for + * named curves. */ +mp_err +ec_group_set_gfp521(ECGroup *group, ECCurveName name) +{ + if (name == ECCurve_NIST_P521) { + group->meth->field_mod = &ec_GFp_nistp521_mod; + group->meth->field_mul = &ec_GFp_nistp521_mul; + group->meth->field_sqr = &ec_GFp_nistp521_sqr; + group->meth->field_div = &ec_GFp_nistp521_div; + } + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/ecp_aff.c b/security/nss/lib/freebl/ecl/ecp_aff.c new file mode 100644 index 000000000..47fb27326 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_aff.c @@ -0,0 +1,308 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include + +/* Checks if point P(px, py) is at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_is_inf_aff(const mp_int *px, const mp_int *py) +{ + + if ((mp_cmp_z(px) == 0) && (mp_cmp_z(py) == 0)) { + return MP_YES; + } else { + return MP_NO; + } +} + +/* Sets P(px, py) to be the point at infinity. Uses affine coordinates. */ +mp_err +ec_GFp_pt_set_inf_aff(mp_int *px, mp_int *py) +{ + mp_zero(px); + mp_zero(py); + return MP_OKAY; +} + +/* Computes R = P + Q based on IEEE P1363 A.10.1. Elliptic curve points P, + * Q, and R can all be identical. Uses affine coordinates. Assumes input + * is already field-encoded using field_enc, and returns output that is + * still field-encoded. */ +mp_err +ec_GFp_pt_add_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int lambda, temp, tempx, tempy; + + MP_DIGITS(&lambda) = 0; + MP_DIGITS(&temp) = 0; + MP_DIGITS(&tempx) = 0; + MP_DIGITS(&tempy) = 0; + MP_CHECKOK(mp_init(&lambda)); + MP_CHECKOK(mp_init(&temp)); + MP_CHECKOK(mp_init(&tempx)); + MP_CHECKOK(mp_init(&tempy)); + /* if P = inf, then R = Q */ + if (ec_GFp_pt_is_inf_aff(px, py) == 0) { + MP_CHECKOK(mp_copy(qx, rx)); + MP_CHECKOK(mp_copy(qy, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if Q = inf, then R = P */ + if (ec_GFp_pt_is_inf_aff(qx, qy) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + res = MP_OKAY; + goto CLEANUP; + } + /* if px != qx, then lambda = (py-qy) / (px-qx) */ + if (mp_cmp(px, qx) != 0) { + MP_CHECKOK(group->meth->field_sub(py, qy, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, qx, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempy, &tempx, &lambda, group->meth)); + } else { + /* if py != qy or qy = 0, then R = inf */ + if (((mp_cmp(py, qy) != 0)) || (mp_cmp_z(qy) == 0)) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* lambda = (3qx^2+a) / (2qy) */ + MP_CHECKOK(group->meth->field_sqr(qx, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 3)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(&tempx, &temp, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_add(&tempx, &group->curvea, &tempx, group->meth)); + MP_CHECKOK(mp_set_int(&temp, 2)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(&temp, &temp, group->meth)); + } + MP_CHECKOK(group->meth->field_mul(qy, &temp, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_div(&tempx, &tempy, &lambda, group->meth)); + } + /* rx = lambda^2 - px - qx */ + MP_CHECKOK(group->meth->field_sqr(&lambda, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, px, &tempx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempx, qx, &tempx, group->meth)); + /* ry = (x1-x2) * lambda - y1 */ + MP_CHECKOK(group->meth->field_sub(qx, &tempx, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tempy, &lambda, &tempy, group->meth)); + MP_CHECKOK(group->meth->field_sub(&tempy, qy, &tempy, group->meth)); + MP_CHECKOK(mp_copy(&tempx, rx)); + MP_CHECKOK(mp_copy(&tempy, ry)); + +CLEANUP: + mp_clear(&lambda); + mp_clear(&temp); + mp_clear(&tempx); + mp_clear(&tempy); + return res; +} + +/* Computes R = P - Q. Elliptic curve points P, Q, and R can all be + * identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_sub_aff(const mp_int *px, const mp_int *py, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int nqy; + + MP_DIGITS(&nqy) = 0; + MP_CHECKOK(mp_init(&nqy)); + /* nqy = -qy */ + MP_CHECKOK(group->meth->field_neg(qy, &nqy, group->meth)); + res = group->point_add(px, py, qx, &nqy, rx, ry, group); +CLEANUP: + mp_clear(&nqy); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * affine coordinates. Assumes input is already field-encoded using + * field_enc, and returns output that is still field-encoded. */ +mp_err +ec_GFp_pt_dbl_aff(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, const ECGroup *group) +{ + return ec_GFp_pt_add_aff(px, py, px, py, rx, ry, group); +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF +/* Computes R = nP based on IEEE P1363 A.10.3. Elliptic curve points P and + * R can be identical. Uses affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_mul_aff(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int k, k3, qx, qy, sx, sy; + int b1, b3, i, l; + + MP_DIGITS(&k) = 0; + MP_DIGITS(&k3) = 0; + MP_DIGITS(&qx) = 0; + MP_DIGITS(&qy) = 0; + MP_DIGITS(&sx) = 0; + MP_DIGITS(&sy) = 0; + MP_CHECKOK(mp_init(&k)); + MP_CHECKOK(mp_init(&k3)); + MP_CHECKOK(mp_init(&qx)); + MP_CHECKOK(mp_init(&qy)); + MP_CHECKOK(mp_init(&sx)); + MP_CHECKOK(mp_init(&sy)); + + /* if n = 0 then r = inf */ + if (mp_cmp_z(n) == 0) { + mp_zero(rx); + mp_zero(ry); + res = MP_OKAY; + goto CLEANUP; + } + /* Q = P, k = n */ + MP_CHECKOK(mp_copy(px, &qx)); + MP_CHECKOK(mp_copy(py, &qy)); + MP_CHECKOK(mp_copy(n, &k)); + /* if n < 0 then Q = -Q, k = -k */ + if (mp_cmp_z(n) < 0) { + MP_CHECKOK(group->meth->field_neg(&qy, &qy, group->meth)); + MP_CHECKOK(mp_neg(&k, &k)); + } +#ifdef ECL_DEBUG /* basic double and add method */ + l = mpl_significant_bits(&k) - 1; + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + for (i = l - 1; i >= 0; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + /* if k_i = 1, then S = S + Q */ + if (mpl_get_bit(&k, i) != 0) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#else /* double and add/subtract method from \ + * standard */ + /* k3 = 3 * k */ + MP_CHECKOK(mp_set_int(&k3, 3)); + MP_CHECKOK(mp_mul(&k, &k3, &k3)); + /* S = Q */ + MP_CHECKOK(mp_copy(&qx, &sx)); + MP_CHECKOK(mp_copy(&qy, &sy)); + /* l = index of high order bit in binary representation of 3*k */ + l = mpl_significant_bits(&k3) - 1; + /* for i = l-1 downto 1 */ + for (i = l - 1; i >= 1; i--) { + /* S = 2S */ + MP_CHECKOK(group->point_dbl(&sx, &sy, &sx, &sy, group)); + b3 = MP_GET_BIT(&k3, i); + b1 = MP_GET_BIT(&k, i); + /* if k3_i = 1 and k_i = 0, then S = S + Q */ + if ((b3 == 1) && (b1 == 0)) { + MP_CHECKOK(group->point_add(&sx, &sy, &qx, &qy, &sx, &sy, group)); + /* if k3_i = 0 and k_i = 1, then S = S - Q */ + } else if ((b3 == 0) && (b1 == 1)) { + MP_CHECKOK(group->point_sub(&sx, &sy, &qx, &qy, &sx, &sy, group)); + } + } +#endif + /* output S */ + MP_CHECKOK(mp_copy(&sx, rx)); + MP_CHECKOK(mp_copy(&sy, ry)); + +CLEANUP: + mp_clear(&k); + mp_clear(&k3); + mp_clear(&qx); + mp_clear(&qy); + mp_clear(&sx); + mp_clear(&sy); + return res; +} +#endif + +/* Validates a point on a GFp curve. */ +mp_err +ec_GFp_validate_point(const mp_int *px, const mp_int *py, const ECGroup *group) +{ + mp_err res = MP_NO; + mp_int accl, accr, tmp, pxt, pyt; + + MP_DIGITS(&accl) = 0; + MP_DIGITS(&accr) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&pxt) = 0; + MP_DIGITS(&pyt) = 0; + MP_CHECKOK(mp_init(&accl)); + MP_CHECKOK(mp_init(&accr)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&pxt)); + MP_CHECKOK(mp_init(&pyt)); + + /* 1: Verify that publicValue is not the point at infinity */ + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + res = MP_NO; + goto CLEANUP; + } + /* 2: Verify that the coordinates of publicValue are elements + * of the field. + */ + if ((MP_SIGN(px) == MP_NEG) || (mp_cmp(px, &group->meth->irr) >= 0) || + (MP_SIGN(py) == MP_NEG) || (mp_cmp(py, &group->meth->irr) >= 0)) { + res = MP_NO; + goto CLEANUP; + } + /* 3: Verify that publicValue is on the curve. */ + if (group->meth->field_enc) { + group->meth->field_enc(px, &pxt, group->meth); + group->meth->field_enc(py, &pyt, group->meth); + } else { + MP_CHECKOK(mp_copy(px, &pxt)); + MP_CHECKOK(mp_copy(py, &pyt)); + } + /* left-hand side: y^2 */ + MP_CHECKOK(group->meth->field_sqr(&pyt, &accl, group->meth)); + /* right-hand side: x^3 + a*x + b = (x^2 + a)*x + b by Horner's rule */ + MP_CHECKOK(group->meth->field_sqr(&pxt, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_add(&tmp, &group->curvea, &tmp, group->meth)); + MP_CHECKOK(group->meth->field_mul(&tmp, &pxt, &accr, group->meth)); + MP_CHECKOK(group->meth->field_add(&accr, &group->curveb, &accr, group->meth)); + /* check LHS - RHS == 0 */ + MP_CHECKOK(group->meth->field_sub(&accl, &accr, &accr, group->meth)); + if (mp_cmp_z(&accr) != 0) { + res = MP_NO; + goto CLEANUP; + } + /* 4: Verify that the order of the curve times the publicValue + * is the point at infinity. + */ + MP_CHECKOK(ECPoint_mul(group, &group->order, px, py, &pxt, &pyt)); + if (ec_GFp_pt_is_inf_aff(&pxt, &pyt) != MP_YES) { + res = MP_NO; + goto CLEANUP; + } + + res = MP_YES; + +CLEANUP: + mp_clear(&accl); + mp_clear(&accr); + mp_clear(&tmp); + mp_clear(&pxt); + mp_clear(&pyt); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jac.c b/security/nss/lib/freebl/ecl/ecp_jac.c new file mode 100644 index 000000000..535e75903 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jac.c @@ -0,0 +1,513 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "mplogic.h" +#include +#ifdef ECL_DEBUG +#include +#endif + +/* Converts a point P(px, py) from affine coordinates to Jacobian + * projective coordinates R(rx, ry, rz). Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. */ +mp_err +ec_GFp_pt_aff2jac(const mp_int *px, const mp_int *py, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + + if (ec_GFp_pt_is_inf_aff(px, py) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } else { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_set_int(rz, 1)); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(rz, rz, group->meth)); + } + } +CLEANUP: + return res; +} + +/* Converts a point P(px, py, pz) from Jacobian projective coordinates to + * affine coordinates R(rx, ry). P and R can share x and y coordinates. + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. */ +mp_err +ec_GFp_pt_jac2aff(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int z1, z2, z3; + + MP_DIGITS(&z1) = 0; + MP_DIGITS(&z2) = 0; + MP_DIGITS(&z3) = 0; + MP_CHECKOK(mp_init(&z1)); + MP_CHECKOK(mp_init(&z2)); + MP_CHECKOK(mp_init(&z3)); + + /* if point at infinity, then set point at infinity and exit */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_set_inf_aff(rx, ry)); + goto CLEANUP; + } + + /* transform (px, py, pz) into (px / pz^2, py / pz^3) */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + } else { + MP_CHECKOK(group->meth->field_div(NULL, pz, &z1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&z1, &z2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&z1, &z2, &z3, group->meth)); + MP_CHECKOK(group->meth->field_mul(px, &z2, rx, group->meth)); + MP_CHECKOK(group->meth->field_mul(py, &z3, ry, group->meth)); + } + +CLEANUP: + mp_clear(&z1); + mp_clear(&z2); + mp_clear(&z3); + return res; +} + +/* Checks if point P(px, py, pz) is at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_is_inf_jac(const mp_int *px, const mp_int *py, const mp_int *pz) +{ + return mp_cmp_z(pz); +} + +/* Sets P(px, py, pz) to be the point at infinity. Uses Jacobian + * coordinates. */ +mp_err +ec_GFp_pt_set_inf_jac(mp_int *px, mp_int *py, mp_int *pz) +{ + mp_zero(pz); + return MP_OKAY; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Jacobian-affine coordinates. Assumes input is already + * field-encoded using field_enc, and returns output that is still + * field-encoded. Uses equation (2) from Brown, Hankerson, Lopez, and + * Menezes. Software Implementation of the NIST Elliptic Curves Over Prime + * Fields. */ +mp_err +ec_GFp_pt_add_jac_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *qx, const mp_int *qy, mp_int *rx, + mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int A, B, C, D, C2, C3; + + MP_DIGITS(&A) = 0; + MP_DIGITS(&B) = 0; + MP_DIGITS(&C) = 0; + MP_DIGITS(&D) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&C3) = 0; + MP_CHECKOK(mp_init(&A)); + MP_CHECKOK(mp_init(&B)); + MP_CHECKOK(mp_init(&C)); + MP_CHECKOK(mp_init(&D)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&C3)); + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, pz, &B, group->meth)); + MP_CHECKOK(group->meth->field_mul(&A, qx, &A, group->meth)); + MP_CHECKOK(group->meth->field_mul(&B, qy, &B, group->meth)); + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(&A, px, &C, group->meth)); + MP_CHECKOK(group->meth->field_sub(&B, py, &D, group->meth)); + + if (mp_cmp_z(&C) == 0) { + /* P == Q or P == -Q */ + if (mp_cmp_z(&D) == 0) { + /* P == Q */ + /* It is cheaper to double (qx, qy, 1) than (px, py, pz). */ + MP_DIGIT(&D, 0) = 1; /* Set D to 1. */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(qx, qy, &D, rx, ry, rz, group)); + } else { + /* P == -Q */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + } + goto CLEANUP; + } + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(&C, &C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(&C, &C2, &C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, &C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, &C2, &C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(&D, &A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(&C, &C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(&C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(&A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, &C3, &C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(&C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(&D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, &C3, ry, group->meth)); + +CLEANUP: + mp_clear(&A); + mp_clear(&B); + mp_clear(&C); + mp_clear(&D); + mp_clear(&C2); + mp_clear(&C3); + return res; +} + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + * This routine implements Point Doubling in the Jacobian Projective + * space as described in the paper "Efficient elliptic curve exponentiation + * using mixed coordinates", by H. Cohen, A Miyaji, T. Ono. + */ +mp_err +ec_GFp_pt_dbl_jac(const mp_int *px, const mp_int *py, const mp_int *pz, + mp_int *rx, mp_int *ry, mp_int *rz, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int t0, t1, M, S; + + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&M) = 0; + MP_DIGITS(&S) = 0; + MP_CHECKOK(mp_init(&t0)); + MP_CHECKOK(mp_init(&t1)); + MP_CHECKOK(mp_init(&M)); + MP_CHECKOK(mp_init(&S)); + + /* P == inf or P == -P */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES || mp_cmp_z(py) == 0) { + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + if (mp_cmp_d(pz, 1) == 0) { + /* M = 3 * px^2 + a */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &group->curvea, &M, group->meth)); + } else if (MP_SIGN(&group->curvea) == MP_NEG && + MP_USED(&group->curvea) == 1 && + MP_DIGIT(&group->curvea, 0) == 3) { + /* M = 3 * (px + pz^2) * (px - pz^2) */ + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(px, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sub(px, &M, &t1, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, &t1, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &M, group->meth)); + } else { + /* M = 3 * (px^2) + a * (pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, &t0, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &t0, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&t0, &M, &t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(pz, &M, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, &M, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &group->curvea, &M, group->meth)); + MP_CHECKOK(group->meth->field_add(&M, &t0, &M, group->meth)); + } + + /* rz = 2 * py * pz */ + /* t0 = 4 * py^2 */ + if (mp_cmp_d(pz, 1) == 0) { + MP_CHECKOK(group->meth->field_add(py, py, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(rz, &t0, group->meth)); + } else { + MP_CHECKOK(group->meth->field_add(py, py, &t0, group->meth)); + MP_CHECKOK(group->meth->field_mul(&t0, pz, rz, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&t0, &t0, group->meth)); + } + + /* S = 4 * px * py^2 = px * (2 * py)^2 */ + MP_CHECKOK(group->meth->field_mul(px, &t0, &S, group->meth)); + + /* rx = M^2 - 2 * S */ + MP_CHECKOK(group->meth->field_add(&S, &S, &t1, group->meth)); + MP_CHECKOK(group->meth->field_sqr(&M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, &t1, rx, group->meth)); + + /* ry = M * (S - rx) - 8 * py^4 */ + MP_CHECKOK(group->meth->field_sqr(&t0, &t1, group->meth)); + if (mp_isodd(&t1)) { + MP_CHECKOK(mp_add(&t1, &group->meth->irr, &t1)); + } + MP_CHECKOK(mp_div_2(&t1, &t1)); + MP_CHECKOK(group->meth->field_sub(&S, rx, &S, group->meth)); + MP_CHECKOK(group->meth->field_mul(&M, &S, &M, group->meth)); + MP_CHECKOK(group->meth->field_sub(&M, &t1, ry, group->meth)); + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&M); + mp_clear(&S); + return res; +} + +/* by default, this routine is unused and thus doesn't need to be compiled */ +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC +/* Computes R = nP where R is (rx, ry) and P is (px, py). The parameters + * a, b and p are the elliptic curve coefficients and the prime that + * determines the field GFp. Elliptic curve points P and R can be + * identical. Uses mixed Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. Uses 4-bit window method. */ +mp_err +ec_GFp_pt_mul_jac(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz; + int i, ni, d; + + MP_DIGITS(&rz) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + + /* fill precomputation table */ + mp_zero(&precomp[0][0]); + mp_zero(&precomp[0][1]); + MP_CHECKOK(mp_copy(px, &precomp[1][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][1])); + for (i = 2; i < 16; i++) { + MP_CHECKOK(group->point_add(&precomp[1][0], &precomp[1][1], + &precomp[i - 1][0], &precomp[i - 1][1], + &precomp[i][0], &precomp[i][1], group)); + } + + d = (mpl_significant_bits(n) + 3) / 4; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d - 1; i >= 0; i--) { + /* compute window ni */ + ni = MP_GET_BIT(n, 4 * i + 3); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 2); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i + 1); + ni <<= 1; + ni |= MP_GET_BIT(n, 4 * i); + /* R = 2^4 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ni * P) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ni][0], &precomp[ni][1], rx, ry, + &rz, group)); + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + return res; +} +#endif + +/* Elliptic curve scalar-point multiplication. Computes R(x, y) = k1 * G + + * k2 * P(x, y), where G is the generator (base point) of the group of + * points on the elliptic curve. Allows k1 = NULL or { k2, P } = NULL. + * Uses mixed Jacobian-affine coordinates. Input and output values are + * assumed to be NOT field-encoded. Uses algorithm 15 (simultaneous + * multiple point multiplication) from Brown, Hankerson, Lopez, Menezes. + * Software Implementation of the NIST Elliptic Curves over Prime Fields. */ +mp_err +ec_GFp_pts_mul_jac(const mp_int *k1, const mp_int *k2, const mp_int *px, + const mp_int *py, mp_int *rx, mp_int *ry, + const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[4][4][2]; + mp_int rz; + const mp_int *a, *b; + unsigned int i, j; + int ai, bi, d; + + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_DIGITS(&precomp[i][j][0]) = 0; + MP_DIGITS(&precomp[i][j][1]) = 0; + } + } + MP_DIGITS(&rz) = 0; + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK(!((k1 == NULL) && ((k2 == NULL) || (px == NULL) || (py == NULL))), MP_BADARG); + + /* if some arguments are not defined used ECPoint_mul */ + if (k1 == NULL) { + return ECPoint_mul(group, k2, px, py, rx, ry); + } else if ((k2 == NULL) || (px == NULL) || (py == NULL)) { + return ECPoint_mul(group, k1, NULL, NULL, rx, ry); + } + + /* initialize precomputation table */ + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + MP_CHECKOK(mp_init(&precomp[i][j][0])); + MP_CHECKOK(mp_init(&precomp[i][j][1])); + } + } + + /* fill precomputation table */ + /* assign {k1, k2} = {a, b} such that len(a) >= len(b) */ + if (mpl_significant_bits(k1) < mpl_significant_bits(k2)) { + a = k2; + b = k1; + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[1][0][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[1][0][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(py, &precomp[1][0][1])); + } + MP_CHECKOK(mp_copy(&group->genx, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[0][1][1])); + } else { + a = k1; + b = k2; + MP_CHECKOK(mp_copy(&group->genx, &precomp[1][0][0])); + MP_CHECKOK(mp_copy(&group->geny, &precomp[1][0][1])); + if (group->meth->field_enc) { + MP_CHECKOK(group->meth->field_enc(px, &precomp[0][1][0], group->meth)); + MP_CHECKOK(group->meth->field_enc(py, &precomp[0][1][1], group->meth)); + } else { + MP_CHECKOK(mp_copy(px, &precomp[0][1][0])); + MP_CHECKOK(mp_copy(py, &precomp[0][1][1])); + } + } + /* precompute [*][0][*] */ + mp_zero(&precomp[0][0][0]); + mp_zero(&precomp[0][0][1]); + MP_CHECKOK(group->point_dbl(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], group)); + MP_CHECKOK(group->point_add(&precomp[1][0][0], &precomp[1][0][1], + &precomp[2][0][0], &precomp[2][0][1], + &precomp[3][0][0], &precomp[3][0][1], group)); + /* precompute [*][1][*] */ + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][1][0], &precomp[i][1][1], group)); + } + /* precompute [*][2][*] */ + MP_CHECKOK(group->point_dbl(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][2][0], &precomp[0][2][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][2][0], &precomp[i][2][1], group)); + } + /* precompute [*][3][*] */ + MP_CHECKOK(group->point_add(&precomp[0][1][0], &precomp[0][1][1], + &precomp[0][2][0], &precomp[0][2][1], + &precomp[0][3][0], &precomp[0][3][1], group)); + for (i = 1; i < 4; i++) { + MP_CHECKOK(group->point_add(&precomp[0][3][0], &precomp[0][3][1], + &precomp[i][0][0], &precomp[i][0][1], + &precomp[i][3][0], &precomp[i][3][1], group)); + } + + d = (mpl_significant_bits(a) + 1) / 2; + + /* R = inf */ + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + for (i = d; i-- > 0;) { + ai = MP_GET_BIT(a, 2 * i + 1); + ai <<= 1; + ai |= MP_GET_BIT(a, 2 * i); + bi = MP_GET_BIT(b, 2 * i + 1); + bi <<= 1; + bi |= MP_GET_BIT(b, 2 * i); + /* R = 2^2 * R */ + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + MP_CHECKOK(ec_GFp_pt_dbl_jac(rx, ry, &rz, rx, ry, &rz, group)); + /* R = R + (ai * A + bi * B) */ + MP_CHECKOK(ec_GFp_pt_add_jac_aff(rx, ry, &rz, &precomp[ai][bi][0], &precomp[ai][bi][1], + rx, ry, &rz, group)); + } + + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_dec(ry, ry, group->meth)); + } + +CLEANUP: + mp_clear(&rz); + for (i = 0; i < 4; i++) { + for (j = 0; j < 4; j++) { + mp_clear(&precomp[i][j][0]); + mp_clear(&precomp[i][j][1]); + } + } + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_jm.c b/security/nss/lib/freebl/ecl/ecp_jm.c new file mode 100644 index 000000000..a1106cea8 --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_jm.c @@ -0,0 +1,283 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "ecp.h" +#include "ecl-priv.h" +#include "mplogic.h" +#include + +#define MAX_SCRATCH 6 + +/* Computes R = 2P. Elliptic curve points P and R can be identical. Uses + * Modified Jacobian coordinates. + * + * Assumes input is already field-encoded using field_enc, and returns + * output that is still field-encoded. + * + */ +static mp_err +ec_GFp_pt_dbl_jm(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *t0, *t1, *M, *S; + + t0 = &scratch[0]; + t1 = &scratch[1]; + M = &scratch[2]; + S = &scratch[3]; + +#if MAX_SCRATCH < 4 +#error "Scratch array defined too small " +#endif + + /* Check for point at infinity */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + /* Set r = pt at infinity by setting rz = 0 */ + + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, rz)); + goto CLEANUP; + } + + /* M = 3 (px^2) + a*(pz^4) */ + MP_CHECKOK(group->meth->field_sqr(px, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, M, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, M, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, paz4, M, group->meth)); + + /* rz = 2 * py * pz */ + MP_CHECKOK(group->meth->field_mul(py, pz, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, rz, group->meth)); + + /* t0 = 2y^2 , t1 = 8y^4 */ + MP_CHECKOK(group->meth->field_sqr(py, t0, group->meth)); + MP_CHECKOK(group->meth->field_add(t0, t0, t0, group->meth)); + MP_CHECKOK(group->meth->field_sqr(t0, t1, group->meth)); + MP_CHECKOK(group->meth->field_add(t1, t1, t1, group->meth)); + + /* S = 4 * px * py^2 = 2 * px * t0 */ + MP_CHECKOK(group->meth->field_mul(px, t0, S, group->meth)); + MP_CHECKOK(group->meth->field_add(S, S, S, group->meth)); + + /* rx = M^2 - 2S */ + MP_CHECKOK(group->meth->field_sqr(M, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(rx, S, rx, group->meth)); + + /* ry = M * (S - rx) - t1 */ + MP_CHECKOK(group->meth->field_sub(S, rx, S, group->meth)); + MP_CHECKOK(group->meth->field_mul(S, M, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, t1, ry, group->meth)); + + /* ra*z^4 = 2*t1*(apz4) */ + MP_CHECKOK(group->meth->field_mul(paz4, t1, raz4, group->meth)); + MP_CHECKOK(group->meth->field_add(raz4, raz4, raz4, group->meth)); + +CLEANUP: + return res; +} + +/* Computes R = P + Q where R is (rx, ry, rz), P is (px, py, pz) and Q is + * (qx, qy, 1). Elliptic curve points P, Q, and R can all be identical. + * Uses mixed Modified_Jacobian-affine coordinates. Assumes input is + * already field-encoded using field_enc, and returns output that is still + * field-encoded. */ +static mp_err +ec_GFp_pt_add_jm_aff(const mp_int *px, const mp_int *py, const mp_int *pz, + const mp_int *paz4, const mp_int *qx, + const mp_int *qy, mp_int *rx, mp_int *ry, mp_int *rz, + mp_int *raz4, mp_int scratch[], const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int *A, *B, *C, *D, *C2, *C3; + + A = &scratch[0]; + B = &scratch[1]; + C = &scratch[2]; + D = &scratch[3]; + C2 = &scratch[4]; + C3 = &scratch[5]; + +#if MAX_SCRATCH < 6 +#error "Scratch array defined too small " +#endif + + /* If either P or Q is the point at infinity, then return the other + * point */ + if (ec_GFp_pt_is_inf_jac(px, py, pz) == MP_YES) { + MP_CHECKOK(ec_GFp_pt_aff2jac(qx, qy, rx, ry, rz, group)); + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); + goto CLEANUP; + } + if (ec_GFp_pt_is_inf_aff(qx, qy) == MP_YES) { + MP_CHECKOK(mp_copy(px, rx)); + MP_CHECKOK(mp_copy(py, ry)); + MP_CHECKOK(mp_copy(pz, rz)); + MP_CHECKOK(mp_copy(paz4, raz4)); + goto CLEANUP; + } + + /* A = qx * pz^2, B = qy * pz^3 */ + MP_CHECKOK(group->meth->field_sqr(pz, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, pz, B, group->meth)); + MP_CHECKOK(group->meth->field_mul(A, qx, A, group->meth)); + MP_CHECKOK(group->meth->field_mul(B, qy, B, group->meth)); + + /* C = A - px, D = B - py */ + MP_CHECKOK(group->meth->field_sub(A, px, C, group->meth)); + MP_CHECKOK(group->meth->field_sub(B, py, D, group->meth)); + + /* C2 = C^2, C3 = C^3 */ + MP_CHECKOK(group->meth->field_sqr(C, C2, group->meth)); + MP_CHECKOK(group->meth->field_mul(C, C2, C3, group->meth)); + + /* rz = pz * C */ + MP_CHECKOK(group->meth->field_mul(pz, C, rz, group->meth)); + + /* C = px * C^2 */ + MP_CHECKOK(group->meth->field_mul(px, C2, C, group->meth)); + /* A = D^2 */ + MP_CHECKOK(group->meth->field_sqr(D, A, group->meth)); + + /* rx = D^2 - (C^3 + 2 * (px * C^2)) */ + MP_CHECKOK(group->meth->field_add(C, C, rx, group->meth)); + MP_CHECKOK(group->meth->field_add(C3, rx, rx, group->meth)); + MP_CHECKOK(group->meth->field_sub(A, rx, rx, group->meth)); + + /* C3 = py * C^3 */ + MP_CHECKOK(group->meth->field_mul(py, C3, C3, group->meth)); + + /* ry = D * (px * C^2 - rx) - py * C^3 */ + MP_CHECKOK(group->meth->field_sub(C, rx, ry, group->meth)); + MP_CHECKOK(group->meth->field_mul(D, ry, ry, group->meth)); + MP_CHECKOK(group->meth->field_sub(ry, C3, ry, group->meth)); + + /* raz4 = a * rz^4 */ + MP_CHECKOK(group->meth->field_sqr(rz, raz4, group->meth)); + MP_CHECKOK(group->meth->field_sqr(raz4, raz4, group->meth)); + MP_CHECKOK(group->meth->field_mul(raz4, &group->curvea, raz4, group->meth)); +CLEANUP: + return res; +} + +/* Computes R = nP where R is (rx, ry) and P is the base point. Elliptic + * curve points P and R can be identical. Uses mixed Modified-Jacobian + * co-ordinates for doubling and Chudnovsky Jacobian coordinates for + * additions. Assumes input is already field-encoded using field_enc, and + * returns output that is still field-encoded. Uses 5-bit window NAF + * method (algorithm 11) for scalar-point multiplication from Brown, + * Hankerson, Lopez, Menezes. Software Implementation of the NIST Elliptic + * Curves Over Prime Fields. */ +mp_err +ec_GFp_pt_mul_jm_wNAF(const mp_int *n, const mp_int *px, const mp_int *py, + mp_int *rx, mp_int *ry, const ECGroup *group) +{ + mp_err res = MP_OKAY; + mp_int precomp[16][2], rz, tpx, tpy; + mp_int raz4; + mp_int scratch[MAX_SCRATCH]; + signed char *naf = NULL; + int i, orderBitSize; + + MP_DIGITS(&rz) = 0; + MP_DIGITS(&raz4) = 0; + MP_DIGITS(&tpx) = 0; + MP_DIGITS(&tpy) = 0; + for (i = 0; i < 16; i++) { + MP_DIGITS(&precomp[i][0]) = 0; + MP_DIGITS(&precomp[i][1]) = 0; + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_DIGITS(&scratch[i]) = 0; + } + + ARGCHK(group != NULL, MP_BADARG); + ARGCHK((n != NULL) && (px != NULL) && (py != NULL), MP_BADARG); + + /* initialize precomputation table */ + MP_CHECKOK(mp_init(&tpx)); + MP_CHECKOK(mp_init(&tpy)); + ; + MP_CHECKOK(mp_init(&rz)); + MP_CHECKOK(mp_init(&raz4)); + + for (i = 0; i < 16; i++) { + MP_CHECKOK(mp_init(&precomp[i][0])); + MP_CHECKOK(mp_init(&precomp[i][1])); + } + for (i = 0; i < MAX_SCRATCH; i++) { + MP_CHECKOK(mp_init(&scratch[i])); + } + + /* Set out[8] = P */ + MP_CHECKOK(mp_copy(px, &precomp[8][0])); + MP_CHECKOK(mp_copy(py, &precomp[8][1])); + + /* Set (tpx, tpy) = 2P */ + MP_CHECKOK(group->point_dbl(&precomp[8][0], &precomp[8][1], &tpx, &tpy, + group)); + + /* Set 3P, 5P, ..., 15P */ + for (i = 8; i < 15; i++) { + MP_CHECKOK(group->point_add(&precomp[i][0], &precomp[i][1], &tpx, &tpy, + &precomp[i + 1][0], &precomp[i + 1][1], + group)); + } + + /* Set -15P, -13P, ..., -P */ + for (i = 0; i < 8; i++) { + MP_CHECKOK(mp_copy(&precomp[15 - i][0], &precomp[i][0])); + MP_CHECKOK(group->meth->field_neg(&precomp[15 - i][1], &precomp[i][1], + group->meth)); + } + + /* R = inf */ + MP_CHECKOK(ec_GFp_pt_set_inf_jac(rx, ry, &rz)); + + orderBitSize = mpl_significant_bits(&group->order); + + /* Allocate memory for NAF */ + naf = (signed char *)malloc(sizeof(signed char) * (orderBitSize + 1)); + if (naf == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + /* Compute 5NAF */ + ec_compute_wNAF(naf, orderBitSize, n, 5); + + /* wNAF method */ + for (i = orderBitSize; i >= 0; i--) { + /* R = 2R */ + ec_GFp_pt_dbl_jm(rx, ry, &rz, &raz4, rx, ry, &rz, + &raz4, scratch, group); + if (naf[i] != 0) { + ec_GFp_pt_add_jm_aff(rx, ry, &rz, &raz4, + &precomp[(naf[i] + 15) / 2][0], + &precomp[(naf[i] + 15) / 2][1], rx, ry, + &rz, &raz4, scratch, group); + } + } + + /* convert result S to affine coordinates */ + MP_CHECKOK(ec_GFp_pt_jac2aff(rx, ry, &rz, rx, ry, group)); + +CLEANUP: + for (i = 0; i < MAX_SCRATCH; i++) { + mp_clear(&scratch[i]); + } + for (i = 0; i < 16; i++) { + mp_clear(&precomp[i][0]); + mp_clear(&precomp[i][1]); + } + mp_clear(&tpx); + mp_clear(&tpy); + mp_clear(&rz); + mp_clear(&raz4); + free(naf); + return res; +} diff --git a/security/nss/lib/freebl/ecl/ecp_mont.c b/security/nss/lib/freebl/ecl/ecp_mont.c new file mode 100644 index 000000000..779685b4d --- /dev/null +++ b/security/nss/lib/freebl/ecl/ecp_mont.c @@ -0,0 +1,154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Uses Montgomery reduction for field arithmetic. See mpi/mpmontg.c for + * code implementation. */ + +#include "mpi.h" +#include "mplogic.h" +#include "mpi-priv.h" +#include "ecl-priv.h" +#include "ecp.h" +#include +#include + +/* Construct a generic GFMethod for arithmetic over prime fields with + * irreducible irr. */ +GFMethod * +GFMethod_consGFp_mont(const mp_int *irr) +{ + mp_err res = MP_OKAY; + GFMethod *meth = NULL; + mp_mont_modulus *mmm; + + meth = GFMethod_consGFp(irr); + if (meth == NULL) + return NULL; + + mmm = (mp_mont_modulus *)malloc(sizeof(mp_mont_modulus)); + if (mmm == NULL) { + res = MP_MEM; + goto CLEANUP; + } + + meth->field_mul = &ec_GFp_mul_mont; + meth->field_sqr = &ec_GFp_sqr_mont; + meth->field_div = &ec_GFp_div_mont; + meth->field_enc = &ec_GFp_enc_mont; + meth->field_dec = &ec_GFp_dec_mont; + meth->extra1 = mmm; + meth->extra2 = NULL; + meth->extra_free = &ec_GFp_extra_free_mont; + + mmm->N = meth->irr; + mmm->n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(&meth->irr, 0)); + +CLEANUP: + if (res != MP_OKAY) { + GFMethod_free(meth); + return NULL; + } + return meth; +} + +/* Wrapper functions for generic prime field arithmetic. */ + +/* Field multiplication using Montgomery reduction. */ +mp_err +ec_GFp_mul_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + +#ifdef MP_MONT_USE_MP_MUL + /* if MP_MONT_USE_MP_MUL is defined, then the function s_mp_mul_mont + * is not implemented and we have to use mp_mul and s_mp_redc directly + */ + MP_CHECKOK(mp_mul(a, b, r)); + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +#else + mp_int s; + + MP_DIGITS(&s) = 0; + /* s_mp_mul_mont doesn't allow source and destination to be the same */ + if ((a == r) || (b == r)) { + MP_CHECKOK(mp_init(&s)); + MP_CHECKOK(s_mp_mul_mont(a, b, &s, (mp_mont_modulus *)meth->extra1)); + MP_CHECKOK(mp_copy(&s, r)); + mp_clear(&s); + } else { + return s_mp_mul_mont(a, b, r, (mp_mont_modulus *)meth->extra1); + } +#endif +CLEANUP: + return res; +} + +/* Field squaring using Montgomery reduction. */ +mp_err +ec_GFp_sqr_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + return ec_GFp_mul_mont(a, a, r, meth); +} + +/* Field division using Montgomery reduction. */ +mp_err +ec_GFp_div_mont(const mp_int *a, const mp_int *b, mp_int *r, + const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + /* if A=aZ represents a encoded in montgomery coordinates with Z and # + * and \ respectively represent multiplication and division in + * montgomery coordinates, then A\B = (a/b)Z = (A/B)Z and Binv = + * (1/b)Z = (1/B)(Z^2) where B # Binv = Z */ + MP_CHECKOK(ec_GFp_div(a, b, r, meth)); + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + if (a == NULL) { + MP_CHECKOK(ec_GFp_enc_mont(r, r, meth)); + } +CLEANUP: + return res; +} + +/* Encode a field element in Montgomery form. See s_mp_to_mont in + * mpi/mpmontg.c */ +mp_err +ec_GFp_enc_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_mont_modulus *mmm; + mp_err res = MP_OKAY; + + mmm = (mp_mont_modulus *)meth->extra1; + MP_CHECKOK(mp_copy(a, r)); + MP_CHECKOK(s_mp_lshd(r, MP_USED(&mmm->N))); + MP_CHECKOK(mp_mod(r, &mmm->N, r)); +CLEANUP: + return res; +} + +/* Decode a field element from Montgomery form. */ +mp_err +ec_GFp_dec_mont(const mp_int *a, mp_int *r, const GFMethod *meth) +{ + mp_err res = MP_OKAY; + + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + MP_CHECKOK(s_mp_redc(r, (mp_mont_modulus *)meth->extra1)); +CLEANUP: + return res; +} + +/* Free the memory allocated to the extra fields of Montgomery GFMethod + * object. */ +void +ec_GFp_extra_free_mont(GFMethod *meth) +{ + if (meth->extra1 != NULL) { + free(meth->extra1); + meth->extra1 = NULL; + } +} diff --git a/security/nss/lib/freebl/ecl/tests/ec_naft.c b/security/nss/lib/freebl/ecl/tests/ec_naft.c new file mode 100644 index 000000000..61ef15c36 --- /dev/null +++ b/security/nss/lib/freebl/ecl/tests/ec_naft.c @@ -0,0 +1,121 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "ecl.h" +#include "ecp.h" +#include "ecl-priv.h" + +#include +#include +#include +#include +#include + +/* Returns 2^e as an integer. This is meant to be used for small powers of + * two. */ +int ec_twoTo(int e); + +/* Number of bits of scalar to test */ +#define BITSIZE 160 + +/* Time k repetitions of operation op. */ +#define M_TimeOperation(op, k) \ + { \ + double dStart, dNow, dUserTime; \ + struct rusage ru; \ + int i; \ + getrusage(RUSAGE_SELF, &ru); \ + dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + for (i = 0; i < k; i++) { \ + { \ + op; \ + } \ + }; \ + getrusage(RUSAGE_SELF, &ru); \ + dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + dUserTime = dNow - dStart; \ + if (dUserTime) \ + printf(" %-45s\n k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \ + } + +/* Tests wNAF computation. Non-adjacent-form is discussed in the paper: D. + * Hankerson, J. Hernandez and A. Menezes, "Software implementation of + * elliptic curve cryptography over binary fields", Proc. CHES 2000. */ + +mp_err +main(void) +{ + signed char naf[BITSIZE + 1]; + ECGroup *group = NULL; + mp_int k; + mp_int *scalar; + int i, count; + int res; + int w = 5; + char s[1000]; + + /* Get a 160 bit scalar to compute wNAF from */ + group = ECGroup_fromName(ECCurve_SECG_PRIME_160R1); + scalar = &group->genx; + + /* Compute wNAF representation of scalar */ + ec_compute_wNAF(naf, BITSIZE, scalar, w); + + /* Verify correctness of representation */ + mp_init(&k); /* init k to 0 */ + + for (i = BITSIZE; i >= 0; i--) { + mp_add(&k, &k, &k); + /* digits in mp_???_d are unsigned */ + if (naf[i] >= 0) { + mp_add_d(&k, naf[i], &k); + } else { + mp_sub_d(&k, -naf[i], &k); + } + } + + if (mp_cmp(&k, scalar) != 0) { + printf("Error: incorrect NAF value.\n"); + MP_CHECKOK(mp_toradix(&k, s, 16)); + printf("NAF value %s\n", s); + MP_CHECKOK(mp_toradix(scalar, s, 16)); + printf("original value %s\n", s); + goto CLEANUP; + } + + /* Verify digits of representation are valid */ + for (i = 0; i <= BITSIZE; i++) { + if (naf[i] % 2 == 0 && naf[i] != 0) { + printf("Error: Even non-zero digit found.\n"); + goto CLEANUP; + } + if (naf[i] < -(ec_twoTo(w - 1)) || naf[i] >= ec_twoTo(w - 1)) { + printf("Error: Magnitude of naf digit too large.\n"); + goto CLEANUP; + } + } + + /* Verify sparsity of representation */ + count = w - 1; + for (i = 0; i <= BITSIZE; i++) { + if (naf[i] != 0) { + if (count < w - 1) { + printf("Error: Sparsity failed.\n"); + goto CLEANUP; + } + count = 0; + } else + count++; + } + + /* Check timing */ + M_TimeOperation(ec_compute_wNAF(naf, BITSIZE, scalar, w), 10000); + + printf("Test passed.\n"); +CLEANUP: + ECGroup_free(group); + return MP_OKAY; +} diff --git a/security/nss/lib/freebl/ecl/tests/ecp_test.c b/security/nss/lib/freebl/ecl/tests/ecp_test.c new file mode 100644 index 000000000..dcec4d747 --- /dev/null +++ b/security/nss/lib/freebl/ecl/tests/ecp_test.c @@ -0,0 +1,409 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "mplogic.h" +#include "mpprime.h" +#include "ecl.h" +#include "ecl-curve.h" +#include "ecp.h" +#include +#include +#include + +#include +#include +#include + +/* Time k repetitions of operation op. */ +#define M_TimeOperation(op, k) \ + { \ + double dStart, dNow, dUserTime; \ + struct rusage ru; \ + int i; \ + getrusage(RUSAGE_SELF, &ru); \ + dStart = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + for (i = 0; i < k; i++) { \ + { \ + op; \ + } \ + }; \ + getrusage(RUSAGE_SELF, &ru); \ + dNow = (double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec * 0.000001; \ + dUserTime = dNow - dStart; \ + if (dUserTime) \ + printf(" %-45s k: %6i, t: %6.2f sec\n", #op, k, dUserTime); \ + } + +/* Test curve using generic field arithmetic. */ +#define ECTEST_GENERIC_GFP(name_c, name) \ + printf("Testing %s using generic implementation...\n", name_c); \ + params = EC_GetNamedCurveParams(name); \ + if (params == NULL) { \ + printf(" Error: could not construct params.\n"); \ + res = MP_NO; \ + goto CLEANUP; \ + } \ + ECGroup_free(group); \ + group = ECGroup_fromHex(params); \ + if (group == NULL) { \ + printf(" Error: could not construct group.\n"); \ + res = MP_NO; \ + goto CLEANUP; \ + } \ + MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 1)); \ + printf("... okay.\n"); + +/* Test curve using specific field arithmetic. */ +#define ECTEST_NAMED_GFP(name_c, name) \ + printf("Testing %s using specific implementation...\n", name_c); \ + ECGroup_free(group); \ + group = ECGroup_fromName(name); \ + if (group == NULL) { \ + printf(" Warning: could not construct group.\n"); \ + printf("... failed; continuing with remaining tests.\n"); \ + } else { \ + MP_CHECKOK(ectest_curve_GFp(group, ectestPrint, ectestTime, 0)); \ + printf("... okay.\n"); \ + } + +/* Performs basic tests of elliptic curve cryptography over prime fields. + * If tests fail, then it prints an error message, aborts, and returns an + * error code. Otherwise, returns 0. */ +int +ectest_curve_GFp(ECGroup *group, int ectestPrint, int ectestTime, + int generic) +{ + + mp_int one, order_1, gx, gy, rx, ry, n; + int size; + mp_err res; + char s[1000]; + + /* initialize values */ + MP_CHECKOK(mp_init(&one)); + MP_CHECKOK(mp_init(&order_1)); + MP_CHECKOK(mp_init(&gx)); + MP_CHECKOK(mp_init(&gy)); + MP_CHECKOK(mp_init(&rx)); + MP_CHECKOK(mp_init(&ry)); + MP_CHECKOK(mp_init(&n)); + + MP_CHECKOK(mp_set_int(&one, 1)); + MP_CHECKOK(mp_sub(&group->order, &one, &order_1)); + + /* encode base point */ + if (group->meth->field_dec) { + MP_CHECKOK(group->meth->field_dec(&group->genx, &gx, group->meth)); + MP_CHECKOK(group->meth->field_dec(&group->geny, &gy, group->meth)); + } else { + MP_CHECKOK(mp_copy(&group->genx, &gx)); + MP_CHECKOK(mp_copy(&group->geny, &gy)); + } + if (ectestPrint) { + /* output base point */ + printf(" base point P:\n"); + MP_CHECKOK(mp_toradix(&gx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&gy, s, 16)); + printf(" %s\n", s); + if (group->meth->field_enc) { + printf(" base point P (encoded):\n"); + MP_CHECKOK(mp_toradix(&group->genx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&group->geny, s, 16)); + printf(" %s\n", s); + } + } + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ec_GFp_pt_mul_aff(&order_1, &group->genx, &group->geny, &rx, &ry, group)); + if (ectestPrint) { + printf(" (order-1)*P (affine):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth)); + if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ec_GFp_pt_mul_jac(&order_1, &group->genx, &group->geny, &rx, &ry, group)); + if (ectestPrint) { + printf(" (order-1)*P (jacobian):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(group->meth->field_neg(&ry, &ry, group->meth)); + if ((mp_cmp(&rx, &group->genx) != 0) || (mp_cmp(&ry, &group->geny) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ECPoint_mul(group, &order_1, NULL, NULL, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* multiply base point by order - 1 and check for negative of base + * point */ + MP_CHECKOK(ECPoint_mul(group, &order_1, &gx, &gy, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ec_GFp_pt_mul_aff(&group->order, &group->genx, &group->geny, &rx, &ry, + group)); + if (ectestPrint) { + printf(" (order)*P (affine):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + +#ifdef ECL_ENABLE_GFP_PT_MUL_JAC + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ec_GFp_pt_mul_jac(&group->order, &group->genx, &group->geny, &rx, &ry, + group)); + if (ectestPrint) { + printf(" (order)*P (jacobian):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } +#endif + + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ECPoint_mul(group, &group->order, NULL, NULL, &rx, &ry)); + if (ectestPrint) { + printf(" (order)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* multiply base point by order and check for point at infinity */ + MP_CHECKOK(ECPoint_mul(group, &group->order, &gx, &gy, &rx, &ry)); + if (ectestPrint) { + printf(" (order)*P (ECPoint_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + if (ec_GFp_pt_is_inf_aff(&rx, &ry) != MP_YES) { + printf(" Error: invalid result (expected point at infinity).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* check that (order-1)P + (order-1)P + P == (order-1)P */ + MP_CHECKOK(ECPoints_mul(group, &order_1, &order_1, &gx, &gy, &rx, &ry)); + MP_CHECKOK(ECPoints_mul(group, &one, &one, &rx, &ry, &rx, &ry)); + if (ectestPrint) { + printf(" (order-1)*P + (order-1)*P + P == (order-1)*P (ECPoints_mul):\n"); + MP_CHECKOK(mp_toradix(&rx, s, 16)); + printf(" %s\n", s); + MP_CHECKOK(mp_toradix(&ry, s, 16)); + printf(" %s\n", s); + } + MP_CHECKOK(mp_submod(&group->meth->irr, &ry, &group->meth->irr, &ry)); + if ((mp_cmp(&rx, &gx) != 0) || (mp_cmp(&ry, &gy) != 0)) { + printf(" Error: invalid result (expected (- base point)).\n"); + res = MP_NO; + goto CLEANUP; + } + + /* test validate_point function */ + if (ECPoint_validate(group, &gx, &gy) != MP_YES) { + printf(" Error: validate point on base point failed.\n"); + res = MP_NO; + goto CLEANUP; + } + MP_CHECKOK(mp_add_d(&gy, 1, &ry)); + if (ECPoint_validate(group, &gx, &ry) != MP_NO) { + printf(" Error: validate point on invalid point passed.\n"); + res = MP_NO; + goto CLEANUP; + } + + if (ectestTime) { + /* compute random scalar */ + size = mpl_significant_bits(&group->meth->irr); + if (size < MP_OKAY) { + goto CLEANUP; + } + MP_CHECKOK(mpp_random_size(&n, (size + ECL_BITS - 1) / ECL_BITS)); + MP_CHECKOK(group->meth->field_mod(&n, &n, group->meth)); + /* timed test */ + if (generic) { +#ifdef ECL_ENABLE_GFP_PT_MUL_AFF + M_TimeOperation(MP_CHECKOK(ec_GFp_pt_mul_aff(&n, &group->genx, &group->geny, &rx, &ry, + group)), + 100); +#endif + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100); + } else { + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, NULL, NULL, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoint_mul(group, &n, &gx, &gy, &rx, &ry)), + 100); + M_TimeOperation(MP_CHECKOK(ECPoints_mul(group, &n, &n, &gx, &gy, &rx, &ry)), 100); + } + } + +CLEANUP: + mp_clear(&one); + mp_clear(&order_1); + mp_clear(&gx); + mp_clear(&gy); + mp_clear(&rx); + mp_clear(&ry); + mp_clear(&n); + if (res != MP_OKAY) { + printf(" Error: exiting with error value %i\n", res); + } + return res; +} + +/* Prints help information. */ +void +printUsage() +{ + printf("Usage: ecp_test [--print] [--time]\n"); + printf(" --print Print out results of each point arithmetic test.\n"); + printf(" --time Benchmark point operations and print results.\n"); +} + +/* Performs tests of elliptic curve cryptography over prime fields If + * tests fail, then it prints an error message, aborts, and returns an + * error code. Otherwise, returns 0. */ +int +main(int argv, char **argc) +{ + + int ectestTime = 0; + int ectestPrint = 0; + int i; + ECGroup *group = NULL; + ECCurveParams *params = NULL; + mp_err res; + + /* read command-line arguments */ + for (i = 1; i < argv; i++) { + if ((strcasecmp(argc[i], "time") == 0) || (strcasecmp(argc[i], "-time") == 0) || (strcasecmp(argc[i], "--time") == 0)) { + ectestTime = 1; + } else if ((strcasecmp(argc[i], "print") == 0) || (strcasecmp(argc[i], "-print") == 0) || (strcasecmp(argc[i], "--print") == 0)) { + ectestPrint = 1; + } else { + printUsage(); + return 0; + } + } + + /* generic arithmetic tests */ + ECTEST_GENERIC_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1); + + /* specific arithmetic tests */ + ECTEST_NAMED_GFP("NIST-P192", ECCurve_NIST_P192); + ECTEST_NAMED_GFP("NIST-P224", ECCurve_NIST_P224); + ECTEST_NAMED_GFP("NIST-P256", ECCurve_NIST_P256); + ECTEST_NAMED_GFP("NIST-P384", ECCurve_NIST_P384); + ECTEST_NAMED_GFP("NIST-P521", ECCurve_NIST_P521); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v1", ECCurve_X9_62_PRIME_192V1); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v2", ECCurve_X9_62_PRIME_192V2); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME192v3", ECCurve_X9_62_PRIME_192V3); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v1", ECCurve_X9_62_PRIME_239V1); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v2", ECCurve_X9_62_PRIME_239V2); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME239v3", ECCurve_X9_62_PRIME_239V3); + ECTEST_NAMED_GFP("ANSI X9.62 PRIME256v1", ECCurve_X9_62_PRIME_256V1); + ECTEST_NAMED_GFP("SECP-112R1", ECCurve_SECG_PRIME_112R1); + ECTEST_NAMED_GFP("SECP-112R2", ECCurve_SECG_PRIME_112R2); + ECTEST_NAMED_GFP("SECP-128R1", ECCurve_SECG_PRIME_128R1); + ECTEST_NAMED_GFP("SECP-128R2", ECCurve_SECG_PRIME_128R2); + ECTEST_NAMED_GFP("SECP-160K1", ECCurve_SECG_PRIME_160K1); + ECTEST_NAMED_GFP("SECP-160R1", ECCurve_SECG_PRIME_160R1); + ECTEST_NAMED_GFP("SECP-160R2", ECCurve_SECG_PRIME_160R2); + ECTEST_NAMED_GFP("SECP-192K1", ECCurve_SECG_PRIME_192K1); + ECTEST_NAMED_GFP("SECP-192R1", ECCurve_SECG_PRIME_192R1); + ECTEST_NAMED_GFP("SECP-224K1", ECCurve_SECG_PRIME_224K1); + ECTEST_NAMED_GFP("SECP-224R1", ECCurve_SECG_PRIME_224R1); + ECTEST_NAMED_GFP("SECP-256K1", ECCurve_SECG_PRIME_256K1); + ECTEST_NAMED_GFP("SECP-256R1", ECCurve_SECG_PRIME_256R1); + ECTEST_NAMED_GFP("SECP-384R1", ECCurve_SECG_PRIME_384R1); + ECTEST_NAMED_GFP("SECP-521R1", ECCurve_SECG_PRIME_521R1); + ECTEST_NAMED_GFP("WTLS-6 (112)", ECCurve_WTLS_6); + ECTEST_NAMED_GFP("WTLS-7 (160)", ECCurve_WTLS_7); + ECTEST_NAMED_GFP("WTLS-8 (112)", ECCurve_WTLS_8); + ECTEST_NAMED_GFP("WTLS-9 (160)", ECCurve_WTLS_9); + ECTEST_NAMED_GFP("WTLS-12 (224)", ECCurve_WTLS_12); + ECTEST_NAMED_GFP("Curve25519", ECCurve25519); + +CLEANUP: + EC_FreeCurveParams(params); + ECGroup_free(group); + if (res != MP_OKAY) { + printf("Error: exiting with error value %i\n", res); + } + return res; +} diff --git a/security/nss/lib/freebl/ecl/uint128.c b/security/nss/lib/freebl/ecl/uint128.c new file mode 100644 index 000000000..22cbd023c --- /dev/null +++ b/security/nss/lib/freebl/ecl/uint128.c @@ -0,0 +1,87 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "uint128.h" + +/* helper functions */ +uint64_t +mask51(uint128_t x) +{ + return x.lo & MASK51; +} + +uint64_t +mask_lower(uint128_t x) +{ + return x.lo; +} + +uint128_t +mask51full(uint128_t x) +{ + uint128_t ret = { x.lo & MASK51, 0 }; + return ret; +} + +uint128_t +init128x(uint64_t x) +{ + uint128_t ret = { x, 0 }; + return ret; +} + +/* arithmetic */ + +uint128_t +add128(uint128_t a, uint128_t b) +{ + uint128_t ret; + ret.lo = a.lo + b.lo; + ret.hi = a.hi + b.hi + (ret.lo < b.lo); + return ret; +} + +/* out = 19 * a */ +uint128_t +mul12819(uint128_t a) +{ + uint128_t ret = lshift128(a, 4); + ret = add128(ret, a); + ret = add128(ret, a); + ret = add128(ret, a); + return ret; +} + +uint128_t +mul6464(uint64_t a, uint64_t b) +{ + uint128_t ret; + uint64_t t0 = ((uint64_t)(uint32_t)a) * ((uint64_t)(uint32_t)b); + uint64_t t1 = (a >> 32) * ((uint64_t)(uint32_t)b) + (t0 >> 32); + uint64_t t2 = (b >> 32) * ((uint64_t)(uint32_t)a) + ((uint32_t)t1); + ret.lo = (((uint64_t)((uint32_t)t2)) << 32) + ((uint32_t)t0); + ret.hi = (a >> 32) * (b >> 32); + ret.hi += (t2 >> 32) + (t1 >> 32); + return ret; +} + +/* only defined for n < 64 */ +uint128_t +rshift128(uint128_t x, uint8_t n) +{ + uint128_t ret; + ret.lo = (x.lo >> n) + (x.hi << (64 - n)); + ret.hi = x.hi >> n; + return ret; +} + +/* only defined for n < 64 */ +uint128_t +lshift128(uint128_t x, uint8_t n) +{ + uint128_t ret; + ret.hi = (x.hi << n) + (x.lo >> (64 - n)); + ret.lo = x.lo << n; + return ret; +} diff --git a/security/nss/lib/freebl/ecl/uint128.h b/security/nss/lib/freebl/ecl/uint128.h new file mode 100644 index 000000000..a3a71e6e7 --- /dev/null +++ b/security/nss/lib/freebl/ecl/uint128.h @@ -0,0 +1,35 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +#define MASK51 0x7ffffffffffffULL + +#ifdef HAVE_INT128_SUPPORT +typedef unsigned __int128 uint128_t; +#define add128(a, b) (a) + (b) +#define mul6464(a, b) (uint128_t)(a) * (uint128_t)(b) +#define mul12819(a) (uint128_t)(a) * 19 +#define rshift128(x, n) (x) >> (n) +#define lshift128(x, n) (x) << (n) +#define mask51(x) (x) & 0x7ffffffffffff +#define mask_lower(x) (uint64_t)(x) +#define mask51full(x) (x) & 0x7ffffffffffff +#define init128x(x) (x) +#else /* uint128_t for Windows and 32 bit intel systems */ +struct uint128_t_str { + uint64_t lo; + uint64_t hi; +}; +typedef struct uint128_t_str uint128_t; +uint128_t add128(uint128_t a, uint128_t b); +uint128_t mul6464(uint64_t a, uint64_t b); +uint128_t mul12819(uint128_t a); +uint128_t rshift128(uint128_t x, uint8_t n); +uint128_t lshift128(uint128_t x, uint8_t n); +uint64_t mask51(uint128_t x); +uint64_t mask_lower(uint128_t x); +uint128_t mask51full(uint128_t x); +uint128_t init128x(uint64_t x); +#endif diff --git a/security/nss/lib/freebl/exports.gyp b/security/nss/lib/freebl/exports.gyp new file mode 100644 index 000000000..ef81685b0 --- /dev/null +++ b/security/nss/lib/freebl/exports.gyp @@ -0,0 +1,48 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'includes': [ + '../../coreconf/config.gypi' + ], + 'targets': [ + { + 'target_name': 'lib_freebl_exports', + 'type': 'none', + 'copies': [ + { + 'files': [ + 'blapit.h', + 'ecl/ecl-exp.h', + 'shsign.h' + ], + 'conditions': [ + [ 'OS=="linux"', { + 'files': [ + 'nsslowhash.h', + ], + }], + ], + 'destination': '<(nss_public_dist_dir)/<(module)' + }, + { + 'files': [ + 'alghmac.h', + 'blapi.h', + 'chacha20poly1305.h', + 'ec.h', + 'ecl/ecl-curve.h', + 'ecl/ecl.h', + 'hmacct.h', + 'secmpi.h', + 'secrng.h' + ], + 'destination': '<(nss_private_dist_dir)/<(module)' + } + ] + } + ], + 'variables': { + 'module': 'nss' + } +} diff --git a/security/nss/lib/freebl/fipsfreebl.c b/security/nss/lib/freebl/fipsfreebl.c new file mode 100644 index 000000000..b3ae6865b --- /dev/null +++ b/security/nss/lib/freebl/fipsfreebl.c @@ -0,0 +1,1715 @@ +/* + * PKCS #11 FIPS Power-Up Self Test. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* $Id: fipstest.c,v 1.31 2012/06/28 17:55:06 rrelyea%redhat.com Exp $ */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "seccomon.h" /* Required for RSA and DSA. */ +#include "secerr.h" +#include "prtypes.h" + +#ifdef NSS_ENABLE_ECC +#include "ec.h" /* Required for ECDSA */ +#endif + +/* + * different platforms have different ways of calling and initial entry point + * when the dll/.so is loaded. Most platforms support either a posix pragma + * or the GCC attribute. Some platforms suppor a pre-defined name, and some + * platforms have a link line way of invoking this function. + */ + +/* The pragma */ +#if defined(USE_INIT_PRAGMA) +#pragma init(bl_startup_tests) +#endif + +/* GCC Attribute */ +#if defined(__GNUC__) && !defined(NSS_NO_INIT_SUPPORT) +#define INIT_FUNCTION __attribute__((constructor)) +#else +#define INIT_FUNCTION +#endif + +static void INIT_FUNCTION bl_startup_tests(void); + +/* Windows pre-defined entry */ +#if defined(XP_WIN) && !defined(NSS_NO_INIT_SUPPORT) +#include + +BOOL WINAPI DllMain( + HINSTANCE hinstDLL, // handle to DLL module + DWORD fdwReason, // reason for calling function + LPVOID lpReserved) // reserved +{ + // Perform actions based on the reason for calling. + switch (fdwReason) { + case DLL_PROCESS_ATTACH: + // Initialize once for each new process. + // Return FALSE to fail DLL load. + bl_startup_tests(); + break; + + case DLL_THREAD_ATTACH: + // Do thread-specific initialization. + break; + + case DLL_THREAD_DETACH: + // Do thread-specific cleanup. + break; + + case DLL_PROCESS_DETACH: + // Perform any necessary cleanup. + break; + } + return TRUE; // Successful DLL_PROCESS_ATTACH. +} +#endif + +/* insert other platform dependent init entry points here, or modify + * the linker line */ + +/* FIPS preprocessor directives for RC2-ECB and RC2-CBC. */ +#define FIPS_RC2_KEY_LENGTH 5 /* 40-bits */ +#define FIPS_RC2_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_RC2_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for RC4. */ +#define FIPS_RC4_KEY_LENGTH 5 /* 40-bits */ +#define FIPS_RC4_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_RC4_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for DES-ECB and DES-CBC. */ +#define FIPS_DES_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_DES_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for DES3-CBC and DES3-ECB. */ +#define FIPS_DES3_ENCRYPT_LENGTH 8 /* 64-bits */ +#define FIPS_DES3_DECRYPT_LENGTH 8 /* 64-bits */ + +/* FIPS preprocessor directives for AES-ECB and AES-CBC. */ +#define FIPS_AES_BLOCK_SIZE 16 /* 128-bits */ +#define FIPS_AES_ENCRYPT_LENGTH 16 /* 128-bits */ +#define FIPS_AES_DECRYPT_LENGTH 16 /* 128-bits */ +#define FIPS_AES_128_KEY_SIZE 16 /* 128-bits */ +#define FIPS_AES_192_KEY_SIZE 24 /* 192-bits */ +#define FIPS_AES_256_KEY_SIZE 32 /* 256-bits */ + +/* FIPS preprocessor directives for message digests */ +#define FIPS_KNOWN_HASH_MESSAGE_LENGTH 64 /* 512-bits */ + +/* FIPS preprocessor directives for RSA. */ +#define FIPS_RSA_TYPE siBuffer +#define FIPS_RSA_PUBLIC_EXPONENT_LENGTH 3 /* 24-bits */ +#define FIPS_RSA_PRIVATE_VERSION_LENGTH 1 /* 8-bits */ +#define FIPS_RSA_MESSAGE_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_COEFFICIENT_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIME0_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIME1_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_EXPONENT0_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_EXPONENT1_LENGTH 128 /* 1024-bits */ +#define FIPS_RSA_PRIVATE_EXPONENT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_ENCRYPT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_DECRYPT_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_SIGNATURE_LENGTH 256 /* 2048-bits */ +#define FIPS_RSA_MODULUS_LENGTH 256 /* 2048-bits */ + +/* FIPS preprocessor directives for DSA. */ +#define FIPS_DSA_TYPE siBuffer +#define FIPS_DSA_DIGEST_LENGTH 20 /* 160-bits */ +#define FIPS_DSA_SUBPRIME_LENGTH 20 /* 160-bits */ +#define FIPS_DSA_SIGNATURE_LENGTH 40 /* 320-bits */ +#define FIPS_DSA_PRIME_LENGTH 128 /* 1024-bits */ +#define FIPS_DSA_BASE_LENGTH 128 /* 1024-bits */ + +/* FIPS preprocessor directives for RNG. */ +#define FIPS_RNG_XKEY_LENGTH 32 /* 256-bits */ + +static SECStatus +freebl_fips_DES3_PowerUpSelfTest(void) +{ + /* DES3 Known Key (56-bits). */ + static const PRUint8 des3_known_key[] = { "ANSI Triple-DES Key Data" }; + + /* DES3-CBC Known Initialization Vector (64-bits). */ + static const PRUint8 des3_cbc_known_initialization_vector[] = { "Security" }; + + /* DES3 Known Plaintext (64-bits). */ + static const PRUint8 des3_ecb_known_plaintext[] = { "Netscape" }; + static const PRUint8 des3_cbc_known_plaintext[] = { "Netscape" }; + + /* DES3 Known Ciphertext (64-bits). */ + static const PRUint8 des3_ecb_known_ciphertext[] = { + 0x55, 0x8e, 0xad, 0x3c, 0xee, 0x49, 0x69, 0xbe + }; + static const PRUint8 des3_cbc_known_ciphertext[] = { + 0x43, 0xdc, 0x6a, 0xc1, 0xaf, 0xa6, 0x32, 0xf5 + }; + + /* DES3 variables. */ + PRUint8 des3_computed_ciphertext[FIPS_DES3_ENCRYPT_LENGTH]; + PRUint8 des3_computed_plaintext[FIPS_DES3_DECRYPT_LENGTH]; + DESContext *des3_context; + unsigned int des3_bytes_encrypted; + unsigned int des3_bytes_decrypted; + SECStatus des3_status; + + /*******************************************************/ + /* DES3-ECB Single-Round Known Answer Encryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, NULL, + NSS_DES_EDE3, PR_TRUE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext, + &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH, + des3_ecb_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_ciphertext, des3_ecb_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-ECB Single-Round Known Answer Decryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, NULL, + NSS_DES_EDE3, PR_FALSE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Decrypt(des3_context, des3_computed_plaintext, + &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH, + des3_ecb_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_plaintext, des3_ecb_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-CBC Single-Round Known Answer Encryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, + des3_cbc_known_initialization_vector, + NSS_DES_EDE3_CBC, PR_TRUE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Encrypt(des3_context, des3_computed_ciphertext, + &des3_bytes_encrypted, FIPS_DES3_ENCRYPT_LENGTH, + des3_cbc_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_encrypted != FIPS_DES3_ENCRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_ciphertext, des3_cbc_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /*******************************************************/ + /* DES3-CBC Single-Round Known Answer Decryption Test. */ + /*******************************************************/ + + des3_context = DES_CreateContext(des3_known_key, + des3_cbc_known_initialization_vector, + NSS_DES_EDE3_CBC, PR_FALSE); + + if (des3_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + des3_status = DES_Decrypt(des3_context, des3_computed_plaintext, + &des3_bytes_decrypted, FIPS_DES3_DECRYPT_LENGTH, + des3_cbc_known_ciphertext, + FIPS_DES3_ENCRYPT_LENGTH); + + DES_DestroyContext(des3_context, PR_TRUE); + + if ((des3_status != SECSuccess) || + (des3_bytes_decrypted != FIPS_DES3_DECRYPT_LENGTH) || + (PORT_Memcmp(des3_computed_plaintext, des3_cbc_known_plaintext, + FIPS_DES3_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +/* AES self-test for 128-bit, 192-bit, or 256-bit key sizes*/ +static SECStatus +freebl_fips_AES_PowerUpSelfTest(int aes_key_size) +{ + /* AES Known Key (up to 256-bits). */ + static const PRUint8 aes_known_key[] = + { "AES-128 RIJNDAELLEADNJIR 821-SEA" }; + + /* AES-CBC Known Initialization Vector (128-bits). */ + static const PRUint8 aes_cbc_known_initialization_vector[] = + { "SecurityytiruceS" }; + + /* AES Known Plaintext (128-bits). (blocksize is 128-bits) */ + static const PRUint8 aes_known_plaintext[] = { "NetscapeepacsteN" }; + + /* AES Known Ciphertext (128-bit key). */ + static const PRUint8 aes_ecb128_known_ciphertext[] = { + 0x3c, 0xa5, 0x96, 0xf3, 0x34, 0x6a, 0x96, 0xc1, + 0x03, 0x88, 0x16, 0x7b, 0x20, 0xbf, 0x35, 0x47 + }; + + static const PRUint8 aes_cbc128_known_ciphertext[] = { + 0xcf, 0x15, 0x1d, 0x4f, 0x96, 0xe4, 0x4f, 0x63, + 0x15, 0x54, 0x14, 0x1d, 0x4e, 0xd8, 0xd5, 0xea + }; + + /* AES Known Ciphertext (192-bit key). */ + static const PRUint8 aes_ecb192_known_ciphertext[] = { + 0xa0, 0x18, 0x62, 0xed, 0x88, 0x19, 0xcb, 0x62, + 0x88, 0x1d, 0x4d, 0xfe, 0x84, 0x02, 0x89, 0x0e + }; + + static const PRUint8 aes_cbc192_known_ciphertext[] = { + 0x83, 0xf7, 0xa4, 0x76, 0xd1, 0x6f, 0x07, 0xbe, + 0x07, 0xbc, 0x43, 0x2f, 0x6d, 0xad, 0x29, 0xe1 + }; + + /* AES Known Ciphertext (256-bit key). */ + static const PRUint8 aes_ecb256_known_ciphertext[] = { + 0xdb, 0xa6, 0x52, 0x01, 0x8a, 0x70, 0xae, 0x66, + 0x3a, 0x99, 0xd8, 0x95, 0x7f, 0xfb, 0x01, 0x67 + }; + + static const PRUint8 aes_cbc256_known_ciphertext[] = { + 0x37, 0xea, 0x07, 0x06, 0x31, 0x1c, 0x59, 0x27, + 0xc5, 0xc5, 0x68, 0x71, 0x6e, 0x34, 0x40, 0x16 + }; + + const PRUint8 *aes_ecb_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_ecb128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_ecb192_known_ciphertext : aes_ecb256_known_ciphertext; + + const PRUint8 *aes_cbc_known_ciphertext = + (aes_key_size == FIPS_AES_128_KEY_SIZE) ? aes_cbc128_known_ciphertext : (aes_key_size == FIPS_AES_192_KEY_SIZE) ? aes_cbc192_known_ciphertext : aes_cbc256_known_ciphertext; + + /* AES variables. */ + PRUint8 aes_computed_ciphertext[FIPS_AES_ENCRYPT_LENGTH]; + PRUint8 aes_computed_plaintext[FIPS_AES_DECRYPT_LENGTH]; + AESContext *aes_context; + unsigned int aes_bytes_encrypted; + unsigned int aes_bytes_decrypted; + SECStatus aes_status; + + /*check if aes_key_size is 128, 192, or 256 bits */ + if ((aes_key_size != FIPS_AES_128_KEY_SIZE) && + (aes_key_size != FIPS_AES_192_KEY_SIZE) && + (aes_key_size != FIPS_AES_256_KEY_SIZE)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-ECB Single-Round Known Answer Encryption Test: */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_TRUE, + aes_key_size, FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH, + aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_ciphertext, aes_ecb_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-ECB Single-Round Known Answer Decryption Test: */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, NULL, NSS_AES, PR_FALSE, + aes_key_size, FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Decrypt(aes_context, aes_computed_plaintext, + &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH, + aes_ecb_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-CBC Single-Round Known Answer Encryption Test. */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, + aes_cbc_known_initialization_vector, + NSS_AES_CBC, PR_TRUE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Encrypt(aes_context, aes_computed_ciphertext, + &aes_bytes_encrypted, FIPS_AES_ENCRYPT_LENGTH, + aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_encrypted != FIPS_AES_ENCRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_ciphertext, aes_cbc_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /******************************************************/ + /* AES-CBC Single-Round Known Answer Decryption Test. */ + /******************************************************/ + + aes_context = AES_CreateContext(aes_known_key, + aes_cbc_known_initialization_vector, + NSS_AES_CBC, PR_FALSE, aes_key_size, + FIPS_AES_BLOCK_SIZE); + + if (aes_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + aes_status = AES_Decrypt(aes_context, aes_computed_plaintext, + &aes_bytes_decrypted, FIPS_AES_DECRYPT_LENGTH, + aes_cbc_known_ciphertext, + FIPS_AES_ENCRYPT_LENGTH); + + AES_DestroyContext(aes_context, PR_TRUE); + + if ((aes_status != SECSuccess) || + (aes_bytes_decrypted != FIPS_AES_DECRYPT_LENGTH) || + (PORT_Memcmp(aes_computed_plaintext, aes_known_plaintext, + FIPS_AES_DECRYPT_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +/* Known Hash Message (512-bits). Used for all hashes (incl. SHA-N [N>1]). */ +static const PRUint8 known_hash_message[] = { + "The test message for the MD2, MD5, and SHA-1 hashing algorithms." +}; + +/****************************************************/ +/* Single Round HMAC SHA-X test */ +/****************************************************/ +static SECStatus +freebl_fips_HMAC(unsigned char *hmac_computed, + const PRUint8 *secret_key, + unsigned int secret_key_length, + const PRUint8 *message, + unsigned int message_length, + HASH_HashType hashAlg) +{ + SECStatus hmac_status = SECFailure; + HMACContext *cx = NULL; + SECHashObject *hashObj = NULL; + unsigned int bytes_hashed = 0; + + hashObj = (SECHashObject *)HASH_GetRawHashObject(hashAlg); + + if (!hashObj) + return (SECFailure); + + cx = HMAC_Create(hashObj, secret_key, + secret_key_length, + PR_TRUE); /* PR_TRUE for in FIPS mode */ + + if (cx == NULL) + return (SECFailure); + + HMAC_Begin(cx); + HMAC_Update(cx, message, message_length); + hmac_status = HMAC_Finish(cx, hmac_computed, &bytes_hashed, + hashObj->length); + + HMAC_Destroy(cx, PR_TRUE); + + return (hmac_status); +} + +static SECStatus +freebl_fips_HMAC_PowerUpSelfTest(void) +{ + static const PRUint8 HMAC_known_secret_key[] = { + "Firefox and ThunderBird are awesome!" + }; + + static const PRUint8 HMAC_known_secret_key_length = sizeof HMAC_known_secret_key; + + /* known SHA1 hmac (20 bytes) */ + static const PRUint8 known_SHA1_hmac[] = { + 0xd5, 0x85, 0xf6, 0x5b, 0x39, 0xfa, 0xb9, 0x05, + 0x3b, 0x57, 0x1d, 0x61, 0xe7, 0xb8, 0x84, 0x1e, + 0x5d, 0x0e, 0x1e, 0x11 + }; + + /* known SHA224 hmac (28 bytes) */ + static const PRUint8 known_SHA224_hmac[] = { + 0x1c, 0xc3, 0x06, 0x8e, 0xce, 0x37, 0x68, 0xfb, + 0x1a, 0x82, 0x4a, 0xbe, 0x2b, 0x00, 0x51, 0xf8, + 0x9d, 0xb6, 0xe0, 0x90, 0x0d, 0x00, 0xc9, 0x64, + 0x9a, 0xb8, 0x98, 0x4e + }; + + /* known SHA256 hmac (32 bytes) */ + static const PRUint8 known_SHA256_hmac[] = { + 0x05, 0x75, 0x9a, 0x9e, 0x70, 0x5e, 0xe7, 0x44, + 0xe2, 0x46, 0x4b, 0x92, 0x22, 0x14, 0x22, 0xe0, + 0x1b, 0x92, 0x8a, 0x0c, 0xfe, 0xf5, 0x49, 0xe9, + 0xa7, 0x1b, 0x56, 0x7d, 0x1d, 0x29, 0x40, 0x48 + }; + + /* known SHA384 hmac (48 bytes) */ + static const PRUint8 known_SHA384_hmac[] = { + 0xcd, 0x56, 0x14, 0xec, 0x05, 0x53, 0x06, 0x2b, + 0x7e, 0x9c, 0x8a, 0x18, 0x5e, 0xea, 0xf3, 0x91, + 0x33, 0xfb, 0x64, 0xf6, 0xe3, 0x9f, 0x89, 0x0b, + 0xaf, 0xbe, 0x83, 0x4d, 0x3f, 0x3c, 0x43, 0x4d, + 0x4a, 0x0c, 0x56, 0x98, 0xf8, 0xca, 0xb4, 0xaa, + 0x9a, 0xf4, 0x0a, 0xaf, 0x4f, 0x69, 0xca, 0x87 + }; + + /* known SHA512 hmac (64 bytes) */ + static const PRUint8 known_SHA512_hmac[] = { + 0xf6, 0x0e, 0x97, 0x12, 0x00, 0x67, 0x6e, 0xb9, + 0x0c, 0xb2, 0x63, 0xf0, 0x60, 0xac, 0x75, 0x62, + 0x70, 0x95, 0x2a, 0x52, 0x22, 0xee, 0xdd, 0xd2, + 0x71, 0xb1, 0xe8, 0x26, 0x33, 0xd3, 0x13, 0x27, + 0xcb, 0xff, 0x44, 0xef, 0x87, 0x97, 0x16, 0xfb, + 0xd3, 0x0b, 0x48, 0xbe, 0x12, 0x4e, 0xda, 0xb1, + 0x89, 0x90, 0xfb, 0x06, 0x0c, 0xbe, 0xe5, 0xc4, + 0xff, 0x24, 0x37, 0x3d, 0xc7, 0xe4, 0xe4, 0x37 + }; + + SECStatus hmac_status; + PRUint8 hmac_computed[HASH_LENGTH_MAX]; + + /***************************************************/ + /* HMAC SHA-1 Single-Round Known Answer HMAC Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA1); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA1_hmac, + SHA1_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-224 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA224); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA224_hmac, + SHA224_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-256 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA256); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA256_hmac, + SHA256_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-384 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA384); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA384_hmac, + SHA384_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* HMAC SHA-512 Single-Round Known Answer Test. */ + /***************************************************/ + + hmac_status = freebl_fips_HMAC(hmac_computed, + HMAC_known_secret_key, + HMAC_known_secret_key_length, + known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH, + HASH_AlgSHA512); + + if ((hmac_status != SECSuccess) || + (PORT_Memcmp(hmac_computed, known_SHA512_hmac, + SHA512_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_SHA_PowerUpSelfTest(void) +{ + /* SHA-1 Known Digest Message (160-bits). */ + static const PRUint8 sha1_known_digest[] = { + 0x0a, 0x6d, 0x07, 0xba, 0x1e, 0xbd, 0x8a, 0x1b, + 0x72, 0xf6, 0xc7, 0x22, 0xf1, 0x27, 0x9f, 0xf0, + 0xe0, 0x68, 0x47, 0x7a + }; + + /* SHA-224 Known Digest Message (224-bits). */ + static const PRUint8 sha224_known_digest[] = { + 0x89, 0x5e, 0x7f, 0xfd, 0x0e, 0xd8, 0x35, 0x6f, + 0x64, 0x6d, 0xf2, 0xde, 0x5e, 0xed, 0xa6, 0x7f, + 0x29, 0xd1, 0x12, 0x73, 0x42, 0x84, 0x95, 0x4f, + 0x8e, 0x08, 0xe5, 0xcb + }; + + /* SHA-256 Known Digest Message (256-bits). */ + static const PRUint8 sha256_known_digest[] = { + 0x38, 0xa9, 0xc1, 0xf0, 0x35, 0xf6, 0x5d, 0x61, + 0x11, 0xd4, 0x0b, 0xdc, 0xce, 0x35, 0x14, 0x8d, + 0xf2, 0xdd, 0xaf, 0xaf, 0xcf, 0xb7, 0x87, 0xe9, + 0x96, 0xa5, 0xd2, 0x83, 0x62, 0x46, 0x56, 0x79 + }; + + /* SHA-384 Known Digest Message (384-bits). */ + static const PRUint8 sha384_known_digest[] = { + 0x11, 0xfe, 0x1c, 0x00, 0x89, 0x48, 0xde, 0xb3, + 0x99, 0xee, 0x1c, 0x18, 0xb4, 0x10, 0xfb, 0xfe, + 0xe3, 0xa8, 0x2c, 0xf3, 0x04, 0xb0, 0x2f, 0xc8, + 0xa3, 0xc4, 0x5e, 0xea, 0x7e, 0x60, 0x48, 0x7b, + 0xce, 0x2c, 0x62, 0xf7, 0xbc, 0xa7, 0xe8, 0xa3, + 0xcf, 0x24, 0xce, 0x9c, 0xe2, 0x8b, 0x09, 0x72 + }; + + /* SHA-512 Known Digest Message (512-bits). */ + static const PRUint8 sha512_known_digest[] = { + 0xc8, 0xb3, 0x27, 0xf9, 0x0b, 0x24, 0xc8, 0xbf, + 0x4c, 0xba, 0x33, 0x54, 0xf2, 0x31, 0xbf, 0xdb, + 0xab, 0xfd, 0xb3, 0x15, 0xd7, 0xfa, 0x48, 0x99, + 0x07, 0x60, 0x0f, 0x57, 0x41, 0x1a, 0xdd, 0x28, + 0x12, 0x55, 0x25, 0xac, 0xba, 0x3a, 0x99, 0x12, + 0x2c, 0x7a, 0x8f, 0x75, 0x3a, 0xe1, 0x06, 0x6f, + 0x30, 0x31, 0xc9, 0x33, 0xc6, 0x1b, 0x90, 0x1a, + 0x6c, 0x98, 0x9a, 0x87, 0xd0, 0xb2, 0xf8, 0x07 + }; + + /* SHA-X variables. */ + PRUint8 sha_computed_digest[HASH_LENGTH_MAX]; + SECStatus sha_status; + + /*************************************************/ + /* SHA-1 Single-Round Known Answer Hashing Test. */ + /*************************************************/ + + sha_status = SHA1_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha1_known_digest, + SHA1_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-224 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA224_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha224_known_digest, + SHA224_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-256 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA256_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha256_known_digest, + SHA256_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-384 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA384_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha384_known_digest, + SHA384_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /***************************************************/ + /* SHA-512 Single-Round Known Answer Hashing Test. */ + /***************************************************/ + + sha_status = SHA512_HashBuf(sha_computed_digest, known_hash_message, + FIPS_KNOWN_HASH_MESSAGE_LENGTH); + + if ((sha_status != SECSuccess) || + (PORT_Memcmp(sha_computed_digest, sha512_known_digest, + SHA512_LENGTH) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_RSA_PowerUpSelfTest(void) +{ + /* RSA Known Modulus used in both Public/Private Key Values (2048-bits). */ + static const PRUint8 rsa_modulus[FIPS_RSA_MODULUS_LENGTH] = { + 0xb8, 0x15, 0x00, 0x33, 0xda, 0x0c, 0x9d, 0xa5, + 0x14, 0x8c, 0xde, 0x1f, 0x23, 0x07, 0x54, 0xe2, + 0xc6, 0xb9, 0x51, 0x04, 0xc9, 0x65, 0x24, 0x6e, + 0x0a, 0x46, 0x34, 0x5c, 0x37, 0x86, 0x6b, 0x88, + 0x24, 0x27, 0xac, 0xa5, 0x02, 0x79, 0xfb, 0xed, + 0x75, 0xc5, 0x3f, 0x6e, 0xdf, 0x05, 0x5f, 0x0f, + 0x20, 0x70, 0xa0, 0x5b, 0x85, 0xdb, 0xac, 0xb9, + 0x5f, 0x02, 0xc2, 0x64, 0x1e, 0x84, 0x5b, 0x3e, + 0xad, 0xbf, 0xf6, 0x2e, 0x51, 0xd6, 0xad, 0xf7, + 0xa7, 0x86, 0x75, 0x86, 0xec, 0xa7, 0xe1, 0xf7, + 0x08, 0xbf, 0xdc, 0x56, 0xb1, 0x3b, 0xca, 0xd8, + 0xfc, 0x51, 0xdf, 0x9a, 0x2a, 0x37, 0x06, 0xf2, + 0xd1, 0x6b, 0x9a, 0x5e, 0x2a, 0xe5, 0x20, 0x57, + 0x35, 0x9f, 0x1f, 0x98, 0xcf, 0x40, 0xc7, 0xd6, + 0x98, 0xdb, 0xde, 0xf5, 0x64, 0x53, 0xf7, 0x9d, + 0x45, 0xf3, 0xd6, 0x78, 0xb9, 0xe3, 0xa3, 0x20, + 0xcd, 0x79, 0x43, 0x35, 0xef, 0xd7, 0xfb, 0xb9, + 0x80, 0x88, 0x27, 0x2f, 0x63, 0xa8, 0x67, 0x3d, + 0x4a, 0xfa, 0x06, 0xc6, 0xd2, 0x86, 0x0b, 0xa7, + 0x28, 0xfd, 0xe0, 0x1e, 0x93, 0x4b, 0x17, 0x2e, + 0xb0, 0x11, 0x6f, 0xc6, 0x2b, 0x98, 0x0f, 0x15, + 0xe3, 0x87, 0x16, 0x7a, 0x7c, 0x67, 0x3e, 0x12, + 0x2b, 0xf8, 0xbe, 0x48, 0xc1, 0x97, 0x47, 0xf4, + 0x1f, 0x81, 0x80, 0x12, 0x28, 0xe4, 0x7b, 0x1e, + 0xb7, 0x00, 0xa4, 0xde, 0xaa, 0xfb, 0x0f, 0x77, + 0x84, 0xa3, 0xd6, 0xb2, 0x03, 0x48, 0xdd, 0x53, + 0x8b, 0x46, 0x41, 0x28, 0x52, 0xc4, 0x53, 0xf0, + 0x1c, 0x95, 0xd9, 0x36, 0xe0, 0x0f, 0x26, 0x46, + 0x9c, 0x61, 0x0e, 0x80, 0xca, 0x86, 0xaf, 0x39, + 0x95, 0xe5, 0x60, 0x43, 0x61, 0x3e, 0x2b, 0xb4, + 0xe8, 0xbd, 0x8d, 0x77, 0x62, 0xf5, 0x32, 0x43, + 0x2f, 0x4b, 0x65, 0x82, 0x14, 0xdd, 0x29, 0x5b + }; + + /* RSA Known Public Key Values (24-bits). */ + static const PRUint8 rsa_public_exponent[FIPS_RSA_PUBLIC_EXPONENT_LENGTH] = { 0x01, 0x00, 0x01 }; + /* RSA Known Private Key Values (version is 8-bits), */ + /* (private exponent is 2048-bits), */ + /* (private prime0 is 1024-bits), */ + /* (private prime1 is 1024-bits), */ + /* (private prime exponent0 is 1024-bits), */ + /* (private prime exponent1 is 1024-bits), */ + /* and (private coefficient is 1024-bits). */ + static const PRUint8 rsa_version[] = { 0x00 }; + + static const PRUint8 rsa_private_exponent[FIPS_RSA_PRIVATE_EXPONENT_LENGTH] = { + 0x29, 0x08, 0x05, 0x53, 0x89, 0x76, 0xe6, 0x6c, + 0xb5, 0x77, 0xf0, 0xca, 0xdf, 0xf3, 0xf2, 0x67, + 0xda, 0x03, 0xd4, 0x9b, 0x4c, 0x88, 0xce, 0xe5, + 0xf8, 0x44, 0x4d, 0xc7, 0x80, 0x58, 0xe5, 0xff, + 0x22, 0x8f, 0xf5, 0x5b, 0x92, 0x81, 0xbe, 0x35, + 0xdf, 0xda, 0x67, 0x99, 0x3e, 0xfc, 0xe3, 0x83, + 0x6b, 0xa7, 0xaf, 0x16, 0xb7, 0x6f, 0x8f, 0xc0, + 0x81, 0xfd, 0x0b, 0x77, 0x65, 0x95, 0xfb, 0x00, + 0xad, 0x99, 0xec, 0x35, 0xc6, 0xe8, 0x23, 0x3e, + 0xe0, 0x88, 0x88, 0x09, 0xdb, 0x16, 0x50, 0xb7, + 0xcf, 0xab, 0x74, 0x61, 0x9e, 0x7f, 0xc5, 0x67, + 0x38, 0x56, 0xc7, 0x90, 0x85, 0x78, 0x5e, 0x84, + 0x21, 0x49, 0xea, 0xce, 0xb2, 0xa0, 0xff, 0xe4, + 0x70, 0x7f, 0x57, 0x7b, 0xa8, 0x36, 0xb8, 0x54, + 0x8d, 0x1d, 0xf5, 0x44, 0x9d, 0x68, 0x59, 0xf9, + 0x24, 0x6e, 0x85, 0x8f, 0xc3, 0x5f, 0x8a, 0x2c, + 0x94, 0xb7, 0xbc, 0x0e, 0xa5, 0xef, 0x93, 0x06, + 0x38, 0xcd, 0x07, 0x0c, 0xae, 0xb8, 0x44, 0x1a, + 0xd8, 0xe7, 0xf5, 0x9a, 0x1e, 0x9c, 0x18, 0xc7, + 0x6a, 0xc2, 0x7f, 0x28, 0x01, 0x4f, 0xb4, 0xb8, + 0x90, 0x97, 0x5a, 0x43, 0x38, 0xad, 0xe8, 0x95, + 0x68, 0x83, 0x1a, 0x1b, 0x10, 0x07, 0xe6, 0x02, + 0x52, 0x1f, 0xbf, 0x76, 0x6b, 0x46, 0xd6, 0xfb, + 0xc3, 0xbe, 0xb5, 0xac, 0x52, 0x53, 0x01, 0x1c, + 0xf3, 0xc5, 0xeb, 0x64, 0xf2, 0x1e, 0xc4, 0x38, + 0xe9, 0xaa, 0xd9, 0xc3, 0x72, 0x51, 0xa5, 0x44, + 0x58, 0x69, 0x0b, 0x1b, 0x98, 0x7f, 0xf2, 0x23, + 0xff, 0xeb, 0xf0, 0x75, 0x24, 0xcf, 0xc5, 0x1e, + 0xb8, 0x6a, 0xc5, 0x2f, 0x4f, 0x23, 0x50, 0x7d, + 0x15, 0x9d, 0x19, 0x7a, 0x0b, 0x82, 0xe0, 0x21, + 0x5b, 0x5f, 0x9d, 0x50, 0x2b, 0x83, 0xe4, 0x48, + 0xcc, 0x39, 0xe5, 0xfb, 0x13, 0x7b, 0x6f, 0x81 + }; + + static const PRUint8 rsa_prime0[FIPS_RSA_PRIME0_LENGTH] = { + 0xe4, 0xbf, 0x21, 0x62, 0x9b, 0xa9, 0x77, 0x40, + 0x8d, 0x2a, 0xce, 0xa1, 0x67, 0x5a, 0x4c, 0x96, + 0x45, 0x98, 0x67, 0xbd, 0x75, 0x22, 0x33, 0x6f, + 0xe6, 0xcb, 0x77, 0xde, 0x9e, 0x97, 0x7d, 0x96, + 0x8c, 0x5e, 0x5d, 0x34, 0xfb, 0x27, 0xfc, 0x6d, + 0x74, 0xdb, 0x9d, 0x2e, 0x6d, 0xf6, 0xea, 0xfc, + 0xce, 0x9e, 0xda, 0xa7, 0x25, 0xa2, 0xf4, 0x58, + 0x6d, 0x0a, 0x3f, 0x01, 0xc2, 0xb4, 0xab, 0x38, + 0xc1, 0x14, 0x85, 0xb6, 0xfa, 0x94, 0xc3, 0x85, + 0xf9, 0x3c, 0x2e, 0x96, 0x56, 0x01, 0xe7, 0xd6, + 0x14, 0x71, 0x4f, 0xfb, 0x4c, 0x85, 0x52, 0xc4, + 0x61, 0x1e, 0xa5, 0x1e, 0x96, 0x13, 0x0d, 0x8f, + 0x66, 0xae, 0xa0, 0xcd, 0x7d, 0x25, 0x66, 0x19, + 0x15, 0xc2, 0xcf, 0xc3, 0x12, 0x3c, 0xe8, 0xa4, + 0x52, 0x4c, 0xcb, 0x28, 0x3c, 0xc4, 0xbf, 0x95, + 0x33, 0xe3, 0x81, 0xea, 0x0c, 0x6c, 0xa2, 0x05 + }; + static const PRUint8 rsa_prime1[FIPS_RSA_PRIME1_LENGTH] = { + 0xce, 0x03, 0x94, 0xf4, 0xa9, 0x2c, 0x1e, 0x06, + 0xe7, 0x40, 0x30, 0x01, 0xf7, 0xbb, 0x68, 0x8c, + 0x27, 0xd2, 0x15, 0xe3, 0x28, 0x49, 0x5b, 0xa8, + 0xc1, 0x9a, 0x42, 0x7e, 0x31, 0xf9, 0x08, 0x34, + 0x81, 0xa2, 0x0f, 0x04, 0x61, 0x34, 0xe3, 0x36, + 0x92, 0xb1, 0x09, 0x2b, 0xe9, 0xef, 0x84, 0x88, + 0xbe, 0x9c, 0x98, 0x60, 0xa6, 0x60, 0x84, 0xe9, + 0x75, 0x6f, 0xcc, 0x81, 0xd1, 0x96, 0xef, 0xdd, + 0x2e, 0xca, 0xc4, 0xf5, 0x42, 0xfb, 0x13, 0x2b, + 0x57, 0xbf, 0x14, 0x5e, 0xc2, 0x7f, 0x77, 0x35, + 0x29, 0xc4, 0xe5, 0xe0, 0xf9, 0x6d, 0x15, 0x4a, + 0x42, 0x56, 0x1c, 0x3e, 0x0c, 0xc5, 0xce, 0x70, + 0x08, 0x63, 0x1e, 0x73, 0xdb, 0x7e, 0x74, 0x05, + 0x32, 0x01, 0xc6, 0x36, 0x32, 0x75, 0x6b, 0xed, + 0x9d, 0xfe, 0x7c, 0x7e, 0xa9, 0x57, 0xb4, 0xe9, + 0x22, 0xe4, 0xe7, 0xfe, 0x36, 0x07, 0x9b, 0xdf + }; + static const PRUint8 rsa_exponent0[FIPS_RSA_EXPONENT0_LENGTH] = { + 0x04, 0x5a, 0x3a, 0xa9, 0x64, 0xaa, 0xd9, 0xd1, + 0x09, 0x9e, 0x99, 0xe5, 0xea, 0x50, 0x86, 0x8a, + 0x89, 0x72, 0x77, 0xee, 0xdb, 0xee, 0xb5, 0xa9, + 0xd8, 0x6b, 0x60, 0xb1, 0x84, 0xb4, 0xff, 0x37, + 0xc1, 0x1d, 0xfe, 0x8a, 0x06, 0x89, 0x61, 0x3d, + 0x37, 0xef, 0x01, 0xd3, 0xa3, 0x56, 0x02, 0x6c, + 0xa3, 0x05, 0xd4, 0xc5, 0x3f, 0x6b, 0x15, 0x59, + 0x25, 0x61, 0xff, 0x86, 0xea, 0x0c, 0x84, 0x01, + 0x85, 0x72, 0xfd, 0x84, 0x58, 0xca, 0x41, 0xda, + 0x27, 0xbe, 0xe4, 0x68, 0x09, 0xe4, 0xe9, 0x63, + 0x62, 0x6a, 0x31, 0x8a, 0x67, 0x8f, 0x55, 0xde, + 0xd4, 0xb6, 0x3f, 0x90, 0x10, 0x6c, 0xf6, 0x62, + 0x17, 0x23, 0x15, 0x7e, 0x33, 0x76, 0x65, 0xb5, + 0xee, 0x7b, 0x11, 0x76, 0xf5, 0xbe, 0xe0, 0xf2, + 0x57, 0x7a, 0x8c, 0x97, 0x0c, 0x68, 0xf5, 0xf8, + 0x41, 0xcf, 0x7f, 0x66, 0x53, 0xac, 0x31, 0x7d + }; + static const PRUint8 rsa_exponent1[FIPS_RSA_EXPONENT1_LENGTH] = { + 0x93, 0x54, 0x14, 0x6e, 0x73, 0x9d, 0x4d, 0x4b, + 0xfa, 0x8c, 0xf8, 0xc8, 0x2f, 0x76, 0x22, 0xea, + 0x38, 0x80, 0x11, 0x8f, 0x05, 0xfc, 0x90, 0x44, + 0x3b, 0x50, 0x2a, 0x45, 0x3d, 0x4f, 0xaf, 0x02, + 0x7d, 0xc2, 0x7b, 0xa2, 0xd2, 0x31, 0x94, 0x5c, + 0x2e, 0xc3, 0xd4, 0x9f, 0x47, 0x09, 0x37, 0x6a, + 0xe3, 0x85, 0xf1, 0xa3, 0x0c, 0xd8, 0xf1, 0xb4, + 0x53, 0x7b, 0xc4, 0x71, 0x02, 0x86, 0x42, 0xbb, + 0x96, 0xff, 0x03, 0xa3, 0xb2, 0x67, 0x03, 0xea, + 0x77, 0x31, 0xfb, 0x4b, 0x59, 0x24, 0xf7, 0x07, + 0x59, 0xfb, 0xa9, 0xba, 0x1e, 0x26, 0x58, 0x97, + 0x66, 0xa1, 0x56, 0x49, 0x39, 0xb1, 0x2c, 0x55, + 0x0a, 0x6a, 0x78, 0x18, 0xba, 0xdb, 0xcf, 0xf4, + 0xf7, 0x32, 0x35, 0xa2, 0x04, 0xab, 0xdc, 0xa7, + 0x6d, 0xd9, 0xd5, 0x06, 0x6f, 0xec, 0x7d, 0x40, + 0x4c, 0xe8, 0x0e, 0xd0, 0xc9, 0xaa, 0xdf, 0x59 + }; + static const PRUint8 rsa_coefficient[FIPS_RSA_COEFFICIENT_LENGTH] = { + 0x17, 0xd7, 0xf5, 0x0a, 0xf0, 0x68, 0x97, 0x96, + 0xc4, 0x29, 0x18, 0x77, 0x9a, 0x1f, 0xe3, 0xf3, + 0x12, 0x13, 0x0f, 0x7e, 0x7b, 0xb9, 0xc1, 0x91, + 0xf9, 0xc7, 0x08, 0x56, 0x5c, 0xa4, 0xbc, 0x83, + 0x71, 0xf9, 0x78, 0xd9, 0x2b, 0xec, 0xfe, 0x6b, + 0xdc, 0x2f, 0x63, 0xc9, 0xcd, 0x50, 0x14, 0x5b, + 0xd3, 0x6e, 0x85, 0x4d, 0x0c, 0xa2, 0x0b, 0xa0, + 0x09, 0xb6, 0xca, 0x34, 0x9c, 0xc2, 0xc1, 0x4a, + 0xb0, 0xbc, 0x45, 0x93, 0xa5, 0x7e, 0x99, 0xb5, + 0xbd, 0xe4, 0x69, 0x29, 0x08, 0x28, 0xd2, 0xcd, + 0xab, 0x24, 0x78, 0x48, 0x41, 0x26, 0x0b, 0x37, + 0xa3, 0x43, 0xd1, 0x95, 0x1a, 0xd6, 0xee, 0x22, + 0x1c, 0x00, 0x0b, 0xc2, 0xb7, 0xa4, 0xa3, 0x21, + 0xa9, 0xcd, 0xe4, 0x69, 0xd3, 0x45, 0x02, 0xb1, + 0xb7, 0x3a, 0xbf, 0x51, 0x35, 0x1b, 0x78, 0xc2, + 0xcf, 0x0c, 0x0d, 0x60, 0x09, 0xa9, 0x44, 0x02 + }; + + /* RSA Known Plaintext Message (1024-bits). */ + static const PRUint8 rsa_known_plaintext_msg[FIPS_RSA_MESSAGE_LENGTH] = { + "Known plaintext message utilized" + "for RSA Encryption & Decryption" + "blocks SHA256, SHA384 and " + "SHA512 RSA Signature KAT tests. " + "Known plaintext message utilized" + "for RSA Encryption & Decryption" + "blocks SHA256, SHA384 and " + "SHA512 RSA Signature KAT tests." + }; + + /* RSA Known Ciphertext (2048-bits). */ + static const PRUint8 rsa_known_ciphertext[] = { + 0x04, 0x12, 0x46, 0xe3, 0x6a, 0xee, 0xde, 0xdd, + 0x49, 0xa1, 0xd9, 0x83, 0xf7, 0x35, 0xf9, 0x70, + 0x88, 0x03, 0x2d, 0x01, 0x8b, 0xd1, 0xbf, 0xdb, + 0xe5, 0x1c, 0x85, 0xbe, 0xb5, 0x0b, 0x48, 0x45, + 0x7a, 0xf0, 0xa0, 0xe3, 0xa2, 0xbb, 0x4b, 0xf6, + 0x27, 0xd0, 0x1b, 0x12, 0xe3, 0x77, 0x52, 0x34, + 0x9e, 0x8e, 0x03, 0xd2, 0xf8, 0x79, 0x6e, 0x39, + 0x79, 0x53, 0x3c, 0x44, 0x14, 0x94, 0xbb, 0x8d, + 0xaa, 0x14, 0x44, 0xa0, 0x7b, 0xa5, 0x8c, 0x93, + 0x5f, 0x99, 0xa4, 0xa3, 0x6e, 0x7a, 0x38, 0x40, + 0x78, 0xfa, 0x36, 0x91, 0x5e, 0x9a, 0x9c, 0xba, + 0x1e, 0xd4, 0xf9, 0xda, 0x4b, 0x0f, 0xa8, 0xa3, + 0x1c, 0xf3, 0x3a, 0xd1, 0xa5, 0xb4, 0x51, 0x16, + 0xed, 0x4b, 0xcf, 0xec, 0x93, 0x7b, 0x90, 0x21, + 0xbc, 0x3a, 0xf4, 0x0b, 0xd1, 0x3a, 0x2b, 0xba, + 0xa6, 0x7d, 0x5b, 0x53, 0xd8, 0x64, 0xf9, 0x29, + 0x7b, 0x7f, 0x77, 0x3e, 0x51, 0x4c, 0x9a, 0x94, + 0xd2, 0x4b, 0x4a, 0x8d, 0x61, 0x74, 0x97, 0xae, + 0x53, 0x6a, 0xf4, 0x90, 0xc2, 0x2c, 0x49, 0xe2, + 0xfa, 0xeb, 0x91, 0xc5, 0xe5, 0x83, 0x13, 0xc9, + 0x44, 0x4b, 0x95, 0x2c, 0x57, 0x70, 0x15, 0x5c, + 0x64, 0x8d, 0x1a, 0xfd, 0x2a, 0xc7, 0xb2, 0x9c, + 0x5c, 0x99, 0xd3, 0x4a, 0xfd, 0xdd, 0xf6, 0x82, + 0x87, 0x8c, 0x5a, 0xc4, 0xa8, 0x0d, 0x2a, 0xef, + 0xc3, 0xa2, 0x7e, 0x8e, 0x67, 0x9f, 0x6f, 0x63, + 0xdb, 0xbb, 0x1d, 0x31, 0xc4, 0xbb, 0xbc, 0x13, + 0x3f, 0x54, 0xc6, 0xf6, 0xc5, 0x28, 0x32, 0xab, + 0x96, 0x42, 0x10, 0x36, 0x40, 0x92, 0xbb, 0x57, + 0x55, 0x38, 0xf5, 0x43, 0x7e, 0x43, 0xc4, 0x65, + 0x47, 0x64, 0xaa, 0x0f, 0x4c, 0xe9, 0x49, 0x16, + 0xec, 0x6a, 0x50, 0xfd, 0x14, 0x49, 0xca, 0xdb, + 0x44, 0x54, 0xca, 0xbe, 0xa3, 0x0e, 0x5f, 0xef + }; + + static const RSAPublicKey bl_public_key = { + NULL, + { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus, + FIPS_RSA_MODULUS_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent, + FIPS_RSA_PUBLIC_EXPONENT_LENGTH } + }; + static const RSAPrivateKey bl_private_key = { + NULL, + { FIPS_RSA_TYPE, (unsigned char *)rsa_version, + FIPS_RSA_PRIVATE_VERSION_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_modulus, + FIPS_RSA_MODULUS_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_public_exponent, + FIPS_RSA_PUBLIC_EXPONENT_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_private_exponent, + FIPS_RSA_PRIVATE_EXPONENT_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_prime0, + FIPS_RSA_PRIME0_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_prime1, + FIPS_RSA_PRIME1_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent0, + FIPS_RSA_EXPONENT0_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_exponent1, + FIPS_RSA_EXPONENT1_LENGTH }, + { FIPS_RSA_TYPE, (unsigned char *)rsa_coefficient, + FIPS_RSA_COEFFICIENT_LENGTH } + }; + + /* RSA variables. */ + SECStatus rsa_status; + RSAPublicKey rsa_public_key; + RSAPrivateKey rsa_private_key; + + PRUint8 rsa_computed_ciphertext[FIPS_RSA_ENCRYPT_LENGTH]; + PRUint8 rsa_computed_plaintext[FIPS_RSA_DECRYPT_LENGTH]; + + rsa_public_key = bl_public_key; + rsa_private_key = bl_private_key; + + /**************************************************/ + /* RSA Single-Round Known Answer Encryption Test. */ + /**************************************************/ + + /* Perform RSA Public Key Encryption. */ + rsa_status = RSA_PublicKeyOp(&rsa_public_key, + rsa_computed_ciphertext, + rsa_known_plaintext_msg); + + if ((rsa_status != SECSuccess) || + (PORT_Memcmp(rsa_computed_ciphertext, rsa_known_ciphertext, + FIPS_RSA_ENCRYPT_LENGTH) != 0)) + goto rsa_loser; + + /**************************************************/ + /* RSA Single-Round Known Answer Decryption Test. */ + /**************************************************/ + + /* Perform RSA Private Key Decryption. */ + rsa_status = RSA_PrivateKeyOp(&rsa_private_key, + rsa_computed_plaintext, + rsa_known_ciphertext); + + if ((rsa_status != SECSuccess) || + (PORT_Memcmp(rsa_computed_plaintext, rsa_known_plaintext_msg, + FIPS_RSA_DECRYPT_LENGTH) != 0)) + goto rsa_loser; + + return (SECSuccess); + +rsa_loser: + + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); +} + +#ifdef NSS_ENABLE_ECC + +static SECStatus +freebl_fips_ECDSA_Test(ECParams *ecparams, + const PRUint8 *knownSignature, + unsigned int knownSignatureLen) +{ + + /* ECDSA Known Seed info for curves nistp256 and nistk283 */ + static const PRUint8 ecdsa_Known_Seed[] = { + 0x6a, 0x9b, 0xf6, 0xf7, 0xce, 0xed, 0x79, 0x11, + 0xf0, 0xc7, 0xc8, 0x9a, 0xa5, 0xd1, 0x57, 0xb1, + 0x7b, 0x5a, 0x3b, 0x76, 0x4e, 0x7b, 0x7c, 0xbc, + 0xf2, 0x76, 0x1c, 0x1c, 0x7f, 0xc5, 0x53, 0x2f + }; + + static const PRUint8 msg[] = { + "Firefox and ThunderBird are awesome!" + }; + + unsigned char sha1[SHA1_LENGTH]; /* SHA-1 hash (160 bits) */ + unsigned char sig[2 * MAX_ECKEY_LEN]; + SECItem signature, digest; + ECPrivateKey *ecdsa_private_key = NULL; + ECPublicKey ecdsa_public_key; + SECStatus ecdsaStatus = SECSuccess; + + /* Generates a new EC key pair. The private key is a supplied + * random value (in seed) and the public key is the result of + * performing a scalar point multiplication of that value with + * the curve's base point. + */ + ecdsaStatus = EC_NewKeyFromSeed(ecparams, &ecdsa_private_key, + ecdsa_Known_Seed, + sizeof(ecdsa_Known_Seed)); + if (ecdsaStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + + /* construct public key from private key. */ + ecdsa_public_key.ecParams = ecdsa_private_key->ecParams; + ecdsa_public_key.publicValue = ecdsa_private_key->publicValue; + + /* validate public key value */ + ecdsaStatus = EC_ValidatePublicKey(&ecdsa_public_key.ecParams, + &ecdsa_public_key.publicValue); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + /* validate public key value */ + ecdsaStatus = EC_ValidatePublicKey(&ecdsa_private_key->ecParams, + &ecdsa_private_key->publicValue); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + /***************************************************/ + /* ECDSA Single-Round Known Answer Signature Test. */ + /***************************************************/ + + ecdsaStatus = SHA1_HashBuf(sha1, msg, sizeof msg); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + digest.type = siBuffer; + digest.data = sha1; + digest.len = SHA1_LENGTH; + + memset(sig, 0, sizeof sig); + signature.type = siBuffer; + signature.data = sig; + signature.len = sizeof sig; + + ecdsaStatus = ECDSA_SignDigestWithSeed(ecdsa_private_key, &signature, + &digest, ecdsa_Known_Seed, sizeof ecdsa_Known_Seed); + if (ecdsaStatus != SECSuccess) { + goto loser; + } + + if ((signature.len != knownSignatureLen) || + (PORT_Memcmp(signature.data, knownSignature, + knownSignatureLen) != 0)) { + ecdsaStatus = SECFailure; + goto loser; + } + + /******************************************************/ + /* ECDSA Single-Round Known Answer Verification Test. */ + /******************************************************/ + + /* Perform ECDSA verification process. */ + ecdsaStatus = ECDSA_VerifyDigest(&ecdsa_public_key, &signature, &digest); + +loser: + /* free the memory for the private key arena*/ + PORT_FreeArena(ecdsa_private_key->ecParams.arena, PR_FALSE); + + if (ecdsaStatus != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return (SECFailure); + } + return (SECSuccess); +} + +static SECStatus +freebl_fips_ECDSA_PowerUpSelfTest() +{ + + /* ECDSA Known curve nistp256 == ECCCurve_X9_62_PRIME_256V1 params */ + static const unsigned char p256_prime[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF + }; + static const unsigned char p256_a[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFC + }; + static const unsigned char p256_b[] = { + 0x5A, 0xC6, 0x35, 0xD8, 0xAA, 0x3A, 0x93, 0xE7, 0xB3, 0xEB, 0xBD, 0x55, 0x76, + 0x98, 0x86, 0xBC, 0x65, 0x1D, 0x06, 0xB0, 0xCC, 0x53, 0xB0, 0xF6, 0x3B, 0xCE, + 0x3C, 0x3E, 0x27, 0xD2, 0x60, 0x4B + }; + static const unsigned char p256_base[] = { + 0x04, + 0x6B, 0x17, 0xD1, 0xF2, 0xE1, 0x2C, 0x42, 0x47, 0xF8, 0xBC, 0xE6, 0xE5, 0x63, + 0xA4, 0x40, 0xF2, 0x77, 0x03, 0x7D, 0x81, 0x2D, 0xEB, 0x33, 0xA0, 0xF4, 0xA1, + 0x39, 0x45, 0xD8, 0x98, 0xC2, 0x96, + 0x4F, 0xE3, 0x42, 0xE2, 0xFE, 0x1A, 0x7F, 0x9B, 0x8E, 0xE7, 0xEB, 0x4A, 0x7C, + 0x0F, 0x9E, 0x16, 0x2B, 0xCE, 0x33, 0x57, 0x6B, 0x31, 0x5E, 0xCE, 0xCB, 0xB6, + 0x40, 0x68, 0x37, 0xBF, 0x51, 0xF5 + }; + static const unsigned char p256_order[] = { + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, 0xFF, 0xBC, 0xE6, 0xFA, 0xAD, 0xA7, 0x17, 0x9E, 0x84, 0xF3, 0xB9, + 0xCA, 0xC2, 0xFC, 0x63, 0x25, 0x51 + }; + static const unsigned char p256_encoding[] = { + 0x06, 0x08, 0x2a, 0x86, 0x48, 0xce, 0x3d, 0x03, 0x01, 0x07 + }; + static const ECParams ecdsa_known_P256_Params = { + NULL, ec_params_named, /* arena, type */ + /* fieldID */ + { 256, ec_field_GFp, /* size and type */ + { { siBuffer, (unsigned char *)p256_prime, sizeof(p256_prime) } }, /* u.prime */ + 0, + 0, + 0 }, + /* curve */ + { /* a = curvea b = curveb */ + /* curve.a */ + { siBuffer, (unsigned char *)p256_a, sizeof(p256_a) }, + /* curve.b */ + { siBuffer, (unsigned char *)p256_b, sizeof(p256_b) }, + /* curve.seed */ + { siBuffer, NULL, 0 } }, + /* base = 04xy*/ + { siBuffer, (unsigned char *)p256_base, sizeof(p256_base) }, + /* order */ + { siBuffer, (unsigned char *)p256_order, sizeof(p256_order) }, + 1, /* cofactor */ + /* DEREncoding */ + { siBuffer, (unsigned char *)p256_encoding, sizeof(p256_encoding) }, + ECCurve_X9_62_PRIME_256V1, + /* curveOID */ + { siBuffer, (unsigned char *)(p256_encoding) + 2, sizeof(p256_encoding) - 2 }, + }; + + static const PRUint8 ecdsa_known_P256_signature[] = { + 0x07, 0xb1, 0xcb, 0x57, 0x20, 0xa7, 0x10, 0xd6, + 0x9d, 0x37, 0x4b, 0x1c, 0xdc, 0x35, 0x90, 0xff, + 0x1a, 0x2d, 0x98, 0x95, 0x1b, 0x2f, 0xeb, 0x7f, + 0xbb, 0x81, 0xca, 0xc0, 0x69, 0x75, 0xea, 0xc5, + 0x59, 0x6a, 0x62, 0x49, 0x3d, 0x50, 0xc9, 0xe1, + 0x27, 0x3b, 0xff, 0x9b, 0x13, 0x66, 0x67, 0xdd, + 0x7d, 0xd1, 0x0d, 0x2d, 0x7c, 0x44, 0x04, 0x1b, + 0x16, 0x21, 0x12, 0xc5, 0xcb, 0xbd, 0x9e, 0x75 + }; + + ECParams ecparams; + + SECStatus rv; + + /* ECDSA GF(p) prime field curve test */ + ecparams = ecdsa_known_P256_Params; + rv = freebl_fips_ECDSA_Test(&ecparams, + ecdsa_known_P256_signature, + sizeof ecdsa_known_P256_signature); + if (rv != SECSuccess) { + return (SECFailure); + } + + return (SECSuccess); +} + +#endif /* NSS_ENABLE_ECC */ + +static SECStatus +freebl_fips_DSA_PowerUpSelfTest(void) +{ + /* DSA Known P (1024-bits), Q (160-bits), and G (1024-bits) Values. */ + static const PRUint8 dsa_P[] = { + 0x80, 0xb0, 0xd1, 0x9d, 0x6e, 0xa4, 0xf3, 0x28, + 0x9f, 0x24, 0xa9, 0x8a, 0x49, 0xd0, 0x0c, 0x63, + 0xe8, 0x59, 0x04, 0xf9, 0x89, 0x4a, 0x5e, 0xc0, + 0x6d, 0xd2, 0x67, 0x6b, 0x37, 0x81, 0x83, 0x0c, + 0xfe, 0x3a, 0x8a, 0xfd, 0xa0, 0x3b, 0x08, 0x91, + 0x1c, 0xcb, 0xb5, 0x63, 0xb0, 0x1c, 0x70, 0xd0, + 0xae, 0xe1, 0x60, 0x2e, 0x12, 0xeb, 0x54, 0xc7, + 0xcf, 0xc6, 0xcc, 0xae, 0x97, 0x52, 0x32, 0x63, + 0xd3, 0xeb, 0x55, 0xea, 0x2f, 0x4c, 0xd5, 0xd7, + 0x3f, 0xda, 0xec, 0x49, 0x27, 0x0b, 0x14, 0x56, + 0xc5, 0x09, 0xbe, 0x4d, 0x09, 0x15, 0x75, 0x2b, + 0xa3, 0x42, 0x0d, 0x03, 0x71, 0xdf, 0x0f, 0xf4, + 0x0e, 0xe9, 0x0c, 0x46, 0x93, 0x3d, 0x3f, 0xa6, + 0x6c, 0xdb, 0xca, 0xe5, 0xac, 0x96, 0xc8, 0x64, + 0x5c, 0xec, 0x4b, 0x35, 0x65, 0xfc, 0xfb, 0x5a, + 0x1b, 0x04, 0x1b, 0xa1, 0x0e, 0xfd, 0x88, 0x15 + }; + + static const PRUint8 dsa_Q[] = { + 0xad, 0x22, 0x59, 0xdf, 0xe5, 0xec, 0x4c, 0x6e, + 0xf9, 0x43, 0xf0, 0x4b, 0x2d, 0x50, 0x51, 0xc6, + 0x91, 0x99, 0x8b, 0xcf + }; + + static const PRUint8 dsa_G[] = { + 0x78, 0x6e, 0xa9, 0xd8, 0xcd, 0x4a, 0x85, 0xa4, + 0x45, 0xb6, 0x6e, 0x5d, 0x21, 0x50, 0x61, 0xf6, + 0x5f, 0xdf, 0x5c, 0x7a, 0xde, 0x0d, 0x19, 0xd3, + 0xc1, 0x3b, 0x14, 0xcc, 0x8e, 0xed, 0xdb, 0x17, + 0xb6, 0xca, 0xba, 0x86, 0xa9, 0xea, 0x51, 0x2d, + 0xc1, 0xa9, 0x16, 0xda, 0xf8, 0x7b, 0x59, 0x8a, + 0xdf, 0xcb, 0xa4, 0x67, 0x00, 0x44, 0xea, 0x24, + 0x73, 0xe5, 0xcb, 0x4b, 0xaf, 0x2a, 0x31, 0x25, + 0x22, 0x28, 0x3f, 0x16, 0x10, 0x82, 0xf7, 0xeb, + 0x94, 0x0d, 0xdd, 0x09, 0x22, 0x14, 0x08, 0x79, + 0xba, 0x11, 0x0b, 0xf1, 0xff, 0x2d, 0x67, 0xac, + 0xeb, 0xb6, 0x55, 0x51, 0x69, 0x97, 0xa7, 0x25, + 0x6b, 0x9c, 0xa0, 0x9b, 0xd5, 0x08, 0x9b, 0x27, + 0x42, 0x1c, 0x7a, 0x69, 0x57, 0xe6, 0x2e, 0xed, + 0xa9, 0x5b, 0x25, 0xe8, 0x1f, 0xd2, 0xed, 0x1f, + 0xdf, 0xe7, 0x80, 0x17, 0xba, 0x0d, 0x4d, 0x38 + }; + + /* DSA Known Random Values (known random key block is 160-bits) */ + /* and (known random signature block is 160-bits). */ + static const PRUint8 dsa_known_random_key_block[] = { + "Mozilla Rules World!" + }; + static const PRUint8 dsa_known_random_signature_block[] = { + "Random DSA Signature" + }; + + /* DSA Known Digest (160-bits) */ + static const PRUint8 dsa_known_digest[] = { "DSA Signature Digest" }; + + /* DSA Known Signature (320-bits). */ + static const PRUint8 dsa_known_signature[] = { + 0x25, 0x7c, 0x3a, 0x79, 0x32, 0x45, 0xb7, 0x32, + 0x70, 0xca, 0x62, 0x63, 0x2b, 0xf6, 0x29, 0x2c, + 0x22, 0x2a, 0x03, 0xce, 0x48, 0x15, 0x11, 0x72, + 0x7b, 0x7e, 0xf5, 0x7a, 0xf3, 0x10, 0x3b, 0xde, + 0x34, 0xc1, 0x9e, 0xd7, 0x27, 0x9e, 0x77, 0x38 + }; + + /* DSA variables. */ + DSAPrivateKey *dsa_private_key; + SECStatus dsa_status; + SECItem dsa_signature_item; + SECItem dsa_digest_item; + DSAPublicKey dsa_public_key; + PRUint8 dsa_computed_signature[FIPS_DSA_SIGNATURE_LENGTH]; + static const PQGParams dsa_pqg = { + NULL, + { FIPS_DSA_TYPE, (unsigned char *)dsa_P, FIPS_DSA_PRIME_LENGTH }, + { FIPS_DSA_TYPE, (unsigned char *)dsa_Q, FIPS_DSA_SUBPRIME_LENGTH }, + { FIPS_DSA_TYPE, (unsigned char *)dsa_G, FIPS_DSA_BASE_LENGTH } + }; + + /*******************************************/ + /* Generate a DSA public/private key pair. */ + /*******************************************/ + + /* Generate a DSA public/private key pair. */ + dsa_status = DSA_NewKeyFromSeed(&dsa_pqg, dsa_known_random_key_block, + &dsa_private_key); + + if (dsa_status != SECSuccess) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return (SECFailure); + } + + /* construct public key from private key. */ + dsa_public_key.params = dsa_private_key->params; + dsa_public_key.publicValue = dsa_private_key->publicValue; + + /*************************************************/ + /* DSA Single-Round Known Answer Signature Test. */ + /*************************************************/ + + dsa_signature_item.data = dsa_computed_signature; + dsa_signature_item.len = sizeof dsa_computed_signature; + + dsa_digest_item.data = (unsigned char *)dsa_known_digest; + dsa_digest_item.len = SHA1_LENGTH; + + /* Perform DSA signature process. */ + dsa_status = DSA_SignDigestWithSeed(dsa_private_key, + &dsa_signature_item, + &dsa_digest_item, + dsa_known_random_signature_block); + + if ((dsa_status != SECSuccess) || + (dsa_signature_item.len != FIPS_DSA_SIGNATURE_LENGTH) || + (PORT_Memcmp(dsa_computed_signature, dsa_known_signature, + FIPS_DSA_SIGNATURE_LENGTH) != 0)) { + dsa_status = SECFailure; + } else { + + /****************************************************/ + /* DSA Single-Round Known Answer Verification Test. */ + /****************************************************/ + + /* Perform DSA verification process. */ + dsa_status = DSA_VerifyDigest(&dsa_public_key, + &dsa_signature_item, + &dsa_digest_item); + } + + PORT_FreeArena(dsa_private_key->params.arena, PR_TRUE); + /* Don't free public key, it uses same arena as private key */ + + /* Verify DSA signature. */ + if (dsa_status != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + return (SECSuccess); +} + +static SECStatus +freebl_fips_RNG_PowerUpSelfTest(void) +{ + static const PRUint8 Q[] = { + 0x85, 0x89, 0x9c, 0x77, 0xa3, 0x79, 0xff, 0x1a, + 0x86, 0x6f, 0x2f, 0x3e, 0x2e, 0xf9, 0x8c, 0x9c, + 0x9d, 0xef, 0xeb, 0xed + }; + static const PRUint8 GENX[] = { + 0x65, 0x48, 0xe3, 0xca, 0xac, 0x64, 0x2d, 0xf7, + 0x7b, 0xd3, 0x4e, 0x79, 0xc9, 0x7d, 0xa6, 0xa8, + 0xa2, 0xc2, 0x1f, 0x8f, 0xe9, 0xb9, 0xd3, 0xa1, + 0x3f, 0xf7, 0x0c, 0xcd, 0xa6, 0xca, 0xbf, 0xce, + 0x84, 0x0e, 0xb6, 0xf1, 0x0d, 0xbe, 0xa9, 0xa3 + }; + static const PRUint8 rng_known_DSAX[] = { + 0x7a, 0x86, 0xf1, 0x7f, 0xbd, 0x4e, 0x6e, 0xd9, + 0x0a, 0x26, 0x21, 0xd0, 0x19, 0xcb, 0x86, 0x73, + 0x10, 0x1f, 0x60, 0xd7 + }; + + SECStatus rng_status = SECSuccess; + PRUint8 DSAX[FIPS_DSA_SUBPRIME_LENGTH]; + + /*******************************************/ + /* Run the SP 800-90 Health tests */ + /*******************************************/ + rng_status = PRNGTEST_RunHealthTests(); + if (rng_status != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + /*******************************************/ + /* Generate DSAX fow given Q. */ + /*******************************************/ + + rng_status = FIPS186Change_ReduceModQForDSA(GENX, Q, DSAX); + + /* Verify DSAX to perform the RNG integrity check */ + if ((rng_status != SECSuccess) || + (PORT_Memcmp(DSAX, rng_known_DSAX, + (FIPS_DSA_SUBPRIME_LENGTH)) != 0)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + + return (SECSuccess); +} + +static SECStatus +freebl_fipsSoftwareIntegrityTest(const char *libname) +{ + SECStatus rv = SECSuccess; + + /* make sure that our check file signatures are OK */ + if (!BLAPI_VerifySelf(libname)) { + rv = SECFailure; + } + return rv; +} + +#define DO_FREEBL 1 +#define DO_REST 2 + +static SECStatus +freebl_fipsPowerUpSelfTest(unsigned int tests) +{ + SECStatus rv; + + /* + * stand alone freebl. Test hash, and rng + */ + if (tests & DO_FREEBL) { + + /* SHA-X Power-Up SelfTest(s). */ + rv = freebl_fips_SHA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* RNG Power-Up SelfTest(s). */ + rv = freebl_fips_RNG_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + } + + /* + * test the rest of the algorithms not accessed through freebl + * standalone */ + if (tests & DO_REST) { + + /* DES3 Power-Up SelfTest(s). */ + rv = freebl_fips_DES3_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 128-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_128_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 192-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_192_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* AES Power-Up SelfTest(s) for 256-bit key. */ + rv = freebl_fips_AES_PowerUpSelfTest(FIPS_AES_256_KEY_SIZE); + + if (rv != SECSuccess) + return rv; + + /* HMAC SHA-X Power-Up SelfTest(s). */ + rv = freebl_fips_HMAC_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* NOTE: RSA can only be tested in full freebl. It requires access to + * the locking primitives */ + /* RSA Power-Up SelfTest(s). */ + rv = freebl_fips_RSA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + + /* DSA Power-Up SelfTest(s). */ + rv = freebl_fips_DSA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; + +#ifdef NSS_ENABLE_ECC + /* ECDSA Power-Up SelfTest(s). */ + rv = freebl_fips_ECDSA_PowerUpSelfTest(); + + if (rv != SECSuccess) + return rv; +#endif + } + /* Passed Power-Up SelfTest(s). */ + return (SECSuccess); +} + +/* + * state variables. NOTE: freebl has two uses: a standalone use which + * provided limitted access to the hash functions throught the NSSLOWHASH_ + * interface and an joint use from softoken, using the function pointer + * table. The standalone use can operation without nspr or nss-util, while + * the joint use requires both to be loaded. Certain functions (like RSA) + * needs locking from NSPR, for instance. + * + * At load time, we need to handle the two uses separately. If nspr and + * nss-util are loaded, then we can run all the selftests, but if nspr and + * nss-util are not loaded, then we can't run all the selftests, and we need + * to prevent the softoken function pointer table from operating until the + * libraries are loaded and we try to use them. + */ +static PRBool self_tests_freebl_ran = PR_FALSE; +static PRBool self_tests_ran = PR_FALSE; +static PRBool self_tests_freebl_success = PR_FALSE; +static PRBool self_tests_success = PR_FALSE; +#if defined(DEBUG) +static PRBool fips_mode_available = PR_FALSE; +#endif + +/* + * accessors for freebl + */ +PRBool +BL_POSTRan(PRBool freebl_only) +{ + SECStatus rv; + /* if the freebl self tests didn't run, there is something wrong with + * our on load tests */ + if (!self_tests_freebl_ran) { + return PR_FALSE; + } + /* if all the self tests have run, we are good */ + if (self_tests_ran) { + return PR_TRUE; + } + /* if we only care about the freebl tests, we are good */ + if (freebl_only) { + return PR_TRUE; + } + /* run the rest of the self tests */ + /* We could get there if freebl was loaded without the rest of the support + * libraries, but now we want to use more than just a standalone freebl. + * This requires the other libraries to be loaded. + * If they are now loaded, Try to run the rest of the selftests, + * otherwise fail (disabling access to these algorithms) */ + self_tests_ran = PR_TRUE; + BL_Init(); /* required by RSA */ + RNG_RNGInit(); /* required by RSA */ + rv = freebl_fipsPowerUpSelfTest(DO_REST); + if (rv == SECSuccess) { + self_tests_success = PR_TRUE; + } + return PR_TRUE; +} + +#include "blname.c" + +/* + * This function is called at dll load time, the code tha makes this + * happen is platform specific on defined above. + */ +static void +bl_startup_tests(void) +{ + const char *libraryName; + PRBool freebl_only = PR_FALSE; + SECStatus rv; + + PORT_Assert(self_tests_freebl_ran == PR_FALSE); + PORT_Assert(self_tests_success == PR_FALSE); + PORT_Assert(fips_mode_available == PR_FALSE); + self_tests_freebl_ran = PR_TRUE; /* we are running the tests */ + self_tests_success = PR_FALSE; /* force it just in case */ + self_tests_freebl_success = PR_FALSE; /* force it just in case */ + +#ifdef FREEBL_NO_DEPEND + rv = FREEBL_InitStubs(); + if (rv != SECSuccess) { + freebl_only = PR_TRUE; + } +#endif + + self_tests_freebl_ran = PR_TRUE; /* we are running the tests */ + + if (!freebl_only) { + self_tests_ran = PR_TRUE; /* we're running all the tests */ + BL_Init(); /* needs to be called before RSA can be used */ + RNG_RNGInit(); + } + + /* always run the post tests */ + rv = freebl_fipsPowerUpSelfTest(freebl_only ? DO_FREEBL : DO_FREEBL | DO_REST); + if (rv != SECSuccess) { + return; + } + + libraryName = getLibName(); + rv = freebl_fipsSoftwareIntegrityTest(libraryName); + if (rv != SECSuccess) { + return; + } + + /* posts are happy, allow the fips module to function now */ + self_tests_freebl_success = PR_TRUE; /* we always test the freebl stuff */ + if (!freebl_only) { + self_tests_success = PR_TRUE; + } +} + +/* + * this is called from the freebl init entry points that controll access to + * all other freebl functions. This prevents freebl from operating if our + * power on selftest failed. + */ +SECStatus +BL_FIPSEntryOK(PRBool freebl_only) +{ +#ifdef NSS_NO_INIT_SUPPORT + /* this should only be set on platforms that can't handle one of the INIT + * schemes. This code allows those platforms to continue to function, + * though they don't meet the strict NIST requirements. If NSS_NO_INIT_SUPPORT + * is not set, and init support has not been properly enabled, freebl + * will always fail because of the test below + */ + if (!self_tests_freebl_ran) { + bl_startup_tests(); + } +#endif + /* if the general self tests succeeded, we're done */ + if (self_tests_success) { + return SECSuccess; + } + /* standalone freebl can initialize */ + if (freebl_only & self_tests_freebl_success) { + return SECSuccess; + } + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; +} diff --git a/security/nss/lib/freebl/freebl.def b/security/nss/lib/freebl/freebl.def new file mode 100644 index 000000000..164c843fd --- /dev/null +++ b/security/nss/lib/freebl/freebl.def @@ -0,0 +1,26 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freebl.gyp b/security/nss/lib/freebl/freebl.gyp new file mode 100644 index 000000000..f5ae232ec --- /dev/null +++ b/security/nss/lib/freebl/freebl.gyp @@ -0,0 +1,408 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +{ + 'includes': [ + '../../coreconf/config.gypi' + ], + 'targets': [ + { + 'target_name': 'intel-gcm-wrap_c_lib', + 'type': 'static_library', + 'sources': [ + 'intel-gcm-wrap.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'cflags': [ + '-mssse3' + ], + 'cflags_mozilla': [ + '-mssse3' + ] + }, + { + 'target_name': 'freebl', + 'type': 'static_library', + 'sources': [ + 'loader.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ] + }, + { + 'target_name': '<(freebl_name)', + 'type': 'shared_library', + 'sources': [ + 'aeskeywrap.c', + 'alg2268.c', + 'alghmac.c', + 'arcfive.c', + 'arcfour.c', + 'camellia.c', + 'chacha20poly1305.c', + 'ctr.c', + 'cts.c', + 'des.c', + 'desblapi.c', + 'dh.c', + 'drbg.c', + 'dsa.c', + 'ec.c', + 'ecdecode.c', + 'ecl/ec_naf.c', + 'ecl/ecl.c', + 'ecl/ecl_curve.c', + 'ecl/ecl_gf.c', + 'ecl/ecl_mult.c', + 'ecl/ecp_25519.c', + 'ecl/ecp_256.c', + 'ecl/ecp_256_32.c', + 'ecl/ecp_384.c', + 'ecl/ecp_521.c', + 'ecl/ecp_aff.c', + 'ecl/ecp_jac.c', + 'ecl/ecp_jm.c', + 'ecl/ecp_mont.c', + 'fipsfreebl.c', + 'freeblver.c', + 'gcm.c', + 'hmacct.c', + 'jpake.c', + 'ldvector.c', + 'md2.c', + 'md5.c', + 'mpi/mp_gf2m.c', + 'mpi/mpcpucache.c', + 'mpi/mpi.c', + 'mpi/mplogic.c', + 'mpi/mpmontg.c', + 'mpi/mpprime.c', + 'pqg.c', + 'rawhash.c', + 'rijndael.c', + 'rsa.c', + 'rsapkcs.c', + 'seed.c', + 'sha512.c', + 'sha_fast.c', + 'shvfy.c', + 'sysrand.c', + 'tlsprfalg.c' + ], + 'conditions': [ + [ 'OS=="linux"', { + 'sources': [ + 'nsslowhash.c', + 'stubs.c', + ], + 'conditions': [ + [ 'test_build==1', { + 'dependencies': [ + '<(DEPTH)/lib/util/util.gyp:nssutil3', + ], + }], + [ 'target_arch=="x64"', { + 'sources': [ + 'arcfour-amd64-gas.s', + 'intel-aes.s', + 'intel-gcm.s', + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_gas.s', + 'mpi/mp_comba.c', + ], + 'dependencies': [ + 'intel-gcm-wrap_c_lib', + ], + 'conditions': [ + [ 'cc_is_clang==1', { + 'cflags': [ + '-no-integrated-as', + ], + 'cflags_mozilla': [ + '-no-integrated-as', + ], + 'asflags_mozilla': [ + '-no-integrated-as', + ], + }], + ], + }], + [ 'target_arch=="ia32"', { + 'sources': [ + 'mpi/mpi_x86.s', + ], + }], + [ 'target_arch=="arm"', { + 'sources': [ + 'mpi/mpi_arm.c', + ], + }], + ], + }, { + # not Linux + 'conditions': [ + [ 'moz_fold_libs==0', { + 'dependencies': [ + '../util/util.gyp:nssutil3', + ], + }, { + 'libraries': [ + '<(moz_folded_library_name)', + ], + }], + ], + }], + [ 'OS=="win"', { + 'sources': [ + #TODO: building with mingw should not need this. + 'ecl/uint128.c', + #TODO: clang-cl needs -msse3 here + 'intel-gcm-wrap.c', + ], + 'libraries': [ + 'advapi32.lib', + ], + 'conditions': [ + [ 'target_arch=="x64"', { + 'sources': [ + 'arcfour-amd64-masm.asm', + 'mpi/mpi_amd64.c', + 'mpi/mpi_amd64_masm.asm', + 'mpi/mp_comba_amd64_masm.asm', + 'intel-aes-x64-masm.asm', + 'intel-gcm-x64-masm.asm', + ], + }, { + # not x64 + 'sources': [ + 'mpi/mpi_x86_asm.c', + 'intel-aes-x86-masm.asm', + 'intel-gcm-x86-masm.asm', + ], + }], + ], + }], + ['target_arch=="ia32" or target_arch=="x64"', { + 'sources': [ + # All intel architectures get the 64 bit version + 'ecl/curve25519_64.c', + ], + }, { + 'sources': [ + # All non intel architectures get the generic 32 bit implementation (slow!) + 'ecl/curve25519_32.c', + ], + }], + #TODO uint128.c + [ 'disable_chachapoly==0', { + 'conditions': [ + [ 'OS!="win" and target_arch=="x64"', { + 'sources': [ + 'chacha20_vec.c', + 'poly1305-donna-x64-sse2-incremental-source.c', + ], + }, { + # not x64 + 'sources': [ + 'chacha20.c', + 'poly1305.c', + ], + }], + ], + }], + [ 'fuzz==1', { + 'sources': [ + 'det_rng.c', + ], + 'defines': [ + 'UNSAFE_FUZZER_MODE', + ], + }], + [ 'test_build==1', { + 'defines': [ + 'CT_VERIF', + ], + }], + [ 'OS=="mac"', { + 'conditions': [ + [ 'target_arch=="ia32"', { + 'sources': [ + 'mpi/mpi_sse2.s', + ], + 'defines': [ + 'MP_USE_UINT_DIGIT', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + ], + }], + ], + }], + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports', + ], + 'variables': { + 'conditions': [ + [ 'OS=="linux"', { + 'mapfile': 'freebl_hash_vector.def', + }, { + 'mapfile': 'freebl.def', + }], + ] + }, + 'ldflags': [ + '-Wl,-Bsymbolic' + ] + }, + ], + 'conditions': [ + [ 'OS=="linux"', { + # stub build + 'targets': [ + { + 'target_name': 'freebl3', + 'type': 'shared_library', + 'defines': [ + 'FREEBL_NO_DEPEND', + ], + 'sources': [ + 'lowhash_vector.c' + ], + 'dependencies': [ + '<(DEPTH)/exports.gyp:nss_exports' + ], + 'variables': { + 'mapfile': 'freebl_hash.def' + } + }, + ], + }], + ], + 'target_defaults': { + 'include_dirs': [ + 'mpi', + 'ecl' + ], + 'defines': [ + 'SHLIB_SUFFIX=\"<(dll_suffix)\"', + 'SHLIB_PREFIX=\"<(dll_prefix)\"', + 'SHLIB_VERSION=\"3\"', + 'SOFTOKEN_SHLIB_VERSION=\"3\"', + 'RIJNDAEL_INCLUDE_TABLES', + 'MP_API_COMPATIBLE' + ], + 'conditions': [ + [ 'OS=="win" and target_arch=="ia32"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + #TODO: -Ox optimize flags + 'PreprocessorDefinitions': [ + 'NSS_X86_OR_X64', + 'NSS_X86', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + 'MP_USE_UINT_DIGIT', + 'MP_NO_MP_WORD', + 'USE_HW_AES', + 'INTEL_GCM', + ], + }, + }, + }], + [ 'OS=="win" and target_arch=="x64"', { + 'msvs_settings': { + 'VCCLCompilerTool': { + #TODO: -Ox optimize flags + 'PreprocessorDefinitions': [ + 'NSS_USE_64', + 'NSS_X86_OR_X64', + 'NSS_X64', + 'MP_IS_LITTLE_ENDIAN', + 'NSS_BEVAND_ARCFOUR', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + 'USE_HW_AES', + 'INTEL_GCM', + ], + }, + }, + }], + [ 'OS!="win"', { + 'conditions': [ + [ 'target_arch=="x64"', { + 'defines': [ + 'NSS_USE_64', + 'NSS_X86_OR_X64', + 'NSS_X64', + # The Makefile does version-tests on GCC, but we're not doing that here. + 'HAVE_INT128_SUPPORT', + ], + }, { + 'sources': [ + 'ecl/uint128.c', + ], + }], + [ 'target_arch=="ia32"', { + 'defines': [ + 'NSS_X86_OR_X64', + 'NSS_X86', + ], + }], + ], + }], + [ 'OS=="linux"', { + 'defines': [ + 'FREEBL_LOWHASH', + ], + 'conditions': [ + [ 'test_build==0', { + 'defines': [ + 'FREEBL_NO_DEPEND', + ], + }], + [ 'target_arch=="x64"', { + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'NSS_BEVAND_ARCFOUR', + 'MPI_AMD64', + 'MP_ASSEMBLY_MULTIPLY', + 'NSS_USE_COMBA', + ], + }], + [ 'target_arch=="x64" and use_msan==0', { + 'defines': [ + 'USE_HW_AES', + 'INTEL_GCM', + ], + }], + [ 'target_arch=="ia32"', { + 'defines': [ + 'MP_IS_LITTLE_ENDIAN', + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_ASSEMBLY_DIV_2DX1D', + 'MP_USE_UINT_DIGIT', + ], + }], + [ 'target_arch=="arm"', { + 'defines': [ + 'MP_ASSEMBLY_MULTIPLY', + 'MP_ASSEMBLY_SQUARE', + 'MP_USE_UINT_DIGIT', + 'SHA_NO_LONG_LONG', + ], + }], + ], + }], + ], + }, + 'variables': { + 'module': 'nss', + } +} diff --git a/security/nss/lib/freebl/freebl.rc b/security/nss/lib/freebl/freebl.rc new file mode 100644 index 000000000..444ae5d03 --- /dev/null +++ b/security/nss/lib/freebl/freebl.rc @@ -0,0 +1,68 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "softkver.h" +#include + +#define MY_LIBNAME "freebl" +#define MY_FILEDESCRIPTION "NSS freebl Library" + +#define STRINGIZE(x) #x +#define STRINGIZE2(x) STRINGIZE(x) +#define SOFTOKEN_VMAJOR_STR STRINGIZE2(SOFTOKEN_VMAJOR) + +#ifdef _DEBUG +#define MY_DEBUG_STR " (debug)" +#define MY_FILEFLAGS_1 VS_FF_DEBUG +#else +#define MY_DEBUG_STR "" +#define MY_FILEFLAGS_1 0x0L +#endif +#if SOFTOKEN_BETA +#define MY_FILEFLAGS_2 MY_FILEFLAGS_1|VS_FF_PRERELEASE +#else +#define MY_FILEFLAGS_2 MY_FILEFLAGS_1 +#endif + +#ifdef WINNT +#define MY_FILEOS VOS_NT_WINDOWS32 +#else +#define MY_FILEOS VOS__WINDOWS32 +#endif + +#define MY_INTERNAL_NAME MY_LIBNAME SOFTOKEN_VMAJOR_STR + +///////////////////////////////////////////////////////////////////////////// +// +// Version-information resource +// + +VS_VERSION_INFO VERSIONINFO + FILEVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD + PRODUCTVERSION SOFTOKEN_VMAJOR,SOFTOKEN_VMINOR,SOFTOKEN_VPATCH,SOFTOKEN_VBUILD + FILEFLAGSMASK VS_FFI_FILEFLAGSMASK + FILEFLAGS MY_FILEFLAGS_2 + FILEOS MY_FILEOS + FILETYPE VFT_DLL + FILESUBTYPE 0x0L // not used + +BEGIN + BLOCK "StringFileInfo" + BEGIN + BLOCK "040904B0" // Lang=US English, CharSet=Unicode + BEGIN + VALUE "CompanyName", "Mozilla Foundation\0" + VALUE "FileDescription", MY_FILEDESCRIPTION MY_DEBUG_STR "\0" + VALUE "FileVersion", SOFTOKEN_VERSION "\0" + VALUE "InternalName", MY_INTERNAL_NAME "\0" + VALUE "OriginalFilename", MY_INTERNAL_NAME ".dll\0" + VALUE "ProductName", "Network Security Services\0" + VALUE "ProductVersion", SOFTOKEN_VERSION "\0" + END + END + BLOCK "VarFileInfo" + BEGIN + VALUE "Translation", 0x409, 1200 + END +END diff --git a/security/nss/lib/freebl/freebl_hash.def b/security/nss/lib/freebl/freebl_hash.def new file mode 100644 index 000000000..9fd27367e --- /dev/null +++ b/security/nss/lib/freebl/freebl_hash.def @@ -0,0 +1,39 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; +;+NSSRAWHASH_3.12.3 { # NSS 3.12.3 release +;+ global: +NSSLOW_Init; +NSSLOW_Shutdown; +NSSLOWHASH_Length; +NSSLOWHASH_Begin; +NSSLOWHASH_Destroy; +NSSLOWHASH_End; +NSSLOWHASH_NewContext; +NSSLOWHASH_Update; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freebl_hash_vector.def b/security/nss/lib/freebl/freebl_hash_vector.def new file mode 100644 index 000000000..9d7d07d54 --- /dev/null +++ b/security/nss/lib/freebl/freebl_hash_vector.def @@ -0,0 +1,34 @@ +;+# +;+# This Source Code Form is subject to the terms of the Mozilla Public +;+# License, v. 2.0. If a copy of the MPL was not distributed with this +;+# file, You can obtain one at http://mozilla.org/MPL/2.0/. +;+# +;+# OK, this file is meant to support SUN, LINUX, AIX and WINDOWS +;+# 1. For all unix platforms, the string ";-" means "remove this line" +;+# 2. For all unix platforms, the string " DATA " will be removed from any +;+# line on which it occurs. +;+# 3. Lines containing ";+" will have ";+" removed on SUN and LINUX. +;+# On AIX, lines containing ";+" will be removed. +;+# 4. For all unix platforms, the string ";;" will thave the ";;" removed. +;+# 5. For all unix platforms, after the above processing has taken place, +;+# all characters after the first ";" on the line will be removed. +;+# And for AIX, the first ";" will also be removed. +;+# This file is passed directly to windows. Since ';' is a comment, all UNIX +;+# directives are hidden behind ";", ";+", and ";-" +;+ +;+NSSprivate_3.11 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +FREEBL_GetVector; +;+ local: +;+ *; +;+}; +;+NSSprivate_3.16 { # NSS 3.11 release +;+ global: +LIBRARY freebl3 ;- +EXPORTS ;- +NSSLOW_GetVector; +;+ local: +;+ *; +;+}; diff --git a/security/nss/lib/freebl/freeblver.c b/security/nss/lib/freebl/freeblver.c new file mode 100644 index 000000000..9136f0b0b --- /dev/null +++ b/security/nss/lib/freebl/freeblver.c @@ -0,0 +1,18 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Library identity and versioning */ + +#include "softkver.h" + +#if defined(DEBUG) +#define _DEBUG_STRING " (debug)" +#else +#define _DEBUG_STRING "" +#endif + +/* + * Version information + */ +const char __nss_freebl_version[] = "Version: NSS " SOFTOKEN_VERSION _DEBUG_STRING; diff --git a/security/nss/lib/freebl/gcm.c b/security/nss/lib/freebl/gcm.c new file mode 100644 index 000000000..22121001b --- /dev/null +++ b/security/nss/lib/freebl/gcm.c @@ -0,0 +1,860 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include + +/************************************************************************** + * First implement the Galois hash function of GCM (gcmHash) * + **************************************************************************/ +#define GCM_HASH_LEN_LEN 8 /* gcm hash defines lengths to be 64 bits */ + +typedef struct gcmHashContextStr gcmHashContext; + +static SECStatus gcmHash_InitContext(gcmHashContext *hash, + const unsigned char *H, + unsigned int blocksize); +static void gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit); +static SECStatus gcmHash_Update(gcmHashContext *ghash, + const unsigned char *buf, unsigned int len, + unsigned int blocksize); +static SECStatus gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize); +static SECStatus gcmHash_Final(gcmHashContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + unsigned int blocksize); +static SECStatus gcmHash_Reset(gcmHashContext *ghash, + const unsigned char *inbuf, + unsigned int inbufLen, unsigned int blocksize); + +/* compile time defines to select how the GF2 multiply is calculated. + * There are currently 2 algorithms implemented here: MPI and ALGORITHM_1. + * + * MPI uses the GF2m implemented in mpi to support GF2 ECC. + * ALGORITHM_1 is the Algorithm 1 in both NIST SP 800-38D and + * "The Galois/Counter Mode of Operation (GCM)", McGrew & Viega. + */ +#if !defined(GCM_USE_ALGORITHM_1) && !defined(GCM_USE_MPI) +#define GCM_USE_MPI 1 /* MPI is about 5x faster with the \ + * same or less complexity. It's possible to use \ + * tables to speed things up even more */ +#endif + +/* GCM defines the bit string to be LSB first, which is exactly + * opposite everyone else, including hardware. build array + * to reverse everything. */ +static const unsigned char gcm_byte_rev[256] = { + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff +}; + +#ifdef GCM_TRACE +#include + +#define GCM_TRACE_X(ghash, label) \ + { \ + unsigned char _X[MAX_BLOCK_SIZE]; \ + int i; \ + gcm_getX(ghash, _X, blocksize); \ + printf(label, (ghash)->m); \ + for (i = 0; i < blocksize; i++) \ + printf("%02x", _X[i]); \ + printf("\n"); \ + } +#define GCM_TRACE_BLOCK(label, buf, blocksize) \ + { \ + printf(label); \ + for (i = 0; i < blocksize; i++) \ + printf("%02x", buf[i]); \ + printf("\n"); \ + } +#else +#define GCM_TRACE_X(ghash, label) +#define GCM_TRACE_BLOCK(label, buf, blocksize) +#endif + +#ifdef GCM_USE_MPI + +#ifdef GCM_USE_ALGORITHM_1 +#error "Only define one of GCM_USE_MPI, GCM_USE_ALGORITHM_1" +#endif +/* use the MPI functions to calculate Xn = (Xn-1^C_i)*H mod poly */ +#include "mpi.h" +#include "secmpi.h" +#include "mplogic.h" +#include "mp_gf2m.h" + +/* state needed to handle GCM Hash function */ +struct gcmHashContextStr { + mp_int H; + mp_int X; + mp_int C_i; + const unsigned int *poly; + unsigned char buffer[MAX_BLOCK_SIZE]; + unsigned int bufLen; + int m; /* XXX what is m? */ + unsigned char counterBuf[2 * GCM_HASH_LEN_LEN]; + PRUint64 cLen; +}; + +/* f = x^128 + x^7 + x^2 + x + 1 */ +static const unsigned int poly_128[] = { 128, 7, 2, 1, 0 }; + +/* sigh, GCM defines the bit strings exactly backwards from everything else */ +static void +gcm_reverse(unsigned char *target, const unsigned char *src, + unsigned int blocksize) +{ + unsigned int i; + for (i = 0; i < blocksize; i++) { + target[blocksize - i - 1] = gcm_byte_rev[src[i]]; + } +} + +/* Initialize a gcmHashContext */ +static SECStatus +gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, + unsigned int blocksize) +{ + mp_err err = MP_OKAY; + unsigned char H_rev[MAX_BLOCK_SIZE]; + + MP_DIGITS(&ghash->H) = 0; + MP_DIGITS(&ghash->X) = 0; + MP_DIGITS(&ghash->C_i) = 0; + CHECK_MPI_OK(mp_init(&ghash->H)); + CHECK_MPI_OK(mp_init(&ghash->X)); + CHECK_MPI_OK(mp_init(&ghash->C_i)); + + mp_zero(&ghash->X); + gcm_reverse(H_rev, H, blocksize); + CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->H, H_rev, blocksize)); + + /* set the irreducible polynomial. Each blocksize has its own polynomial. + * for now only blocksize 16 (=128 bits) is defined */ + switch (blocksize) { + case 16: /* 128 bits */ + ghash->poly = poly_128; + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + ghash->cLen = 0; + ghash->bufLen = 0; + ghash->m = 0; + PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); + return SECSuccess; +cleanup: + gcmHash_DestroyContext(ghash, PR_FALSE); + return SECFailure; +} + +/* Destroy a HashContext (Note we zero the digits so this function + * is idempotent if called with freeit == PR_FALSE */ +static void +gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit) +{ + mp_clear(&ghash->H); + mp_clear(&ghash->X); + mp_clear(&ghash->C_i); + PORT_Memset(ghash, 0, sizeof(gcmHashContext)); + if (freeit) { + PORT_Free(ghash); + } +} + +static SECStatus +gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize) +{ + int len; + mp_err err; + unsigned char tmp_buf[MAX_BLOCK_SIZE]; + unsigned char *X; + + len = mp_unsigned_octet_size(&ghash->X); + if (len <= 0) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + X = tmp_buf; + PORT_Assert((unsigned int)len <= blocksize); + if ((unsigned int)len > blocksize) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* zero pad the result */ + if (len != blocksize) { + PORT_Memset(X, 0, blocksize - len); + X += blocksize - len; + } + + err = mp_to_unsigned_octets(&ghash->X, X, len); + if (err < 0) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + gcm_reverse(T, tmp_buf, blocksize); + return SECSuccess; +} + +static SECStatus +gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count, unsigned int blocksize) +{ + SECStatus rv = SECFailure; + mp_err err = MP_OKAY; + unsigned char tmp_buf[MAX_BLOCK_SIZE]; + unsigned int i; + + for (i = 0; i < count; i++, buf += blocksize) { + ghash->m++; + gcm_reverse(tmp_buf, buf, blocksize); + CHECK_MPI_OK(mp_read_unsigned_octets(&ghash->C_i, tmp_buf, blocksize)); + CHECK_MPI_OK(mp_badd(&ghash->X, &ghash->C_i, &ghash->C_i)); + /* + * Looking to speed up GCM, this the the place to do it. + * There are two areas that can be exploited to speed up this code. + * + * 1) H is a constant in this multiply. We can precompute H * (0 - 255) + * at init time and this becomes an blockize xors of our table lookup. + * + * 2) poly is a constant for each blocksize. We can calculate the + * modulo reduction by a series of adds and shifts. + * + * For now we are after functionality, so we will go ahead and use + * the builtin bmulmod from mpi + */ + CHECK_MPI_OK(mp_bmulmod(&ghash->C_i, &ghash->H, + ghash->poly, &ghash->X)); + GCM_TRACE_X(ghash, "X%d = ") + } + rv = SECSuccess; +cleanup: + PORT_Memset(tmp_buf, 0, sizeof(tmp_buf)); + if (rv != SECSuccess) { + MP_TO_SEC_ERROR(err); + } + return rv; +} + +static void +gcm_zeroX(gcmHashContext *ghash) +{ + mp_zero(&ghash->X); + ghash->m = 0; +} + +#endif + +#ifdef GCM_USE_ALGORITHM_1 +/* use algorithm 1 of McGrew & Viega "The Galois/Counter Mode of Operation" */ + +#define GCM_ARRAY_SIZE (MAX_BLOCK_SIZE / sizeof(unsigned long)) + +struct gcmHashContextStr { + unsigned long H[GCM_ARRAY_SIZE]; + unsigned long X[GCM_ARRAY_SIZE]; + unsigned long R; + unsigned char buffer[MAX_BLOCK_SIZE]; + unsigned int bufLen; + int m; + unsigned char counterBuf[2 * GCM_HASH_LEN_LEN]; + PRUint64 cLen; +}; + +static void +gcm_bytes_to_longs(unsigned long *l, const unsigned char *c, unsigned int len) +{ + int i, j; + int array_size = len / sizeof(unsigned long); + + PORT_Assert(len % sizeof(unsigned long) == 0); + for (i = 0; i < array_size; i++) { + unsigned long tmp = 0; + int byte_offset = i * sizeof(unsigned long); + for (j = sizeof(unsigned long) - 1; j >= 0; j--) { + tmp = (tmp << PR_BITS_PER_BYTE) | gcm_byte_rev[c[byte_offset + j]]; + } + l[i] = tmp; + } +} + +static void +gcm_longs_to_bytes(const unsigned long *l, unsigned char *c, unsigned int len) +{ + int i, j; + int array_size = len / sizeof(unsigned long); + + PORT_Assert(len % sizeof(unsigned long) == 0); + for (i = 0; i < array_size; i++) { + unsigned long tmp = l[i]; + int byte_offset = i * sizeof(unsigned long); + for (j = 0; j < sizeof(unsigned long); j++) { + c[byte_offset + j] = gcm_byte_rev[tmp & 0xff]; + tmp = (tmp >> PR_BITS_PER_BYTE); + } + } +} + +/* Initialize a gcmHashContext */ +static SECStatus +gcmHash_InitContext(gcmHashContext *ghash, const unsigned char *H, + unsigned int blocksize) +{ + PORT_Memset(ghash->X, 0, sizeof(ghash->X)); + PORT_Memset(ghash->H, 0, sizeof(ghash->H)); + gcm_bytes_to_longs(ghash->H, H, blocksize); + + /* set the irreducible polynomial. Each blocksize has its own polynommial + * for now only blocksize 16 (=128 bits) is defined */ + switch (blocksize) { + case 16: /* 128 bits */ + ghash->R = (unsigned long)0x87; /* x^7 + x^2 + x +1 */ + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + goto cleanup; + } + ghash->cLen = 0; + ghash->bufLen = 0; + ghash->m = 0; + PORT_Memset(ghash->counterBuf, 0, sizeof(ghash->counterBuf)); + return SECSuccess; +cleanup: + return SECFailure; +} + +/* Destroy a HashContext (Note we zero the digits so this function + * is idempotent if called with freeit == PR_FALSE */ +static void +gcmHash_DestroyContext(gcmHashContext *ghash, PRBool freeit) +{ + PORT_Memset(ghash, 0, sizeof(gcmHashContext)); + if (freeit) { + PORT_Free(ghash); + } +} + +static unsigned long +gcm_shift_one(unsigned long *t, unsigned int count) +{ + unsigned long carry = 0; + unsigned long nextcarry = 0; + unsigned int i; + for (i = 0; i < count; i++) { + nextcarry = t[i] >> ((sizeof(unsigned long) * PR_BITS_PER_BYTE) - 1); + t[i] = (t[i] << 1) | carry; + carry = nextcarry; + } + return carry; +} + +static SECStatus +gcm_getX(gcmHashContext *ghash, unsigned char *T, unsigned int blocksize) +{ + gcm_longs_to_bytes(ghash->X, T, blocksize); + return SECSuccess; +} + +#define GCM_XOR(t, s, len) \ + for (l = 0; l < len; l++) \ + t[l] ^= s[l] + +static SECStatus +gcm_HashMult(gcmHashContext *ghash, const unsigned char *buf, + unsigned int count, unsigned int blocksize) +{ + unsigned long C_i[GCM_ARRAY_SIZE]; + unsigned int arraysize = blocksize / sizeof(unsigned long); + unsigned int i, j, k, l; + + for (i = 0; i < count; i++, buf += blocksize) { + ghash->m++; + gcm_bytes_to_longs(C_i, buf, blocksize); + GCM_XOR(C_i, ghash->X, arraysize); + /* multiply X = C_i * H */ + PORT_Memset(ghash->X, 0, sizeof(ghash->X)); + for (j = 0; j < arraysize; j++) { + unsigned long H = ghash->H[j]; + for (k = 0; k < sizeof(unsigned long) * PR_BITS_PER_BYTE; k++) { + if (H & 1) { + GCM_XOR(ghash->X, C_i, arraysize); + } + if (gcm_shift_one(C_i, arraysize)) { + C_i[0] = C_i[0] ^ ghash->R; + } + H = H >> 1; + } + } + GCM_TRACE_X(ghash, "X%d = ") + } + PORT_Memset(C_i, 0, sizeof(C_i)); + return SECSuccess; +} + +static void +gcm_zeroX(gcmHashContext *ghash) +{ + PORT_Memset(ghash->X, 0, sizeof(ghash->X)); + ghash->m = 0; +} +#endif + +/* + * implement GCM GHASH using the freebl GHASH function. The gcm_HashMult + * function always takes blocksize lengths of data. gcmHash_Update will + * format the data properly. + */ +static SECStatus +gcmHash_Update(gcmHashContext *ghash, const unsigned char *buf, + unsigned int len, unsigned int blocksize) +{ + unsigned int blocks; + SECStatus rv; + + ghash->cLen += (len * PR_BITS_PER_BYTE); + + /* first deal with the current buffer of data. Try to fill it out so + * we can hash it */ + if (ghash->bufLen) { + unsigned int needed = PR_MIN(len, blocksize - ghash->bufLen); + if (needed != 0) { + PORT_Memcpy(ghash->buffer + ghash->bufLen, buf, needed); + } + buf += needed; + len -= needed; + ghash->bufLen += needed; + if (len == 0) { + /* didn't add enough to hash the data, nothing more do do */ + return SECSuccess; + } + PORT_Assert(ghash->bufLen == blocksize); + /* hash the buffer and clear it */ + rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize); + PORT_Memset(ghash->buffer, 0, blocksize); + ghash->bufLen = 0; + if (rv != SECSuccess) { + return SECFailure; + } + } + /* now hash any full blocks remaining in the data stream */ + blocks = len / blocksize; + if (blocks) { + rv = gcm_HashMult(ghash, buf, blocks, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + buf += blocks * blocksize; + len -= blocks * blocksize; + } + + /* save any remainder in the buffer to be hashed with the next call */ + if (len != 0) { + PORT_Memcpy(ghash->buffer, buf, len); + ghash->bufLen = len; + } + return SECSuccess; +} + +/* + * write out any partial blocks zero padded through the GHASH engine, + * save the lengths for the final completion of the hash + */ +static SECStatus +gcmHash_Sync(gcmHashContext *ghash, unsigned int blocksize) +{ + int i; + SECStatus rv; + + /* copy the previous counter to the upper block */ + PORT_Memcpy(ghash->counterBuf, &ghash->counterBuf[GCM_HASH_LEN_LEN], + GCM_HASH_LEN_LEN); + /* copy the current counter in the lower block */ + for (i = 0; i < GCM_HASH_LEN_LEN; i++) { + ghash->counterBuf[GCM_HASH_LEN_LEN + i] = + (ghash->cLen >> ((GCM_HASH_LEN_LEN - 1 - i) * PR_BITS_PER_BYTE)) & 0xff; + } + ghash->cLen = 0; + + /* now zero fill the buffer and hash the last block */ + if (ghash->bufLen) { + PORT_Memset(ghash->buffer + ghash->bufLen, 0, blocksize - ghash->bufLen); + rv = gcm_HashMult(ghash, ghash->buffer, 1, blocksize); + PORT_Memset(ghash->buffer, 0, blocksize); + ghash->bufLen = 0; + if (rv != SECSuccess) { + return SECFailure; + } + } + return SECSuccess; +} + +/* + * This does the final sync, hashes the lengths, then returns + * "T", the hashed output. + */ +static SECStatus +gcmHash_Final(gcmHashContext *ghash, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + unsigned int blocksize) +{ + unsigned char T[MAX_BLOCK_SIZE]; + SECStatus rv; + + rv = gcmHash_Sync(ghash, blocksize); + if (rv != SECSuccess) { + goto cleanup; + } + + rv = gcm_HashMult(ghash, ghash->counterBuf, (GCM_HASH_LEN_LEN * 2) / blocksize, + blocksize); + if (rv != SECSuccess) { + goto cleanup; + } + + GCM_TRACE_X(ghash, "GHASH(H,A,C) = ") + + rv = gcm_getX(ghash, T, blocksize); + if (rv != SECSuccess) { + goto cleanup; + } + + if (maxout > blocksize) + maxout = blocksize; + PORT_Memcpy(outbuf, T, maxout); + *outlen = maxout; + rv = SECSuccess; + +cleanup: + PORT_Memset(T, 0, sizeof(T)); + return rv; +} + +SECStatus +gcmHash_Reset(gcmHashContext *ghash, const unsigned char *AAD, + unsigned int AADLen, unsigned int blocksize) +{ + SECStatus rv; + + ghash->cLen = 0; + PORT_Memset(ghash->counterBuf, 0, GCM_HASH_LEN_LEN * 2); + ghash->bufLen = 0; + gcm_zeroX(ghash); + + /* now kick things off by hashing the Additional Authenticated Data */ + if (AADLen != 0) { + rv = gcmHash_Update(ghash, AAD, AADLen, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcmHash_Sync(ghash, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + } + return SECSuccess; +} + +/************************************************************************** + * Now implement the GCM using gcmHash and CTR * + **************************************************************************/ + +/* state to handle the full GCM operation (hash and counter) */ +struct GCMContextStr { + gcmHashContext ghash_context; + CTRContext ctr_context; + unsigned long tagBits; + unsigned char tagKey[MAX_BLOCK_SIZE]; +}; + +GCMContext * +GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params, unsigned int blocksize) +{ + GCMContext *gcm = NULL; + gcmHashContext *ghash; + unsigned char H[MAX_BLOCK_SIZE]; + unsigned int tmp; + PRBool freeCtr = PR_FALSE; + PRBool freeHash = PR_FALSE; + const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params; + CK_AES_CTR_PARAMS ctrParams; + SECStatus rv; + + if (blocksize > MAX_BLOCK_SIZE || blocksize > sizeof(ctrParams.cb)) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return NULL; + } + gcm = PORT_ZNew(GCMContext); + if (gcm == NULL) { + return NULL; + } + /* first fill in the ghash context */ + ghash = &gcm->ghash_context; + PORT_Memset(H, 0, blocksize); + rv = (*cipher)(context, H, &tmp, blocksize, H, blocksize, blocksize); + if (rv != SECSuccess) { + goto loser; + } + rv = gcmHash_InitContext(ghash, H, blocksize); + if (rv != SECSuccess) { + goto loser; + } + freeHash = PR_TRUE; + + /* fill in the Counter context */ + ctrParams.ulCounterBits = 32; + PORT_Memset(ctrParams.cb, 0, sizeof(ctrParams.cb)); + if ((blocksize == 16) && (gcmParams->ulIvLen == 12)) { + PORT_Memcpy(ctrParams.cb, gcmParams->pIv, gcmParams->ulIvLen); + ctrParams.cb[blocksize - 1] = 1; + } else { + rv = gcmHash_Update(ghash, gcmParams->pIv, gcmParams->ulIvLen, + blocksize); + if (rv != SECSuccess) { + goto loser; + } + rv = gcmHash_Final(ghash, ctrParams.cb, &tmp, blocksize, blocksize); + if (rv != SECSuccess) { + goto loser; + } + } + rv = CTR_InitContext(&gcm->ctr_context, context, cipher, + (unsigned char *)&ctrParams, blocksize); + if (rv != SECSuccess) { + goto loser; + } + freeCtr = PR_TRUE; + + /* fill in the gcm structure */ + gcm->tagBits = gcmParams->ulTagBits; /* save for final step */ + /* calculate the final tag key. NOTE: gcm->tagKey is zero to start with. + * if this assumption changes, we would need to explicitly clear it here */ + rv = CTR_Update(&gcm->ctr_context, gcm->tagKey, &tmp, blocksize, + gcm->tagKey, blocksize, blocksize); + if (rv != SECSuccess) { + goto loser; + } + + /* finally mix in the AAD data */ + rv = gcmHash_Reset(ghash, gcmParams->pAAD, gcmParams->ulAADLen, blocksize); + if (rv != SECSuccess) { + goto loser; + } + + return gcm; + +loser: + if (freeCtr) { + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + } + if (freeHash) { + gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE); + } + if (gcm) { + PORT_Free(gcm); + } + return NULL; +} + +void +GCM_DestroyContext(GCMContext *gcm, PRBool freeit) +{ + /* these two are statically allocated and will be freed when we free + * gcm. call their destroy functions to free up any locally + * allocated data (like mp_int's) */ + CTR_DestroyContext(&gcm->ctr_context, PR_FALSE); + gcmHash_DestroyContext(&gcm->ghash_context, PR_FALSE); + PORT_Memset(&gcm->tagBits, 0, sizeof(gcm->tagBits)); + PORT_Memset(gcm->tagKey, 0, sizeof(gcm->tagKey)); + if (freeit) { + PORT_Free(gcm); + } +} + +static SECStatus +gcm_GetTag(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned int extra; + unsigned int i; + SECStatus rv; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + extra = tagBytes * PR_BITS_PER_BYTE - gcm->tagBits; + + if (outbuf == NULL) { + *outlen = tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if (maxout < tagBytes) { + *outlen = tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + maxout = tagBytes; + rv = gcmHash_Final(&gcm->ghash_context, outbuf, outlen, maxout, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + + GCM_TRACE_BLOCK("GHASH=", outbuf, blocksize); + GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize); + for (i = 0; i < *outlen; i++) { + outbuf[i] ^= gcm->tagKey[i]; + } + GCM_TRACE_BLOCK("Y0=", gcm->tagKey, blocksize); + GCM_TRACE_BLOCK("T=", outbuf, blocksize); + /* mask off any extra bits we got */ + if (extra) { + outbuf[tagBytes - 1] &= ~((1 << extra) - 1); + } + return SECSuccess; +} + +/* + * See The Galois/Counter Mode of Operation, McGrew and Viega. + * GCM is basically counter mode with a specific initialization and + * built in macing operation. + */ +SECStatus +GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned int len; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + rv = CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcmHash_Update(&gcm->ghash_context, outbuf, *outlen, blocksize); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + } + rv = gcm_GetTag(gcm, outbuf + *outlen, &len, maxout - *outlen, blocksize); + if (rv != SECSuccess) { + PORT_Memset(outbuf, 0, *outlen); /* clear the output buffer */ + *outlen = 0; + return SECFailure; + }; + *outlen += len; + return SECSuccess; +} + +/* + * See The Galois/Counter Mode of Operation, McGrew and Viega. + * GCM is basically counter mode with a specific initialization and + * built in macing operation. NOTE: the only difference between Encrypt + * and Decrypt is when we calculate the mac. That is because the mac must + * always be calculated on the cipher text, not the plain text, so for + * encrypt, we do the CTR update first and for decrypt we do the mac first. + */ +SECStatus +GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + SECStatus rv; + unsigned int tagBytes; + unsigned char tag[MAX_BLOCK_SIZE]; + const unsigned char *intag; + unsigned int len; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + /* verify the block */ + rv = gcmHash_Update(&gcm->ghash_context, inbuf, inlen, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + rv = gcm_GetTag(gcm, tag, &len, blocksize, blocksize); + if (rv != SECSuccess) { + return SECFailure; + } + /* Don't decrypt if we can't authenticate the encrypted data! + * This assumes that if tagBits is not a multiple of 8, intag will + * preserve the masked off missing bits. */ + if (NSS_SecureMemcmp(tag, intag, tagBytes) != 0) { + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + PORT_Memset(tag, 0, sizeof(tag)); + return SECFailure; + } + PORT_Memset(tag, 0, sizeof(tag)); + /* finish the decryption */ + return CTR_Update(&gcm->ctr_context, outbuf, outlen, maxout, + inbuf, inlen, blocksize); +} diff --git a/security/nss/lib/freebl/gcm.h b/security/nss/lib/freebl/gcm.h new file mode 100644 index 000000000..1cdba534d --- /dev/null +++ b/security/nss/lib/freebl/gcm.h @@ -0,0 +1,31 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef GCM_H +#define GCM_H 1 + +#include "blapii.h" + +typedef struct GCMContextStr GCMContext; + +/* + * The context argument is the inner cipher context to use with cipher. The + * GCMContext does not own context. context needs to remain valid for as long + * as the GCMContext is valid. + * + * The cipher argument is a block cipher in the ECB encrypt mode. + */ +GCMContext *GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params, unsigned int blocksize); +void GCM_DestroyContext(GCMContext *gcm, PRBool freeit); +SECStatus GCM_EncryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); +SECStatus GCM_DecryptUpdate(GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +#endif diff --git a/security/nss/lib/freebl/genload.c b/security/nss/lib/freebl/genload.c new file mode 100644 index 000000000..832deb58c --- /dev/null +++ b/security/nss/lib/freebl/genload.c @@ -0,0 +1,167 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * This file is meant to be included by other .c files. + * This file takes a "parameter", the scope which includes this + * code shall declare this variable: + * const char *NameOfThisSharedLib; + * + * NameOfThisSharedLib: + * The file name of the shared library that shall be used as the + * "reference library". The loader will attempt to load the requested + * library from the same directory as the reference library. + */ + +#ifdef XP_UNIX +#include +#define BL_MAXSYMLINKS 20 + +/* + * If 'link' is a symbolic link, this function follows the symbolic links + * and returns the pathname of the ultimate source of the symbolic links. + * If 'link' is not a symbolic link, this function returns NULL. + * The caller should call PR_Free to free the string returned by this + * function. + */ +static char* +loader_GetOriginalPathname(const char* link) +{ +#ifdef __GLIBC__ + char* tmp = realpath(link, NULL); + char* resolved; + if (!tmp) + return NULL; + resolved = PR_Malloc(strlen(tmp) + 1); + strcpy(resolved, tmp); /* This is necessary because PR_Free might not be using free() */ + free(tmp); + return resolved; +#else + char* resolved = NULL; + char* input = NULL; + PRUint32 iterations = 0; + PRInt32 len = 0, retlen = 0; + if (!link) { + PR_SetError(PR_INVALID_ARGUMENT_ERROR, 0); + return NULL; + } + len = PR_MAX(1024, strlen(link) + 1); + resolved = PR_Malloc(len); + input = PR_Malloc(len); + if (!resolved || !input) { + if (resolved) { + PR_Free(resolved); + } + if (input) { + PR_Free(input); + } + return NULL; + } + strcpy(input, link); + while ((iterations++ < BL_MAXSYMLINKS) && + ((retlen = readlink(input, resolved, len - 1)) > 0)) { + char* tmp = input; + resolved[retlen] = '\0'; /* NULL termination */ + input = resolved; + resolved = tmp; + } + PR_Free(resolved); + if (iterations == 1 && retlen < 0) { + PR_Free(input); + input = NULL; + } + return input; +#endif +} +#endif /* XP_UNIX */ + +/* + * Load the library with the file name 'name' residing in the same + * directory as the reference library, whose pathname is 'referencePath'. + */ +static PRLibrary* +loader_LoadLibInReferenceDir(const char* referencePath, const char* name) +{ + PRLibrary* dlh = NULL; + char* fullName = NULL; + char* c; + PRLibSpec libSpec; + + /* Remove the trailing filename from referencePath and add the new one */ + c = strrchr(referencePath, PR_GetDirectorySeparator()); + if (c) { + size_t referencePathSize = 1 + c - referencePath; + fullName = (char*)PORT_Alloc(strlen(name) + referencePathSize + 1); + if (fullName) { + memcpy(fullName, referencePath, referencePathSize); + strcpy(fullName + referencePathSize, name); +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nAttempting to load fully-qualified %s\n", + fullName); +#endif + libSpec.type = PR_LibSpec_Pathname; + libSpec.value.pathname = fullName; + dlh = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL); + PORT_Free(fullName); + } + } + return dlh; +} + +/* + * We use PR_GetLibraryFilePathname to get the pathname of the loaded + * shared lib that contains this function, and then do a PR_LoadLibrary + * with an absolute pathname for the softoken shared library. + */ + +static PRLibrary* +loader_LoadLibrary(const char* nameToLoad) +{ + PRLibrary* lib = NULL; + char* fullPath = NULL; + PRLibSpec libSpec; + + /* Get the pathname for nameOfAlreadyLoadedLib, i.e. /usr/lib/libnss3.so + * PR_GetLibraryFilePathname works with either the base library name or a + * function pointer, depending on the platform. We can't query an exported + * symbol such as NSC_GetFunctionList, because on some platforms we can't + * find symbols in loaded implicit dependencies. + * But we can just get the address of this function ! + */ + fullPath = PR_GetLibraryFilePathname(NameOfThisSharedLib, + (PRFuncPtr)&loader_LoadLibrary); + + if (fullPath) { + lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad); +#ifdef XP_UNIX + if (!lib) { + /* + * If fullPath is a symbolic link, resolve the symbolic + * link and try again. + */ + char* originalfullPath = loader_GetOriginalPathname(fullPath); + if (originalfullPath) { + PR_Free(fullPath); + fullPath = originalfullPath; + lib = loader_LoadLibInReferenceDir(fullPath, nameToLoad); + } + } +#endif + PR_Free(fullPath); + } + if (!lib) { +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nAttempting to load %s\n", nameToLoad); +#endif + libSpec.type = PR_LibSpec_Pathname; + libSpec.value.pathname = nameToLoad; + lib = PR_LoadLibraryWithFlags(libSpec, PR_LD_NOW | PR_LD_LOCAL); + } + if (NULL == lib) { +#ifdef DEBUG_LOADER + PR_fprintf(PR_STDOUT, "\nLoading failed : %s.\n", nameToLoad); +#endif + } + return lib; +} diff --git a/security/nss/lib/freebl/hmacct.c b/security/nss/lib/freebl/hmacct.c new file mode 100644 index 000000000..c7815ac05 --- /dev/null +++ b/security/nss/lib/freebl/hmacct.c @@ -0,0 +1,335 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secport.h" +#include "hasht.h" +#include "blapit.h" +#include "hmacct.h" +#include "secerr.h" + +/* MAX_HASH_BIT_COUNT_BYTES is the maximum number of bytes in the hash's length + * field. (SHA-384/512 have 128-bit length.) */ +#define MAX_HASH_BIT_COUNT_BYTES 16 + +/* Some utility functions are needed: + * + * These macros return the given value with the MSB copied to all the other + * bits. They use the fact that an arithmetic shift shifts-in the sign bit. + * However, this is not ensured by the C standard so you may need to replace + * them with something else on odd CPUs. + * + * Note: the argument to these macros must be an unsigned int. + * */ +#define DUPLICATE_MSB_TO_ALL(x) ((unsigned int)((int)(x) >> (sizeof(int) * 8 - 1))) +#define DUPLICATE_MSB_TO_ALL_8(x) ((unsigned char)(DUPLICATE_MSB_TO_ALL(x))) + +/* constantTimeGE returns 0xff if a>=b and 0x00 otherwise, where a, b < + * MAX_UINT/2. */ +static unsigned char +constantTimeGE(unsigned int a, unsigned int b) +{ + a -= b; + return DUPLICATE_MSB_TO_ALL(~a); +} + +/* constantTimeEQ8 returns 0xff if a==b and 0x00 otherwise. */ +static unsigned char +constantTimeEQ8(unsigned char a, unsigned char b) +{ + unsigned int c = a ^ b; + c--; + return DUPLICATE_MSB_TO_ALL_8(c); +} + +/* MAC performs a constant time SSLv3/TLS MAC of |dataLen| bytes of |data|, + * where |dataLen| includes both the authenticated bytes and the MAC tag from + * the sender. |dataLen| must be >= the length of the MAC tag. + * + * |dataTotalLen| is >= |dataLen| and also accounts for any padding bytes + * that may follow the sender's MAC. (Only a single block of padding may + * follow in SSLv3, or up to 255 bytes in TLS.) + * + * Since the results of decryption are secret information (otherwise a + * padding-oracle is created), this function is constant-time with respect to + * |dataLen|. + * + * |header| contains either the 13-byte TLS header (containing the sequence + * number, record type etc), or it contains the SSLv3 header with the SSLv3 + * padding bytes etc. */ +static SECStatus +MAC(unsigned char *mdOut, + unsigned int *mdOutLen, + unsigned int mdOutMax, + const SECHashObject *hashObj, + const unsigned char *macSecret, + unsigned int macSecretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *data, + unsigned int dataLen, + unsigned int dataTotalLen, + unsigned char isSSLv3) +{ + void *mdState = hashObj->create(); + const unsigned int mdSize = hashObj->length; + const unsigned int mdBlockSize = hashObj->blocklength; + /* mdLengthSize is the number of bytes in the length field that terminates + * the hash. + * + * This assumes that hash functions with a 64 byte block size use a 64-bit + * length, and otherwise they use a 128-bit length. This is true of {MD5, + * SHA*} (which are all of the hash functions specified for use with TLS + * today). */ + const unsigned int mdLengthSize = mdBlockSize == 64 ? 8 : 16; + + const unsigned int sslv3PadLen = hashObj->type == HASH_AlgMD5 ? 48 : 40; + + /* varianceBlocks is the number of blocks of the hash that we have to + * calculate in constant time because they could be altered by the + * padding value. + * + * In SSLv3, the padding must be minimal so the end of the plaintext + * varies by, at most, 15+20 = 35 bytes. (We conservatively assume that + * the MAC size varies from 0..20 bytes.) In case the 9 bytes of hash + * termination (0x80 + 64-bit length) don't fit in the final block, we + * say that the final two blocks can vary based on the padding. + * + * TLSv1 has MACs up to 48 bytes long (SHA-384) and the padding is not + * required to be minimal. Therefore we say that the final six blocks + * can vary based on the padding. + * + * Later in the function, if the message is short and there obviously + * cannot be this many blocks then varianceBlocks can be reduced. */ + unsigned int varianceBlocks = isSSLv3 ? 2 : 6; + /* From now on we're dealing with the MAC, which conceptually has 13 + * bytes of `header' before the start of the data (TLS) or 71/75 bytes + * (SSLv3) */ + const unsigned int len = dataTotalLen + headerLen; + /* maxMACBytes contains the maximum bytes of bytes in the MAC, including + * |header|, assuming that there's no padding. */ + const unsigned int maxMACBytes = len - mdSize - 1; + /* numBlocks is the maximum number of hash blocks. */ + const unsigned int numBlocks = + (maxMACBytes + 1 + mdLengthSize + mdBlockSize - 1) / mdBlockSize; + /* macEndOffset is the index just past the end of the data to be + * MACed. */ + const unsigned int macEndOffset = dataLen + headerLen - mdSize; + /* c is the index of the 0x80 byte in the final hash block that + * contains application data. */ + const unsigned int c = macEndOffset % mdBlockSize; + /* indexA is the hash block number that contains the 0x80 terminating + * value. */ + const unsigned int indexA = macEndOffset / mdBlockSize; + /* indexB is the hash block number that contains the 64-bit hash + * length, in bits. */ + const unsigned int indexB = (macEndOffset + mdLengthSize) / mdBlockSize; + /* bits is the hash-length in bits. It includes the additional hash + * block for the masked HMAC key, or whole of |header| in the case of + * SSLv3. */ + unsigned int bits; + /* In order to calculate the MAC in constant time we have to handle + * the final blocks specially because the padding value could cause the + * end to appear somewhere in the final |varianceBlocks| blocks and we + * can't leak where. However, |numStartingBlocks| worth of data can + * be hashed right away because no padding value can affect whether + * they are plaintext. */ + unsigned int numStartingBlocks = 0; + /* k is the starting byte offset into the conceptual header||data where + * we start processing. */ + unsigned int k = 0; + unsigned char lengthBytes[MAX_HASH_BIT_COUNT_BYTES]; + /* hmacPad is the masked HMAC key. */ + unsigned char hmacPad[HASH_BLOCK_LENGTH_MAX]; + unsigned char firstBlock[HASH_BLOCK_LENGTH_MAX]; + unsigned char macOut[HASH_LENGTH_MAX]; + unsigned i, j; + + /* For SSLv3, if we're going to have any starting blocks then we need + * at least two because the header is larger than a single block. */ + if (numBlocks > varianceBlocks + (isSSLv3 ? 1 : 0)) { + numStartingBlocks = numBlocks - varianceBlocks; + k = mdBlockSize * numStartingBlocks; + } + + bits = 8 * macEndOffset; + hashObj->begin(mdState); + if (!isSSLv3) { + /* Compute the initial HMAC block. For SSLv3, the padding and + * secret bytes are included in |header| because they take more + * than a single block. */ + bits += 8 * mdBlockSize; + memset(hmacPad, 0, mdBlockSize); + PORT_Assert(macSecretLen <= sizeof(hmacPad)); + memcpy(hmacPad, macSecret, macSecretLen); + for (i = 0; i < mdBlockSize; i++) + hmacPad[i] ^= 0x36; + hashObj->update(mdState, hmacPad, mdBlockSize); + } + + j = 0; + memset(lengthBytes, 0, sizeof(lengthBytes)); + if (mdLengthSize == 16) { + j = 8; + } + if (hashObj->type == HASH_AlgMD5) { + /* MD5 appends a little-endian length. */ + for (i = 0; i < 4; i++) { + lengthBytes[i + j] = bits >> (8 * i); + } + } else { + /* All other TLS hash functions use a big-endian length. */ + for (i = 0; i < 4; i++) { + lengthBytes[4 + i + j] = bits >> (8 * (3 - i)); + } + } + + if (k > 0) { + if (isSSLv3) { + /* The SSLv3 header is larger than a single block. + * overhang is the number of bytes beyond a single + * block that the header consumes: either 7 bytes + * (SHA1) or 11 bytes (MD5). */ + const unsigned int overhang = headerLen - mdBlockSize; + hashObj->update(mdState, header, mdBlockSize); + memcpy(firstBlock, header + mdBlockSize, overhang); + memcpy(firstBlock + overhang, data, mdBlockSize - overhang); + hashObj->update(mdState, firstBlock, mdBlockSize); + for (i = 1; i < k / mdBlockSize - 1; i++) { + hashObj->update(mdState, data + mdBlockSize * i - overhang, + mdBlockSize); + } + } else { + /* k is a multiple of mdBlockSize. */ + memcpy(firstBlock, header, 13); + memcpy(firstBlock + 13, data, mdBlockSize - 13); + hashObj->update(mdState, firstBlock, mdBlockSize); + for (i = 1; i < k / mdBlockSize; i++) { + hashObj->update(mdState, data + mdBlockSize * i - 13, + mdBlockSize); + } + } + } + + memset(macOut, 0, sizeof(macOut)); + + /* We now process the final hash blocks. For each block, we construct + * it in constant time. If i == indexA then we'll include the 0x80 + * bytes and zero pad etc. For each block we selectively copy it, in + * constant time, to |macOut|. */ + for (i = numStartingBlocks; i <= numStartingBlocks + varianceBlocks; i++) { + unsigned char block[HASH_BLOCK_LENGTH_MAX]; + unsigned char isBlockA = constantTimeEQ8(i, indexA); + unsigned char isBlockB = constantTimeEQ8(i, indexB); + for (j = 0; j < mdBlockSize; j++) { + unsigned char isPastC = isBlockA & constantTimeGE(j, c); + unsigned char isPastCPlus1 = isBlockA & constantTimeGE(j, c + 1); + unsigned char b = 0; + if (k < headerLen) { + b = header[k]; + } else if (k < dataTotalLen + headerLen) { + b = data[k - headerLen]; + } + k++; + + /* If this is the block containing the end of the + * application data, and we are at the offset for the + * 0x80 value, then overwrite b with 0x80. */ + b = (b & ~isPastC) | (0x80 & isPastC); + /* If this the the block containing the end of the + * application data and we're past the 0x80 value then + * just write zero. */ + b = b & ~isPastCPlus1; + /* If this is indexB (the final block), but not + * indexA (the end of the data), then the 64-bit + * length didn't fit into indexA and we're having to + * add an extra block of zeros. */ + b &= ~isBlockB | isBlockA; + + /* The final bytes of one of the blocks contains the length. */ + if (j >= mdBlockSize - mdLengthSize) { + /* If this is indexB, write a length byte. */ + b = (b & ~isBlockB) | + (isBlockB & lengthBytes[j - (mdBlockSize - mdLengthSize)]); + } + block[j] = b; + } + + hashObj->update(mdState, block, mdBlockSize); + hashObj->end_raw(mdState, block, NULL, mdSize); + /* If this is indexB, copy the hash value to |macOut|. */ + for (j = 0; j < mdSize; j++) { + macOut[j] |= block[j] & isBlockB; + } + } + + hashObj->begin(mdState); + + if (isSSLv3) { + /* We repurpose |hmacPad| to contain the SSLv3 pad2 block. */ + for (i = 0; i < sslv3PadLen; i++) + hmacPad[i] = 0x5c; + + hashObj->update(mdState, macSecret, macSecretLen); + hashObj->update(mdState, hmacPad, sslv3PadLen); + hashObj->update(mdState, macOut, mdSize); + } else { + /* Complete the HMAC in the standard manner. */ + for (i = 0; i < mdBlockSize; i++) + hmacPad[i] ^= 0x6a; + + hashObj->update(mdState, hmacPad, mdBlockSize); + hashObj->update(mdState, macOut, mdSize); + } + + hashObj->end(mdState, mdOut, mdOutLen, mdOutMax); + hashObj->destroy(mdState, PR_TRUE); + + return SECSuccess; +} + +SECStatus +HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (hashObj->end_raw == NULL) + return SECFailure; + return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen, + header, headerLen, body, bodyLen, bodyTotalLen, + 0 /* not SSLv3 */); +} + +SECStatus +SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (hashObj->end_raw == NULL) + return SECFailure; + return MAC(result, resultLen, maxResultLen, hashObj, secret, secretLen, + header, headerLen, body, bodyLen, bodyTotalLen, + 1 /* SSLv3 */); +} diff --git a/security/nss/lib/freebl/hmacct.h b/security/nss/lib/freebl/hmacct.h new file mode 100644 index 000000000..a773ea89c --- /dev/null +++ b/security/nss/lib/freebl/hmacct.h @@ -0,0 +1,38 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _HMACCT_H_ +#define _HMACCT_H_ + +SEC_BEGIN_PROTOS + +extern SECStatus HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + +extern SECStatus SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/intel-aes-x64-masm.asm b/security/nss/lib/freebl/intel-aes-x64-masm.asm new file mode 100644 index 000000000..ef5c76ba2 --- /dev/null +++ b/security/nss/lib/freebl/intel-aes-x64-masm.asm @@ -0,0 +1,971 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.DATA +ALIGN 16 +Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh +Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h +Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh +Lcon1 dd 1,1,1,1 +Lcon2 dd 1bh,1bh,1bh,1bh + +.CODE + +ctx textequ +output textequ +input textequ +inputLen textequ + + +aes_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesenc xmm0, xmm8 + aesenc xmm1, xmm8 + aesenc xmm2, xmm8 + aesenc xmm3, xmm8 + aesenc xmm4, xmm8 + aesenc xmm5, xmm8 + aesenc xmm6, xmm8 + aesenc xmm7, xmm8 + ENDM + +aes_last_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesenclast xmm0, xmm8 + aesenclast xmm1, xmm8 + aesenclast xmm2, xmm8 + aesenclast xmm3, xmm8 + aesenclast xmm4, xmm8 + aesenclast xmm5, xmm8 + aesenclast xmm6, xmm8 + aesenclast xmm7, xmm8 + ENDM + +aes_dec_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesdec xmm0, xmm8 + aesdec xmm1, xmm8 + aesdec xmm2, xmm8 + aesdec xmm3, xmm8 + aesdec xmm4, xmm8 + aesdec xmm5, xmm8 + aesdec xmm6, xmm8 + aesdec xmm7, xmm8 + ENDM + +aes_dec_last_rnd MACRO i + movdqu xmm8, [i*16 + ctx] + aesdeclast xmm0, xmm8 + aesdeclast xmm1, xmm8 + aesdeclast xmm2, xmm8 + aesdeclast xmm3, xmm8 + aesdeclast xmm4, xmm8 + aesdeclast xmm5, xmm8 + aesdeclast xmm6, xmm8 + aesdeclast xmm7, xmm8 + ENDM + + +gen_aes_ecb_func MACRO enc, rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL bail + + xor inputLen, inputLen + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + lea ctx, [48+ctx] + +loop8: + cmp inputLen, 8*16 + jb loop1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + movdqu xmm7, [7*16 + input] + + movdqu xmm8, [0*16 + ctx] + pxor xmm0, xmm8 + pxor xmm1, xmm8 + pxor xmm2, xmm8 + pxor xmm3, xmm8 + pxor xmm4, xmm8 + pxor xmm5, xmm8 + pxor xmm6, xmm8 + pxor xmm7, xmm8 + +IF enc eq 1 + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ELSE + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ENDIF + + i = 1 + WHILE i LT rnds + rnd i + i = i+1 + ENDM + lastrnd rnds + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesinst xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aeslastinst xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret +ENDM + +intel_aes_encrypt_ecb_128 PROC +gen_aes_ecb_func 1, 10 +intel_aes_encrypt_ecb_128 ENDP + +intel_aes_encrypt_ecb_192 PROC +gen_aes_ecb_func 1, 12 +intel_aes_encrypt_ecb_192 ENDP + +intel_aes_encrypt_ecb_256 PROC +gen_aes_ecb_func 1, 14 +intel_aes_encrypt_ecb_256 ENDP + +intel_aes_decrypt_ecb_128 PROC +gen_aes_ecb_func 0, 10 +intel_aes_decrypt_ecb_128 ENDP + +intel_aes_decrypt_ecb_192 PROC +gen_aes_ecb_func 0, 12 +intel_aes_decrypt_ecb_192 ENDP + +intel_aes_decrypt_ecb_256 PROC +gen_aes_ecb_func 0, 14 +intel_aes_decrypt_ecb_256 ENDP + + +KEY textequ +KS textequ +ITR textequ + +intel_aes_encrypt_init_128 PROC + + movdqu xmm1, [KEY] + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + lea ITR, Lcon1 + movdqa xmm0, [ITR] + lea ITR, Lmask + movdqa xmm4, [ITR] + + mov ITR, 8 + +Lenc_128_ks_loop: + lea KS, [16 + KS] + dec ITR + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + jne Lenc_128_ks_loop + + lea ITR, Lcon2 + movdqa xmm0, [ITR] + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [16 + KS], xmm1 + movdqa xmm2, xmm1 + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [32 + KS], xmm1 + movdqa xmm2, xmm1 + + ret +intel_aes_encrypt_init_128 ENDP + + +intel_aes_decrypt_init_128 PROC + + push KS + push KEY + + call intel_aes_encrypt_init_128 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [10*16 + KS] + movdqu [10*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 5 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(10-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(10-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [5*16 + KS] + aesimc xmm0, xmm0 + movdqu [5*16 + KS], xmm0 + ret +intel_aes_decrypt_init_128 ENDP + + +intel_aes_encrypt_init_192 PROC + + sub rsp, 16*2 + movdqu [16*0 + rsp], xmm6 + movdqu [16*1 + rsp], xmm7 + + movdqu xmm1, [KEY] + mov ITR, [16 + KEY] + movd xmm3, ITR + + movdqu [KS], xmm1 + movdqa xmm5, xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask192 + movdqu xmm4, [ITR] + + mov ITR, 4 + +Lenc_192_ks_loop: + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqa xmm6, xmm1 + shufpd xmm5, xmm1, 00h + shufpd xmm6, xmm3, 01h + + movdqu [16 + KS], xmm5 + movdqu [32 + KS], xmm6 + + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqu [48 + KS], xmm1 + movdqa xmm5, xmm3 + + lea KS, [48 + KS] + + dec ITR + jnz Lenc_192_ks_loop + + movdqu [16 + KS], xmm5 + + movdqu xmm7, [16*1 + rsp] + movdqu xmm6, [16*0 + rsp] + add rsp, 16*2 + ret +intel_aes_encrypt_init_192 ENDP + +intel_aes_decrypt_init_192 PROC + push KS + push KEY + + call intel_aes_encrypt_init_192 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [12*16 + KS] + movdqu [12*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 6 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(12-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(12-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [6*16 + KS] + aesimc xmm0, xmm0 + movdqu [6*16 + KS], xmm0 + ret +intel_aes_decrypt_init_192 ENDP + + +intel_aes_encrypt_init_256 PROC + sub rsp, 16*2 + movdqu [16*0 + rsp], xmm6 + movdqu [16*1 + rsp], xmm7 + + movdqu xmm1, [16*0 + KEY] + movdqu xmm3, [16*1 + KEY] + + movdqu [16*0 + KS], xmm1 + movdqu [16*1 + KS], xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask256 + movdqu xmm5, [ITR] + + pxor xmm6, xmm6 + + mov ITR, 6 + +Lenc_256_ks_loop: + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + pshufd xmm2, xmm1, 0ffh + aesenclast xmm2, xmm6 + movdqa xmm4, xmm3 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movdqu [16*3 + KS], xmm3 + + lea KS, [32 + KS] + dec ITR + jnz Lenc_256_ks_loop + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + movdqu xmm7, [16*1 + rsp] + movdqu xmm6, [16*0 + rsp] + add rsp, 16*2 + ret + +intel_aes_encrypt_init_256 ENDP + + +intel_aes_decrypt_init_256 PROC + push KS + push KEY + + call intel_aes_encrypt_init_256 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [14*16 + KS] + movdqu [14*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 7 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(14-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(14-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [7*16 + KS] + aesimc xmm0, xmm0 + movdqu [7*16 + KS], xmm0 + ret +intel_aes_decrypt_init_256 ENDP + + + +gen_aes_cbc_enc_func MACRO rnds + +LOCAL loop1 +LOCAL bail + + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + lea ctx, [48+ctx] + + movdqu xmm0, [-32+ctx] + + movdqu xmm2, [0*16 + ctx] + movdqu xmm3, [1*16 + ctx] + movdqu xmm4, [2*16 + ctx] + movdqu xmm5, [3*16 + ctx] + movdqu xmm6, [4*16 + ctx] + movdqu xmm7, [5*16 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm1, [input] + pxor xmm1, xmm2 + pxor xmm0, xmm1 + + aesenc xmm0, xmm3 + aesenc xmm0, xmm4 + aesenc xmm0, xmm5 + aesenc xmm0, xmm6 + aesenc xmm0, xmm7 + + i = 6 + WHILE i LT rnds + movdqu xmm8, [i*16 + ctx] + aesenc xmm0, xmm8 + i = i+1 + ENDM + movdqu xmm8, [rnds*16 + ctx] + aesenclast xmm0, xmm8 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [-32+ctx], xmm0 + + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret + +ENDM + +gen_aes_cbc_dec_func MACRO rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL dec1 +LOCAL bail + + mov input, [rsp + 1*8 + 8*4] + mov inputLen, [rsp + 1*8 + 8*5] + + sub rsp, 3*16 + + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + lea ctx, [48+ctx] + +loop8: + cmp inputLen, 8*16 + jb dec1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + movdqu xmm7, [7*16 + input] + + movdqu xmm8, [0*16 + ctx] + pxor xmm0, xmm8 + pxor xmm1, xmm8 + pxor xmm2, xmm8 + pxor xmm3, xmm8 + pxor xmm4, xmm8 + pxor xmm5, xmm8 + pxor xmm6, xmm8 + pxor xmm7, xmm8 + + i = 1 + WHILE i LT rnds + aes_dec_rnd i + i = i+1 + ENDM + aes_dec_last_rnd rnds + + movdqu xmm8, [-32 + ctx] + pxor xmm0, xmm8 + movdqu xmm8, [0*16 + input] + pxor xmm1, xmm8 + movdqu xmm8, [1*16 + input] + pxor xmm2, xmm8 + movdqu xmm8, [2*16 + input] + pxor xmm3, xmm8 + movdqu xmm8, [3*16 + input] + pxor xmm4, xmm8 + movdqu xmm8, [4*16 + input] + pxor xmm5, xmm8 + movdqu xmm8, [5*16 + input] + pxor xmm6, xmm8 + movdqu xmm8, [6*16 + input] + pxor xmm7, xmm8 + movdqu xmm8, [7*16 + input] + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + movdqu [-32 + ctx], xmm8 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 +dec1: + + movdqu xmm3, [-32 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqa xmm4, xmm0 + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesdeclast xmm0, xmm7 + pxor xmm3, xmm0 + + movdqu [output], xmm3 + movdqa xmm3, xmm4 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [-32 + ctx], xmm3 + xor rax, rax + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + ret +ENDM + +intel_aes_encrypt_cbc_128 PROC +gen_aes_cbc_enc_func 10 +intel_aes_encrypt_cbc_128 ENDP + +intel_aes_encrypt_cbc_192 PROC +gen_aes_cbc_enc_func 12 +intel_aes_encrypt_cbc_192 ENDP + +intel_aes_encrypt_cbc_256 PROC +gen_aes_cbc_enc_func 14 +intel_aes_encrypt_cbc_256 ENDP + +intel_aes_decrypt_cbc_128 PROC +gen_aes_cbc_dec_func 10 +intel_aes_decrypt_cbc_128 ENDP + +intel_aes_decrypt_cbc_192 PROC +gen_aes_cbc_dec_func 12 +intel_aes_decrypt_cbc_192 ENDP + +intel_aes_decrypt_cbc_256 PROC +gen_aes_cbc_dec_func 14 +intel_aes_decrypt_cbc_256 ENDP + + + +ctrCtx textequ +CTR textequ +CTRSave textequ + +gen_aes_ctr_func MACRO rnds + +LOCAL loop8 +LOCAL loop1 +LOCAL enc1 +LOCAL bail + + mov input, [rsp + 8*1 + 4*8] + mov inputLen, [rsp + 8*1 + 5*8] + + mov ctrCtx, ctx + mov ctx, [8+ctrCtx] + lea ctx, [48+ctx] + + sub rsp, 3*16 + movdqu [rsp + 0*16], xmm6 + movdqu [rsp + 1*16], xmm7 + movdqu [rsp + 2*16], xmm8 + + + push rbp + mov rbp, rsp + sub rsp, 8*16 + and rsp, -16 + + + movdqu xmm0, [16+ctrCtx] + mov CTRSave, DWORD PTR [ctrCtx + 16 + 3*4] + bswap CTRSave + movdqu xmm1, [ctx + 0*16] + + pxor xmm0, xmm1 + + movdqa [rsp + 0*16], xmm0 + movdqa [rsp + 1*16], xmm0 + movdqa [rsp + 2*16], xmm0 + movdqa [rsp + 3*16], xmm0 + movdqa [rsp + 4*16], xmm0 + movdqa [rsp + 5*16], xmm0 + movdqa [rsp + 6*16], xmm0 + movdqa [rsp + 7*16], xmm0 + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 1*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 2*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 3*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 4*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 5*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 6*16 + 3*4], CTR + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + 7*16 + 3*4], CTR + + +loop8: + cmp inputLen, 8*16 + jb loop1 + + movdqu xmm0, [0*16 + rsp] + movdqu xmm1, [1*16 + rsp] + movdqu xmm2, [2*16 + rsp] + movdqu xmm3, [3*16 + rsp] + movdqu xmm4, [4*16 + rsp] + movdqu xmm5, [5*16 + rsp] + movdqu xmm6, [6*16 + rsp] + movdqu xmm7, [7*16 + rsp] + + i = 1 + WHILE i LE 8 + aes_rnd i + + inc CTRSave + mov CTR, CTRSave + bswap CTR + xor CTR, DWORD PTR [ctx + 3*4] + mov DWORD PTR [rsp + (i-1)*16 + 3*4], CTR + + i = i+1 + ENDM + WHILE i LT rnds + aes_rnd i + i = i+1 + ENDM + aes_last_rnd rnds + + movdqu xmm8, [0*16 + input] + pxor xmm0, xmm8 + movdqu xmm8, [1*16 + input] + pxor xmm1, xmm8 + movdqu xmm8, [2*16 + input] + pxor xmm2, xmm8 + movdqu xmm8, [3*16 + input] + pxor xmm3, xmm8 + movdqu xmm8, [4*16 + input] + pxor xmm4, xmm8 + movdqu xmm8, [5*16 + input] + pxor xmm5, xmm8 + movdqu xmm8, [6*16 + input] + pxor xmm6, xmm8 + movdqu xmm8, [7*16 + input] + pxor xmm7, xmm8 + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [7*16 + output], xmm7 + + lea input, [8*16 + input] + lea output, [8*16 + output] + sub inputLen, 8*16 + jmp loop8 + + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [rsp] + add rsp, 16 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu xmm7, [input] + pxor xmm0, xmm7 + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + + movdqu xmm0, [rsp] + movdqu xmm1, [ctx + 0*16] + pxor xmm0, xmm1 + movdqu [16+ctrCtx], xmm0 + + + xor rax, rax + mov rsp, rbp + pop rbp + + movdqu xmm6, [rsp + 0*16] + movdqu xmm7, [rsp + 1*16] + movdqu xmm8, [rsp + 2*16] + add rsp, 3*16 + + ret +ENDM + + +intel_aes_encrypt_ctr_128 PROC +gen_aes_ctr_func 10 +intel_aes_encrypt_ctr_128 ENDP + +intel_aes_encrypt_ctr_192 PROC +gen_aes_ctr_func 12 +intel_aes_encrypt_ctr_192 ENDP + +intel_aes_encrypt_ctr_256 PROC +gen_aes_ctr_func 14 +intel_aes_encrypt_ctr_256 ENDP + + +END diff --git a/security/nss/lib/freebl/intel-aes-x86-masm.asm b/security/nss/lib/freebl/intel-aes-x86-masm.asm new file mode 100644 index 000000000..7d805e766 --- /dev/null +++ b/security/nss/lib/freebl/intel-aes-x86-masm.asm @@ -0,0 +1,949 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.MODEL FLAT, C +.XMM + +.DATA +ALIGN 16 +Lmask dd 0c0f0e0dh,0c0f0e0dh,0c0f0e0dh,0c0f0e0dh +Lmask192 dd 004070605h, 004070605h, 004070605h, 004070605h +Lmask256 dd 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh, 00c0f0e0dh +Lcon1 dd 1,1,1,1 +Lcon2 dd 1bh,1bh,1bh,1bh + +.CODE + +ctx textequ +output textequ +input textequ +inputLen textequ + + +aes_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + aesenc xmm1, xmm7 + aesenc xmm2, xmm7 + aesenc xmm3, xmm7 + aesenc xmm4, xmm7 + aesenc xmm5, xmm7 + aesenc xmm6, xmm7 + ENDM + +aes_last_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesenclast xmm0, xmm7 + aesenclast xmm1, xmm7 + aesenclast xmm2, xmm7 + aesenclast xmm3, xmm7 + aesenclast xmm4, xmm7 + aesenclast xmm5, xmm7 + aesenclast xmm6, xmm7 + ENDM + +aes_dec_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + aesdec xmm1, xmm7 + aesdec xmm2, xmm7 + aesdec xmm3, xmm7 + aesdec xmm4, xmm7 + aesdec xmm5, xmm7 + aesdec xmm6, xmm7 + ENDM + +aes_dec_last_rnd MACRO i + movdqu xmm7, [i*16 + ctx] + aesdeclast xmm0, xmm7 + aesdeclast xmm1, xmm7 + aesdeclast xmm2, xmm7 + aesdeclast xmm3, xmm7 + aesdeclast xmm4, xmm7 + aesdeclast xmm5, xmm7 + aesdeclast xmm6, xmm7 + ENDM + + +gen_aes_ecb_func MACRO enc, rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + + lea ctx, [44+ctx] + +loop7: + cmp inputLen, 7*16 + jb loop1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + pxor xmm1, xmm7 + pxor xmm2, xmm7 + pxor xmm3, xmm7 + pxor xmm4, xmm7 + pxor xmm5, xmm7 + pxor xmm6, xmm7 + +IF enc eq 1 + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ELSE + rnd textequ + lastrnd textequ + aesinst textequ + aeslastinst textequ +ENDIF + + i = 1 + WHILE i LT rnds + rnd i + i = i+1 + ENDM + lastrnd rnds + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesinst xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aeslastinst xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + xor eax, eax + pop inputLen + ret + +ENDM + +ALIGN 16 +intel_aes_encrypt_ecb_128 PROC +gen_aes_ecb_func 1, 10 +intel_aes_encrypt_ecb_128 ENDP + +ALIGN 16 +intel_aes_encrypt_ecb_192 PROC +gen_aes_ecb_func 1, 12 +intel_aes_encrypt_ecb_192 ENDP + +ALIGN 16 +intel_aes_encrypt_ecb_256 PROC +gen_aes_ecb_func 1, 14 +intel_aes_encrypt_ecb_256 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_128 PROC +gen_aes_ecb_func 0, 10 +intel_aes_decrypt_ecb_128 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_192 PROC +gen_aes_ecb_func 0, 12 +intel_aes_decrypt_ecb_192 ENDP + +ALIGN 16 +intel_aes_decrypt_ecb_256 PROC +gen_aes_ecb_func 0, 14 +intel_aes_decrypt_ecb_256 ENDP + + +KEY textequ +KS textequ +ITR textequ + +ALIGN 16 +intel_aes_encrypt_init_128 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + + movdqu xmm1, [KEY] + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + lea ITR, Lcon1 + movdqa xmm0, [ITR] + lea ITR, Lmask + movdqa xmm4, [ITR] + + mov ITR, 8 + +Lenc_128_ks_loop: + lea KS, [16 + KS] + dec ITR + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [KS], xmm1 + movdqa xmm2, xmm1 + + jne Lenc_128_ks_loop + + lea ITR, Lcon2 + movdqa xmm0, [ITR] + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [16 + KS], xmm1 + movdqa xmm2, xmm1 + + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + movdqa xmm3, xmm1 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pslldq xmm3, 4 + pxor xmm1, xmm3 + pxor xmm1, xmm2 + movdqu [32 + KS], xmm1 + movdqa xmm2, xmm1 + + ret +intel_aes_encrypt_init_128 ENDP + + +ALIGN 16 +intel_aes_decrypt_init_128 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_128 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [10*16 + KS] + movdqu [10*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 5 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(10-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(10-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [5*16 + KS] + aesimc xmm0, xmm0 + movdqu [5*16 + KS], xmm0 + ret +intel_aes_decrypt_init_128 ENDP + + +ALIGN 16 +intel_aes_encrypt_init_192 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + pxor xmm3, xmm3 + movdqu xmm1, [KEY] + pinsrd xmm3, DWORD PTR [16 + KEY], 0 + pinsrd xmm3, DWORD PTR [20 + KEY], 1 + + movdqu [KS], xmm1 + movdqa xmm5, xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask192 + movdqu xmm4, [ITR] + + mov ITR, 4 + +Lenc_192_ks_loop: + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqa xmm6, xmm1 + shufpd xmm5, xmm1, 00h + shufpd xmm6, xmm3, 01h + + movdqu [16 + KS], xmm5 + movdqu [32 + KS], xmm6 + + movdqa xmm2, xmm3 + pshufb xmm2, xmm4 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + + movdqa xmm6, xmm1 + movdqa xmm7, xmm3 + pslldq xmm6, 4 + pslldq xmm7, 4 + pxor xmm1, xmm6 + pxor xmm3, xmm7 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pslldq xmm6, 4 + pxor xmm1, xmm6 + pxor xmm1, xmm2 + pshufd xmm2, xmm1, 0ffh + pxor xmm3, xmm2 + + movdqu [48 + KS], xmm1 + movdqa xmm5, xmm3 + + lea KS, [48 + KS] + + dec ITR + jnz Lenc_192_ks_loop + + movdqu [16 + KS], xmm5 +ret +intel_aes_encrypt_init_192 ENDP + +ALIGN 16 +intel_aes_decrypt_init_192 PROC + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_192 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [12*16 + KS] + movdqu [12*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 6 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(12-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(12-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [6*16 + KS] + aesimc xmm0, xmm0 + movdqu [6*16 + KS], xmm0 + ret +intel_aes_decrypt_init_192 ENDP + +ALIGN 16 +intel_aes_encrypt_init_256 PROC + + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + movdqu xmm1, [16*0 + KEY] + movdqu xmm3, [16*1 + KEY] + + movdqu [16*0 + KS], xmm1 + movdqu [16*1 + KS], xmm3 + + lea ITR, Lcon1 + movdqu xmm0, [ITR] + lea ITR, Lmask256 + movdqu xmm5, [ITR] + + pxor xmm6, xmm6 + + mov ITR, 6 + +Lenc_256_ks_loop: + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + pslld xmm0, 1 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + pshufd xmm2, xmm1, 0ffh + aesenclast xmm2, xmm6 + movdqa xmm4, xmm3 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pslldq xmm4, 4 + pxor xmm3, xmm4 + pxor xmm3, xmm2 + movdqu [16*3 + KS], xmm3 + + lea KS, [32 + KS] + dec ITR + jnz Lenc_256_ks_loop + + movdqa xmm2, xmm3 + pshufb xmm2, xmm5 + aesenclast xmm2, xmm0 + movdqa xmm4, xmm1 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pslldq xmm4, 4 + pxor xmm1, xmm4 + pxor xmm1, xmm2 + movdqu [16*2 + KS], xmm1 + + ret +intel_aes_encrypt_init_256 ENDP + +ALIGN 16 +intel_aes_decrypt_init_256 PROC + mov KEY, [esp + 1*4 + 0*4] + mov KS, [esp + 1*4 + 1*4] + + push KS + push KEY + + call intel_aes_encrypt_init_256 + + pop KEY + pop KS + + movdqu xmm0, [0*16 + KS] + movdqu xmm1, [14*16 + KS] + movdqu [14*16 + KS], xmm0 + movdqu [0*16 + KS], xmm1 + + i = 1 + WHILE i LT 7 + movdqu xmm0, [i*16 + KS] + movdqu xmm1, [(14-i)*16 + KS] + + aesimc xmm0, xmm0 + aesimc xmm1, xmm1 + + movdqu [(14-i)*16 + KS], xmm0 + movdqu [i*16 + KS], xmm1 + + i = i+1 + ENDM + + movdqu xmm0, [7*16 + KS] + aesimc xmm0, xmm0 + movdqu [7*16 + KS], xmm0 + ret +intel_aes_decrypt_init_256 ENDP + + + +gen_aes_cbc_enc_func MACRO rnds + +LOCAL loop1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + + lea ctx, [44+ctx] + + movdqu xmm0, [-32+ctx] + + movdqu xmm2, [0*16 + ctx] + movdqu xmm3, [1*16 + ctx] + movdqu xmm4, [2*16 + ctx] + movdqu xmm5, [3*16 + ctx] + movdqu xmm6, [4*16 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm1, [input] + pxor xmm1, xmm2 + pxor xmm0, xmm1 + + aesenc xmm0, xmm3 + aesenc xmm0, xmm4 + aesenc xmm0, xmm5 + aesenc xmm0, xmm6 + + i = 5 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [-32+ctx], xmm0 + + xor eax, eax + pop inputLen + ret + +ENDM + +gen_aes_cbc_dec_func MACRO rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL dec1 +LOCAL bail + + push inputLen + + mov ctx, [esp + 2*4 + 0*4] + mov output, [esp + 2*4 + 1*4] + mov input, [esp + 2*4 + 4*4] + mov inputLen, [esp + 2*4 + 5*4] + + lea ctx, [44+ctx] + +loop7: + cmp inputLen, 7*16 + jb dec1 + + movdqu xmm0, [0*16 + input] + movdqu xmm1, [1*16 + input] + movdqu xmm2, [2*16 + input] + movdqu xmm3, [3*16 + input] + movdqu xmm4, [4*16 + input] + movdqu xmm5, [5*16 + input] + movdqu xmm6, [6*16 + input] + + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + pxor xmm1, xmm7 + pxor xmm2, xmm7 + pxor xmm3, xmm7 + pxor xmm4, xmm7 + pxor xmm5, xmm7 + pxor xmm6, xmm7 + + i = 1 + WHILE i LT rnds + aes_dec_rnd i + i = i+1 + ENDM + aes_dec_last_rnd rnds + + movdqu xmm7, [-32 + ctx] + pxor xmm0, xmm7 + movdqu xmm7, [0*16 + input] + pxor xmm1, xmm7 + movdqu xmm7, [1*16 + input] + pxor xmm2, xmm7 + movdqu xmm7, [2*16 + input] + pxor xmm3, xmm7 + movdqu xmm7, [3*16 + input] + pxor xmm4, xmm7 + movdqu xmm7, [4*16 + input] + pxor xmm5, xmm7 + movdqu xmm7, [5*16 + input] + pxor xmm6, xmm7 + movdqu xmm7, [6*16 + input] + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + movdqu [-32 + ctx], xmm7 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 +dec1: + + movdqu xmm3, [-32 + ctx] + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [input] + movdqa xmm4, xmm0 + movdqu xmm7, [0*16 + ctx] + pxor xmm0, xmm7 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesdec xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesdeclast xmm0, xmm7 + pxor xmm3, xmm0 + + movdqu [output], xmm3 + movdqa xmm3, xmm4 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + movdqu [-32 + ctx], xmm3 + xor eax, eax + pop inputLen + ret +ENDM + +ALIGN 16 +intel_aes_encrypt_cbc_128 PROC +gen_aes_cbc_enc_func 10 +intel_aes_encrypt_cbc_128 ENDP + +ALIGN 16 +intel_aes_encrypt_cbc_192 PROC +gen_aes_cbc_enc_func 12 +intel_aes_encrypt_cbc_192 ENDP + +ALIGN 16 +intel_aes_encrypt_cbc_256 PROC +gen_aes_cbc_enc_func 14 +intel_aes_encrypt_cbc_256 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_128 PROC +gen_aes_cbc_dec_func 10 +intel_aes_decrypt_cbc_128 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_192 PROC +gen_aes_cbc_dec_func 12 +intel_aes_decrypt_cbc_192 ENDP + +ALIGN 16 +intel_aes_decrypt_cbc_256 PROC +gen_aes_cbc_dec_func 14 +intel_aes_decrypt_cbc_256 ENDP + + + +ctrCtx textequ +CTR textequ + +gen_aes_ctr_func MACRO rnds + +LOCAL loop7 +LOCAL loop1 +LOCAL enc1 +LOCAL bail + + push inputLen + push ctrCtx + push CTR + push ebp + + mov ctrCtx, [esp + 4*5 + 0*4] + mov output, [esp + 4*5 + 1*4] + mov input, [esp + 4*5 + 4*4] + mov inputLen, [esp + 4*5 + 5*4] + + mov ctx, [4+ctrCtx] + lea ctx, [44+ctx] + + mov ebp, esp + sub esp, 7*16 + and esp, -16 + + movdqu xmm0, [8+ctrCtx] + mov ctrCtx, [ctrCtx + 8 + 3*4] + bswap ctrCtx + movdqu xmm1, [ctx + 0*16] + + pxor xmm0, xmm1 + + movdqa [esp + 0*16], xmm0 + movdqa [esp + 1*16], xmm0 + movdqa [esp + 2*16], xmm0 + movdqa [esp + 3*16], xmm0 + movdqa [esp + 4*16], xmm0 + movdqa [esp + 5*16], xmm0 + movdqa [esp + 6*16], xmm0 + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 1*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 2*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 3*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 4*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 5*16 + 3*4], CTR + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + 6*16 + 3*4], CTR + + +loop7: + cmp inputLen, 7*16 + jb loop1 + + movdqu xmm0, [0*16 + esp] + movdqu xmm1, [1*16 + esp] + movdqu xmm2, [2*16 + esp] + movdqu xmm3, [3*16 + esp] + movdqu xmm4, [4*16 + esp] + movdqu xmm5, [5*16 + esp] + movdqu xmm6, [6*16 + esp] + + i = 1 + WHILE i LE 7 + aes_rnd i + + inc ctrCtx + mov CTR, ctrCtx + bswap CTR + xor CTR, [ctx + 3*4] + mov [esp + (i-1)*16 + 3*4], CTR + + i = i+1 + ENDM + WHILE i LT rnds + aes_rnd i + i = i+1 + ENDM + aes_last_rnd rnds + + movdqu xmm7, [0*16 + input] + pxor xmm0, xmm7 + movdqu xmm7, [1*16 + input] + pxor xmm1, xmm7 + movdqu xmm7, [2*16 + input] + pxor xmm2, xmm7 + movdqu xmm7, [3*16 + input] + pxor xmm3, xmm7 + movdqu xmm7, [4*16 + input] + pxor xmm4, xmm7 + movdqu xmm7, [5*16 + input] + pxor xmm5, xmm7 + movdqu xmm7, [6*16 + input] + pxor xmm6, xmm7 + + movdqu [0*16 + output], xmm0 + movdqu [1*16 + output], xmm1 + movdqu [2*16 + output], xmm2 + movdqu [3*16 + output], xmm3 + movdqu [4*16 + output], xmm4 + movdqu [5*16 + output], xmm5 + movdqu [6*16 + output], xmm6 + + lea input, [7*16 + input] + lea output, [7*16 + output] + sub inputLen, 7*16 + jmp loop7 + + +loop1: + cmp inputLen, 1*16 + jb bail + + movdqu xmm0, [esp] + add esp, 16 + + i = 1 + WHILE i LT rnds + movdqu xmm7, [i*16 + ctx] + aesenc xmm0, xmm7 + i = i+1 + ENDM + movdqu xmm7, [rnds*16 + ctx] + aesenclast xmm0, xmm7 + + movdqu xmm7, [input] + pxor xmm0, xmm7 + movdqu [output], xmm0 + + lea input, [1*16 + input] + lea output, [1*16 + output] + sub inputLen, 1*16 + jmp loop1 + +bail: + + mov ctrCtx, [ebp + 4*5 + 0*4] + movdqu xmm0, [esp] + movdqu xmm1, [ctx + 0*16] + pxor xmm0, xmm1 + movdqu [8+ctrCtx], xmm0 + + + xor eax, eax + mov esp, ebp + pop ebp + pop CTR + pop ctrCtx + pop inputLen + ret +ENDM + + +ALIGN 16 +intel_aes_encrypt_ctr_128 PROC +gen_aes_ctr_func 10 +intel_aes_encrypt_ctr_128 ENDP + +ALIGN 16 +intel_aes_encrypt_ctr_192 PROC +gen_aes_ctr_func 12 +intel_aes_encrypt_ctr_192 ENDP + +ALIGN 16 +intel_aes_encrypt_ctr_256 PROC +gen_aes_ctr_func 14 +intel_aes_encrypt_ctr_256 ENDP + + +END diff --git a/security/nss/lib/freebl/intel-aes.h b/security/nss/lib/freebl/intel-aes.h new file mode 100644 index 000000000..d5bd2d8ca --- /dev/null +++ b/security/nss/lib/freebl/intel-aes.h @@ -0,0 +1,143 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Prototypes of the functions defined in the assembler file. */ +void intel_aes_encrypt_init_128(const unsigned char *key, PRUint32 *expanded); +void intel_aes_encrypt_init_192(const unsigned char *key, PRUint32 *expanded); +void intel_aes_encrypt_init_256(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_128(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_192(const unsigned char *key, PRUint32 *expanded); +void intel_aes_decrypt_init_256(const unsigned char *key, PRUint32 *expanded); +SECStatus intel_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_128(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_192(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); +SECStatus intel_aes_encrypt_ctr_256(CTRContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + unsigned int blocksize); + +#define intel_aes_ecb_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? intel_aes_encrypt_ecb_128 \ + : (keysize) == 24 ? intel_aes_encrypt_ecb_192 \ + : intel_aes_encrypt_ecb_256) \ + : ((keysize) == 16 ? intel_aes_decrypt_ecb_128 \ + : (keysize) == 24 ? intel_aes_decrypt_ecb_192 \ + : intel_aes_decrypt_ecb_256)) + +#define intel_aes_cbc_worker(encrypt, keysize) \ + ((encrypt) \ + ? ((keysize) == 16 ? intel_aes_encrypt_cbc_128 \ + : (keysize) == 24 ? intel_aes_encrypt_cbc_192 \ + : intel_aes_encrypt_cbc_256) \ + : ((keysize) == 16 ? intel_aes_decrypt_cbc_128 \ + : (keysize) == 24 ? intel_aes_decrypt_cbc_192 \ + : intel_aes_decrypt_cbc_256)) + +#define intel_aes_ctr_worker(nr) \ + ((nr) == 10 ? intel_aes_encrypt_ctr_128 \ + : (nr) == 12 ? intel_aes_encrypt_ctr_192 \ + : intel_aes_encrypt_ctr_256) + +#define intel_aes_init(encrypt, keysize) \ + do { \ + if (encrypt) { \ + if (keysize == 16) \ + intel_aes_encrypt_init_128(key, cx->expandedKey); \ + else if (keysize == 24) \ + intel_aes_encrypt_init_192(key, cx->expandedKey); \ + else \ + intel_aes_encrypt_init_256(key, cx->expandedKey); \ + } else { \ + if (keysize == 16) \ + intel_aes_decrypt_init_128(key, cx->expandedKey); \ + else if (keysize == 24) \ + intel_aes_decrypt_init_192(key, cx->expandedKey); \ + else \ + intel_aes_decrypt_init_256(key, cx->expandedKey); \ + } \ + } while (0) diff --git a/security/nss/lib/freebl/intel-aes.s b/security/nss/lib/freebl/intel-aes.s new file mode 100644 index 000000000..2dfcfa15b --- /dev/null +++ b/security/nss/lib/freebl/intel-aes.s @@ -0,0 +1,2514 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + + .text + +#define IV_OFFSET 16 +#define EXPANDED_KEY_OFFSET 48 + +/* + * Warning: the length values used in this module are "unsigned int" + * in C, which is 32-bit. When they're passed in registers, use only + * the low 32 bits, because the top half is unspecified. + * + * This is called from C code, so the contents of those bits can + * depend on the C compiler's optimization decisions. This means that + * mistakes might not be obvious in testing if those bits happen to be + * zero in your build. + * + * Exception: 32-bit lea instructions use a 64-bit address because the + * address size doesn't affect the result, and that form is more + * compactly encoded and preferred by compilers over a 32-bit address. + */ + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_128,@function + .globl intel_aes_encrypt_init_128 + .align 16 +intel_aes_encrypt_init_128: + movups (%rdi), %xmm1 + movups %xmm1, (%rsi) + leaq 16(%rsi), %rsi + xorl %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ + call key_expansion128 + + ret + .size intel_aes_encrypt_init_128, .-intel_aes_encrypt_init_128 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_128,@function + .globl intel_aes_decrypt_init_128 + .align 16 +intel_aes_decrypt_init_128: + movups (%rdi), %xmm1 + movups %xmm1, (%rsi) + leaq 16(%rsi), %rsi + xorl %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x01 /* aeskeygenassist $0x01, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x02 /* aeskeygenassist $0x02, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x04 /* aeskeygenassist $0x04, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x08 /* aeskeygenassist $0x08, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x10 /* aeskeygenassist $0x10, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x20 /* aeskeygenassist $0x20, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x40 /* aeskeygenassist $0x40, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x80 /* aeskeygenassist $0x80, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x1b /* aeskeygenassist $0x1b, %xmm1, %xmm2 */ + call key_expansion128 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd1,0x36 /* aeskeygenassist $0x36, %xmm1, %xmm2 */ + call key_expansion128 + + ret + .size intel_aes_decrypt_init_128, .-intel_aes_decrypt_init_128 + + + .type key_expansion128,@function + .align 16 +key_expansion128: + movd %eax, %xmm3 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm3 + pxor %xmm3, %xmm1 + shufps $0x8c, %xmm1, %xmm3 + pxor %xmm2, %xmm1 + pxor %xmm3, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + ret + .size key_expansion128, .-key_expansion128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_128,@function + .globl intel_aes_encrypt_ecb_128 + .align 16 +intel_aes_encrypt_ecb_128: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 160(%rdi), %xmm12 + xor %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xdc /* aesenclast %xmm12, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe4 /* aesenclast %xmm12, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xec /* aesenclast %xmm12, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf4 /* aesenclast %xmm12, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xfc /* aesenclast %xmm12, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc4 /* aesenclast %xmm12, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd4 /* aesenclast %xmm12, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_128, .-intel_aes_encrypt_ecb_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_128,@function + .globl intel_aes_decrypt_ecb_128 + .align 16 +intel_aes_decrypt_ecb_128: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 160(%rdi), %xmm12 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm12, %xmm4 + pxor %xmm12, %xmm5 + pxor %xmm12, %xmm6 + pxor %xmm12, %xmm7 + pxor %xmm12, %xmm8 + pxor %xmm12, %xmm9 + pxor %xmm12, %xmm10 + +// complete loop unrolling + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm12, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_128, .-intel_aes_decrypt_ecb_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_128,@function + .globl intel_aes_encrypt_cbc_128 + .align 16 +intel_aes_encrypt_cbc_128: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmma, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmmb, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_128, .-intel_aes_encrypt_cbc_128 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_128,@function + .globl intel_aes_decrypt_cbc_128 + .align 16 +intel_aes_decrypt_cbc_128: +// leaq IV_OFFSET(%rdi), %rdx +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 /* iv */ + movdqu (%rdi), %xmm2 /* first key block */ + movdqu 160(%rdi), %xmm12 /* last key block */ + xorl %eax, %eax + cmpl $128, %r9d + jb 1f + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 /* 1st data block */ + movdqu 16(%r8, %rax), %xmm4 /* 2d data block */ + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm12, %xmm3 + pxor %xmm12, %xmm4 + pxor %xmm12, %xmm5 + pxor %xmm12, %xmm6 + pxor %xmm12, %xmm7 + pxor %xmm12, %xmm8 + pxor %xmm12, %xmm9 + pxor %xmm12, %xmm10 + +// complete loop unrolling + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + +4: movdqu (%r8, %rax), %xmm1 + movdqa %xmm1, %xmm13 + pxor %xmm12, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + pxor %xmm0, %xmm1 + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm13, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_128, .-intel_aes_decrypt_cbc_128 + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_192,@function + .globl intel_aes_encrypt_init_192 + .align 16 +intel_aes_encrypt_init_192: + movdqu (%rdi), %xmm1 + movq 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movq %xmm3, 16(%rsi) + leaq 24(%rsi), %rsi + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ + call key_expansion192 + + ret + .size intel_aes_encrypt_init_192, .-intel_aes_encrypt_init_192 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_192,@function + .globl intel_aes_decrypt_init_192 + .align 16 +intel_aes_decrypt_init_192: + movdqu (%rdi), %xmm1 + movq 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movq %xmm3, 16(%rsi) + leaq 24(%rsi), %rsi + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion192 + .byte 0x66,0x0f,0x38,0xdb,0xd1 /* aesimc %xmm1, %xmm2 */ + movups %xmm2, -24(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + call key_expansion192 + movups -32(%rsi), %xmm2 + movups -16(%rsi), %xmm4 + .byte 0x66,0x0f,0x38,0xdb,0xd2 /* aesimc %xmm2, %xmm2 */ + .byte 0x66,0x0f,0x38,0xdb,0xe4 /* aesimc %xmm4, %xmm4 */ + movups %xmm2, -32(%rsi) + movups %xmm4, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x80 /* aeskeygenassist $0x80, %xmm3, %xmm2 */ + call key_expansion192 + + ret + .size intel_aes_decrypt_init_192, .-intel_aes_decrypt_init_192 + + + .type key_expansion192,@function + .align 16 +key_expansion192: + pshufd $0x55, %xmm2, %xmm2 + xor %eax, %eax + movd %eax, %xmm4 + shufps $0x10, %xmm1, %xmm4 + pxor %xmm4, %xmm1 + shufps $0x8c, %xmm1, %xmm4 + pxor %xmm2, %xmm1 + pxor %xmm4, %xmm1 + movdqu %xmm1, (%rsi) + addq $16, %rsi + + pshufd $0xff, %xmm1, %xmm4 + movd %eax, %xmm5 + shufps $0x00, %xmm3, %xmm5 + shufps $0x08, %xmm3, %xmm5 + pxor %xmm4, %xmm3 + pxor %xmm5, %xmm3 + movq %xmm3, (%rsi) + addq $8, %rsi + ret + .size key_expansion192, .-key_expansion192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_192,@function + .globl intel_aes_encrypt_ecb_192 + .align 16 +intel_aes_encrypt_ecb_192: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xde /* aesenclast %xmm14, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe6 /* aesenclast %xmm14, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xee /* aesenclast %xmm14, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf6 /* aesenclast %xmm14, %xmm7 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xfe /* aesenclast %xmm14, %xmm3 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc6 /* aesenclast %xmm14, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd6 /* aesenclast %xmm14, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_192, .-intel_aes_encrypt_ecb_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_192,@function + .globl intel_aes_decrypt_ecb_192 + .align 16 +intel_aes_decrypt_ecb_192: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm14, %xmm3 + pxor %xmm14, %xmm4 + pxor %xmm14, %xmm5 + pxor %xmm14, %xmm6 + pxor %xmm14, %xmm7 + pxor %xmm14, %xmm8 + pxor %xmm14, %xmm9 + pxor %xmm14, %xmm10 + +// complete loop unrolling + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm14, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_192, .-intel_aes_decrypt_ecb_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_192,@function + .globl intel_aes_encrypt_cbc_192 + .align 16 +intel_aes_encrypt_cbc_192: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + movdqu 192(%rdi), %xmm14 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm2, %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_192, .-intel_aes_encrypt_cbc_192 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %exx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_192,@function + .globl intel_aes_decrypt_cbc_192 + .align 16 +intel_aes_decrypt_cbc_192: + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 192(%rdi), %xmm14 + xorl %eax, %eax + cmpl $128, %r9d + jb 1f + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm14, %xmm3 + pxor %xmm14, %xmm4 + pxor %xmm14, %xmm5 + pxor %xmm14, %xmm6 + pxor %xmm14, %xmm7 + pxor %xmm14, %xmm8 + pxor %xmm14, %xmm9 + pxor %xmm14, %xmm10 + +// complete loop unrolling + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm3 + movdqu 32(%rdi), %xmm4 + movdqu 48(%rdi), %xmm5 + movdqu 64(%rdi), %xmm6 + movdqu 80(%rdi), %xmm7 + movdqu 96(%rdi), %xmm8 + movdqu 112(%rdi), %xmm9 + movdqu 128(%rdi), %xmm10 + movdqu 144(%rdi), %xmm11 + movdqu 160(%rdi), %xmm12 + movdqu 176(%rdi), %xmm13 + +4: movdqu (%r8, %rax), %xmm1 + movdqa %xmm1, %xmm15 + pxor %xmm14, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */ + pxor %xmm0, %xmm1 + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm15, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_192, .-intel_aes_decrypt_cbc_192 + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_encrypt_init_256,@function + .globl intel_aes_encrypt_init_256 + .align 16 +intel_aes_encrypt_init_256: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + movdqu %xmm3, 16(%rsi) + leaq 32(%rsi), %rsi + xor %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + pxor %xmm6, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + ret + .size intel_aes_encrypt_init_256, .-intel_aes_encrypt_init_256 + + +/* in %rdi : the key + in %rsi : buffer for expanded key +*/ + .type intel_aes_decrypt_init_256,@function + .globl intel_aes_decrypt_init_256 + .align 16 +intel_aes_decrypt_init_256: + movdqu (%rdi), %xmm1 + movdqu 16(%rdi), %xmm3 + movdqu %xmm1, (%rsi) + .byte 0x66,0x0f,0x38,0xdb,0xe3 /* aesimc %xmm3, %xmm4 */ + movdqu %xmm4, 16(%rsi) + leaq 32(%rsi), %rsi + xor %eax, %eax + + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x01 /* aeskeygenassist $0x01, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x02 /* aeskeygenassist $0x02, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x04 /* aeskeygenassist $0x04, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x08 /* aeskeygenassist $0x08, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x10 /* aeskeygenassist $0x10, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x20 /* aeskeygenassist $0x20, %xmm3, %xmm2 */ + call key_expansion256 + .byte 0x66,0x0f,0x38,0xdb,0xe1 /* aesimc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdb,0xeb /* aesimc %xmm3, %xmm5 */ + movdqu %xmm4, -32(%rsi) + movdqu %xmm5, -16(%rsi) + .byte 0x66,0x0f,0x3a,0xdf,0xd3,0x40 /* aeskeygenassist $0x40, %xmm3, %xmm2 */ + pxor %xmm6, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + ret + .size intel_aes_decrypt_init_256, .-intel_aes_decrypt_init_256 + + + .type key_expansion256,@function + .align 16 +key_expansion256: + movd %eax, %xmm6 + pshufd $0xff, %xmm2, %xmm2 + shufps $0x10, %xmm1, %xmm6 + pxor %xmm6, %xmm1 + shufps $0x8c, %xmm1, %xmm6 + pxor %xmm2, %xmm1 + pxor %xmm6, %xmm1 + movdqu %xmm1, (%rsi) + + addq $16, %rsi + .byte 0x66,0x0f,0x3a,0xdf,0xe1,0x00 /* aeskeygenassist $0, %xmm1, %xmm4 */ + pshufd $0xaa, %xmm4, %xmm4 + shufps $0x10, %xmm3, %xmm6 + pxor %xmm6, %xmm3 + shufps $0x8c, %xmm3, %xmm6 + pxor %xmm4, %xmm3 + pxor %xmm6, %xmm3 + movdqu %xmm3, (%rsi) + addq $16, %rsi + ret + .size key_expansion256, .-key_expansion256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_ecb_256,@function + .globl intel_aes_encrypt_ecb_256 + .align 16 +intel_aes_encrypt_ecb_256: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm2, %xmm3 + pxor %xmm2, %xmm4 + pxor %xmm2, %xmm5 + pxor %xmm2, %xmm6 + pxor %xmm2, %xmm7 + pxor %xmm2, %xmm8 + pxor %xmm2, %xmm9 + pxor %xmm2, %xmm10 + +// complete loop unrolling + movdqu 16(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xdb /* aesenc %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xe3 /* aesenc %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xeb /* aesenc %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xf3 /* aesenc %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xfb /* aesenc %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xc3 /* aesenc %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdc,0xd3 /* aesenc %xmm11, %xmm10 */ + + movdqu 208(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xdc,0xd9 /* aesenc %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdc,0xe1 /* aesenc %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdc,0xe9 /* aesenc %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdc,0xf1 /* aesenc %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdc,0xf9 /* aesenc %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc1 /* aesenc %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdc,0xd1 /* aesenc %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xdf /* aesenclast %xmm15, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xe7 /* aesenclast %xmm15, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xef /* aesenclast %xmm15, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xf7 /* aesenclast %xmm15, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xff /* aesenclast %xmm15, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xc7 /* aesenclast %xmm15, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xdd,0xd7 /* aesenclast %xmm15, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm8, %xmm1 + movdqu 112(%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_encrypt_ecb_256, .-intel_aes_encrypt_ecb_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_ecb_256,@function + .globl intel_aes_decrypt_ecb_256 + .align 16 +intel_aes_decrypt_ecb_256: +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 48(%rdi), %rdi + + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm15, %xmm3 + pxor %xmm15, %xmm4 + pxor %xmm15, %xmm5 + pxor %xmm15, %xmm6 + pxor %xmm15, %xmm7 + pxor %xmm15, %xmm8 + pxor %xmm15, %xmm9 + pxor %xmm15, %xmm10 + +// complete loop unrolling + movdqu 208(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 112(%rdi), %xmm8 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm15, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ + movdqu 112(%rdi), %xmm8 + movdqu %xmm1, (%rsi, %rax) + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: xor %eax, %eax + ret + .size intel_aes_decrypt_ecb_256, .-intel_aes_decrypt_ecb_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_encrypt_cbc_256,@function + .globl intel_aes_encrypt_cbc_256 + .align 16 +intel_aes_encrypt_cbc_256: + testl %r9d, %r9d + je 2f + +// leaq IV_OFFSET(%rdi), %rdx +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm8 + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + movdqu 224(%rdi), %xmm15 + + xorl %eax, %eax +1: movdqu (%r8, %rax), %xmm1 + pxor %xmm0, %xmm1 + pxor %xmm8, %xmm1 + movdqu 112(%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xdc,0xca /* aesenc %xmm2, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcb /* aesenc %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcc /* aesenc %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcd /* aesenc %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xce /* aesenc %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xdc,0xcf /* aesenc %xmm7, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x41,0x0f,0x38,0xdc,0xc9 /* aesenc %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xca /* aesenc %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcc /* aesenc %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */ + movdqu %xmm1, (%rsi, %rax) + movdqa %xmm1, %xmm0 + addl $16, %eax + cmpl %eax, %r9d + jne 1b + + movdqu %xmm0, (%rdx) + +2: xor %eax, %eax + ret + .size intel_aes_encrypt_cbc_256, .-intel_aes_encrypt_cbc_256 + + +/* in %rdi : cx - context + in %rsi : output - pointer to output buffer + in %rdx : outputLen - pointer to variable for length of output + (already filled in by caller) + in %ecx : maxOutputLen - length of output buffer + (already checked by caller) + in %r8 : input - pointer to input buffer + in %r9d : inputLen - length of input buffer + on stack: blocksize - AES blocksize (always 16, unused) +*/ + .type intel_aes_decrypt_cbc_256,@function + .globl intel_aes_decrypt_cbc_256 + .align 16 +intel_aes_decrypt_cbc_256: +// leaq IV_OFFSET(%rdi), %rdx +// leaq EXPANDED_KEY_OFFSET(%rdi), %rdi + leaq 16(%rdi), %rdx + leaq 48(%rdi), %rdi + + movdqu (%rdx), %xmm0 + movdqu (%rdi), %xmm2 + movdqu 224(%rdi), %xmm15 + xorl %eax, %eax +// cmpl $8*16, %r9d + cmpl $128, %r9d + jb 1f +// leal -8*16(%r9), %r11d + leal -128(%r9), %r11d +2: movdqu (%r8, %rax), %xmm3 + movdqu 16(%r8, %rax), %xmm4 + movdqu 32(%r8, %rax), %xmm5 + movdqu 48(%r8, %rax), %xmm6 + movdqu 64(%r8, %rax), %xmm7 + movdqu 80(%r8, %rax), %xmm8 + movdqu 96(%r8, %rax), %xmm9 + movdqu 112(%r8, %rax), %xmm10 + pxor %xmm15, %xmm3 + pxor %xmm15, %xmm4 + pxor %xmm15, %xmm5 + pxor %xmm15, %xmm6 + pxor %xmm15, %xmm7 + pxor %xmm15, %xmm8 + pxor %xmm15, %xmm9 + pxor %xmm15, %xmm10 + +// complete loop unrolling + movdqu 208(%rdi), %xmm1 + movdqu 192(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 176(%rdi), %xmm1 + movdqu 160(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 144(%rdi), %xmm1 + movdqu 128(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 112(%rdi), %xmm1 + movdqu 96(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 80(%rdi), %xmm1 + movdqu 64(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 48(%rdi), %xmm1 + movdqu 32(%rdi), %xmm11 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xdb /* aesdec %xmm11, %xmm3 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xe3 /* aesdec %xmm11, %xmm4 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xeb /* aesdec %xmm11, %xmm5 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xf3 /* aesdec %xmm11, %xmm6 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xfb /* aesdec %xmm11, %xmm7 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xc3 /* aesdec %xmm11, %xmm8 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm9 */ + .byte 0x66,0x45,0x0f,0x38,0xde,0xd3 /* aesdec %xmm11, %xmm10 */ + + movdqu 16(%rdi), %xmm1 + .byte 0x66,0x0f,0x38,0xde,0xd9 /* aesdec %xmm1, %xmm3 */ + .byte 0x66,0x0f,0x38,0xde,0xe1 /* aesdec %xmm1, %xmm4 */ + .byte 0x66,0x0f,0x38,0xde,0xe9 /* aesdec %xmm1, %xmm5 */ + .byte 0x66,0x0f,0x38,0xde,0xf1 /* aesdec %xmm1, %xmm6 */ + .byte 0x66,0x0f,0x38,0xde,0xf9 /* aesdec %xmm1, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc1 /* aesdec %xmm1, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xc9 /* aesdec %xmm1, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xde,0xd1 /* aesdec %xmm1, %xmm10 */ + .byte 0x66,0x0f,0x38,0xdf,0xda /* aesdeclast %xmm2, %xmm3 */ + .byte 0x66,0x0f,0x38,0xdf,0xe2 /* aesdeclast %xmm2, %xmm4 */ + .byte 0x66,0x0f,0x38,0xdf,0xea /* aesdeclast %xmm2, %xmm5 */ + .byte 0x66,0x0f,0x38,0xdf,0xf2 /* aesdeclast %xmm2, %xmm6 */ + .byte 0x66,0x0f,0x38,0xdf,0xfa /* aesdeclast %xmm2, %xmm7 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xc2 /* aesdeclast %xmm2, %xmm8 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm9 */ + .byte 0x66,0x44,0x0f,0x38,0xdf,0xd2 /* aesdeclast %xmm2, %xmm10 */ + + pxor %xmm0, %xmm3 + movdqu (%r8, %rax), %xmm0 + pxor %xmm0, %xmm4 + movdqu 16(%r8, %rax), %xmm0 + pxor %xmm0, %xmm5 + movdqu 32(%r8, %rax), %xmm0 + pxor %xmm0, %xmm6 + movdqu 48(%r8, %rax), %xmm0 + pxor %xmm0, %xmm7 + movdqu 64(%r8, %rax), %xmm0 + pxor %xmm0, %xmm8 + movdqu 80(%r8, %rax), %xmm0 + pxor %xmm0, %xmm9 + movdqu 96(%r8, %rax), %xmm0 + pxor %xmm0, %xmm10 + movdqu 112(%r8, %rax), %xmm0 + movdqu %xmm3, (%rsi, %rax) + movdqu %xmm4, 16(%rsi, %rax) + movdqu %xmm5, 32(%rsi, %rax) + movdqu %xmm6, 48(%rsi, %rax) + movdqu %xmm7, 64(%rsi, %rax) + movdqu %xmm8, 80(%rsi, %rax) + movdqu %xmm9, 96(%rsi, %rax) + movdqu %xmm10, 112(%rsi, %rax) +// addl $8*16, %eax + addl $128, %eax + cmpl %r11d, %eax + jbe 2b +1: cmpl %eax, %r9d + je 5f + + movdqu 16(%rdi), %xmm2 + movdqu 32(%rdi), %xmm3 + movdqu 48(%rdi), %xmm4 + movdqu 64(%rdi), %xmm5 + movdqu 80(%rdi), %xmm6 + movdqu 96(%rdi), %xmm7 + movdqu 112(%rdi), %xmm8 + movdqu 128(%rdi), %xmm9 + movdqu 144(%rdi), %xmm10 + movdqu 160(%rdi), %xmm11 + movdqu 176(%rdi), %xmm12 + movdqu 192(%rdi), %xmm13 + movdqu 208(%rdi), %xmm14 + +4: movdqu (%r8, %rax), %xmm1 + pxor %xmm15, %xmm1 + .byte 0x66,0x41,0x0f,0x38,0xde,0xce /* aesdec %xmm14, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcd /* aesdec %xmm13, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcc /* aesdec %xmm12, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xcb /* aesdec %xmm11, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xca /* aesdec %xmm10, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc9 /* aesdec %xmm9, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xde,0xc8 /* aesdec %xmm8, %xmm1 */ + movdqu (%rdi), %xmm8 + .byte 0x66,0x0f,0x38,0xde,0xcf /* aesdec %xmm7, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xce /* aesdec %xmm6, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcd /* aesdec %xmm5, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcc /* aesdec %xmm4, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */ + .byte 0x66,0x0f,0x38,0xde,0xca /* aesdec %xmm2, %xmm1 */ + .byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */ + movdqu 112(%rdi), %xmm8 + pxor %xmm0, %xmm1 + movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */ + movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */ + addl $16, %eax + cmpl %eax, %r9d + jne 4b + +5: movdqu %xmm0, (%rdx) + + xor %eax, %eax + ret + .size intel_aes_decrypt_cbc_256, .-intel_aes_decrypt_cbc_256 diff --git a/security/nss/lib/freebl/intel-gcm-wrap.c b/security/nss/lib/freebl/intel-gcm-wrap.c new file mode 100644 index 000000000..8c5eaf021 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-wrap.c @@ -0,0 +1,254 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* Copyright(c) 2013, Intel Corp. */ + +/* Wrapper functions for Intel optimized implementation of AES-GCM */ + +#ifdef USE_HW_AES + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapii.h" +#include "blapit.h" +#include "gcm.h" +#include "ctr.h" +#include "secerr.h" +#include "prtypes.h" +#include "pkcs11t.h" + +#include + +#include "intel-gcm.h" +#include "rijndael.h" + +#include +#include + +struct intel_AES_GCMContextStr { + unsigned char Htbl[16 * AES_BLOCK_SIZE]; + unsigned char X0[AES_BLOCK_SIZE]; + unsigned char T[AES_BLOCK_SIZE]; + unsigned char CTR[AES_BLOCK_SIZE]; + AESContext *aes_context; + unsigned long tagBits; + unsigned long Alen; + unsigned long Mlen; +}; + +intel_AES_GCMContext * +intel_AES_GCM_CreateContext(void *context, + freeblCipherFunc cipher, + const unsigned char *params, + unsigned int blocksize) +{ + intel_AES_GCMContext *gcm = NULL; + AESContext *aes = (AESContext *)context; + const CK_GCM_PARAMS *gcmParams = (const CK_GCM_PARAMS *)params; + unsigned char buff[AES_BLOCK_SIZE]; /* aux buffer */ + + unsigned long IV_whole_len = gcmParams->ulIvLen & (~0xful); + unsigned int IV_remainder_len = gcmParams->ulIvLen & 0xful; + unsigned long AAD_whole_len = gcmParams->ulAADLen & (~0xful); + unsigned int AAD_remainder_len = gcmParams->ulAADLen & 0xful; + + __m128i BSWAP_MASK = _mm_setr_epi8(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + __m128i ONE = _mm_set_epi32(0, 0, 0, 1); + unsigned int j; + SECStatus rv; + + if (blocksize != AES_BLOCK_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return NULL; + } + gcm = PORT_ZNew(intel_AES_GCMContext); + + if (gcm == NULL) { + return NULL; + } + + /* initialize context fields */ + gcm->aes_context = aes; + gcm->tagBits = gcmParams->ulTagBits; + gcm->Alen = 0; + gcm->Mlen = 0; + + /* first prepare H and its derivatives for ghash */ + intel_aes_gcmINIT(gcm->Htbl, (unsigned char *)aes->expandedKey, aes->Nr); + + /* Initial TAG value is zero */ + _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128()); + _mm_storeu_si128((__m128i *)gcm->X0, _mm_setzero_si128()); + + /* Init the counter */ + if (gcmParams->ulIvLen == 12) { + _mm_storeu_si128((__m128i *)gcm->CTR, + _mm_setr_epi32(((unsigned int *)gcmParams->pIv)[0], + ((unsigned int *)gcmParams->pIv)[1], + ((unsigned int *)gcmParams->pIv)[2], + 0x01000000)); + } else { + /* If IV size is not 96 bits, then the initial counter value is GHASH + * of the IV */ + intel_aes_gcmAAD(gcm->Htbl, gcmParams->pIv, IV_whole_len, gcm->T); + + /* Partial block */ + if (IV_remainder_len) { + PORT_Memset(buff, 0, AES_BLOCK_SIZE); + PORT_Memcpy(buff, gcmParams->pIv + IV_whole_len, IV_remainder_len); + intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T); + } + + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcmParams->ulIvLen, + 0, + gcm->X0, + gcm->CTR); + + /* TAG should be zero again */ + _mm_storeu_si128((__m128i *)gcm->T, _mm_setzero_si128()); + } + + /* Encrypt the initial counter, will be used to encrypt the GHASH value, + * in the end */ + rv = (*cipher)(context, gcm->X0, &j, AES_BLOCK_SIZE, gcm->CTR, + AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (rv != SECSuccess) { + goto loser; + } + + /* Promote the counter by 1 */ + _mm_storeu_si128((__m128i *)gcm->CTR, _mm_shuffle_epi8(_mm_add_epi32(ONE, _mm_shuffle_epi8(_mm_loadu_si128((__m128i *)gcm->CTR), BSWAP_MASK)), BSWAP_MASK)); + + /* Now hash AAD - it would actually make sense to seperate the context + * creation from the AAD, because that would allow to reuse the H, which + * only changes when the AES key changes, and not every package, like the + * IV and AAD */ + intel_aes_gcmAAD(gcm->Htbl, gcmParams->pAAD, AAD_whole_len, gcm->T); + if (AAD_remainder_len) { + PORT_Memset(buff, 0, AES_BLOCK_SIZE); + PORT_Memcpy(buff, gcmParams->pAAD + AAD_whole_len, AAD_remainder_len); + intel_aes_gcmAAD(gcm->Htbl, buff, AES_BLOCK_SIZE, gcm->T); + } + gcm->Alen += gcmParams->ulAADLen; + return gcm; + +loser: + PORT_Free(gcm); + return NULL; +} + +void +intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit) +{ + if (freeit) { + PORT_Free(gcm); + } +} + +SECStatus +intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + unsigned int j; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + if (UINT_MAX - inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxout < inlen + tagBytes) { + *outlen = inlen + tagBytes; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + intel_aes_gcmENC( + inbuf, + outbuf, + gcm, + inlen); + + gcm->Mlen += inlen; + + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + *outlen = inlen + tagBytes; + + for (j = 0; j < tagBytes; j++) { + outbuf[inlen + j] = T[j]; + } + return SECSuccess; +} + +SECStatus +intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, + unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize) +{ + unsigned int tagBytes; + unsigned char T[AES_BLOCK_SIZE]; + const unsigned char *intag; + + tagBytes = (gcm->tagBits + (PR_BITS_PER_BYTE - 1)) / PR_BITS_PER_BYTE; + + /* get the authentication block */ + if (inlen < tagBytes) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + inlen -= tagBytes; + intag = inbuf + inlen; + + if (maxout < inlen) { + *outlen = inlen; + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + intel_aes_gcmDEC( + inbuf, + outbuf, + gcm, + inlen); + + gcm->Mlen += inlen; + intel_aes_gcmTAG( + gcm->Htbl, + gcm->T, + gcm->Mlen, + gcm->Alen, + gcm->X0, + T); + + if (NSS_SecureMemcmp(T, intag, tagBytes) != 0) { + memset(outbuf, 0, inlen); + *outlen = 0; + /* force a CKR_ENCRYPTED_DATA_INVALID error at in softoken */ + PORT_SetError(SEC_ERROR_BAD_DATA); + return SECFailure; + } + *outlen = inlen; + + return SECSuccess; +} + +#endif diff --git a/security/nss/lib/freebl/intel-gcm-x64-masm.asm b/security/nss/lib/freebl/intel-gcm-x64-masm.asm new file mode 100644 index 000000000..8b68b76e5 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-x64-masm.asm @@ -0,0 +1,1295 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.DATA +ALIGN 16 +Lone dq 1,0 +Ltwo dq 2,0 +Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh +Lpoly dq 01h, 0c200000000000000h + +.CODE + + +GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4 + vpclmulqdq TMP1, SRC2, SRC1, 0h + vpclmulqdq TMP4, SRC2, SRC1, 011h + + vpshufd TMP2, SRC2, 78 + vpshufd TMP3, SRC1, 78 + vpxor TMP2, TMP2, SRC2 + vpxor TMP3, TMP3, SRC1 + + vpclmulqdq TMP2, TMP2, TMP3, 0h + vpxor TMP2, TMP2, TMP1 + vpxor TMP2, TMP2, TMP4 + + vpslldq TMP3, TMP2, 8 + vpsrldq TMP2, TMP2, 8 + + vpxor TMP1, TMP1, TMP3 + vpxor TMP4, TMP4, TMP2 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpxor DST, TMP1, TMP4 + + ENDM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the final GCM tag +; void intel_aes_gcmTAG(unsigned char Htbl[16*16], +; unsigned char *Tp, +; unsigned int Mlen, +; unsigned int Alen, +; unsigned char *X0, +; unsigned char *TAG); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmTAG PROC + +Htbl textequ +Tp textequ +Mlen textequ +Alen textequ +X0 textequ +TAG textequ + +T textequ +TMP0 textequ + + mov X0, [rsp + 1*8 + 4*8] + mov TAG, [rsp + 1*8 + 5*8] + + vzeroupper + vmovdqu T, XMMWORD PTR[Tp] + vpxor TMP0, TMP0, TMP0 + + shl Mlen, 3 + shl Alen, 3 + + ;vpinsrq TMP0, TMP0, Mlen, 0 + ;vpinsrq TMP0, TMP0, Alen, 1 + ; workaround the ml64.exe vpinsrq issue + vpinsrd TMP0, TMP0, r8d, 0 + vpinsrd TMP0, TMP0, r9d, 2 + shr Mlen, 32 + shr Alen, 32 + vpinsrd TMP0, TMP0, r8d, 1 + vpinsrd TMP0, TMP0, r9d, 3 + + vpxor T, T, TMP0 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + + vpshufb T, T, [Lbswap_mask] + vpxor T, T, [X0] + vmovdqu XMMWORD PTR[TAG], T + vzeroupper + + ret + +intel_aes_gcmTAG ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the H table +; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmINIT PROC + +Htbl textequ +KS textequ +NR textequ + +T textequ +TMP0 textequ + + vzeroupper + ; AES-ENC(0) + vmovdqu T, XMMWORD PTR[KS] + lea KS, [16 + KS] + dec NR +Lenc_loop: + vaesenc T, T, [KS] + lea KS, [16 + KS] + dec NR + jnz Lenc_loop + + vaesenclast T, T, [KS] + vpshufb T, T, [Lbswap_mask] + + ;Calculate H` = GFMUL(H, 2) + vpsrad xmm3, T, 31 + vpshufd xmm3, xmm3, 0ffh + vpand xmm5, xmm3, [Lpoly] + vpsrld xmm3, T, 31 + vpslld xmm4, T, 1 + vpslldq xmm3, xmm3, 4 + vpxor T, xmm4, xmm3 + vpxor T, T, xmm5 + + vmovdqu TMP0, T + vmovdqu XMMWORD PTR[Htbl + 0*16], T + + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2 + + i = 1 + WHILE i LT 8 + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + vmovdqu XMMWORD PTR[Htbl + i*16], T + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2 + i = i+1 + ENDM + vzeroupper + ret +intel_aes_gcmINIT ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Authenticate only +; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmAAD PROC + +Htbl textequ +inp textequ +len textequ +Tp textequ +hlp0 textequ + +DATA textequ +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +Xhi textequ + +KARATSUBA_AAD MACRO i + vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h + vpxor TMP2, TMP2, TMP3 +ENDM + + test len, len + jnz LbeginAAD + ret + +LbeginAAD: + vzeroupper + + sub rsp, 2*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + + vpxor Xhi, Xhi, Xhi + + vmovdqu T, XMMWORD PTR[Tp] + ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov hlp0, len + and hlp0, 128-1 + jz Lmod_loop + + and len, -128 + sub hlp0, 16 + + ; Prefix block + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + + vpclmulqdq TMP0, DATA, [Htbl + hlp0], 0h + vpclmulqdq TMP1, DATA, [Htbl + hlp0], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + hlp0], 0h + + lea inp, [inp+16] + test hlp0, hlp0 + jnz Lpre_loop + jmp Lred1 + + ;hash remaining prefix bocks (up to 7 total prefix blocks) +Lpre_loop: + + sub hlp0, 16 + + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP3, DATA, [Htbl + hlp0], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + hlp0], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + hlp0], 0h + vpxor TMP2, TMP2, TMP3 + + test hlp0, hlp0 + lea inp, [inp+16] + jnz Lpre_loop + +Lred1: + + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + +Lmod_loop: + + sub len, 16*8 + jb Ldone + ; Block #0 + vmovdqu DATA, XMMWORD PTR[inp + 16*7] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP0, DATA, [Htbl + 0*16], 0h + vpclmulqdq TMP1, DATA, [Htbl + 0*16], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, [Htbl + 8*16 + 0*16], 0h + + ; Block #1 + vmovdqu DATA, XMMWORD PTR[inp + 16*6] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 1 + + ; Block #2 + vmovdqu DATA, XMMWORD PTR[inp + 16*5] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 2 + + vpxor T, T, TMP4 ;reduction stage 1b + + ; Block #3 + vmovdqu DATA, XMMWORD PTR[inp + 16*4] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 3 + ; Block #4 + vmovdqu DATA, XMMWORD PTR[inp + 16*3] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 4 + + vpxor T, T, TMP4 ;reduction stage 2b + ; Block #5 + vmovdqu DATA, XMMWORD PTR[inp + 16*2] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 5 + + vpxor T, T, Xhi ;reduction finalize + ; Block #6 + vmovdqu DATA, XMMWORD PTR[inp + 16*1] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 6 + ; Block #7 + vmovdqu DATA, XMMWORD PTR[inp + 16*0] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + KARATSUBA_AAD 7 + ; Aggregated 8 blocks, now karatsuba fixup + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + lea inp, [inp + 16*8] + jmp Lmod_loop + +Ldone: + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpxor T, T, Xhi + vmovdqu XMMWORD PTR[Tp], T + vzeroupper + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + add rsp, 16*2 + + ret + +intel_aes_gcmAAD ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Encrypt and Authenticate +; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmENC PROC + +PT textequ +CT textequ +Htbl textequ +Gctx textequ +len textequ +KS textequ +NR textequ + +aluCTR textequ +aluKSl textequ +aluTMP textequ + +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +TMP5 textequ +CTR0 textequ +CTR1 textequ +CTR2 textequ +CTR3 textequ +CTR4 textequ +CTR5 textequ +CTR6 textequ +CTR7 textequ +BSWAPMASK textequ + +ROUND MACRO i + vmovdqu TMP3, XMMWORD PTR[i*16 + KS] + vaesenc CTR0, CTR0, TMP3 + vaesenc CTR1, CTR1, TMP3 + vaesenc CTR2, CTR2, TMP3 + vaesenc CTR3, CTR3, TMP3 + vaesenc CTR4, CTR4, TMP3 + vaesenc CTR5, CTR5, TMP3 + vaesenc CTR6, CTR6, TMP3 + vaesenc CTR7, CTR7, TMP3 +ENDM +ROUNDMUL MACRO i + vmovdqu TMP3, XMMWORD PTR[i*16 + KS] + + vaesenc CTR0, CTR0, TMP3 + vaesenc CTR1, CTR1, TMP3 + vaesenc CTR2, CTR2, TMP3 + vaesenc CTR3, CTR3, TMP3 + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + + vaesenc CTR4, CTR4, TMP3 + vaesenc CTR5, CTR5, TMP3 + vaesenc CTR6, CTR6, TMP3 + vaesenc CTR7, CTR7, TMP3 + + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM +KARATSUBA MACRO i + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [3*4 + 8*16 + i*16 + rsp], aluTMP +ENDM + + + test len, len + jnz LbeginENC + ret + +LbeginENC: + + vzeroupper + push r11 + push r12 + push r13 + push rbp + sub rsp, 10*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + vmovdqu XMMWORD PTR[rsp + 2*16], xmm8 + vmovdqu XMMWORD PTR[rsp + 3*16], xmm9 + vmovdqu XMMWORD PTR[rsp + 4*16], xmm10 + vmovdqu XMMWORD PTR[rsp + 5*16], xmm11 + vmovdqu XMMWORD PTR[rsp + 6*16], xmm12 + vmovdqu XMMWORD PTR[rsp + 7*16], xmm13 + vmovdqu XMMWORD PTR[rsp + 8*16], xmm14 + vmovdqu XMMWORD PTR[rsp + 9*16], xmm15 + + mov rbp, rsp + sub rsp, 16*16 + and rsp, -16 + + vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx] + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] + mov KS, [16*16 + 3*16 + Gctx] + mov NR, [4 + KS] + lea KS, [48 + KS] + + vpshufb CTR0, CTR0, BSWAPMASK + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + mov aluKSl, [3*4 + KS] + bswap aluCTR + bswap aluKSl + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[8*16 + 0*16 + rsp], TMP0 + + cmp len, 128 + jb LEncDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[8*16 + 1*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 2*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 3*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 4*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 5*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 6*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 7*16 + rsp], TMP0 + +; Encrypt the initial 8 blocks + sub len, 128 + vpaddd CTR1, CTR0, XMMWORD PTR[Lone] + vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo] + vpaddd CTR3, CTR2, XMMWORD PTR[Lone] + vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo] + vpaddd CTR5, CTR4, XMMWORD PTR[Lone] + vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo] + vpaddd CTR7, CTR6, XMMWORD PTR[Lone] + + vpshufb CTR0, CTR0, BSWAPMASK + vpshufb CTR1, CTR1, BSWAPMASK + vpshufb CTR2, CTR2, BSWAPMASK + vpshufb CTR3, CTR3, BSWAPMASK + vpshufb CTR4, CTR4, BSWAPMASK + vpshufb CTR5, CTR5, BSWAPMASK + vpshufb CTR6, CTR6, BSWAPMASK + vpshufb CTR7, CTR7, BSWAPMASK + + vmovdqu TMP3, XMMWORD PTR[0*16 + KS] + vpxor CTR0, CTR0, TMP3 + vpxor CTR1, CTR1, TMP3 + vpxor CTR2, CTR2, TMP3 + vpxor CTR3, CTR3, TMP3 + vpxor CTR4, CTR4, TMP3 + vpxor CTR5, CTR5, TMP3 + vpxor CTR6, CTR6, TMP3 + vpxor CTR7, CTR7, TMP3 + + ROUND 1 + + add aluCTR, 8 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [8*16 + 0*16 + 3*4 + rsp], aluTMP + + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + ROUND 8 + NEXTCTR 7 + ROUND 9 + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vpshufb CTR0, CTR0, BSWAPMASK + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vpshufb CTR1, CTR1, BSWAPMASK + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vpshufb CTR2, CTR2, BSWAPMASK + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vpshufb CTR3, CTR3, BSWAPMASK + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vpshufb CTR4, CTR4, BSWAPMASK + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vpshufb CTR5, CTR5, BSWAPMASK + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + vpshufb CTR6, CTR6, BSWAPMASK + vmovdqu XMMWORD PTR[7*16 + CT], CTR7 + vpshufb TMP5, CTR7, BSWAPMASK + + vmovdqa XMMWORD PTR[1*16 + rsp], CTR6 + vmovdqa XMMWORD PTR[2*16 + rsp], CTR5 + vmovdqa XMMWORD PTR[3*16 + rsp], CTR4 + vmovdqa XMMWORD PTR[4*16 + rsp], CTR3 + vmovdqa XMMWORD PTR[5*16 + rsp], CTR2 + vmovdqa XMMWORD PTR[6*16 + rsp], CTR1 + vmovdqa XMMWORD PTR[7*16 + rsp], CTR0 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LEncDataOctets + +LEncDataOctets: + cmp len, 128 + jb LEndEncOctets + sub len, 128 + + vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + rsp] + vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + rsp] + vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + rsp] + vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + rsp] + vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + rsp] + vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + rsp] + vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + rsp] + vmovdqa CTR7, XMMWORD PTR[8*16 + 7*16 + rsp] + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + rsp] + ROUNDMUL 1 + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[2*16 + rsp] + ROUNDMUL 2 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[3*16 + rsp] + ROUNDMUL 3 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[4*16 + rsp] + ROUNDMUL 4 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[5*16 + rsp] + ROUNDMUL 5 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[6*16 + rsp] + ROUNDMUL 6 + NEXTCTR 5 + vpxor TMP5, T, XMMWORD PTR[7*16 + rsp] + ROUNDMUL 7 + NEXTCTR 6 + + ROUND 8 + NEXTCTR 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + ROUND 9 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + PT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + PT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + PT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + PT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + PT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + PT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + PT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + PT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vpshufb CTR0, CTR0, BSWAPMASK + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vpshufb CTR1, CTR1, BSWAPMASK + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vpshufb CTR2, CTR2, BSWAPMASK + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vpshufb CTR3, CTR3, BSWAPMASK + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vpshufb CTR4, CTR4, BSWAPMASK + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vpshufb CTR5, CTR5, BSWAPMASK + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + vpshufb CTR6, CTR6, BSWAPMASK + vmovdqu XMMWORD PTR[7*16 + CT], CTR7 + vpshufb TMP5, CTR7, BSWAPMASK + + vmovdqa XMMWORD PTR[1*16 + rsp], CTR6 + vmovdqa XMMWORD PTR[2*16 + rsp], CTR5 + vmovdqa XMMWORD PTR[3*16 + rsp], CTR4 + vmovdqa XMMWORD PTR[4*16 + rsp], CTR3 + vmovdqa XMMWORD PTR[5*16 + rsp], CTR2 + vmovdqa XMMWORD PTR[6*16 + rsp], CTR1 + vmovdqa XMMWORD PTR[7*16 + rsp], CTR0 + + vpxor T, T, TMP4 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LEncDataOctets + +LEndEncOctets: + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + rsp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + rsp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + rsp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + rsp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + rsp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + rsp] + KARATSUBA 6 + vpxor TMP5, T, XMMWORD PTR[7*16 + rsp] + KARATSUBA 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vpxor T, T, TMP4 + + sub aluCTR, 7 + +LEncDataSingles: + + cmp len, 16 + jb LEncDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[PT] + vmovdqu XMMWORD PTR[CT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP1, TMP1, BSWAPMASK + vpxor T, T, TMP1 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + + jmp LEncDataSingles + +LEncDataTail: + + test len, len + jz LEncDataEnd + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + rsp] + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa XMMWORD PTR[rsp], TMP2 +; copy as many bytes as needed + xor KS, KS + +@@: + cmp len, KS + je @f + mov al, [PT + KS] + mov [rsp + KS], al + inc KS + jmp @b +@@: + vpxor TMP1, TMP1, XMMWORD PTR[rsp] + vmovdqa XMMWORD PTR[rsp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [rsp + KS] + mov [CT + KS], al + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[rsp + KS], 0 + inc KS + jmp @b +@@: +BAIL: + vmovdqa TMP1, XMMWORD PTR[rsp] + vpshufb TMP1, TMP1, BSWAPMASK + vpxor T, T, TMP1 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + +LEncDataEnd: + + vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov rsp, rbp + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + vmovdqu xmm8, XMMWORD PTR[rsp + 2*16] + vmovdqu xmm9, XMMWORD PTR[rsp + 3*16] + vmovdqu xmm10, XMMWORD PTR[rsp + 4*16] + vmovdqu xmm11, XMMWORD PTR[rsp + 5*16] + vmovdqu xmm12, XMMWORD PTR[rsp + 6*16] + vmovdqu xmm13, XMMWORD PTR[rsp + 7*16] + vmovdqu xmm14, XMMWORD PTR[rsp + 8*16] + vmovdqu xmm15, XMMWORD PTR[rsp + 9*16] + + add rsp, 10*16 + pop rbp + pop r13 + pop r12 + pop r11 + + vzeroupper + + ret +intel_aes_gcmENC ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Decrypt and Authenticate +; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmDEC PROC + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + xor aluTMP, aluKSl + bswap aluTMP + mov [3*4 + i*16 + rsp], aluTMP +ENDM + +PT textequ +CT textequ + + test len, len + jnz LbeginDEC + ret + +LbeginDEC: + + vzeroupper + push r11 + push r12 + push r13 + push rbp + sub rsp, 10*16 + vmovdqu XMMWORD PTR[rsp + 0*16], xmm6 + vmovdqu XMMWORD PTR[rsp + 1*16], xmm7 + vmovdqu XMMWORD PTR[rsp + 2*16], xmm8 + vmovdqu XMMWORD PTR[rsp + 3*16], xmm9 + vmovdqu XMMWORD PTR[rsp + 4*16], xmm10 + vmovdqu XMMWORD PTR[rsp + 5*16], xmm11 + vmovdqu XMMWORD PTR[rsp + 6*16], xmm12 + vmovdqu XMMWORD PTR[rsp + 7*16], xmm13 + vmovdqu XMMWORD PTR[rsp + 8*16], xmm14 + vmovdqu XMMWORD PTR[rsp + 9*16], xmm15 + + mov rbp, rsp + sub rsp, 8*16 + and rsp, -16 + + vmovdqu T, XMMWORD PTR[16*16 + 1*16 + Gctx] + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu BSWAPMASK, XMMWORD PTR[Lbswap_mask] + mov KS, [16*16 + 3*16 + Gctx] + mov NR, [4 + KS] + lea KS, [48 + KS] + + vpshufb CTR0, CTR0, BSWAPMASK + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + mov aluKSl, [3*4 + KS] + bswap aluCTR + bswap aluKSl + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[0*16 + rsp], TMP0 + + cmp len, 128 + jb LDecDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[1*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[2*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[3*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[4*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[5*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[6*16 + rsp], TMP0 + vmovdqu XMMWORD PTR[7*16 + rsp], TMP0 + + NEXTCTR 1 + NEXTCTR 2 + NEXTCTR 3 + NEXTCTR 4 + NEXTCTR 5 + NEXTCTR 6 + NEXTCTR 7 + +LDecDataOctets: + cmp len, 128 + jb LEndDecOctets + sub len, 128 + + vmovdqa CTR0, XMMWORD PTR[0*16 + rsp] + vmovdqa CTR1, XMMWORD PTR[1*16 + rsp] + vmovdqa CTR2, XMMWORD PTR[2*16 + rsp] + vmovdqa CTR3, XMMWORD PTR[3*16 + rsp] + vmovdqa CTR4, XMMWORD PTR[4*16 + rsp] + vmovdqa CTR5, XMMWORD PTR[5*16 + rsp] + vmovdqa CTR6, XMMWORD PTR[6*16 + rsp] + vmovdqa CTR7, XMMWORD PTR[7*16 + rsp] + + vmovdqu TMP5, XMMWORD PTR[7*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[6*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 1 + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[5*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 2 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[4*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 3 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 4 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[2*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 5 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[1*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + ROUNDMUL 6 + NEXTCTR 5 + vmovdqu TMP5, XMMWORD PTR[0*16 + CT] + vpshufb TMP5, TMP5, BSWAPMASK + vpxor TMP5, TMP5, T + ROUNDMUL 7 + NEXTCTR 6 + + ROUND 8 + NEXTCTR 7 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor T, TMP2, TMP3 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + ROUND 9 + + vpclmulqdq TMP1, T, XMMWORD PTR[Lpoly], 010h + vpalignr T,T,T,8 + vpxor T, T, TMP1 + + vmovdqu TMP5, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu TMP5, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu TMP5, XMMWORD PTR[14*16 + KS] +@@: + vpxor TMP3, TMP5, XMMWORD PTR[0*16 + CT] + vaesenclast CTR0, CTR0, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[1*16 + CT] + vaesenclast CTR1, CTR1, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[2*16 + CT] + vaesenclast CTR2, CTR2, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[3*16 + CT] + vaesenclast CTR3, CTR3, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[4*16 + CT] + vaesenclast CTR4, CTR4, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[5*16 + CT] + vaesenclast CTR5, CTR5, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[6*16 + CT] + vaesenclast CTR6, CTR6, TMP3 + vpxor TMP3, TMP5, XMMWORD PTR[7*16 + CT] + vaesenclast CTR7, CTR7, TMP3 + + vmovdqu XMMWORD PTR[0*16 + PT], CTR0 + vmovdqu XMMWORD PTR[1*16 + PT], CTR1 + vmovdqu XMMWORD PTR[2*16 + PT], CTR2 + vmovdqu XMMWORD PTR[3*16 + PT], CTR3 + vmovdqu XMMWORD PTR[4*16 + PT], CTR4 + vmovdqu XMMWORD PTR[5*16 + PT], CTR5 + vmovdqu XMMWORD PTR[6*16 + PT], CTR6 + vmovdqu XMMWORD PTR[7*16 + PT], CTR7 + + vpxor T, T, TMP4 + + lea CT, [8*16 + CT] + lea PT, [8*16 + PT] + jmp LDecDataOctets + +LEndDecOctets: + + sub aluCTR, 7 + +LDecDataSingles: + + cmp len, 16 + jb LDecDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[0*16 + rsp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + + vmovdqu TMP2, XMMWORD PTR[CT] + vpxor TMP1, TMP1, TMP2 + vmovdqu XMMWORD PTR[PT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP2, TMP2, BSWAPMASK + vpxor T, T, TMP2 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP1, TMP2, TMP3, TMP4 + + jmp LDecDataSingles + +LDecDataTail: + + test len, len + jz LDecDataEnd + + vmovdqa TMP1, XMMWORD PTR[0*16 + rsp] + inc aluCTR + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; copy as many bytes as needed + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [CT + KS] + mov [rsp + KS], al + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[rsp + KS], 0 + inc KS + jmp @b +@@: + vmovdqa TMP2, XMMWORD PTR[rsp] + vpshufb TMP2, TMP2, BSWAPMASK + vpxor T, T, TMP2 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, TMP5, TMP2, TMP3, TMP4 + + + vpxor TMP1, TMP1, XMMWORD PTR[rsp] + vmovdqa XMMWORD PTR[rsp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov al, [rsp + KS] + mov [PT + KS], al + inc KS + jmp @b +@@: + +LDecDataEnd: + + vmovdqu XMMWORD PTR[16*16 + 1*16 + Gctx], T + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov rsp, rbp + + vmovdqu xmm6, XMMWORD PTR[rsp + 0*16] + vmovdqu xmm7, XMMWORD PTR[rsp + 1*16] + vmovdqu xmm8, XMMWORD PTR[rsp + 2*16] + vmovdqu xmm9, XMMWORD PTR[rsp + 3*16] + vmovdqu xmm10, XMMWORD PTR[rsp + 4*16] + vmovdqu xmm11, XMMWORD PTR[rsp + 5*16] + vmovdqu xmm12, XMMWORD PTR[rsp + 6*16] + vmovdqu xmm13, XMMWORD PTR[rsp + 7*16] + vmovdqu xmm14, XMMWORD PTR[rsp + 8*16] + vmovdqu xmm15, XMMWORD PTR[rsp + 9*16] + + add rsp, 10*16 + pop rbp + pop r13 + pop r12 + pop r11 + + vzeroupper + + ret +ret +intel_aes_gcmDEC ENDP + + +END diff --git a/security/nss/lib/freebl/intel-gcm-x86-masm.asm b/security/nss/lib/freebl/intel-gcm-x86-masm.asm new file mode 100644 index 000000000..6362ad859 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm-x86-masm.asm @@ -0,0 +1,1209 @@ +; LICENSE: +; This submission to NSS is to be made available under the terms of the +; Mozilla Public License, v. 2.0. You can obtain one at http: +; //mozilla.org/MPL/2.0/. +;############################################################################### +; Copyright(c) 2014, Intel Corp. +; Developers and authors: +; Shay Gueron and Vlad Krasnov +; Intel Corporation, Israel Development Centre, Haifa, Israel +; Please send feedback directly to crypto.feedback.alias@intel.com + + +.MODEL FLAT, C +.XMM + +.DATA +ALIGN 16 +Lone dq 1,0 +Ltwo dq 2,0 +Lbswap_mask db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +Lshuff_mask dq 0f0f0f0f0f0f0f0fh, 0f0f0f0f0f0f0f0fh +Lpoly dq 01h, 0c200000000000000h + +.CODE + + +GFMUL MACRO DST, SRC1, SRC2, TMP1, TMP2, TMP3, TMP4 + vpclmulqdq TMP1, SRC2, SRC1, 0h + vpclmulqdq TMP4, SRC2, SRC1, 011h + + vpshufd TMP2, SRC2, 78 + vpshufd TMP3, SRC1, 78 + vpxor TMP2, TMP2, SRC2 + vpxor TMP3, TMP3, SRC1 + + vpclmulqdq TMP2, TMP2, TMP3, 0h + vpxor TMP2, TMP2, TMP1 + vpxor TMP2, TMP2, TMP4 + + vpslldq TMP3, TMP2, 8 + vpsrldq TMP2, TMP2, 8 + + vpxor TMP1, TMP1, TMP3 + vpxor TMP4, TMP4, TMP2 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpclmulqdq TMP2, TMP1, [Lpoly], 010h + vpshufd TMP3, TMP1, 78 + vpxor TMP1, TMP2, TMP3 + + vpxor DST, TMP1, TMP4 + + ENDM + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the final GCM tag +; void intel_aes_gcmTAG(unsigned char Htbl[16*16], +; unsigned char *Tp, +; unsigned int Mlen, +; unsigned int Alen, +; unsigned char* X0, +; unsigned char* TAG); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmTAG PROC + +Htbl textequ +Tp textequ +X0 textequ +TAG textequ + +T textequ +TMP0 textequ + + push ebx + + mov Htbl, [esp + 2*4 + 0*4] + mov Tp, [esp + 2*4 + 1*4] + mov X0, [esp + 2*4 + 4*4] + mov TAG, [esp + 2*4 + 5*4] + + vzeroupper + vmovdqu T, XMMWORD PTR[Tp] + + vpxor TMP0, TMP0, TMP0 + vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 2*4], 0 + vpinsrd TMP0, TMP0, DWORD PTR[esp + 2*4 + 3*4], 2 + vpsllq TMP0, TMP0, 3 + + vpxor T, T, TMP0 + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + + vpshufb T, T, [Lbswap_mask] + vpxor T, T, [X0] + vmovdqu XMMWORD PTR[TAG], T + vzeroupper + + pop ebx + + ret + +intel_aes_gcmTAG ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Generates the H table +; void intel_aes_gcmINIT(unsigned char Htbl[16*16], unsigned char *KS, int NR); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmINIT PROC + +Htbl textequ +KS textequ +NR textequ + +T textequ +TMP0 textequ + + mov Htbl, [esp + 4*1 + 0*4] + mov KS, [esp + 4*1 + 1*4] + mov NR, [esp + 4*1 + 2*4] + + vzeroupper + ; AES-ENC(0) + vmovdqu T, XMMWORD PTR[KS] + lea KS, [16 + KS] + dec NR +Lenc_loop: + vaesenc T, T, [KS] + lea KS, [16 + KS] + dec NR + jnz Lenc_loop + + vaesenclast T, T, [KS] + vpshufb T, T, [Lbswap_mask] + + ;Calculate H` = GFMUL(H, 2) + vpsrad xmm3, T, 31 + vpshufd xmm3, xmm3, 0ffh + vpand xmm5, xmm3, [Lpoly] + vpsrld xmm3, T, 31 + vpslld xmm4, T, 1 + vpslldq xmm3, xmm3, 4 + vpxor T, xmm4, xmm3 + vpxor T, T, xmm5 + + vmovdqu TMP0, T + vmovdqu XMMWORD PTR[Htbl + 0*16], T + + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + 0*16], xmm2 + + i = 1 + WHILE i LT 8 + GFMUL T, T, TMP0, xmm2, xmm3, xmm4, xmm5 + vmovdqu XMMWORD PTR[Htbl + i*16], T + vpshufd xmm2, T, 78 + vpxor xmm2, xmm2, T + vmovdqu XMMWORD PTR[Htbl + 8*16 + i*16], xmm2 + i = i+1 + ENDM + vzeroupper + ret +intel_aes_gcmINIT ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Authenticate only +; void intel_aes_gcmAAD(unsigned char Htbl[16*16], unsigned char *AAD, unsigned int Alen, unsigned char *Tp); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmAAD PROC + +Htbl textequ +inp textequ +len textequ +Tp textequ +hlp0 textequ + +DATA textequ +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +Xhi textequ + +KARATSUBA_AAD MACRO i + vpclmulqdq TMP3, DATA, [Htbl + i*16], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, [Htbl + i*16], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, [Htbl + 8*16 + i*16], 0h + vpxor TMP2, TMP2, TMP3 +ENDM + + cmp DWORD PTR[esp + 1*3 + 2*4], 0 + jnz LbeginAAD + ret + +LbeginAAD: + push ebx + push esi + + mov Htbl, [esp + 4*3 + 0*4] + mov inp, [esp + 4*3 + 1*4] + mov len, [esp + 4*3 + 2*4] + mov Tp, [esp + 4*3 + 3*4] + + vzeroupper + + vpxor Xhi, Xhi, Xhi + + vmovdqu T, XMMWORD PTR[Tp] + ;we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov hlp0, len + and hlp0, 128-1 + jz Lmod_loop + + and len, -128 + sub hlp0, 16 + + ; Prefix block + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + + vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + hlp0], 0h + vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + hlp0], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h + + lea inp, [inp+16] + test hlp0, hlp0 + jnz Lpre_loop + jmp Lred1 + + ;hash remaining prefix bocks (up to 7 total prefix blocks) +Lpre_loop: + + sub hlp0, 16 + + vmovdqu DATA, XMMWORD PTR[inp] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 0h + vpxor TMP0, TMP0, TMP3 + vpclmulqdq TMP3, DATA, XMMWORD PTR[Htbl + hlp0], 011h + vpxor TMP1, TMP1, TMP3 + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP3, TMP3, XMMWORD PTR[Htbl + 8*16 + hlp0], 0h + vpxor TMP2, TMP2, TMP3 + + test hlp0, hlp0 + lea inp, [inp+16] + jnz Lpre_loop + +Lred1: + + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + +Lmod_loop: + + sub len, 16*8 + jb Ldone + ; Block #0 + vmovdqu DATA, XMMWORD PTR[inp + 16*7] + vpshufb DATA, DATA, XMMWORD PTR[Lbswap_mask] + + vpclmulqdq TMP0, DATA, XMMWORD PTR[Htbl + 0*16], 0h + vpclmulqdq TMP1, DATA, XMMWORD PTR[Htbl + 0*16], 011h + vpshufd TMP3, DATA, 78 + vpxor TMP3, TMP3, DATA + vpclmulqdq TMP2, TMP3, XMMWORD PTR[Htbl + 8*16 + 0*16], 0h + + ; Block #1 + vmovdqu DATA, XMMWORD PTR[inp + 16*6] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 1 + + ; Block #2 + vmovdqu DATA, XMMWORD PTR[inp + 16*5] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 1a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 2 + + vpxor T, T, TMP4 ;reduction stage 1b + + ; Block #3 + vmovdqu DATA, XMMWORD PTR[inp + 16*4] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 3 + ; Block #4 + vmovdqu DATA, XMMWORD PTR[inp + 16*3] + vpshufb DATA, DATA, [Lbswap_mask] + + vpclmulqdq TMP4, T, [Lpoly], 010h ;reduction stage 2a + vpalignr T, T, T, 8 + + KARATSUBA_AAD 4 + + vpxor T, T, TMP4 ;reduction stage 2b + ; Block #5 + vmovdqu DATA, XMMWORD PTR[inp + 16*2] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 5 + + vpxor T, T, Xhi ;reduction finalize + ; Block #6 + vmovdqu DATA, XMMWORD PTR[inp + 16*1] + vpshufb DATA, DATA, [Lbswap_mask] + KARATSUBA_AAD 6 + ; Block #7 + vmovdqu DATA, XMMWORD PTR[inp + 16*0] + vpshufb DATA, DATA, [Lbswap_mask] + vpxor DATA, DATA, T + KARATSUBA_AAD 7 + ; Aggregated 8 blocks, now karatsuba fixup + vpxor TMP2, TMP2, TMP0 + vpxor TMP2, TMP2, TMP1 + vpsrldq TMP3, TMP2, 8 + vpslldq TMP2, TMP2, 8 + + vpxor Xhi, TMP1, TMP3 + vpxor T, TMP0, TMP2 + + lea inp, [inp + 16*8] + jmp Lmod_loop + +Ldone: + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpclmulqdq TMP4, T, [Lpoly], 010h + vpalignr T, T, T, 8 + vpxor T, T, TMP4 + + vpxor T, T, Xhi + vmovdqu XMMWORD PTR[Tp], T + vzeroupper + + pop esi + pop ebx + ret + +intel_aes_gcmAAD ENDP + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Encrypt and Authenticate +; void intel_aes_gcmENC(unsigned char* PT, unsigned char* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +ALIGN 16 +intel_aes_gcmENC PROC + +PT textequ +CT textequ +Htbl textequ +Gctx textequ +len textequ +KS textequ +NR textequ + +aluCTR textequ +aluTMP textequ + +T textequ +TMP0 textequ +TMP1 textequ +TMP2 textequ +TMP3 textequ +TMP4 textequ +TMP5 textequ + +CTR0 textequ +CTR1 textequ +CTR2 textequ +CTR3 textequ +CTR4 textequ +CTR5 textequ +CTR6 textequ + +ROUND MACRO i + vmovdqu xmm7, XMMWORD PTR[i*16 + KS] + vaesenc CTR0, CTR0, xmm7 + vaesenc CTR1, CTR1, xmm7 + vaesenc CTR2, CTR2, xmm7 + vaesenc CTR3, CTR3, xmm7 + vaesenc CTR4, CTR4, xmm7 + vaesenc CTR5, CTR5, xmm7 + vaesenc CTR6, CTR6, xmm7 +ENDM + +KARATSUBA MACRO i + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP3, TMP4, XMMWORD PTR[i*16 + 8*16 + Htbl], 000h + vpxor TMP0, TMP0, TMP3 + vmovdqu TMP4, XMMWORD PTR[i*16 + Htbl] + vpclmulqdq TMP3, TMP5, TMP4, 011h + vpxor TMP1, TMP1, TMP3 + vpclmulqdq TMP3, TMP5, TMP4, 000h + vpxor TMP2, TMP2, TMP3 +ENDM + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [3*4 + KS] + mov [3*4 + 8*16 + i*16 + esp], aluTMP +ENDM + + cmp DWORD PTR[1*4 + 3*4 + esp], 0 + jne LbeginENC + ret + +LbeginENC: + + vzeroupper + push ebp + push ebx + push esi + push edi + + mov ebp, esp + sub esp, 16*16 + and esp, -16 + + mov PT, [ebp + 5*4 + 0*4] + mov CT, [ebp + 5*4 + 1*4] + mov Gctx, [ebp + 5*4 + 2*4] + + mov KS, [16*16 + 3*16 + Gctx] + lea KS, [44 + KS] + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + bswap aluCTR + + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[8*16 + 0*16 + esp], TMP0 + + cmp len, 16*7 + jb LEncDataSingles +; Prepare the "top" counters + vmovdqu XMMWORD PTR[8*16 + 1*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 2*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 3*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 4*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 5*16 + esp], TMP0 + vmovdqu XMMWORD PTR[8*16 + 6*16 + esp], TMP0 + + vmovdqu CTR0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] +; Encrypt the initial 7 blocks + sub len, 16*7 + vpaddd CTR1, CTR0, XMMWORD PTR[Lone] + vpaddd CTR2, CTR0, XMMWORD PTR[Ltwo] + vpaddd CTR3, CTR2, XMMWORD PTR[Lone] + vpaddd CTR4, CTR2, XMMWORD PTR[Ltwo] + vpaddd CTR5, CTR4, XMMWORD PTR[Lone] + vpaddd CTR6, CTR4, XMMWORD PTR[Ltwo] + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb CTR6, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqu xmm7, XMMWORD PTR[0*16 + KS] + vpxor CTR0, CTR0, xmm7 + vpxor CTR1, CTR1, xmm7 + vpxor CTR2, CTR2, xmm7 + vpxor CTR3, CTR3, xmm7 + vpxor CTR4, CTR4, xmm7 + vpxor CTR5, CTR5, xmm7 + vpxor CTR6, CTR6, xmm7 + + ROUND 1 + + add aluCTR, 7 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [KS + 3*4] + mov [8*16 + 0*16 + 3*4 + esp], aluTMP + + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + ROUND 8 + ROUND 9 + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqa XMMWORD PTR[1*16 + esp], CTR5 + vmovdqa XMMWORD PTR[2*16 + esp], CTR4 + vmovdqa XMMWORD PTR[3*16 + esp], CTR3 + vmovdqa XMMWORD PTR[4*16 + esp], CTR2 + vmovdqa XMMWORD PTR[5*16 + esp], CTR1 + vmovdqa XMMWORD PTR[6*16 + esp], CTR0 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LEncData7 + +LEncData7: + cmp len, 16*7 + jb LEndEnc7 + sub len, 16*7 + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + esp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + esp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + esp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + esp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + esp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + esp] + vpxor TMP5, TMP5, T + KARATSUBA 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + vmovdqa CTR0, XMMWORD PTR[8*16 + 0*16 + esp] + vmovdqa CTR1, XMMWORD PTR[8*16 + 1*16 + esp] + vmovdqa CTR2, XMMWORD PTR[8*16 + 2*16 + esp] + vmovdqa CTR3, XMMWORD PTR[8*16 + 3*16 + esp] + vmovdqa CTR4, XMMWORD PTR[8*16 + 4*16 + esp] + vmovdqa CTR5, XMMWORD PTR[8*16 + 5*16 + esp] + vmovdqa CTR6, XMMWORD PTR[8*16 + 6*16 + esp] + + ROUND 1 + NEXTCTR 0 + ROUND 2 + NEXTCTR 1 + ROUND 3 + NEXTCTR 2 + ROUND 4 + NEXTCTR 3 + ROUND 5 + NEXTCTR 4 + ROUND 6 + NEXTCTR 5 + ROUND 7 + NEXTCTR 6 + + ROUND 8 + ROUND 9 + + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + PT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + PT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + PT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + PT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + PT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + PT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + PT] + + vmovdqu XMMWORD PTR[0*16 + CT], CTR0 + vmovdqu XMMWORD PTR[1*16 + CT], CTR1 + vmovdqu XMMWORD PTR[2*16 + CT], CTR2 + vmovdqu XMMWORD PTR[3*16 + CT], CTR3 + vmovdqu XMMWORD PTR[4*16 + CT], CTR4 + vmovdqu XMMWORD PTR[5*16 + CT], CTR5 + vmovdqu XMMWORD PTR[6*16 + CT], CTR6 + + vpshufb CTR0, CTR0, XMMWORD PTR[Lbswap_mask] + vpshufb CTR1, CTR1, XMMWORD PTR[Lbswap_mask] + vpshufb CTR2, CTR2, XMMWORD PTR[Lbswap_mask] + vpshufb CTR3, CTR3, XMMWORD PTR[Lbswap_mask] + vpshufb CTR4, CTR4, XMMWORD PTR[Lbswap_mask] + vpshufb CTR5, CTR5, XMMWORD PTR[Lbswap_mask] + vpshufb TMP5, CTR6, XMMWORD PTR[Lbswap_mask] + + vmovdqa XMMWORD PTR[1*16 + esp], CTR5 + vmovdqa XMMWORD PTR[2*16 + esp], CTR4 + vmovdqa XMMWORD PTR[3*16 + esp], CTR3 + vmovdqa XMMWORD PTR[4*16 + esp], CTR2 + vmovdqa XMMWORD PTR[5*16 + esp], CTR1 + vmovdqa XMMWORD PTR[6*16 + esp], CTR0 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LEncData7 + +LEndEnc7: + + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[0*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[0*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + vmovdqu TMP5, XMMWORD PTR[1*16 + esp] + KARATSUBA 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + esp] + KARATSUBA 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + esp] + KARATSUBA 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + esp] + KARATSUBA 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + esp] + KARATSUBA 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + esp] + vpxor TMP5, TMP5, T + KARATSUBA 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + sub aluCTR, 6 + +LEncDataSingles: + + cmp len, 16 + jb LEncDataTail + sub len, 16 + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[PT] + vmovdqu XMMWORD PTR[CT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + jmp LEncDataSingles + +LEncDataTail: + + cmp len, 0 + je LEncDataEnd + + vmovdqa TMP1, XMMWORD PTR[8*16 + 0*16 + esp] + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 +; zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa XMMWORD PTR[esp], TMP2 +; copy as many bytes as needed + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[PT + KS] + mov BYTE PTR[esp + KS], dl + inc KS + jmp @b +@@: + vpxor TMP1, TMP1, XMMWORD PTR[esp] + vmovdqa XMMWORD PTR[esp], TMP1 + xor KS, KS +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[esp + KS] + mov BYTE PTR[CT + KS], dl + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[esp + KS], 0 + inc KS + jmp @b +@@: + mov edx, aluTMP + vmovdqa TMP1, XMMWORD PTR[esp] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + +LEncDataEnd: + inc aluCTR + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov esp, ebp + pop edi + pop esi + pop ebx + pop ebp + + + vzeroupper + + ret +intel_aes_gcmENC ENDP + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; +; Decrypt and Authenticate +; void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx, unsigned int len); +; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +NEXTCTR MACRO i + add aluCTR, 1 + mov aluTMP, aluCTR + bswap aluTMP + xor aluTMP, [3*4 + KS] + mov [3*4 + i*16 + esp], aluTMP +ENDM + +intel_aes_gcmDEC PROC + + cmp DWORD PTR[1*4 + 3*4 + esp], 0 + jne LbeginDEC + ret + +LbeginDEC: + + vzeroupper + push ebp + push ebx + push esi + push edi + + mov ebp, esp + sub esp, 8*16 + and esp, -16 + + mov CT, [ebp + 5*4 + 0*4] + mov PT, [ebp + 5*4 + 1*4] + mov Gctx, [ebp + 5*4 + 2*4] + + mov KS, [16*16 + 3*16 + Gctx] + lea KS, [44 + KS] + + mov aluCTR, [16*16 + 2*16 + 3*4 + Gctx] + bswap aluCTR + + + vmovdqu TMP0, XMMWORD PTR[0*16 + KS] + vpxor TMP0, TMP0, XMMWORD PTR[16*16 + 2*16 + Gctx] + vmovdqu XMMWORD PTR[0*16 + esp], TMP0 + + cmp len, 16*7 + jb LDecDataSingles + vmovdqu XMMWORD PTR[1*16 + esp], TMP0 + vmovdqu XMMWORD PTR[2*16 + esp], TMP0 + vmovdqu XMMWORD PTR[3*16 + esp], TMP0 + vmovdqu XMMWORD PTR[4*16 + esp], TMP0 + vmovdqu XMMWORD PTR[5*16 + esp], TMP0 + vmovdqu XMMWORD PTR[6*16 + esp], TMP0 + dec aluCTR + +LDecData7: + cmp len, 16*7 + jb LDecData7End + sub len, 16*7 + + vmovdqu TMP5, XMMWORD PTR[0*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + vpxor TMP5, TMP5, T + vpshufd TMP4, TMP5, 78 + vpxor TMP4, TMP4, TMP5 + vpclmulqdq TMP0, TMP4, XMMWORD PTR[6*16 + 8*16 + Htbl], 000h + vmovdqu TMP4, XMMWORD PTR[6*16 + Htbl] + vpclmulqdq TMP1, TMP5, TMP4, 011h + vpclmulqdq TMP2, TMP5, TMP4, 000h + + NEXTCTR 0 + vmovdqu TMP5, XMMWORD PTR[1*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 5 + NEXTCTR 1 + vmovdqu TMP5, XMMWORD PTR[2*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 4 + NEXTCTR 2 + vmovdqu TMP5, XMMWORD PTR[3*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 3 + NEXTCTR 3 + vmovdqu TMP5, XMMWORD PTR[4*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 2 + NEXTCTR 4 + vmovdqu TMP5, XMMWORD PTR[5*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 1 + NEXTCTR 5 + vmovdqu TMP5, XMMWORD PTR[6*16 + CT] + vpshufb TMP5, TMP5, XMMWORD PTR[Lbswap_mask] + KARATSUBA 0 + NEXTCTR 6 + + vpxor TMP0, TMP0, TMP1 + vpxor TMP0, TMP0, TMP2 + vpsrldq TMP3, TMP0, 8 + vpxor TMP4, TMP1, TMP3 + vpslldq TMP3, TMP0, 8 + vpxor TMP5, TMP2, TMP3 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpclmulqdq TMP1, TMP5, XMMWORD PTR[Lpoly], 010h + vpalignr TMP5,TMP5,TMP5,8 + vpxor TMP5, TMP5, TMP1 + + vpxor TMP5, TMP5, TMP4 + vmovdqu T, TMP5 + + vmovdqa CTR0, XMMWORD PTR[0*16 + esp] + vmovdqa CTR1, XMMWORD PTR[1*16 + esp] + vmovdqa CTR2, XMMWORD PTR[2*16 + esp] + vmovdqa CTR3, XMMWORD PTR[3*16 + esp] + vmovdqa CTR4, XMMWORD PTR[4*16 + esp] + vmovdqa CTR5, XMMWORD PTR[5*16 + esp] + vmovdqa CTR6, XMMWORD PTR[6*16 + esp] + + ROUND 1 + ROUND 2 + ROUND 3 + ROUND 4 + ROUND 5 + ROUND 6 + ROUND 7 + ROUND 8 + ROUND 9 + vmovdqu xmm7, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + + ROUND 10 + ROUND 11 + vmovdqu xmm7, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + + ROUND 12 + ROUND 13 + vmovdqu xmm7, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast CTR0, CTR0, xmm7 + vaesenclast CTR1, CTR1, xmm7 + vaesenclast CTR2, CTR2, xmm7 + vaesenclast CTR3, CTR3, xmm7 + vaesenclast CTR4, CTR4, xmm7 + vaesenclast CTR5, CTR5, xmm7 + vaesenclast CTR6, CTR6, xmm7 + + vpxor CTR0, CTR0, XMMWORD PTR[0*16 + CT] + vpxor CTR1, CTR1, XMMWORD PTR[1*16 + CT] + vpxor CTR2, CTR2, XMMWORD PTR[2*16 + CT] + vpxor CTR3, CTR3, XMMWORD PTR[3*16 + CT] + vpxor CTR4, CTR4, XMMWORD PTR[4*16 + CT] + vpxor CTR5, CTR5, XMMWORD PTR[5*16 + CT] + vpxor CTR6, CTR6, XMMWORD PTR[6*16 + CT] + + vmovdqu XMMWORD PTR[0*16 + PT], CTR0 + vmovdqu XMMWORD PTR[1*16 + PT], CTR1 + vmovdqu XMMWORD PTR[2*16 + PT], CTR2 + vmovdqu XMMWORD PTR[3*16 + PT], CTR3 + vmovdqu XMMWORD PTR[4*16 + PT], CTR4 + vmovdqu XMMWORD PTR[5*16 + PT], CTR5 + vmovdqu XMMWORD PTR[6*16 + PT], CTR6 + + lea CT, [7*16 + CT] + lea PT, [7*16 + PT] + jmp LDecData7 + +LDecData7End: + + NEXTCTR 0 + +LDecDataSingles: + + cmp len, 16 + jb LDecDataTail + sub len, 16 + + vmovdqu TMP1, XMMWORD PTR[CT] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + vmovdqa TMP1, XMMWORD PTR[0*16 + esp] + NEXTCTR 0 + + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast TMP1, TMP1, TMP2 + vpxor TMP1, TMP1, XMMWORD PTR[CT] + vmovdqu XMMWORD PTR[PT], TMP1 + + lea PT, [16+PT] + lea CT, [16+CT] + jmp LDecDataSingles + +LDecDataTail: + + cmp len, 0 + je LDecDataEnd + + vmovdqa TMP1, XMMWORD PTR[0*16 + esp] + inc aluCTR + vaesenc TMP1, TMP1, XMMWORD PTR[1*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[2*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[3*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[4*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[5*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[6*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[7*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[8*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[9*16 + KS] + vmovdqu TMP2, XMMWORD PTR[10*16 + KS] + cmp NR, 10 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[10*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[11*16 + KS] + vmovdqu TMP2, XMMWORD PTR[12*16 + KS] + cmp NR, 12 + je @f + vaesenc TMP1, TMP1, XMMWORD PTR[12*16 + KS] + vaesenc TMP1, TMP1, XMMWORD PTR[13*16 + KS] + vmovdqu TMP2, XMMWORD PTR[14*16 + KS] +@@: + vaesenclast xmm7, TMP1, TMP2 + +; copy as many bytes as needed + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[CT + KS] + mov BYTE PTR[esp + KS], dl + inc KS + jmp @b +@@: + cmp KS, 16 + je @f + mov BYTE PTR[esp + KS], 0 + inc KS + jmp @b +@@: + mov edx, aluTMP + vmovdqa TMP1, XMMWORD PTR[esp] + vpshufb TMP1, TMP1, XMMWORD PTR[Lbswap_mask] + vpxor TMP1, TMP1, T + + vmovdqu TMP0, XMMWORD PTR[Htbl] + GFMUL TMP1, TMP1, TMP0, TMP5, TMP2, TMP3, TMP4 + vmovdqu T, TMP1 + + vpxor xmm7, xmm7, XMMWORD PTR[esp] + vmovdqa XMMWORD PTR[esp], xmm7 + xor KS, KS + mov aluTMP, edx +@@: + cmp len, KS + je @f + mov dl, BYTE PTR[esp + KS] + mov BYTE PTR[PT + KS], dl + inc KS + jmp @b +@@: + mov edx, aluTMP + +LDecDataEnd: + + bswap aluCTR + mov [16*16 + 2*16 + 3*4 + Gctx], aluCTR + + mov esp, ebp + pop edi + pop esi + pop ebx + pop ebp + + vzeroupper + + ret +intel_aes_gcmDEC ENDP + + +END diff --git a/security/nss/lib/freebl/intel-gcm.h b/security/nss/lib/freebl/intel-gcm.h new file mode 100644 index 000000000..566e544d8 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm.h @@ -0,0 +1,83 @@ +/******************************************************************************/ +/* LICENSE: */ +/* This submission to NSS is to be made available under the terms of the */ +/* Mozilla Public License, v. 2.0. You can obtain one at http: */ +/* //mozilla.org/MPL/2.0/. */ +/******************************************************************************/ +/* Copyright(c) 2013, Intel Corp. */ +/******************************************************************************/ +/* Reference: */ +/* [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication */ +/* Instruction and its Usage for Computing the GCM Mode (Rev. 2.01) */ +/* http://software.intel.com/sites/default/files/article/165685/clmul-wp-r*/ +/*ev-2.01-2012-09-21.pdf */ +/* [2] S. Gueron, M. E. Kounavis: Efficient Implementation of the Galois */ +/* Counter Mode Using a Carry-less Multiplier and a Fast Reduction */ +/* Algorithm. Information Processing Letters 110: 549-553 (2010). */ +/* [3] S. Gueron: AES Performance on the 2nd Generation Intel(R) Core(TM) */ +/* Processor Family (to be posted) (2012). */ +/* [4] S. Gueron: Fast GHASH computations for speeding up AES-GCM (to be */ +/* published) (2012). */ + +#ifndef INTEL_GCM_H +#define INTEL_GCM_H 1 + +#include "blapii.h" + +typedef struct intel_AES_GCMContextStr intel_AES_GCMContext; + +intel_AES_GCMContext *intel_AES_GCM_CreateContext(void *context, freeblCipherFunc cipher, + const unsigned char *params, unsigned int blocksize); + +void intel_AES_GCM_DestroyContext(intel_AES_GCMContext *gcm, PRBool freeit); + +SECStatus intel_AES_GCM_EncryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +SECStatus intel_AES_GCM_DecryptUpdate(intel_AES_GCMContext *gcm, unsigned char *outbuf, + unsigned int *outlen, unsigned int maxout, + const unsigned char *inbuf, unsigned int inlen, + unsigned int blocksize); + +/* Prototypes of functions in the assembler file for fast AES-GCM, using + Intel AES-NI and CLMUL-NI, as described in [1] + [1] Shay Gueron, Michael E. Kounavis: Intel(R) Carry-Less Multiplication + Instruction and its Usage for Computing the GCM Mode */ + +/* Prepares the constants used in the aggregated reduction method */ +void intel_aes_gcmINIT(unsigned char Htbl[16 * 16], + unsigned char *KS, + int NR); + +/* Produces the final GHASH value */ +void intel_aes_gcmTAG(unsigned char Htbl[16 * 16], + unsigned char *Tp, + unsigned long Mlen, + unsigned long Alen, + unsigned char *X0, + unsigned char *TAG); + +/* Hashes the Additional Authenticated Data, should be used before enc/dec. + Operates on whole blocks only. Partial blocks should be padded externally. */ +void intel_aes_gcmAAD(unsigned char Htbl[16 * 16], + unsigned char *AAD, + unsigned long Alen, + unsigned char *Tp); + +/* Encrypts and hashes the Plaintext. + Operates on any length of data, however partial block should only be encrypted + at the last call, otherwise the result will be incorrect. */ +void intel_aes_gcmENC(const unsigned char *PT, + unsigned char *CT, + void *Gctx, + unsigned long len); + +/* Similar to ENC, but decrypts the Ciphertext. */ +void intel_aes_gcmDEC(const unsigned char *CT, + unsigned char *PT, + void *Gctx, + unsigned long len); + +#endif diff --git a/security/nss/lib/freebl/intel-gcm.s b/security/nss/lib/freebl/intel-gcm.s new file mode 100644 index 000000000..1a3106091 --- /dev/null +++ b/security/nss/lib/freebl/intel-gcm.s @@ -0,0 +1,1340 @@ +# LICENSE: +# This submission to NSS is to be made available under the terms of the +# Mozilla Public License, v. 2.0. You can obtain one at http: +# //mozilla.org/MPL/2.0/. +################################################################################ +# Copyright(c) 2012, Intel Corp. + +.align 16 +.Lone: +.quad 1,0 +.Ltwo: +.quad 2,0 +.Lbswap_mask: +.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 +.Lshuff_mask: +.quad 0x0f0f0f0f0f0f0f0f, 0x0f0f0f0f0f0f0f0f +.Lpoly: +.quad 0x1, 0xc200000000000000 + + +################################################################################ +# Generates the final GCM tag +# void intel_aes_gcmTAG(uint8_t Htbl[16*16], uint8_t *Tp, uint64_t Mlen, uint64_t Alen, uint8_t* X0, uint8_t* TAG); +.type intel_aes_gcmTAG,@function +.globl intel_aes_gcmTAG +.align 16 +intel_aes_gcmTAG: + +.set Htbl, %rdi +.set Tp, %rsi +.set Mlen, %rdx +.set Alen, %rcx +.set X0, %r8 +.set TAG, %r9 + +.set T,%xmm0 +.set TMP0,%xmm1 + + vmovdqu (Tp), T + vpshufb .Lbswap_mask(%rip), T, T + vpxor TMP0, TMP0, TMP0 + shl $3, Mlen + shl $3, Alen + vpinsrq $0, Mlen, TMP0, TMP0 + vpinsrq $1, Alen, TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + vpshufb .Lbswap_mask(%rip), T, T + vpxor (X0), T, T + vmovdqu T, (TAG) + +ret +.size intel_aes_gcmTAG, .-intel_aes_gcmTAG +################################################################################ +# Generates the H table +# void intel_aes_gcmINIT(uint8_t Htbl[16*16], uint8_t *KS, int NR); +.type intel_aes_gcmINIT,@function +.globl intel_aes_gcmINIT +.align 16 +intel_aes_gcmINIT: + +.set Htbl, %rdi +.set KS, %rsi +.set NR, %edx + +.set T,%xmm0 +.set TMP0,%xmm1 + +CALCULATE_POWERS_OF_H: + vmovdqu 16*0(KS), T + vaesenc 16*1(KS), T, T + vaesenc 16*2(KS), T, T + vaesenc 16*3(KS), T, T + vaesenc 16*4(KS), T, T + vaesenc 16*5(KS), T, T + vaesenc 16*6(KS), T, T + vaesenc 16*7(KS), T, T + vaesenc 16*8(KS), T, T + vaesenc 16*9(KS), T, T + vmovdqu 16*10(KS), TMP0 + cmp $10, NR + je .LH0done + vaesenc 16*10(KS), T, T + vaesenc 16*11(KS), T, T + vmovdqu 16*12(KS), TMP0 + cmp $12, NR + je .LH0done + vaesenc 16*12(KS), T, T + vaesenc 16*13(KS), T, T + vmovdqu 16*14(KS), TMP0 + +.LH0done: + vaesenclast TMP0, T, T + + vpshufb .Lbswap_mask(%rip), T, T + + vmovdqu T, TMP0 + # Calculate H` = GFMUL(H, 2) + vpsrld $7 , T , %xmm3 + vmovdqu .Lshuff_mask(%rip), %xmm4 + vpshufb %xmm4, %xmm3 , %xmm3 + movq $0xff00 , %rax + vmovq %rax, %xmm4 + vpshufb %xmm3, %xmm4 , %xmm4 + vmovdqu .Lpoly(%rip), %xmm5 + vpand %xmm4, %xmm5, %xmm5 + vpsrld $31, T, %xmm3 + vpslld $1, T, %xmm4 + vpslldq $4, %xmm3, %xmm3 + vpxor %xmm3, %xmm4, T #xmm1 holds now p(x)<<1 + + #adding p(x)<<1 to xmm5 + vpxor %xmm5, T , T + vmovdqu T, TMP0 + vmovdqu T, (Htbl) # H * 2 + call GFMUL + vmovdqu T, 16(Htbl) # H^2 * 2 + call GFMUL + vmovdqu T, 32(Htbl) # H^3 * 2 + call GFMUL + vmovdqu T, 48(Htbl) # H^4 * 2 + call GFMUL + vmovdqu T, 64(Htbl) # H^5 * 2 + call GFMUL + vmovdqu T, 80(Htbl) # H^6 * 2 + call GFMUL + vmovdqu T, 96(Htbl) # H^7 * 2 + call GFMUL + vmovdqu T, 112(Htbl) # H^8 * 2 + + # Precalculations for the reduce 4 step + vpshufd $78, (Htbl), %xmm8 + vpshufd $78, 16(Htbl), %xmm9 + vpshufd $78, 32(Htbl), %xmm10 + vpshufd $78, 48(Htbl), %xmm11 + vpshufd $78, 64(Htbl), %xmm12 + vpshufd $78, 80(Htbl), %xmm13 + vpshufd $78, 96(Htbl), %xmm14 + vpshufd $78, 112(Htbl), %xmm15 + + vpxor (Htbl), %xmm8, %xmm8 + vpxor 16(Htbl), %xmm9, %xmm9 + vpxor 32(Htbl), %xmm10, %xmm10 + vpxor 48(Htbl), %xmm11, %xmm11 + vpxor 64(Htbl), %xmm12, %xmm12 + vpxor 80(Htbl), %xmm13, %xmm13 + vpxor 96(Htbl), %xmm14, %xmm14 + vpxor 112(Htbl), %xmm15, %xmm15 + + vmovdqu %xmm8, 128(Htbl) + vmovdqu %xmm9, 144(Htbl) + vmovdqu %xmm10, 160(Htbl) + vmovdqu %xmm11, 176(Htbl) + vmovdqu %xmm12, 192(Htbl) + vmovdqu %xmm13, 208(Htbl) + vmovdqu %xmm14, 224(Htbl) + vmovdqu %xmm15, 240(Htbl) + + ret +.size intel_aes_gcmINIT, .-intel_aes_gcmINIT +################################################################################ +# Authenticate only +# void intel_aes_gcmAAD(uint8_t Htbl[16*16], uint8_t *AAD, uint64_t Alen, uint8_t *Tp); + +.globl intel_aes_gcmAAD +.type intel_aes_gcmAAD,@function +.align 16 +intel_aes_gcmAAD: + +.set DATA, %xmm0 +.set T, %xmm1 +.set BSWAP_MASK, %xmm2 +.set TMP0, %xmm3 +.set TMP1, %xmm4 +.set TMP2, %xmm5 +.set TMP3, %xmm6 +.set TMP4, %xmm7 +.set Xhi, %xmm9 + +.set Htbl, %rdi +.set inp, %rsi +.set len, %rdx +.set Tp, %rcx + +.set hlp0, %r11 + +.macro KARATSUBA_AAD i + vpclmulqdq $0x00, 16*\i(Htbl), DATA, TMP3 + vpxor TMP3, TMP0, TMP0 + vpclmulqdq $0x11, 16*\i(Htbl), DATA, TMP3 + vpxor TMP3, TMP1, TMP1 + vpshufd $78, DATA, TMP3 + vpxor DATA, TMP3, TMP3 + vpclmulqdq $0x00, 16*(\i+8)(Htbl), TMP3, TMP3 + vpxor TMP3, TMP2, TMP2 +.endm + + test len, len + jnz .LbeginAAD + ret + +.LbeginAAD: + + push hlp0 + vzeroupper + + vmovdqa .Lbswap_mask(%rip), BSWAP_MASK + + vpxor Xhi, Xhi, Xhi + + vmovdqu (Tp),T + vpshufb BSWAP_MASK,T,T + + # we hash 8 block each iteration, if the total amount of blocks is not a multiple of 8, we hash the first n%8 blocks first + mov len, hlp0 + and $~-128, hlp0 + + jz .Lmod_loop + + sub hlp0, len + sub $16, hlp0 + + #hash first prefix block + vmovdqu (inp), DATA + vpshufb BSWAP_MASK, DATA, DATA + vpxor T, DATA, DATA + + vpclmulqdq $0x00, (Htbl, hlp0), DATA, TMP0 + vpclmulqdq $0x11, (Htbl, hlp0), DATA, TMP1 + vpshufd $78, DATA, TMP2 + vpxor DATA, TMP2, TMP2 + vpclmulqdq $0x00, 16*8(Htbl, hlp0), TMP2, TMP2 + + lea 16(inp), inp + test hlp0, hlp0 + jnz .Lpre_loop + jmp .Lred1 + + #hash remaining prefix bocks (up to 7 total prefix blocks) +.align 64 +.Lpre_loop: + + sub $16, hlp0 + + vmovdqu (inp),DATA # next data block + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x00, (Htbl,hlp0), DATA, TMP3 + vpxor TMP3, TMP0, TMP0 + vpclmulqdq $0x11, (Htbl,hlp0), DATA, TMP3 + vpxor TMP3, TMP1, TMP1 + vpshufd $78, DATA, TMP3 + vpxor DATA, TMP3, TMP3 + vpclmulqdq $0x00, 16*8(Htbl,hlp0), TMP3, TMP3 + vpxor TMP3, TMP2, TMP2 + + test hlp0, hlp0 + + lea 16(inp), inp + + jnz .Lpre_loop + +.Lred1: + vpxor TMP0, TMP2, TMP2 + vpxor TMP1, TMP2, TMP2 + vpsrldq $8, TMP2, TMP3 + vpslldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, Xhi + vpxor TMP2, TMP0, T + +.align 64 +.Lmod_loop: + sub $0x80, len + jb .Ldone + + vmovdqu 16*7(inp),DATA # Ii + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x00, (Htbl), DATA, TMP0 + vpclmulqdq $0x11, (Htbl), DATA, TMP1 + vpshufd $78, DATA, TMP2 + vpxor DATA, TMP2, TMP2 + vpclmulqdq $0x00, 16*8(Htbl), TMP2, TMP2 + ######################################################### + vmovdqu 16*6(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + KARATSUBA_AAD 1 + ######################################################### + vmovdqu 16*5(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 1a + vpalignr $8, T, T, T + + KARATSUBA_AAD 2 + + vpxor TMP4, T, T #reduction stage 1b + ######################################################### + vmovdqu 16*4(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 3 + ######################################################### + vmovdqu 16*3(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP4 #reduction stage 2a + vpalignr $8, T, T, T + + KARATSUBA_AAD 4 + + vpxor TMP4, T, T #reduction stage 2b + ######################################################### + vmovdqu 16*2(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 5 + + vpxor Xhi, T, T #reduction finalize + ######################################################### + vmovdqu 16*1(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + + KARATSUBA_AAD 6 + ######################################################### + vmovdqu 16*0(inp),DATA + vpshufb BSWAP_MASK,DATA,DATA + vpxor T,DATA,DATA + + KARATSUBA_AAD 7 + ######################################################### + vpxor TMP0, TMP2, TMP2 # karatsuba fixup + vpxor TMP1, TMP2, TMP2 + vpsrldq $8, TMP2, TMP3 + vpslldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, Xhi + vpxor TMP2, TMP0, T + + lea 16*8(inp), inp + jmp .Lmod_loop + ######################################################### + +.Ldone: + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3 + vpalignr $8, T, T, T + vpxor TMP3, T, T + + vpclmulqdq $0x10, .Lpoly(%rip), T, TMP3 + vpalignr $8, T, T, T + vpxor TMP3, T, T + + vpxor Xhi, T, T + +.Lsave: + vpshufb BSWAP_MASK,T, T + vmovdqu T,(Tp) + vzeroupper + + pop hlp0 + ret +.size intel_aes_gcmAAD,.-intel_aes_gcmAAD + +################################################################################ +# Encrypt and Authenticate +# void intel_aes_gcmENC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len); +.type intel_aes_gcmENC,@function +.globl intel_aes_gcmENC +.align 16 +intel_aes_gcmENC: + +.set PT,%rdi +.set CT,%rsi +.set Htbl, %rdx +.set len, %rcx +.set KS,%r9 +.set NR,%r10d + +.set Gctx, %rdx + +.set T,%xmm0 +.set TMP0,%xmm1 +.set TMP1,%xmm2 +.set TMP2,%xmm3 +.set TMP3,%xmm4 +.set TMP4,%xmm5 +.set TMP5,%xmm6 +.set CTR0,%xmm7 +.set CTR1,%xmm8 +.set CTR2,%xmm9 +.set CTR3,%xmm10 +.set CTR4,%xmm11 +.set CTR5,%xmm12 +.set CTR6,%xmm13 +.set CTR7,%xmm14 +.set CTR,%xmm15 + +.macro ROUND i + vmovdqu \i*16(KS), TMP3 + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 +.endm + +.macro ROUNDMUL i + + vmovdqu \i*16(%rsp), TMP5 + vmovdqu \i*16(KS), TMP3 + + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + + vpshufd $78, TMP5, TMP4 + vpxor TMP5, TMP4, TMP4 + + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 + + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP4, TMP3 + vpxor TMP3, TMP0, TMP0 + vmovdqa \i*16(Htbl), TMP4 + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + +.endm + +.macro KARATSUBA i + vmovdqu \i*16(%rsp), TMP5 + + vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 +.endm + + test len, len + jnz .Lbegin + ret + +.Lbegin: + + vzeroupper + push %rbp + push %rbx + + movq %rsp, %rbp + sub $128, %rsp + andq $-16, %rsp + + vmovdqu 288(Gctx), CTR + vmovdqu 272(Gctx), T + mov 304(Gctx), KS + mov 4(KS), NR + lea 48(KS), KS + + vpshufb .Lbswap_mask(%rip), CTR, CTR + vpshufb .Lbswap_mask(%rip), T, T + + cmp $128, len + jb .LDataSingles + +# Encrypt the first eight blocks + sub $128, len + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vpshufb .Lbswap_mask(%rip), CTR0, CTR0 + vpshufb .Lbswap_mask(%rip), CTR1, CTR1 + vpshufb .Lbswap_mask(%rip), CTR2, CTR2 + vpshufb .Lbswap_mask(%rip), CTR3, CTR3 + vpshufb .Lbswap_mask(%rip), CTR4, CTR4 + vpshufb .Lbswap_mask(%rip), CTR5, CTR5 + vpshufb .Lbswap_mask(%rip), CTR6, CTR6 + vpshufb .Lbswap_mask(%rip), CTR7, CTR7 + + vpxor (KS), CTR0, CTR0 + vpxor (KS), CTR1, CTR1 + vpxor (KS), CTR2, CTR2 + vpxor (KS), CTR3, CTR3 + vpxor (KS), CTR4, CTR4 + vpxor (KS), CTR5, CTR5 + vpxor (KS), CTR6, CTR6 + vpxor (KS), CTR7, CTR7 + + ROUND 1 + ROUND 2 + ROUND 3 + ROUND 4 + ROUND 5 + ROUND 6 + ROUND 7 + ROUND 8 + ROUND 9 + + vmovdqu 160(KS), TMP5 + cmp $12, NR + jb .LLast1 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $14, NR + jb .LLast1 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LLast1: + + vpxor (PT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(PT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(PT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(PT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(PT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(PT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(PT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(PT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (CT) + vpshufb TMP3, CTR0, CTR0 + vmovdqu CTR1, 16(CT) + vpshufb TMP3, CTR1, CTR1 + vmovdqu CTR2, 32(CT) + vpshufb TMP3, CTR2, CTR2 + vmovdqu CTR3, 48(CT) + vpshufb TMP3, CTR3, CTR3 + vmovdqu CTR4, 64(CT) + vpshufb TMP3, CTR4, CTR4 + vmovdqu CTR5, 80(CT) + vpshufb TMP3, CTR5, CTR5 + vmovdqu CTR6, 96(CT) + vpshufb TMP3, CTR6, CTR6 + vmovdqu CTR7, 112(CT) + vpshufb TMP3, CTR7, CTR7 + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDataOctets + +# Encrypt 8 blocks each time while hashing previous 8 blocks +.align 64 +.LDataOctets: + cmp $128, len + jb .LEndOctets + sub $128, len + + vmovdqa CTR7, TMP5 + vmovdqa CTR6, 1*16(%rsp) + vmovdqa CTR5, 2*16(%rsp) + vmovdqa CTR4, 3*16(%rsp) + vmovdqa CTR3, 4*16(%rsp) + vmovdqa CTR2, 5*16(%rsp) + vmovdqa CTR1, 6*16(%rsp) + vmovdqa CTR0, 7*16(%rsp) + + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vmovdqu (KS), TMP4 + vpshufb TMP3, CTR0, CTR0 + vpxor TMP4, CTR0, CTR0 + vpshufb TMP3, CTR1, CTR1 + vpxor TMP4, CTR1, CTR1 + vpshufb TMP3, CTR2, CTR2 + vpxor TMP4, CTR2, CTR2 + vpshufb TMP3, CTR3, CTR3 + vpxor TMP4, CTR3, CTR3 + vpshufb TMP3, CTR4, CTR4 + vpxor TMP4, CTR4, CTR4 + vpshufb TMP3, CTR5, CTR5 + vpxor TMP4, CTR5, CTR5 + vpshufb TMP3, CTR6, CTR6 + vpxor TMP4, CTR6, CTR6 + vpshufb TMP3, CTR7, CTR7 + vpxor TMP4, CTR7, CTR7 + + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + ROUNDMUL 1 + + ROUNDMUL 2 + + ROUNDMUL 3 + + ROUNDMUL 4 + + ROUNDMUL 5 + + ROUNDMUL 6 + + vpxor 7*16(%rsp), T, TMP5 + vmovdqu 7*16(KS), TMP3 + + vaesenc TMP3, CTR0, CTR0 + vaesenc TMP3, CTR1, CTR1 + vaesenc TMP3, CTR2, CTR2 + vaesenc TMP3, CTR3, CTR3 + + vpshufd $78, TMP5, TMP4 + vpxor TMP5, TMP4, TMP4 + + vaesenc TMP3, CTR4, CTR4 + vaesenc TMP3, CTR5, CTR5 + vaesenc TMP3, CTR6, CTR6 + vaesenc TMP3, CTR7, CTR7 + + vpclmulqdq $0x11, 7*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 7*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpclmulqdq $0x00, 128+7*16(Htbl), TMP4, TMP3 + vpxor TMP3, TMP0, TMP0 + + ROUND 8 + vmovdqa .Lpoly(%rip), TMP5 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + + vpclmulqdq $0x10, TMP5, T, TMP1 + vpalignr $8, T, T, T + vpxor T, TMP1, T + + ROUND 9 + + vpclmulqdq $0x10, TMP5, T, TMP1 + vpalignr $8, T, T, T + vpxor T, TMP1, T + + vmovdqu 160(KS), TMP5 + cmp $10, NR + jbe .LLast2 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $12, NR + jbe .LLast2 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LLast2: + + vpxor (PT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(PT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(PT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(PT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(PT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(PT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(PT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(PT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (CT) + vpshufb TMP3, CTR0, CTR0 + vmovdqu CTR1, 16(CT) + vpshufb TMP3, CTR1, CTR1 + vmovdqu CTR2, 32(CT) + vpshufb TMP3, CTR2, CTR2 + vmovdqu CTR3, 48(CT) + vpshufb TMP3, CTR3, CTR3 + vmovdqu CTR4, 64(CT) + vpshufb TMP3, CTR4, CTR4 + vmovdqu CTR5, 80(CT) + vpshufb TMP3, CTR5, CTR5 + vmovdqu CTR6, 96(CT) + vpshufb TMP3, CTR6, CTR6 + vmovdqu CTR7,112(CT) + vpshufb TMP3, CTR7, CTR7 + + vpxor TMP4, T, T + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDataOctets + +.LEndOctets: + + vmovdqa CTR7, TMP5 + vmovdqa CTR6, 1*16(%rsp) + vmovdqa CTR5, 2*16(%rsp) + vmovdqa CTR4, 3*16(%rsp) + vmovdqa CTR3, 4*16(%rsp) + vmovdqa CTR2, 5*16(%rsp) + vmovdqa CTR1, 6*16(%rsp) + vmovdqa CTR0, 7*16(%rsp) + + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + KARATSUBA 1 + KARATSUBA 2 + KARATSUBA 3 + KARATSUBA 4 + KARATSUBA 5 + KARATSUBA 6 + + vmovdqu 7*16(%rsp), TMP5 + vpxor T, TMP5, TMP5 + vmovdqu 16*7(Htbl), TMP4 + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+7*16(Htbl), TMP4 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + + vmovdqa .Lpoly(%rip), TMP2 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vpxor TMP4, T, T + +#Here we encrypt any remaining whole block +.LDataSingles: + + cmp $16, len + jb .LDataTail + sub $16, len + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LLast3 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LLast3 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LLast3: + vaesenclast TMP2, TMP1, TMP1 + + vpxor (PT), TMP1, TMP1 + vmovdqu TMP1, (CT) + addq $16, CT + addq $16, PT + + vpshufb .Lbswap_mask(%rip), TMP1, TMP1 + vpxor TMP1, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + + jmp .LDataSingles + +#Here we encypt the final partial block, if there is one +.LDataTail: + + test len, len + jz DATA_END +# First prepare the counter block + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LLast4 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LLast4 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LLast4: + vaesenclast TMP2, TMP1, TMP1 +#Zero a temp location + vpxor TMP2, TMP2, TMP2 + vmovdqa TMP2, (%rsp) + +# Copy the required bytes only (could probably use rep movsb) + xor KS, KS +.LEncCpy: + cmp KS, len + je .LEncCpyEnd + movb (PT, KS, 1), %r8b + movb %r8b, (%rsp, KS, 1) + inc KS + jmp .LEncCpy +.LEncCpyEnd: +# Xor with the counter block + vpxor (%rsp), TMP1, TMP0 +# Again, store at temp location + vmovdqa TMP0, (%rsp) +# Copy only the required bytes to CT, and zero the rest for the hash + xor KS, KS +.LEncCpy2: + cmp KS, len + je .LEncCpy3 + movb (%rsp, KS, 1), %r8b + movb %r8b, (CT, KS, 1) + inc KS + jmp .LEncCpy2 +.LEncCpy3: + cmp $16, KS + je .LEndCpy3 + movb $0, (%rsp, KS, 1) + inc KS + jmp .LEncCpy3 +.LEndCpy3: + vmovdqa (%rsp), TMP0 + + vpshufb .Lbswap_mask(%rip), TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + +DATA_END: + + vpshufb .Lbswap_mask(%rip), T, T + vpshufb .Lbswap_mask(%rip), CTR, CTR + vmovdqu T, 272(Gctx) + vmovdqu CTR, 288(Gctx) + + movq %rbp, %rsp + + popq %rbx + popq %rbp + ret + .size intel_aes_gcmENC, .-intel_aes_gcmENC + +######################### +# Decrypt and Authenticate +# void intel_aes_gcmDEC(uint8_t* PT, uint8_t* CT, void *Gctx,uint64_t len); +.type intel_aes_gcmDEC,@function +.globl intel_aes_gcmDEC +.align 16 +intel_aes_gcmDEC: +# parameter 1: CT # input +# parameter 2: PT # output +# parameter 3: %rdx # Gctx +# parameter 4: %rcx # len + +.macro DEC_KARATSUBA i + vmovdqu (7-\i)*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + + vpclmulqdq $0x11, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, 16*\i(Htbl), TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vpclmulqdq $0x00, 128+\i*16(Htbl), TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 +.endm + +.set PT,%rsi +.set CT,%rdi +.set Htbl, %rdx +.set len, %rcx +.set KS,%r9 +.set NR,%r10d + +.set Gctx, %rdx + +.set T,%xmm0 +.set TMP0,%xmm1 +.set TMP1,%xmm2 +.set TMP2,%xmm3 +.set TMP3,%xmm4 +.set TMP4,%xmm5 +.set TMP5,%xmm6 +.set CTR0,%xmm7 +.set CTR1,%xmm8 +.set CTR2,%xmm9 +.set CTR3,%xmm10 +.set CTR4,%xmm11 +.set CTR5,%xmm12 +.set CTR6,%xmm13 +.set CTR7,%xmm14 +.set CTR,%xmm15 + + test len, len + jnz .LbeginDec + ret + +.LbeginDec: + + pushq %rbp + pushq %rbx + movq %rsp, %rbp + sub $128, %rsp + andq $-16, %rsp + vmovdqu 288(Gctx), CTR + vmovdqu 272(Gctx), T + mov 304(Gctx), KS + mov 4(KS), NR + lea 48(KS), KS + + vpshufb .Lbswap_mask(%rip), CTR, CTR + vpshufb .Lbswap_mask(%rip), T, T + + vmovdqu .Lbswap_mask(%rip), TMP3 + jmp .LDECOctets + +# Decrypt 8 blocks each time while hashing them at the same time +.align 64 +.LDECOctets: + + cmp $128, len + jb .LDECSingles + sub $128, len + + vmovdqa CTR, CTR0 + vpaddd .Lone(%rip), CTR0, CTR1 + vpaddd .Ltwo(%rip), CTR0, CTR2 + vpaddd .Lone(%rip), CTR2, CTR3 + vpaddd .Ltwo(%rip), CTR2, CTR4 + vpaddd .Lone(%rip), CTR4, CTR5 + vpaddd .Ltwo(%rip), CTR4, CTR6 + vpaddd .Lone(%rip), CTR6, CTR7 + vpaddd .Ltwo(%rip), CTR6, CTR + + vpshufb TMP3, CTR0, CTR0 + vpshufb TMP3, CTR1, CTR1 + vpshufb TMP3, CTR2, CTR2 + vpshufb TMP3, CTR3, CTR3 + vpshufb TMP3, CTR4, CTR4 + vpshufb TMP3, CTR5, CTR5 + vpshufb TMP3, CTR6, CTR6 + vpshufb TMP3, CTR7, CTR7 + + vmovdqu (KS), TMP3 + vpxor TMP3, CTR0, CTR0 + vpxor TMP3, CTR1, CTR1 + vpxor TMP3, CTR2, CTR2 + vpxor TMP3, CTR3, CTR3 + vpxor TMP3, CTR4, CTR4 + vpxor TMP3, CTR5, CTR5 + vpxor TMP3, CTR6, CTR6 + vpxor TMP3, CTR7, CTR7 + + vmovdqu 7*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + vmovdqu 16*0(Htbl), TMP3 + vpclmulqdq $0x11, TMP3, TMP5, TMP1 + vpclmulqdq $0x00, TMP3, TMP5, TMP2 + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+0*16(Htbl), TMP3 + vpclmulqdq $0x00, TMP3, TMP5, TMP0 + + ROUND 1 + DEC_KARATSUBA 1 + + ROUND 2 + DEC_KARATSUBA 2 + + ROUND 3 + DEC_KARATSUBA 3 + + ROUND 4 + DEC_KARATSUBA 4 + + ROUND 5 + DEC_KARATSUBA 5 + + ROUND 6 + DEC_KARATSUBA 6 + + ROUND 7 + + vmovdqu 0*16(CT), TMP5 + vpshufb .Lbswap_mask(%rip), TMP5, TMP5 + vpxor T, TMP5, TMP5 + vmovdqu 16*7(Htbl), TMP4 + + vpclmulqdq $0x11, TMP4, TMP5, TMP3 + vpxor TMP3, TMP1, TMP1 + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP2, TMP2 + + vpshufd $78, TMP5, TMP3 + vpxor TMP5, TMP3, TMP5 + vmovdqu 128+7*16(Htbl), TMP4 + + vpclmulqdq $0x00, TMP4, TMP5, TMP3 + vpxor TMP3, TMP0, TMP0 + + ROUND 8 + + vpxor TMP1, TMP0, TMP0 + vpxor TMP2, TMP0, TMP0 + + vpsrldq $8, TMP0, TMP3 + vpxor TMP3, TMP1, TMP4 + vpslldq $8, TMP0, TMP3 + vpxor TMP3, TMP2, T + vmovdqa .Lpoly(%rip), TMP2 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + ROUND 9 + + vpalignr $8, T, T, TMP1 + vpclmulqdq $0x10, TMP2, T, T + vpxor T, TMP1, T + + vmovdqu 160(KS), TMP5 + cmp $10, NR + + jbe .LDECLast1 + + ROUND 10 + ROUND 11 + + vmovdqu 192(KS), TMP5 + cmp $12, NR + + jbe .LDECLast1 + + ROUND 12 + ROUND 13 + + vmovdqu 224(KS), TMP5 + +.LDECLast1: + + vpxor (CT), TMP5, TMP3 + vaesenclast TMP3, CTR0, CTR0 + vpxor 16(CT), TMP5, TMP3 + vaesenclast TMP3, CTR1, CTR1 + vpxor 32(CT), TMP5, TMP3 + vaesenclast TMP3, CTR2, CTR2 + vpxor 48(CT), TMP5, TMP3 + vaesenclast TMP3, CTR3, CTR3 + vpxor 64(CT), TMP5, TMP3 + vaesenclast TMP3, CTR4, CTR4 + vpxor 80(CT), TMP5, TMP3 + vaesenclast TMP3, CTR5, CTR5 + vpxor 96(CT), TMP5, TMP3 + vaesenclast TMP3, CTR6, CTR6 + vpxor 112(CT), TMP5, TMP3 + vaesenclast TMP3, CTR7, CTR7 + + vmovdqu .Lbswap_mask(%rip), TMP3 + + vmovdqu CTR0, (PT) + vmovdqu CTR1, 16(PT) + vmovdqu CTR2, 32(PT) + vmovdqu CTR3, 48(PT) + vmovdqu CTR4, 64(PT) + vmovdqu CTR5, 80(PT) + vmovdqu CTR6, 96(PT) + vmovdqu CTR7,112(PT) + + vpxor TMP4, T, T + + lea 128(CT), CT + lea 128(PT), PT + jmp .LDECOctets + +#Here we decrypt and hash any remaining whole block +.LDECSingles: + + cmp $16, len + jb .LDECTail + sub $16, len + + vmovdqu (CT), TMP1 + vpshufb .Lbswap_mask(%rip), TMP1, TMP1 + vpxor TMP1, T, T + vmovdqu (Htbl), TMP0 + call GFMUL + + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LDECLast2 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LDECLast2 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 +.LDECLast2: + vaesenclast TMP2, TMP1, TMP1 + + vpxor (CT), TMP1, TMP1 + vmovdqu TMP1, (PT) + addq $16, CT + addq $16, PT + jmp .LDECSingles + +#Here we decrypt the final partial block, if there is one +.LDECTail: + test len, len + jz .LDEC_END + + vpshufb .Lbswap_mask(%rip), CTR, TMP1 + vpaddd .Lone(%rip), CTR, CTR + + vpxor (KS), TMP1, TMP1 + vaesenc 16*1(KS), TMP1, TMP1 + vaesenc 16*2(KS), TMP1, TMP1 + vaesenc 16*3(KS), TMP1, TMP1 + vaesenc 16*4(KS), TMP1, TMP1 + vaesenc 16*5(KS), TMP1, TMP1 + vaesenc 16*6(KS), TMP1, TMP1 + vaesenc 16*7(KS), TMP1, TMP1 + vaesenc 16*8(KS), TMP1, TMP1 + vaesenc 16*9(KS), TMP1, TMP1 + vmovdqu 16*10(KS), TMP2 + cmp $10, NR + je .LDECLast3 + vaesenc 16*10(KS), TMP1, TMP1 + vaesenc 16*11(KS), TMP1, TMP1 + vmovdqu 16*12(KS), TMP2 + cmp $12, NR + je .LDECLast3 + vaesenc 16*12(KS), TMP1, TMP1 + vaesenc 16*13(KS), TMP1, TMP1 + vmovdqu 16*14(KS), TMP2 + +.LDECLast3: + vaesenclast TMP2, TMP1, TMP1 + + vpxor TMP2, TMP2, TMP2 + vmovdqa TMP2, (%rsp) +# Copy the required bytes only (could probably use rep movsb) + xor KS, KS +.LDecCpy: + cmp KS, len + je .LDecCpy2 + movb (CT, KS, 1), %r8b + movb %r8b, (%rsp, KS, 1) + inc KS + jmp .LDecCpy +.LDecCpy2: + cmp $16, KS + je .LDecCpyEnd + movb $0, (%rsp, KS, 1) + inc KS + jmp .LDecCpy2 +.LDecCpyEnd: +# Xor with the counter block + vmovdqa (%rsp), TMP0 + vpxor TMP0, TMP1, TMP1 +# Again, store at temp location + vmovdqa TMP1, (%rsp) +# Copy only the required bytes to PT, and zero the rest for the hash + xor KS, KS +.LDecCpy3: + cmp KS, len + je .LDecCpyEnd3 + movb (%rsp, KS, 1), %r8b + movb %r8b, (PT, KS, 1) + inc KS + jmp .LDecCpy3 +.LDecCpyEnd3: + vpshufb .Lbswap_mask(%rip), TMP0, TMP0 + vpxor TMP0, T, T + vmovdqu (Htbl), TMP0 + call GFMUL +.LDEC_END: + + vpshufb .Lbswap_mask(%rip), T, T + vpshufb .Lbswap_mask(%rip), CTR, CTR + vmovdqu T, 272(Gctx) + vmovdqu CTR, 288(Gctx) + + movq %rbp, %rsp + + popq %rbx + popq %rbp + ret + .size intel_aes_gcmDEC, .-intel_aes_gcmDEC +######################### +# a = T +# b = TMP0 - remains unchanged +# res = T +# uses also TMP1,TMP2,TMP3,TMP4 +# __m128i GFMUL(__m128i A, __m128i B); +.type GFMUL,@function +.globl GFMUL +GFMUL: + vpclmulqdq $0x00, TMP0, T, TMP1 + vpclmulqdq $0x11, TMP0, T, TMP4 + + vpshufd $78, T, TMP2 + vpshufd $78, TMP0, TMP3 + vpxor T, TMP2, TMP2 + vpxor TMP0, TMP3, TMP3 + + vpclmulqdq $0x00, TMP3, TMP2, TMP2 + vpxor TMP1, TMP2, TMP2 + vpxor TMP4, TMP2, TMP2 + + vpslldq $8, TMP2, TMP3 + vpsrldq $8, TMP2, TMP2 + + vpxor TMP3, TMP1, TMP1 + vpxor TMP2, TMP4, TMP4 + + vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2 + vpshufd $78, TMP1, TMP3 + vpxor TMP3, TMP2, TMP1 + + vpclmulqdq $0x10, .Lpoly(%rip), TMP1, TMP2 + vpshufd $78, TMP1, TMP3 + vpxor TMP3, TMP2, TMP1 + + vpxor TMP4, TMP1, T + ret +.size GFMUL, .-GFMUL + diff --git a/security/nss/lib/freebl/jpake.c b/security/nss/lib/freebl/jpake.c new file mode 100644 index 000000000..741c7a876 --- /dev/null +++ b/security/nss/lib/freebl/jpake.c @@ -0,0 +1,495 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "secerr.h" +#include "secitem.h" +#include "secmpi.h" + +/* Hash an item's length and then its value. Only items smaller than 2^16 bytes + * are allowed. Lengths are hashed in network byte order. This is designed + * to match the OpenSSL J-PAKE implementation. + */ +static mp_err +hashSECItem(HASHContext *hash, const SECItem *it) +{ + unsigned char length[2]; + + if (it->len > 0xffff) + return MP_BADARG; + + length[0] = (unsigned char)(it->len >> 8); + length[1] = (unsigned char)(it->len); + hash->hashobj->update(hash->hash_context, length, 2); + hash->hashobj->update(hash->hash_context, it->data, it->len); + return MP_OKAY; +} + +/* Hash all public components of the signature, each prefixed with its + length, and then convert the hash to an mp_int. */ +static mp_err +hashPublicParams(HASH_HashType hashType, const SECItem *g, + const SECItem *gv, const SECItem *gx, + const SECItem *signerID, mp_int *h) +{ + mp_err err; + unsigned char hBuf[HASH_LENGTH_MAX]; + SECItem hItem; + HASHContext hash; + + hash.hashobj = HASH_GetRawHashObject(hashType); + if (hash.hashobj == NULL || hash.hashobj->length > sizeof hBuf) { + return MP_BADARG; + } + hash.hash_context = hash.hashobj->create(); + if (hash.hash_context == NULL) { + return MP_MEM; + } + + hItem.data = hBuf; + hItem.len = hash.hashobj->length; + + hash.hashobj->begin(hash.hash_context); + CHECK_MPI_OK(hashSECItem(&hash, g)); + CHECK_MPI_OK(hashSECItem(&hash, gv)); + CHECK_MPI_OK(hashSECItem(&hash, gx)); + CHECK_MPI_OK(hashSECItem(&hash, signerID)); + hash.hashobj->end(hash.hash_context, hItem.data, &hItem.len, + sizeof hBuf); + SECITEM_TO_MPINT(hItem, h); + +cleanup: + if (hash.hash_context != NULL) { + hash.hashobj->destroy(hash.hash_context, PR_TRUE); + } + + return err; +} + +/* Generate a Schnorr signature for round 1 or round 2 */ +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r) +{ + SECStatus rv = SECSuccess; + mp_err err; + mp_int p; + mp_int q; + mp_int g; + mp_int X; + mp_int GX; + mp_int V; + mp_int GV; + mp_int h; + mp_int tmp; + mp_int R; + SECItem v; + + if (!arena || + !pqg || !pqg->prime.data || pqg->prime.len == 0 || + !pqg->subPrime.data || pqg->subPrime.len == 0 || + !pqg->base.data || pqg->base.len == 0 || + !signerID || !signerID->data || signerID->len == 0 || + !x || !x->data || x->len == 0 || + (testRandom && (!testRandom->data || testRandom->len == 0)) || + (gxIn == NULL && (!gxOut || gxOut->data != NULL)) || + (gxIn != NULL && (!gxIn->data || gxIn->len == 0 || gxOut != NULL)) || + !gv || gv->data != NULL || + !r || r->data != NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&X) = 0; + MP_DIGITS(&GX) = 0; + MP_DIGITS(&V) = 0; + MP_DIGITS(&GV) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&R) = 0; + + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&X)); + CHECK_MPI_OK(mp_init(&GX)); + CHECK_MPI_OK(mp_init(&V)); + CHECK_MPI_OK(mp_init(&GV)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&R)); + + SECITEM_TO_MPINT(pqg->prime, &p); + SECITEM_TO_MPINT(pqg->subPrime, &q); + SECITEM_TO_MPINT(pqg->base, &g); + SECITEM_TO_MPINT(*x, &X); + + /* gx = g^x */ + if (gxIn == NULL) { + CHECK_MPI_OK(mp_exptmod(&g, &X, &p, &GX)); + MPINT_TO_SECITEM(&GX, gxOut, arena); + gxIn = gxOut; + } else { + SECITEM_TO_MPINT(*gxIn, &GX); + } + + /* v is a random value in the q subgroup */ + if (testRandom == NULL) { + v.data = NULL; + rv = DSA_NewRandom(arena, &pqg->subPrime, &v); + if (rv != SECSuccess) { + goto cleanup; + } + } else { + v.data = testRandom->data; + v.len = testRandom->len; + } + SECITEM_TO_MPINT(v, &V); + + /* gv = g^v (mod q), random v, 1 <= v < q */ + CHECK_MPI_OK(mp_exptmod(&g, &V, &p, &GV)); + MPINT_TO_SECITEM(&GV, gv, arena); + + /* h = H(g, gv, gx, signerID) */ + CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gxIn, signerID, + &h)); + + /* r = v - x*h (mod q) */ + CHECK_MPI_OK(mp_mulmod(&X, &h, &q, &tmp)); + CHECK_MPI_OK(mp_submod(&V, &tmp, &q, &R)); + MPINT_TO_SECITEM(&R, r, arena); + +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&X); + mp_clear(&GX); + mp_clear(&V); + mp_clear(&GV); + mp_clear(&h); + mp_clear(&tmp); + mp_clear(&R); + + if (rv == SECSuccess && err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* Verify a Schnorr signature generated by the peer in round 1 or round 2. */ +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *peerID, + const SECItem *gx, const SECItem *gv, const SECItem *r) +{ + SECStatus rv = SECSuccess; + mp_err err; + mp_int p; + mp_int q; + mp_int g; + mp_int p_minus_1; + mp_int GX; + mp_int h; + mp_int one; + mp_int R; + mp_int gr; + mp_int gxh; + mp_int gr_gxh; + SECItem calculated; + + if (!arena || + !pqg || !pqg->prime.data || pqg->prime.len == 0 || + !pqg->subPrime.data || pqg->subPrime.len == 0 || + !pqg->base.data || pqg->base.len == 0 || + !signerID || !signerID->data || signerID->len == 0 || + !peerID || !peerID->data || peerID->len == 0 || + !gx || !gx->data || gx->len == 0 || + !gv || !gv->data || gv->len == 0 || + !r || !r->data || r->len == 0 || + SECITEM_CompareItem(signerID, peerID) == SECEqual) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&g) = 0; + MP_DIGITS(&p_minus_1) = 0; + MP_DIGITS(&GX) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&one) = 0; + MP_DIGITS(&R) = 0; + MP_DIGITS(&gr) = 0; + MP_DIGITS(&gxh) = 0; + MP_DIGITS(&gr_gxh) = 0; + calculated.data = NULL; + + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&g)); + CHECK_MPI_OK(mp_init(&p_minus_1)); + CHECK_MPI_OK(mp_init(&GX)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&one)); + CHECK_MPI_OK(mp_init(&R)); + CHECK_MPI_OK(mp_init(&gr)); + CHECK_MPI_OK(mp_init(&gxh)); + CHECK_MPI_OK(mp_init(&gr_gxh)); + + SECITEM_TO_MPINT(pqg->prime, &p); + SECITEM_TO_MPINT(pqg->subPrime, &q); + SECITEM_TO_MPINT(pqg->base, &g); + SECITEM_TO_MPINT(*gx, &GX); + SECITEM_TO_MPINT(*r, &R); + + CHECK_MPI_OK(mp_sub_d(&p, 1, &p_minus_1)); + CHECK_MPI_OK(mp_exptmod(&GX, &q, &p, &one)); + /* Check g^x is in [1, p-2], R is in [0, q-1], and (g^x)^q mod p == 1 */ + if (!(mp_cmp_z(&GX) > 0 && + mp_cmp(&GX, &p_minus_1) < 0 && + mp_cmp(&R, &q) < 0 && + mp_cmp_d(&one, 1) == 0)) { + goto badSig; + } + + CHECK_MPI_OK(hashPublicParams(hashType, &pqg->base, gv, gx, peerID, + &h)); + + /* Calculate g^v = g^r * g^x^h */ + CHECK_MPI_OK(mp_exptmod(&g, &R, &p, &gr)); + CHECK_MPI_OK(mp_exptmod(&GX, &h, &p, &gxh)); + CHECK_MPI_OK(mp_mulmod(&gr, &gxh, &p, &gr_gxh)); + + /* Compare calculated g^v to given g^v */ + MPINT_TO_SECITEM(&gr_gxh, &calculated, arena); + if (calculated.len == gv->len && + NSS_SecureMemcmp(calculated.data, gv->data, calculated.len) == 0) { + rv = SECSuccess; + } else { + badSig: + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + } + +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&g); + mp_clear(&p_minus_1); + mp_clear(&GX); + mp_clear(&h); + mp_clear(&one); + mp_clear(&R); + mp_clear(&gr); + mp_clear(&gxh); + mp_clear(&gr_gxh); + + if (rv == SECSuccess && err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* Calculate base = gx1*gx3*gx4 (mod p), i.e. g^(x1+x3+x4) (mod p) */ +static mp_err +jpake_Round2Base(const SECItem *gx1, const SECItem *gx3, + const SECItem *gx4, const mp_int *p, mp_int *base) +{ + mp_err err; + mp_int GX1; + mp_int GX3; + mp_int GX4; + mp_int tmp; + + MP_DIGITS(&GX1) = 0; + MP_DIGITS(&GX3) = 0; + MP_DIGITS(&GX4) = 0; + MP_DIGITS(&tmp) = 0; + + CHECK_MPI_OK(mp_init(&GX1)); + CHECK_MPI_OK(mp_init(&GX3)); + CHECK_MPI_OK(mp_init(&GX4)); + CHECK_MPI_OK(mp_init(&tmp)); + + SECITEM_TO_MPINT(*gx1, &GX1); + SECITEM_TO_MPINT(*gx3, &GX3); + SECITEM_TO_MPINT(*gx4, &GX4); + + /* In round 2, the peer/attacker sends us g^x3 and g^x4 and the protocol + requires that these values are distinct. */ + if (mp_cmp(&GX3, &GX4) == 0) { + return MP_BADARG; + } + + CHECK_MPI_OK(mp_mul(&GX1, &GX3, &tmp)); + CHECK_MPI_OK(mp_mul(&tmp, &GX4, &tmp)); + CHECK_MPI_OK(mp_mod(&tmp, p, base)); + +cleanup: + mp_clear(&GX1); + mp_clear(&GX3); + mp_clear(&GX4); + mp_clear(&tmp); + return err; +} + +SECStatus +JPAKE_Round2(PLArenaPool *arena, + const SECItem *p, const SECItem *q, const SECItem *gx1, + const SECItem *gx3, const SECItem *gx4, SECItem *base, + const SECItem *x2, const SECItem *s, SECItem *x2s) +{ + mp_err err; + mp_int P; + mp_int Q; + mp_int X2; + mp_int S; + mp_int result; + + if (!arena || + !p || !p->data || p->len == 0 || + !q || !q->data || q->len == 0 || + !gx1 || !gx1->data || gx1->len == 0 || + !gx3 || !gx3->data || gx3->len == 0 || + !gx4 || !gx4->data || gx4->len == 0 || + !base || base->data != NULL || + (x2s != NULL && (x2s->data != NULL || + !x2 || !x2->data || x2->len == 0 || + !s || !s->data || s->len == 0))) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&X2) = 0; + MP_DIGITS(&S) = 0; + MP_DIGITS(&result) = 0; + + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&result)); + + if (x2s != NULL) { + CHECK_MPI_OK(mp_init(&X2)); + CHECK_MPI_OK(mp_init(&S)); + + SECITEM_TO_MPINT(*q, &Q); + SECITEM_TO_MPINT(*x2, &X2); + + SECITEM_TO_MPINT(*s, &S); + /* S must be in [1, Q-1] */ + if (mp_cmp_z(&S) <= 0 || mp_cmp(&S, &Q) >= 0) { + err = MP_BADARG; + goto cleanup; + } + + CHECK_MPI_OK(mp_mulmod(&X2, &S, &Q, &result)); + MPINT_TO_SECITEM(&result, x2s, arena); + } + + SECITEM_TO_MPINT(*p, &P); + CHECK_MPI_OK(jpake_Round2Base(gx1, gx3, gx4, &P, &result)); + MPINT_TO_SECITEM(&result, base, arena); + +cleanup: + mp_clear(&P); + mp_clear(&Q); + mp_clear(&X2); + mp_clear(&S); + mp_clear(&result); + + if (err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K) +{ + mp_err err; + mp_int P; + mp_int Q; + mp_int tmp; + mp_int exponent; + mp_int divisor; + mp_int base; + + if (!arena || + !p || !p->data || p->len == 0 || + !q || !q->data || q->len == 0 || + !x2 || !x2->data || x2->len == 0 || + !gx4 || !gx4->data || gx4->len == 0 || + !x2s || !x2s->data || x2s->len == 0 || + !B || !B->data || B->len == 0 || + !K || K->data != NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&exponent) = 0; + MP_DIGITS(&divisor) = 0; + MP_DIGITS(&base) = 0; + + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&exponent)); + CHECK_MPI_OK(mp_init(&divisor)); + CHECK_MPI_OK(mp_init(&base)); + + /* exponent = -x2s (mod q) */ + SECITEM_TO_MPINT(*q, &Q); + SECITEM_TO_MPINT(*x2s, &tmp); + /* q == 0 (mod q), so q - x2s == -x2s (mod q) */ + CHECK_MPI_OK(mp_sub(&Q, &tmp, &exponent)); + + /* divisor = gx4^-x2s = 1/(gx4^x2s) (mod p) */ + SECITEM_TO_MPINT(*p, &P); + SECITEM_TO_MPINT(*gx4, &tmp); + CHECK_MPI_OK(mp_exptmod(&tmp, &exponent, &P, &divisor)); + + /* base = B*divisor = B/(gx4^x2s) (mod p) */ + SECITEM_TO_MPINT(*B, &tmp); + CHECK_MPI_OK(mp_mulmod(&divisor, &tmp, &P, &base)); + + /* tmp = base^x2 (mod p) */ + SECITEM_TO_MPINT(*x2, &exponent); + CHECK_MPI_OK(mp_exptmod(&base, &exponent, &P, &tmp)); + + MPINT_TO_SECITEM(&tmp, K, arena); + +cleanup: + mp_clear(&P); + mp_clear(&Q); + mp_clear(&tmp); + mp_clear(&exponent); + mp_clear(&divisor); + mp_clear(&base); + + if (err != MP_OKAY) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} diff --git a/security/nss/lib/freebl/ldvector.c b/security/nss/lib/freebl/ldvector.c new file mode 100644 index 000000000..2447a0c9f --- /dev/null +++ b/security/nss/lib/freebl/ldvector.c @@ -0,0 +1,353 @@ +/* + * ldvector.c - platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +extern int FREEBL_InitStubs(void); +#endif + +#include "loader.h" +#include "alghmac.h" +#include "hmacct.h" +#include "blapii.h" + +static const struct FREEBLVectorStr vector = + { + + sizeof vector, + FREEBL_VERSION, + + RSA_NewKey, + RSA_PublicKeyOp, + RSA_PrivateKeyOp, + DSA_NewKey, + DSA_SignDigest, + DSA_VerifyDigest, + DSA_NewKeyFromSeed, + DSA_SignDigestWithSeed, + DH_GenParam, + DH_NewKey, + DH_Derive, + KEA_Derive, + KEA_Verify, + RC4_CreateContext, + RC4_DestroyContext, + RC4_Encrypt, + RC4_Decrypt, + RC2_CreateContext, + RC2_DestroyContext, + RC2_Encrypt, + RC2_Decrypt, + RC5_CreateContext, + RC5_DestroyContext, + RC5_Encrypt, + RC5_Decrypt, + DES_CreateContext, + DES_DestroyContext, + DES_Encrypt, + DES_Decrypt, + AES_CreateContext, + AES_DestroyContext, + AES_Encrypt, + AES_Decrypt, + MD5_Hash, + MD5_HashBuf, + MD5_NewContext, + MD5_DestroyContext, + MD5_Begin, + MD5_Update, + MD5_End, + MD5_FlattenSize, + MD5_Flatten, + MD5_Resurrect, + MD5_TraceState, + MD2_Hash, + MD2_NewContext, + MD2_DestroyContext, + MD2_Begin, + MD2_Update, + MD2_End, + MD2_FlattenSize, + MD2_Flatten, + MD2_Resurrect, + SHA1_Hash, + SHA1_HashBuf, + SHA1_NewContext, + SHA1_DestroyContext, + SHA1_Begin, + SHA1_Update, + SHA1_End, + SHA1_TraceState, + SHA1_FlattenSize, + SHA1_Flatten, + SHA1_Resurrect, + RNG_RNGInit, + RNG_RandomUpdate, + RNG_GenerateGlobalRandomBytes, + RNG_RNGShutdown, + PQG_ParamGen, + PQG_ParamGenSeedLen, + PQG_VerifyParams, + + /* End of Version 3.001. */ + + RSA_PrivateKeyOpDoubleChecked, + RSA_PrivateKeyCheck, + BL_Cleanup, + + /* End of Version 3.002. */ + + SHA256_NewContext, + SHA256_DestroyContext, + SHA256_Begin, + SHA256_Update, + SHA256_End, + SHA256_HashBuf, + SHA256_Hash, + SHA256_TraceState, + SHA256_FlattenSize, + SHA256_Flatten, + SHA256_Resurrect, + + SHA512_NewContext, + SHA512_DestroyContext, + SHA512_Begin, + SHA512_Update, + SHA512_End, + SHA512_HashBuf, + SHA512_Hash, + SHA512_TraceState, + SHA512_FlattenSize, + SHA512_Flatten, + SHA512_Resurrect, + + SHA384_NewContext, + SHA384_DestroyContext, + SHA384_Begin, + SHA384_Update, + SHA384_End, + SHA384_HashBuf, + SHA384_Hash, + SHA384_TraceState, + SHA384_FlattenSize, + SHA384_Flatten, + SHA384_Resurrect, + + /* End of Version 3.003. */ + + AESKeyWrap_CreateContext, + AESKeyWrap_DestroyContext, + AESKeyWrap_Encrypt, + AESKeyWrap_Decrypt, + + /* End of Version 3.004. */ + + BLAPI_SHVerify, + BLAPI_VerifySelf, + + /* End of Version 3.005. */ + + EC_NewKey, + EC_NewKeyFromSeed, + EC_ValidatePublicKey, + ECDH_Derive, + ECDSA_SignDigest, + ECDSA_VerifyDigest, + ECDSA_SignDigestWithSeed, + + /* End of Version 3.006. */ + /* End of Version 3.007. */ + + AES_InitContext, + AESKeyWrap_InitContext, + DES_InitContext, + RC2_InitContext, + RC4_InitContext, + + AES_AllocateContext, + AESKeyWrap_AllocateContext, + DES_AllocateContext, + RC2_AllocateContext, + RC4_AllocateContext, + + MD2_Clone, + MD5_Clone, + SHA1_Clone, + SHA256_Clone, + SHA384_Clone, + SHA512_Clone, + + TLS_PRF, + HASH_GetRawHashObject, + + HMAC_Create, + HMAC_Init, + HMAC_Begin, + HMAC_Update, + HMAC_Clone, + HMAC_Finish, + HMAC_Destroy, + + RNG_SystemInfoForRNG, + + /* End of Version 3.008. */ + + FIPS186Change_GenerateX, + FIPS186Change_ReduceModQForDSA, + + /* End of Version 3.009. */ + Camellia_InitContext, + Camellia_AllocateContext, + Camellia_CreateContext, + Camellia_DestroyContext, + Camellia_Encrypt, + Camellia_Decrypt, + + PQG_DestroyParams, + PQG_DestroyVerify, + + /* End of Version 3.010. */ + + SEED_InitContext, + SEED_AllocateContext, + SEED_CreateContext, + SEED_DestroyContext, + SEED_Encrypt, + SEED_Decrypt, + + BL_Init, + BL_SetForkState, + + PRNGTEST_Instantiate, + PRNGTEST_Reseed, + PRNGTEST_Generate, + + PRNGTEST_Uninstantiate, + + /* End of Version 3.011. */ + + RSA_PopulatePrivateKey, + + DSA_NewRandom, + + JPAKE_Sign, + JPAKE_Verify, + JPAKE_Round2, + JPAKE_Final, + + /* End of Version 3.012 */ + + TLS_P_hash, + SHA224_NewContext, + SHA224_DestroyContext, + SHA224_Begin, + SHA224_Update, + SHA224_End, + SHA224_HashBuf, + SHA224_Hash, + SHA224_TraceState, + SHA224_FlattenSize, + SHA224_Flatten, + SHA224_Resurrect, + SHA224_Clone, + BLAPI_SHVerifyFile, + + /* End of Version 3.013 */ + + PQG_ParamGenV2, + PRNGTEST_RunHealthTests, + + /* End of Version 3.014 */ + + HMAC_ConstantTime, + SSLv3_MAC_ConstantTime, + + /* End of Version 3.015 */ + + RSA_SignRaw, + RSA_CheckSignRaw, + RSA_CheckSignRecoverRaw, + RSA_EncryptRaw, + RSA_DecryptRaw, + RSA_EncryptOAEP, + RSA_DecryptOAEP, + RSA_EncryptBlock, + RSA_DecryptBlock, + RSA_SignPSS, + RSA_CheckSignPSS, + RSA_Sign, + RSA_CheckSign, + RSA_CheckSignRecover, + + /* End of Version 3.016 */ + + EC_FillParams, + EC_DecodeParams, + EC_CopyParams, + + /* End of Version 3.017 */ + + ChaCha20Poly1305_InitContext, + ChaCha20Poly1305_CreateContext, + ChaCha20Poly1305_DestroyContext, + ChaCha20Poly1305_Seal, + ChaCha20Poly1305_Open, + + /* End of Version 3.018 */ + + EC_GetPointSize + + /* End of Version 3.019 */ + }; + +const FREEBLVector* +FREEBL_GetVector(void) +{ +#ifdef FREEBL_NO_DEPEND + SECStatus rv; +#endif + +#define NSS_VERSION_VARIABLE __nss_freebl_version +#include "verref.h" + +#ifdef FREEBL_NO_DEPEND + /* this entry point is only valid if nspr and nss-util has been loaded */ + rv = FREEBL_InitStubs(); + if (rv != SECSuccess) { + return NULL; + } +#endif + /* make sure the Full self tests have been run before continuing */ + BL_POSTRan(PR_FALSE); + + return &vector; +} + +#ifdef FREEBL_LOWHASH +static const struct NSSLOWVectorStr nssvector = + { + sizeof nssvector, + NSSLOW_VERSION, + FREEBL_GetVector, + NSSLOW_Init, + NSSLOW_Shutdown, + NSSLOW_Reset, + NSSLOWHASH_NewContext, + NSSLOWHASH_Begin, + NSSLOWHASH_Update, + NSSLOWHASH_End, + NSSLOWHASH_Destroy, + NSSLOWHASH_Length + }; + +const NSSLOWVector* +NSSLOW_GetVector(void) +{ + /* POST check and stub init happens in FREEBL_GetVector() and + * NSSLOW_Init() respectively */ + return &nssvector; +} +#endif diff --git a/security/nss/lib/freebl/loader.c b/security/nss/lib/freebl/loader.c new file mode 100644 index 000000000..792171b08 --- /dev/null +++ b/security/nss/lib/freebl/loader.c @@ -0,0 +1,2126 @@ +/* + * loader.c - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "loader.h" +#include "prmem.h" +#include "prerror.h" +#include "prinit.h" +#include "prenv.h" +#include "blname.c" + +#include "prio.h" +#include "prprf.h" +#include +#include "prsystem.h" + +static const char *NameOfThisSharedLib = + SHLIB_PREFIX "softokn" SOFTOKEN_SHLIB_VERSION "." SHLIB_SUFFIX; + +static PRLibrary *blLib = NULL; + +#define LSB(x) ((x)&0xff) +#define MSB(x) ((x) >> 8) + +static const FREEBLVector *vector; +static const char *libraryName = NULL; + +#include "genload.c" + +/* This function must be run only once. */ +/* determine if hybrid platform, then actually load the DSO. */ +static PRStatus +freebl_LoadDSO(void) +{ + PRLibrary *handle; + const char *name = getLibName(); + + if (!name) { + PR_SetError(PR_LOAD_LIBRARY_ERROR, 0); + return PR_FAILURE; + } + + handle = loader_LoadLibrary(name); + if (handle) { + PRFuncPtr address = PR_FindFunctionSymbol(handle, "FREEBL_GetVector"); + if (address) { + FREEBLGetVectorFn *getVector = (FREEBLGetVectorFn *)address; + const FREEBLVector *dsoVector = getVector(); + if (dsoVector) { + unsigned short dsoVersion = dsoVector->version; + unsigned short myVersion = FREEBL_VERSION; + if (MSB(dsoVersion) == MSB(myVersion) && + LSB(dsoVersion) >= LSB(myVersion) && + dsoVector->length >= sizeof(FREEBLVector)) { + vector = dsoVector; + libraryName = name; + blLib = handle; + return PR_SUCCESS; + } + } + } +#ifdef DEBUG + if (blLib) { + PRStatus status = PR_UnloadLibrary(blLib); + PORT_Assert(PR_SUCCESS == status); + } +#else + if (blLib) + PR_UnloadLibrary(blLib); +#endif + } + return PR_FAILURE; +} + +static const PRCallOnceType pristineCallOnce; +static PRCallOnceType loadFreeBLOnce; + +static PRStatus +freebl_RunLoaderOnce(void) +{ + PRStatus status; + + status = PR_CallOnce(&loadFreeBLOnce, &freebl_LoadDSO); + return status; +} + +SECStatus +BL_Init(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_BL_Init)(); +} + +RSAPrivateKey * +RSA_NewKey(int keySizeInBits, SECItem *publicExponent) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RSA_NewKey)(keySizeInBits, publicExponent); +} + +SECStatus +RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PublicKeyOp)(key, output, input); +} + +SECStatus +RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyOp)(key, output, input); +} + +SECStatus +RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyOpDoubleChecked)(key, output, input); +} + +SECStatus +RSA_PrivateKeyCheck(const RSAPrivateKey *key) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PrivateKeyCheck)(key); +} + +SECStatus +DSA_NewKey(const PQGParams *params, DSAPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewKey)(params, privKey); +} + +SECStatus +DSA_SignDigest(DSAPrivateKey *key, SECItem *signature, const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_SignDigest)(key, signature, digest); +} + +SECStatus +DSA_VerifyDigest(DSAPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_VerifyDigest)(key, signature, digest); +} + +SECStatus +DSA_NewKeyFromSeed(const PQGParams *params, const unsigned char *seed, + DSAPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewKeyFromSeed)(params, seed, privKey); +} + +SECStatus +DSA_SignDigestWithSeed(DSAPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *seed) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_SignDigestWithSeed)(key, signature, digest, seed); +} + +SECStatus +DSA_NewRandom(PLArenaPool *arena, const SECItem *q, SECItem *seed) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DSA_NewRandom)(arena, q, seed); +} + +SECStatus +DH_GenParam(int primeLen, DHParams **params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_GenParam)(primeLen, params); +} + +SECStatus +DH_NewKey(DHParams *params, DHPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_NewKey)(params, privKey); +} + +SECStatus +DH_Derive(SECItem *publicValue, SECItem *prime, SECItem *privateValue, + SECItem *derivedSecret, unsigned int maxOutBytes) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DH_Derive)(publicValue, prime, privateValue, + derivedSecret, maxOutBytes); +} + +SECStatus +KEA_Derive(SECItem *prime, SECItem *public1, SECItem *public2, + SECItem *private1, SECItem *private2, SECItem *derivedSecret) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_KEA_Derive)(prime, public1, public2, + private1, private2, derivedSecret); +} + +PRBool +KEA_Verify(SECItem *Y, SECItem *prime, SECItem *subPrime) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return (vector->p_KEA_Verify)(Y, prime, subPrime); +} + +RC4Context * +RC4_CreateContext(const unsigned char *key, int len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC4_CreateContext)(key, len); +} + +void +RC4_DestroyContext(RC4Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RC4_DestroyContext)(cx, freeit); +} + +SECStatus +RC4_Encrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RC4_Decrypt(RC4Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +RC2Context * +RC2_CreateContext(const unsigned char *key, unsigned int len, + const unsigned char *iv, int mode, unsigned effectiveKeyLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC2_CreateContext)(key, len, iv, mode, effectiveKeyLen); +} + +void +RC2_DestroyContext(RC2Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RC2_DestroyContext)(cx, freeit); +} + +SECStatus +RC2_Encrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC2_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RC2_Decrypt(RC2Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC2_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +RC5Context * +RC5_CreateContext(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC5_CreateContext)(key, rounds, wordSize, iv, mode); +} + +void +RC5_DestroyContext(RC5Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RC5_DestroyContext)(cx, freeit); +} + +SECStatus +RC5_Encrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC5_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RC5_Decrypt(RC5Context *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC5_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +DESContext * +DES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_DES_CreateContext)(key, iv, mode, encrypt); +} + +void +DES_DestroyContext(DESContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_DES_DestroyContext)(cx, freeit); +} + +SECStatus +DES_Encrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +DES_Decrypt(DESContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} +SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SEED_CreateContext)(key, iv, mode, encrypt); +} + +void +SEED_DestroyContext(SEEDContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SEED_DestroyContext)(cx, freeit); +} + +SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SEED_Encrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SEED_Decrypt)(cx, output, outputLen, maxOutputLen, input, + inputLen); +} + +AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AES_CreateContext)(key, iv, mode, encrypt, keylen, + blocklen); +} + +void +AES_DestroyContext(AESContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_AES_DestroyContext)(cx, freeit); +} + +SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_Encrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_Decrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +MD5_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_Hash)(dest, src); +} + +SECStatus +MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_HashBuf)(dest, src, src_length); +} + +MD5Context * +MD5_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD5_NewContext)(); +} + +void +MD5_DestroyContext(MD5Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_DestroyContext)(cx, freeit); +} + +void +MD5_Begin(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Begin)(cx); +} + +void +MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Update)(cx, input, inputLen); +} + +void +MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_End)(cx, digest, digestLen, maxDigestLen); +} + +unsigned int +MD5_FlattenSize(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_MD5_FlattenSize)(cx); +} + +SECStatus +MD5_Flatten(MD5Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD5_Flatten)(cx, space); +} + +MD5Context * +MD5_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD5_Resurrect)(space, arg); +} + +void +MD5_TraceState(MD5Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_TraceState)(cx); +} + +SECStatus +MD2_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD2_Hash)(dest, src); +} + +MD2Context * +MD2_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD2_NewContext)(); +} + +void +MD2_DestroyContext(MD2Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_DestroyContext)(cx, freeit); +} + +void +MD2_Begin(MD2Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Begin)(cx); +} + +void +MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Update)(cx, input, inputLen); +} + +void +MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_End)(cx, digest, digestLen, maxDigestLen); +} + +unsigned int +MD2_FlattenSize(MD2Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_MD2_FlattenSize)(cx); +} + +SECStatus +MD2_Flatten(MD2Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_MD2_Flatten)(cx, space); +} + +MD2Context * +MD2_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_MD2_Resurrect)(space, arg); +} + +SECStatus +SHA1_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_Hash)(dest, src); +} + +SECStatus +SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_HashBuf)(dest, src, src_length); +} + +SHA1Context * +SHA1_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA1_NewContext)(); +} + +void +SHA1_DestroyContext(SHA1Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_DestroyContext)(cx, freeit); +} + +void +SHA1_Begin(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Begin)(cx); +} + +void +SHA1_Update(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Update)(cx, input, inputLen); +} + +void +SHA1_End(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA1_TraceState(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_TraceState)(cx); +} + +unsigned int +SHA1_FlattenSize(SHA1Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA1_FlattenSize)(cx); +} + +SECStatus +SHA1_Flatten(SHA1Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA1_Flatten)(cx, space); +} + +SHA1Context * +SHA1_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA1_Resurrect)(space, arg); +} + +SECStatus +RNG_RNGInit(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_RNGInit)(); +} + +SECStatus +RNG_RandomUpdate(const void *data, size_t bytes) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_RandomUpdate)(data, bytes); +} + +SECStatus +RNG_GenerateGlobalRandomBytes(void *dest, size_t len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RNG_GenerateGlobalRandomBytes)(dest, len); +} + +void +RNG_RNGShutdown(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RNG_RNGShutdown)(); +} + +SECStatus +PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGen)(j, pParams, pVfy); +} + +SECStatus +PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGenSeedLen)(j, seedBytes, pParams, pVfy); +} + +SECStatus +PQG_VerifyParams(const PQGParams *params, const PQGVerify *vfy, + SECStatus *result) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_VerifyParams)(params, vfy, result); +} + +void +PQG_DestroyParams(PQGParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_PQG_DestroyParams)(params); +} + +void +PQG_DestroyVerify(PQGVerify *vfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_PQG_DestroyVerify)(vfy); +} + +void +BL_Cleanup(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_BL_Cleanup)(); +} + +void +BL_Unload(void) +{ + /* This function is not thread-safe, but doesn't need to be, because it is + * only called from functions that are also defined as not thread-safe, + * namely C_Finalize in softoken, and the SSL bypass shutdown callback called + * from NSS_Shutdown. */ + char *disableUnload = NULL; + vector = NULL; + disableUnload = PR_GetEnvSecure("NSS_DISABLE_UNLOAD"); + if (blLib && !disableUnload) { +#ifdef DEBUG + PRStatus status = PR_UnloadLibrary(blLib); + PORT_Assert(PR_SUCCESS == status); +#else + PR_UnloadLibrary(blLib); +#endif + } + blLib = NULL; + loadFreeBLOnce = pristineCallOnce; +} + +/* ============== New for 3.003 =============================== */ + +SECStatus +SHA256_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_Hash)(dest, src); +} + +SECStatus +SHA256_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_HashBuf)(dest, src, src_length); +} + +SHA256Context * +SHA256_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA256_NewContext)(); +} + +void +SHA256_DestroyContext(SHA256Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_DestroyContext)(cx, freeit); +} + +void +SHA256_Begin(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Begin)(cx); +} + +void +SHA256_Update(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Update)(cx, input, inputLen); +} + +void +SHA256_End(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA256_TraceState(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_TraceState)(cx); +} + +unsigned int +SHA256_FlattenSize(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA256_FlattenSize)(cx); +} + +SECStatus +SHA256_Flatten(SHA256Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA256_Flatten)(cx, space); +} + +SHA256Context * +SHA256_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA256_Resurrect)(space, arg); +} + +SECStatus +SHA512_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_Hash)(dest, src); +} + +SECStatus +SHA512_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_HashBuf)(dest, src, src_length); +} + +SHA512Context * +SHA512_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA512_NewContext)(); +} + +void +SHA512_DestroyContext(SHA512Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_DestroyContext)(cx, freeit); +} + +void +SHA512_Begin(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Begin)(cx); +} + +void +SHA512_Update(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Update)(cx, input, inputLen); +} + +void +SHA512_End(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA512_TraceState(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_TraceState)(cx); +} + +unsigned int +SHA512_FlattenSize(SHA512Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA512_FlattenSize)(cx); +} + +SECStatus +SHA512_Flatten(SHA512Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA512_Flatten)(cx, space); +} + +SHA512Context * +SHA512_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA512_Resurrect)(space, arg); +} + +SECStatus +SHA384_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_Hash)(dest, src); +} + +SECStatus +SHA384_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_HashBuf)(dest, src, src_length); +} + +SHA384Context * +SHA384_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA384_NewContext)(); +} + +void +SHA384_DestroyContext(SHA384Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_DestroyContext)(cx, freeit); +} + +void +SHA384_Begin(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Begin)(cx); +} + +void +SHA384_Update(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Update)(cx, input, inputLen); +} + +void +SHA384_End(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA384_TraceState(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_TraceState)(cx); +} + +unsigned int +SHA384_FlattenSize(SHA384Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA384_FlattenSize)(cx); +} + +SECStatus +SHA384_Flatten(SHA384Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA384_Flatten)(cx, space); +} + +SHA384Context * +SHA384_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA384_Resurrect)(space, arg); +} + +AESKeyWrapContext * +AESKeyWrap_CreateContext(const unsigned char *key, const unsigned char *iv, + int encrypt, unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return vector->p_AESKeyWrap_CreateContext(key, iv, encrypt, keylen); +} + +void +AESKeyWrap_DestroyContext(AESKeyWrapContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + vector->p_AESKeyWrap_DestroyContext(cx, freeit); +} + +SECStatus +AESKeyWrap_Encrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_Encrypt(cx, output, outputLen, maxOutputLen, + input, inputLen); +} +SECStatus +AESKeyWrap_Decrypt(AESKeyWrapContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_AESKeyWrap_Decrypt(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_SHVerify(name, addr); +} + +/* + * The Caller is expected to pass NULL as the name, which will + * trigger the p_BLAPI_VerifySelf() to return 'TRUE'. Pass the real + * name of the shared library we loaded (the static libraryName set + * in freebl_LoadDSO) to p_BLAPI_VerifySelf. + */ +PRBool +BLAPI_VerifySelf(const char *name) +{ + PORT_Assert(!name); + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_VerifySelf(libraryName); +} + +/* ============== New for 3.006 =============================== */ + +SECStatus +EC_NewKey(ECParams *params, ECPrivateKey **privKey) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_NewKey)(params, privKey); +} + +SECStatus +EC_NewKeyFromSeed(ECParams *params, ECPrivateKey **privKey, + const unsigned char *seed, int seedlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_NewKeyFromSeed)(params, privKey, seed, seedlen); +} + +SECStatus +EC_ValidatePublicKey(ECParams *params, SECItem *publicValue) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_ValidatePublicKey)(params, publicValue); +} + +SECStatus +ECDH_Derive(SECItem *publicValue, ECParams *params, SECItem *privateValue, + PRBool withCofactor, SECItem *derivedSecret) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDH_Derive)(publicValue, params, privateValue, + withCofactor, derivedSecret); +} + +SECStatus +ECDSA_SignDigest(ECPrivateKey *key, SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_SignDigest)(key, signature, digest); +} + +SECStatus +ECDSA_VerifyDigest(ECPublicKey *key, const SECItem *signature, + const SECItem *digest) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_VerifyDigest)(key, signature, digest); +} + +SECStatus +ECDSA_SignDigestWithSeed(ECPrivateKey *key, SECItem *signature, + const SECItem *digest, const unsigned char *seed, const int seedlen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ECDSA_SignDigestWithSeed)(key, signature, digest, + seed, seedlen); +} + +/* ============== New for 3.008 =============================== */ + +AESContext * +AES_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AES_AllocateContext)(); +} + +AESKeyWrapContext * +AESKeyWrap_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_AESKeyWrap_AllocateContext)(); +} + +DESContext * +DES_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_DES_AllocateContext)(); +} + +RC2Context * +RC2_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC2_AllocateContext)(); +} + +RC4Context * +RC4_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_RC4_AllocateContext)(); +} + +SECStatus +AES_InitContext(AESContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AES_InitContext)(cx, key, keylen, iv, mode, encrypt, + blocklen); +} + +SECStatus +AESKeyWrap_InitContext(AESKeyWrapContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int blocklen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_AESKeyWrap_InitContext)(cx, key, keylen, iv, mode, + encrypt, blocklen); +} + +SECStatus +DES_InitContext(DESContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_DES_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra); +} + +SECStatus +SEED_InitContext(SEEDContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SEED_InitContext)(cx, key, keylen, iv, mode, encrypt, xtra); +} + +SECStatus +RC2_InitContext(RC2Context *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int effectiveKeyLen, unsigned int xtra) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC2_InitContext)(cx, key, keylen, iv, mode, + effectiveKeyLen, xtra); +} + +SECStatus +RC4_InitContext(RC4Context *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *x1, int x2, + unsigned int x3, unsigned int x4) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RC4_InitContext)(cx, key, keylen, x1, x2, x3, x4); +} + +void +MD2_Clone(MD2Context *dest, MD2Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD2_Clone)(dest, src); +} + +void +MD5_Clone(MD5Context *dest, MD5Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_MD5_Clone)(dest, src); +} + +void +SHA1_Clone(SHA1Context *dest, SHA1Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA1_Clone)(dest, src); +} + +void +SHA256_Clone(SHA256Context *dest, SHA256Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA256_Clone)(dest, src); +} + +void +SHA384_Clone(SHA384Context *dest, SHA384Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA384_Clone)(dest, src); +} + +void +SHA512_Clone(SHA512Context *dest, SHA512Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA512_Clone)(dest, src); +} + +SECStatus +TLS_PRF(const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_TLS_PRF)(secret, label, seed, result, isFIPS); +} + +const SECHashObject * +HASH_GetRawHashObject(HASH_HashType hashType) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HASH_GetRawHashObject)(hashType); +} + +void +HMAC_Destroy(HMACContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Destroy)(cx, freeit); +} + +HMACContext * +HMAC_Create(const SECHashObject *hashObj, const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HMAC_Create)(hashObj, secret, secret_len, isFIPS); +} + +SECStatus +HMAC_Init(HMACContext *cx, const SECHashObject *hashObj, + const unsigned char *secret, unsigned int secret_len, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_Init)(cx, hashObj, secret, secret_len, isFIPS); +} + +void +HMAC_Begin(HMACContext *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Begin)(cx); +} + +void +HMAC_Update(HMACContext *cx, const unsigned char *data, unsigned int data_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_HMAC_Update)(cx, data, data_len); +} + +SECStatus +HMAC_Finish(HMACContext *cx, unsigned char *result, unsigned int *result_len, + unsigned int max_result_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_Finish)(cx, result, result_len, max_result_len); +} + +HMACContext * +HMAC_Clone(HMACContext *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_HMAC_Clone)(cx); +} + +void +RNG_SystemInfoForRNG(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_RNG_SystemInfoForRNG)(); +} + +SECStatus +FIPS186Change_GenerateX(unsigned char *XKEY, const unsigned char *XSEEDj, + unsigned char *x_j) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_FIPS186Change_GenerateX)(XKEY, XSEEDj, x_j); +} + +SECStatus +FIPS186Change_ReduceModQForDSA(const unsigned char *w, + const unsigned char *q, + unsigned char *xj) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_FIPS186Change_ReduceModQForDSA)(w, q, xj); +} + +/* === new for Camellia === */ +SECStatus +Camellia_InitContext(CamelliaContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, int mode, + unsigned int encrypt, unsigned int unused) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_InitContext)(cx, key, keylen, iv, mode, encrypt, + unused); +} + +CamelliaContext * +Camellia_AllocateContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_Camellia_AllocateContext)(); +} + +CamelliaContext * +Camellia_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_Camellia_CreateContext)(key, iv, mode, encrypt, keylen); +} + +void +Camellia_DestroyContext(CamelliaContext *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_Camellia_DestroyContext)(cx, freeit); +} + +SECStatus +Camellia_Encrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_Encrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +Camellia_Decrypt(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_Camellia_Decrypt)(cx, output, outputLen, maxOutputLen, + input, inputLen); +} + +void +BL_SetForkState(PRBool forked) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_BL_SetForkState)(forked); +} + +SECStatus +PRNGTEST_Instantiate(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *nonce, unsigned int nonce_len, + const PRUint8 *personal_string, unsigned int ps_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Instantiate)(entropy, entropy_len, + nonce, nonce_len, + personal_string, ps_len); +} + +SECStatus +PRNGTEST_Reseed(const PRUint8 *entropy, unsigned int entropy_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Reseed)(entropy, entropy_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Generate(PRUint8 *bytes, unsigned int bytes_len, + const PRUint8 *additional, unsigned int additional_len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Generate)(bytes, bytes_len, + additional, additional_len); +} + +SECStatus +PRNGTEST_Uninstantiate() +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PRNGTEST_Uninstantiate)(); +} + +SECStatus +RSA_PopulatePrivateKey(RSAPrivateKey *key) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_PopulatePrivateKey)(key); +} + +SECStatus +JPAKE_Sign(PLArenaPool *arena, const PQGParams *pqg, HASH_HashType hashType, + const SECItem *signerID, const SECItem *x, + const SECItem *testRandom, const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Sign)(arena, pqg, hashType, signerID, x, + testRandom, gxIn, gxOut, gv, r); +} + +SECStatus +JPAKE_Verify(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Verify)(arena, pqg, hashType, signerID, peerID, + gx, gv, r); +} + +SECStatus +JPAKE_Round2(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *gx1, const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, const SECItem *s, SECItem *x2s) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Round2)(arena, p, q, gx1, gx3, gx4, base, x2, s, x2s); +} + +SECStatus +JPAKE_Final(PLArenaPool *arena, const SECItem *p, const SECItem *q, + const SECItem *x2, const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_JPAKE_Final)(arena, p, q, x2, gx4, x2s, B, K); +} + +SECStatus +TLS_P_hash(HASH_HashType hashAlg, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_TLS_P_hash)(hashAlg, secret, label, seed, result, isFIPS); +} + +SECStatus +SHA224_Hash(unsigned char *dest, const char *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_Hash)(dest, src); +} + +SECStatus +SHA224_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_HashBuf)(dest, src, src_length); +} + +SHA224Context * +SHA224_NewContext(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA224_NewContext)(); +} + +void +SHA224_DestroyContext(SHA224Context *cx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_DestroyContext)(cx, freeit); +} + +void +SHA224_Begin(SHA256Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Begin)(cx); +} + +void +SHA224_Update(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Update)(cx, input, inputLen); +} + +void +SHA224_End(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_End)(cx, digest, digestLen, maxDigestLen); +} + +void +SHA224_TraceState(SHA224Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_TraceState)(cx); +} + +unsigned int +SHA224_FlattenSize(SHA224Context *cx) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return 0; + return (vector->p_SHA224_FlattenSize)(cx); +} + +SECStatus +SHA224_Flatten(SHA224Context *cx, unsigned char *space) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SHA224_Flatten)(cx, space); +} + +SHA224Context * +SHA224_Resurrect(unsigned char *space, void *arg) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_SHA224_Resurrect)(space, arg); +} + +void +SHA224_Clone(SHA224Context *dest, SHA224Context *src) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_SHA224_Clone)(dest, src); +} + +PRBool +BLAPI_SHVerifyFile(const char *name) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return PR_FALSE; + return vector->p_BLAPI_SHVerifyFile(name); +} + +/* === new for DSA-2 === */ +SECStatus +PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_PQG_ParamGenV2)(L, N, seedBytes, pParams, pVfy); +} + +SECStatus +PRNGTEST_RunHealthTests(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return vector->p_PRNGTEST_RunHealthTests(); +} + +SECStatus +SSLv3_MAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_SSLv3_MAC_ConstantTime)( + result, resultLen, maxResultLen, + hashObj, + secret, secretLen, + header, headerLen, + body, bodyLen, bodyTotalLen); +} + +SECStatus +HMAC_ConstantTime( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_HMAC_ConstantTime)( + result, resultLen, maxResultLen, + hashObj, + secret, secretLen, + header, headerLen, + body, bodyLen, bodyTotalLen); +} + +SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_SignRaw)(key, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRaw)(key, sig, sigLen, hash, hashLen); +} + +SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRecoverRaw)(key, data, dataLen, maxDataLen, + sig, sigLen); +} + +SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptRaw)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptRaw)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptOAEP)(key, hashAlg, maskHashAlg, label, + labelLen, seed, seedLen, output, + outputLen, maxOutputLen, input, inputLen); +} + +SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptOAEP)(key, hashAlg, maskHashAlg, label, + labelLen, output, outputLen, + maxOutputLen, input, inputLen); +} + +SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_EncryptBlock)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_DecryptBlock)(key, output, outputLen, maxOutputLen, + input, inputLen); +} + +SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_SignPSS)(key, hashAlg, maskHashAlg, salt, saltLen, + output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignPSS)(key, hashAlg, maskHashAlg, saltLen, + sig, sigLen, hash, hashLen); +} + +SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_Sign)(key, output, outputLen, maxOutputLen, input, + inputLen); +} + +SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSign)(key, sig, sigLen, data, dataLen); +} + +SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_RSA_CheckSignRecover)(key, output, outputLen, maxOutputLen, + sig, sigLen); +} + +SECStatus +EC_FillParams(PLArenaPool *arena, + const SECItem *encodedParams, + ECParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_FillParams)(arena, encodedParams, params); +} + +SECStatus +EC_DecodeParams(const SECItem *encodedParams, + ECParams **ecparams) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_DecodeParams)(encodedParams, ecparams); +} + +SECStatus +EC_CopyParams(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_CopyParams)(arena, dstParams, srcParams); +} + +SECStatus +ChaCha20Poly1305_InitContext(ChaCha20Poly1305Context *ctx, + const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_InitContext)(ctx, key, keyLen, tagLen); +} + +ChaCha20Poly1305Context * +ChaCha20Poly1305_CreateContext(const unsigned char *key, unsigned int keyLen, + unsigned int tagLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_ChaCha20Poly1305_CreateContext)(key, keyLen, tagLen); +} + +void +ChaCha20Poly1305_DestroyContext(ChaCha20Poly1305Context *ctx, PRBool freeit) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_ChaCha20Poly1305_DestroyContext)(ctx, freeit); +} + +SECStatus +ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Seal)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen); +} + +SECStatus +ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, + unsigned char *output, unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_ChaCha20Poly1305_Open)( + ctx, output, outputLen, maxOutputLen, input, inputLen, + nonce, nonceLen, ad, adLen); +} + +int +EC_GetPointSize(const ECParams *params) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return SECFailure; + return (vector->p_EC_GetPointSize)(params); +} diff --git a/security/nss/lib/freebl/loader.h b/security/nss/lib/freebl/loader.h new file mode 100644 index 000000000..ed392cc47 --- /dev/null +++ b/security/nss/lib/freebl/loader.h @@ -0,0 +1,788 @@ +/* + * loader.h - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _LOADER_H_ +#define _LOADER_H_ 1 + +#include "blapi.h" + +#define FREEBL_VERSION 0x0313 + +struct FREEBLVectorStr { + + unsigned short length; /* of this struct in bytes */ + unsigned short version; /* of this struct. */ + + RSAPrivateKey *(*p_RSA_NewKey)(int keySizeInBits, + SECItem *publicExponent); + + SECStatus (*p_RSA_PublicKeyOp)(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_RSA_PrivateKeyOp)(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_DSA_NewKey)(const PQGParams *params, + DSAPrivateKey **privKey); + + SECStatus (*p_DSA_SignDigest)(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest); + + SECStatus (*p_DSA_VerifyDigest)(DSAPublicKey *key, + const SECItem *signature, + const SECItem *digest); + + SECStatus (*p_DSA_NewKeyFromSeed)(const PQGParams *params, + const unsigned char *seed, + DSAPrivateKey **privKey); + + SECStatus (*p_DSA_SignDigestWithSeed)(DSAPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed); + + SECStatus (*p_DH_GenParam)(int primeLen, DHParams **params); + + SECStatus (*p_DH_NewKey)(DHParams *params, + DHPrivateKey **privKey); + + SECStatus (*p_DH_Derive)(SECItem *publicValue, + SECItem *prime, + SECItem *privateValue, + SECItem *derivedSecret, + unsigned int maxOutBytes); + + SECStatus (*p_KEA_Derive)(SECItem *prime, + SECItem *public1, + SECItem *public2, + SECItem *private1, + SECItem *private2, + SECItem *derivedSecret); + + PRBool (*p_KEA_Verify)(SECItem *Y, SECItem *prime, SECItem *subPrime); + + RC4Context *(*p_RC4_CreateContext)(const unsigned char *key, int len); + + void (*p_RC4_DestroyContext)(RC4Context *cx, PRBool freeit); + + SECStatus (*p_RC4_Encrypt)(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC4_Decrypt)(RC4Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + RC2Context *(*p_RC2_CreateContext)(const unsigned char *key, + unsigned int len, const unsigned char *iv, + int mode, unsigned effectiveKeyLen); + + void (*p_RC2_DestroyContext)(RC2Context *cx, PRBool freeit); + + SECStatus (*p_RC2_Encrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC2_Decrypt)(RC2Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + RC5Context *(*p_RC5_CreateContext)(const SECItem *key, unsigned int rounds, + unsigned int wordSize, const unsigned char *iv, int mode); + + void (*p_RC5_DestroyContext)(RC5Context *cx, PRBool freeit); + + SECStatus (*p_RC5_Encrypt)(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_RC5_Decrypt)(RC5Context *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + DESContext *(*p_DES_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); + + void (*p_DES_DestroyContext)(DESContext *cx, PRBool freeit); + + SECStatus (*p_DES_Encrypt)(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_DES_Decrypt)(DESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + AESContext *(*p_AES_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, int encrypt, unsigned int keylen, + unsigned int blocklen); + + void (*p_AES_DestroyContext)(AESContext *cx, PRBool freeit); + + SECStatus (*p_AES_Encrypt)(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_AES_Decrypt)(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_MD5_Hash)(unsigned char *dest, const char *src); + + SECStatus (*p_MD5_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + + MD5Context *(*p_MD5_NewContext)(void); + + void (*p_MD5_DestroyContext)(MD5Context *cx, PRBool freeit); + + void (*p_MD5_Begin)(MD5Context *cx); + + void (*p_MD5_Update)(MD5Context *cx, + const unsigned char *input, unsigned int inputLen); + + void (*p_MD5_End)(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + unsigned int (*p_MD5_FlattenSize)(MD5Context *cx); + + SECStatus (*p_MD5_Flatten)(MD5Context *cx, unsigned char *space); + + MD5Context *(*p_MD5_Resurrect)(unsigned char *space, void *arg); + + void (*p_MD5_TraceState)(MD5Context *cx); + + SECStatus (*p_MD2_Hash)(unsigned char *dest, const char *src); + + MD2Context *(*p_MD2_NewContext)(void); + + void (*p_MD2_DestroyContext)(MD2Context *cx, PRBool freeit); + + void (*p_MD2_Begin)(MD2Context *cx); + + void (*p_MD2_Update)(MD2Context *cx, + const unsigned char *input, unsigned int inputLen); + + void (*p_MD2_End)(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + unsigned int (*p_MD2_FlattenSize)(MD2Context *cx); + + SECStatus (*p_MD2_Flatten)(MD2Context *cx, unsigned char *space); + + MD2Context *(*p_MD2_Resurrect)(unsigned char *space, void *arg); + + SECStatus (*p_SHA1_Hash)(unsigned char *dest, const char *src); + + SECStatus (*p_SHA1_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + + SHA1Context *(*p_SHA1_NewContext)(void); + + void (*p_SHA1_DestroyContext)(SHA1Context *cx, PRBool freeit); + + void (*p_SHA1_Begin)(SHA1Context *cx); + + void (*p_SHA1_Update)(SHA1Context *cx, const unsigned char *input, + unsigned int inputLen); + + void (*p_SHA1_End)(SHA1Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + + void (*p_SHA1_TraceState)(SHA1Context *cx); + + unsigned int (*p_SHA1_FlattenSize)(SHA1Context *cx); + + SECStatus (*p_SHA1_Flatten)(SHA1Context *cx, unsigned char *space); + + SHA1Context *(*p_SHA1_Resurrect)(unsigned char *space, void *arg); + + SECStatus (*p_RNG_RNGInit)(void); + + SECStatus (*p_RNG_RandomUpdate)(const void *data, size_t bytes); + + SECStatus (*p_RNG_GenerateGlobalRandomBytes)(void *dest, size_t len); + + void (*p_RNG_RNGShutdown)(void); + + SECStatus (*p_PQG_ParamGen)(unsigned int j, PQGParams **pParams, + PQGVerify **pVfy); + + SECStatus (*p_PQG_ParamGenSeedLen)(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy); + + SECStatus (*p_PQG_VerifyParams)(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result); + + /* Version 3.001 came to here */ + + SECStatus (*p_RSA_PrivateKeyOpDoubleChecked)(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input); + + SECStatus (*p_RSA_PrivateKeyCheck)(const RSAPrivateKey *key); + + void (*p_BL_Cleanup)(void); + + /* Version 3.002 came to here */ + + SHA256Context *(*p_SHA256_NewContext)(void); + void (*p_SHA256_DestroyContext)(SHA256Context *cx, PRBool freeit); + void (*p_SHA256_Begin)(SHA256Context *cx); + void (*p_SHA256_Update)(SHA256Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA256_End)(SHA256Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA256_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA256_Hash)(unsigned char *dest, const char *src); + void (*p_SHA256_TraceState)(SHA256Context *cx); + unsigned int (*p_SHA256_FlattenSize)(SHA256Context *cx); + SECStatus (*p_SHA256_Flatten)(SHA256Context *cx, unsigned char *space); + SHA256Context *(*p_SHA256_Resurrect)(unsigned char *space, void *arg); + + SHA512Context *(*p_SHA512_NewContext)(void); + void (*p_SHA512_DestroyContext)(SHA512Context *cx, PRBool freeit); + void (*p_SHA512_Begin)(SHA512Context *cx); + void (*p_SHA512_Update)(SHA512Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA512_End)(SHA512Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA512_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA512_Hash)(unsigned char *dest, const char *src); + void (*p_SHA512_TraceState)(SHA512Context *cx); + unsigned int (*p_SHA512_FlattenSize)(SHA512Context *cx); + SECStatus (*p_SHA512_Flatten)(SHA512Context *cx, unsigned char *space); + SHA512Context *(*p_SHA512_Resurrect)(unsigned char *space, void *arg); + + SHA384Context *(*p_SHA384_NewContext)(void); + void (*p_SHA384_DestroyContext)(SHA384Context *cx, PRBool freeit); + void (*p_SHA384_Begin)(SHA384Context *cx); + void (*p_SHA384_Update)(SHA384Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA384_End)(SHA384Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA384_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA384_Hash)(unsigned char *dest, const char *src); + void (*p_SHA384_TraceState)(SHA384Context *cx); + unsigned int (*p_SHA384_FlattenSize)(SHA384Context *cx); + SECStatus (*p_SHA384_Flatten)(SHA384Context *cx, unsigned char *space); + SHA384Context *(*p_SHA384_Resurrect)(unsigned char *space, void *arg); + + /* Version 3.003 came to here */ + + AESKeyWrapContext *(*p_AESKeyWrap_CreateContext)(const unsigned char *key, + const unsigned char *iv, int encrypt, unsigned int keylen); + + void (*p_AESKeyWrap_DestroyContext)(AESKeyWrapContext *cx, PRBool freeit); + + SECStatus (*p_AESKeyWrap_Encrypt)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_AESKeyWrap_Decrypt)(AESKeyWrapContext *cx, + unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + /* Version 3.004 came to here */ + + PRBool (*p_BLAPI_SHVerify)(const char *name, PRFuncPtr addr); + PRBool (*p_BLAPI_VerifySelf)(const char *name); + + /* Version 3.005 came to here */ + + SECStatus (*p_EC_NewKey)(ECParams *params, + ECPrivateKey **privKey); + + SECStatus (*p_EC_NewKeyFromSeed)(ECParams *params, + ECPrivateKey **privKey, + const unsigned char *seed, + int seedlen); + + SECStatus (*p_EC_ValidatePublicKey)(ECParams *params, + SECItem *publicValue); + + SECStatus (*p_ECDH_Derive)(SECItem *publicValue, + ECParams *params, + SECItem *privateValue, + PRBool withCofactor, + SECItem *derivedSecret); + + SECStatus (*p_ECDSA_SignDigest)(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest); + + SECStatus (*p_ECDSA_VerifyDigest)(ECPublicKey *key, + const SECItem *signature, + const SECItem *digest); + + SECStatus (*p_ECDSA_SignDigestWithSeed)(ECPrivateKey *key, + SECItem *signature, + const SECItem *digest, + const unsigned char *seed, + const int seedlen); + + /* Version 3.006 came to here */ + + /* no modification to FREEBLVectorStr itself + * but ECParamStr was modified + */ + + /* Version 3.007 came to here */ + + SECStatus (*p_AES_InitContext)(AESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + SECStatus (*p_AESKeyWrap_InitContext)(AESKeyWrapContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int blocklen); + SECStatus (*p_DES_InitContext)(DESContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + SECStatus (*p_RC2_InitContext)(RC2Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int effectiveKeyLen, + unsigned int); + SECStatus (*p_RC4_InitContext)(RC4Context *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *, + int, + unsigned int, + unsigned int); + + AESContext *(*p_AES_AllocateContext)(void); + AESKeyWrapContext *(*p_AESKeyWrap_AllocateContext)(void); + DESContext *(*p_DES_AllocateContext)(void); + RC2Context *(*p_RC2_AllocateContext)(void); + RC4Context *(*p_RC4_AllocateContext)(void); + + void (*p_MD2_Clone)(MD2Context *dest, MD2Context *src); + void (*p_MD5_Clone)(MD5Context *dest, MD5Context *src); + void (*p_SHA1_Clone)(SHA1Context *dest, SHA1Context *src); + void (*p_SHA256_Clone)(SHA256Context *dest, SHA256Context *src); + void (*p_SHA384_Clone)(SHA384Context *dest, SHA384Context *src); + void (*p_SHA512_Clone)(SHA512Context *dest, SHA512Context *src); + + SECStatus (*p_TLS_PRF)(const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS); + + const SECHashObject *(*p_HASH_GetRawHashObject)(HASH_HashType hashType); + + HMACContext *(*p_HMAC_Create)(const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + SECStatus (*p_HMAC_Init)(HMACContext *cx, const SECHashObject *hash_obj, + const unsigned char *secret, + unsigned int secret_len, PRBool isFIPS); + void (*p_HMAC_Begin)(HMACContext *cx); + void (*p_HMAC_Update)(HMACContext *cx, const unsigned char *data, + unsigned int data_len); + HMACContext *(*p_HMAC_Clone)(HMACContext *cx); + SECStatus (*p_HMAC_Finish)(HMACContext *cx, unsigned char *result, + unsigned int *result_len, + unsigned int max_result_len); + void (*p_HMAC_Destroy)(HMACContext *cx, PRBool freeit); + + void (*p_RNG_SystemInfoForRNG)(void); + + /* Version 3.008 came to here */ + + SECStatus (*p_FIPS186Change_GenerateX)(unsigned char *XKEY, + const unsigned char *XSEEDj, + unsigned char *x_j); + SECStatus (*p_FIPS186Change_ReduceModQForDSA)(const unsigned char *w, + const unsigned char *q, + unsigned char *xj); + + /* Version 3.009 came to here */ + + SECStatus (*p_Camellia_InitContext)(CamelliaContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int unused); + + CamelliaContext *(*p_Camellia_AllocateContext)(void); + CamelliaContext *(*p_Camellia_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, int encrypt, + unsigned int keylen); + void (*p_Camellia_DestroyContext)(CamelliaContext *cx, PRBool freeit); + + SECStatus (*p_Camellia_Encrypt)(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + SECStatus (*p_Camellia_Decrypt)(CamelliaContext *cx, unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + + void (*p_PQG_DestroyParams)(PQGParams *params); + + void (*p_PQG_DestroyVerify)(PQGVerify *vfy); + + /* Version 3.010 came to here */ + + SECStatus (*p_SEED_InitContext)(SEEDContext *cx, + const unsigned char *key, + unsigned int keylen, + const unsigned char *iv, + int mode, + unsigned int encrypt, + unsigned int); + + SEEDContext *(*p_SEED_AllocateContext)(void); + + SEEDContext *(*p_SEED_CreateContext)(const unsigned char *key, + const unsigned char *iv, + int mode, PRBool encrypt); + + void (*p_SEED_DestroyContext)(SEEDContext *cx, PRBool freeit); + + SECStatus (*p_SEED_Encrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_SEED_Decrypt)(SEEDContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen); + + SECStatus (*p_BL_Init)(void); + void (*p_BL_SetForkState)(PRBool); + + SECStatus (*p_PRNGTEST_Instantiate)(const PRUint8 *entropy, + unsigned int entropy_len, + const PRUint8 *nonce, + unsigned int nonce_len, + const PRUint8 *personal_string, + unsigned int ps_len); + + SECStatus (*p_PRNGTEST_Reseed)(const PRUint8 *entropy, + unsigned int entropy_len, + const PRUint8 *additional, + unsigned int additional_len); + + SECStatus (*p_PRNGTEST_Generate)(PRUint8 *bytes, + unsigned int bytes_len, + const PRUint8 *additional, + unsigned int additional_len); + + SECStatus (*p_PRNGTEST_Uninstantiate)(void); + /* Version 3.011 came to here */ + + SECStatus (*p_RSA_PopulatePrivateKey)(RSAPrivateKey *key); + + SECStatus (*p_DSA_NewRandom)(PLArenaPool *arena, const SECItem *q, + SECItem *seed); + + SECStatus (*p_JPAKE_Sign)(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *x, const SECItem *testRandom, + const SECItem *gxIn, SECItem *gxOut, + SECItem *gv, SECItem *r); + + SECStatus (*p_JPAKE_Verify)(PLArenaPool *arena, const PQGParams *pqg, + HASH_HashType hashType, const SECItem *signerID, + const SECItem *peerID, const SECItem *gx, + const SECItem *gv, const SECItem *r); + + SECStatus (*p_JPAKE_Round2)(PLArenaPool *arena, const SECItem *p, + const SECItem *q, const SECItem *gx1, + const SECItem *gx3, const SECItem *gx4, + SECItem *base, const SECItem *x2, + const SECItem *s, SECItem *x2s); + + SECStatus (*p_JPAKE_Final)(PLArenaPool *arena, const SECItem *p, + const SECItem *q, const SECItem *x2, + const SECItem *gx4, const SECItem *x2s, + const SECItem *B, SECItem *K); + + /* Version 3.012 came to here */ + + SECStatus (*p_TLS_P_hash)(HASH_HashType hashAlg, + const SECItem *secret, + const char *label, + SECItem *seed, + SECItem *result, + PRBool isFIPS); + + SHA224Context *(*p_SHA224_NewContext)(void); + void (*p_SHA224_DestroyContext)(SHA224Context *cx, PRBool freeit); + void (*p_SHA224_Begin)(SHA224Context *cx); + void (*p_SHA224_Update)(SHA224Context *cx, const unsigned char *input, + unsigned int inputLen); + void (*p_SHA224_End)(SHA224Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen); + SECStatus (*p_SHA224_HashBuf)(unsigned char *dest, const unsigned char *src, + PRUint32 src_length); + SECStatus (*p_SHA224_Hash)(unsigned char *dest, const char *src); + void (*p_SHA224_TraceState)(SHA224Context *cx); + unsigned int (*p_SHA224_FlattenSize)(SHA224Context *cx); + SECStatus (*p_SHA224_Flatten)(SHA224Context *cx, unsigned char *space); + SHA224Context *(*p_SHA224_Resurrect)(unsigned char *space, void *arg); + void (*p_SHA224_Clone)(SHA224Context *dest, SHA224Context *src); + PRBool (*p_BLAPI_SHVerifyFile)(const char *name); + + /* Version 3.013 came to here */ + + SECStatus (*p_PQG_ParamGenV2)(unsigned int L, unsigned int N, + unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy); + SECStatus (*p_PRNGTEST_RunHealthTests)(void); + + /* Version 3.014 came to here */ + + SECStatus (*p_HMAC_ConstantTime)( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + + SECStatus (*p_SSLv3_MAC_ConstantTime)( + unsigned char *result, + unsigned int *resultLen, + unsigned int maxResultLen, + const SECHashObject *hashObj, + const unsigned char *secret, + unsigned int secretLen, + const unsigned char *header, + unsigned int headerLen, + const unsigned char *body, + unsigned int bodyLen, + unsigned int bodyTotalLen); + + /* Version 3.015 came to here */ + + SECStatus (*p_RSA_SignRaw)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSignRaw)(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + SECStatus (*p_RSA_CheckSignRecoverRaw)(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen); + SECStatus (*p_RSA_EncryptRaw)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptRaw)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_EncryptOAEP)(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptOAEP)(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_EncryptBlock)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_DecryptBlock)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_SignPSS)(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSignPSS)(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen); + SECStatus (*p_RSA_Sign)(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen); + SECStatus (*p_RSA_CheckSign)(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen); + SECStatus (*p_RSA_CheckSignRecover)(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen); + + /* Version 3.016 came to here */ + + SECStatus (*p_EC_FillParams)(PLArenaPool *arena, + const SECItem *encodedParams, ECParams *params); + SECStatus (*p_EC_DecodeParams)(const SECItem *encodedParams, + ECParams **ecparams); + SECStatus (*p_EC_CopyParams)(PLArenaPool *arena, ECParams *dstParams, + const ECParams *srcParams); + + /* Version 3.017 came to here */ + + SECStatus (*p_ChaCha20Poly1305_InitContext)(ChaCha20Poly1305Context *ctx, + const unsigned char *key, + unsigned int keyLen, + unsigned int tagLen); + + ChaCha20Poly1305Context *(*p_ChaCha20Poly1305_CreateContext)( + const unsigned char *key, unsigned int keyLen, unsigned int tagLen); + + void (*p_ChaCha20Poly1305_DestroyContext)(ChaCha20Poly1305Context *ctx, + PRBool freeit); + + SECStatus (*p_ChaCha20Poly1305_Seal)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + + SECStatus (*p_ChaCha20Poly1305_Open)( + const ChaCha20Poly1305Context *ctx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + const unsigned char *nonce, unsigned int nonceLen, + const unsigned char *ad, unsigned int adLen); + + /* Version 3.018 came to here */ + + int (*p_EC_GetPointSize)(const ECParams *); + + /* Version 3.019 came to here */ + + /* Add new function pointers at the end of this struct and bump + * FREEBL_VERSION at the beginning of this file. */ +}; + +typedef struct FREEBLVectorStr FREEBLVector; + +#ifdef FREEBL_LOWHASH +#include "nsslowhash.h" + +#define NSSLOW_VERSION 0x0300 + +struct NSSLOWVectorStr { + unsigned short length; /* of this struct in bytes */ + unsigned short version; /* of this struct. */ + const FREEBLVector *(*p_FREEBL_GetVector)(void); + NSSLOWInitContext *(*p_NSSLOW_Init)(void); + void (*p_NSSLOW_Shutdown)(NSSLOWInitContext *context); + void (*p_NSSLOW_Reset)(NSSLOWInitContext *context); + NSSLOWHASHContext *(*p_NSSLOWHASH_NewContext)( + NSSLOWInitContext *initContext, + HASH_HashType hashType); + void (*p_NSSLOWHASH_Begin)(NSSLOWHASHContext *context); + void (*p_NSSLOWHASH_Update)(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len); + void (*p_NSSLOWHASH_End)(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len); + void (*p_NSSLOWHASH_Destroy)(NSSLOWHASHContext *context); + unsigned int (*p_NSSLOWHASH_Length)(NSSLOWHASHContext *context); +}; + +typedef struct NSSLOWVectorStr NSSLOWVector; +#endif + +SEC_BEGIN_PROTOS + +#ifdef FREEBL_LOWHASH +typedef const NSSLOWVector *NSSLOWGetVectorFn(void); + +extern NSSLOWGetVectorFn NSSLOW_GetVector; +#endif + +typedef const FREEBLVector *FREEBLGetVectorFn(void); + +extern FREEBLGetVectorFn FREEBL_GetVector; + +SEC_END_PROTOS + +#endif diff --git a/security/nss/lib/freebl/lowhash_vector.c b/security/nss/lib/freebl/lowhash_vector.c new file mode 100644 index 000000000..7690c98da --- /dev/null +++ b/security/nss/lib/freebl/lowhash_vector.c @@ -0,0 +1,217 @@ +/* + * loader.c - load platform dependent DSO containing freebl implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define _GNU_SOURCE 1 +#include "loader.h" +#include "prmem.h" +#include "prerror.h" +#include "prinit.h" +#include "prenv.h" +#include "blname.c" + +#include "prio.h" +#include "prprf.h" +#include +#include "prsystem.h" +#include "nsslowhash.h" +#include +#include "pratom.h" + +static PRLibrary *blLib; + +#define LSB(x) ((x)&0xff) +#define MSB(x) ((x) >> 8) + +static const NSSLOWVector *vector; +static const char *libraryName = NULL; + +/* pretty much only glibc uses this, make sure we don't have any depenencies + * on nspr.. */ +#undef PORT_Alloc +#undef PORT_Free +#define PORT_Alloc malloc +#define PR_Malloc malloc +#define PORT_Free free +#define PR_Free free +#define PR_GetDirectorySeparator() '/' +#define PR_LoadLibraryWithFlags(libspec, flags) \ + (PRLibrary *)dlopen(libSpec.value.pathname, RTLD_NOW | RTLD_LOCAL) +#define PR_GetLibraryFilePathname(name, addr) \ + freebl_lowhash_getLibraryFilePath(addr) + +static char * +freebl_lowhash_getLibraryFilePath(void *addr) +{ + Dl_info dli; + if (dladdr(addr, &dli) == 0) { + return NULL; + } + return strdup(dli.dli_fname); +} + +/* + * The PR_LoadLibraryWithFlags call above defines this variable away, so we + * don't need it.. + */ +#ifdef nodef +static const char *NameOfThisSharedLib = + SHLIB_PREFIX "freebl" SHLIB_VERSION "." SHLIB_SUFFIX; +#endif + +#include "genload.c" + +/* This function must be run only once. */ +/* determine if hybrid platform, then actually load the DSO. */ +static PRStatus +freebl_LoadDSO(void) +{ + PRLibrary *handle; + const char *name = getLibName(); + + if (!name) { + /*PR_SetError(PR_LOAD_LIBRARY_ERROR,0); */ + return PR_FAILURE; + } + handle = loader_LoadLibrary(name); + if (handle) { + void *address = dlsym(handle, "NSSLOW_GetVector"); + if (address) { + NSSLOWGetVectorFn *getVector = (NSSLOWGetVectorFn *)address; + const NSSLOWVector *dsoVector = getVector(); + if (dsoVector) { + unsigned short dsoVersion = dsoVector->version; + unsigned short myVersion = NSSLOW_VERSION; + if (MSB(dsoVersion) == MSB(myVersion) && + LSB(dsoVersion) >= LSB(myVersion) && + dsoVector->length >= sizeof(NSSLOWVector)) { + vector = dsoVector; + libraryName = name; + blLib = handle; + return PR_SUCCESS; + } + } + } + (void)dlclose(handle); + } + return PR_FAILURE; +} + +static PRCallOnceType loadFreeBLOnce; + +static PRStatus +freebl_RunLoaderOnce(void) +{ + /* Don't have NSPR, so can use the real PR_CallOnce, implement a stripped + * down version. */ + if (loadFreeBLOnce.initialized) { + return loadFreeBLOnce.status; + } + if (__sync_lock_test_and_set(&loadFreeBLOnce.inProgress, 1) == 0) { + loadFreeBLOnce.status = freebl_LoadDSO(); + loadFreeBLOnce.initialized = 1; + } else { + /* shouldn't have a lot of takers on the else clause, which is good + * since we don't have condition variables yet. + * 'initialized' only ever gets set (not cleared) so we don't + * need the traditional locks. */ + while (!loadFreeBLOnce.initialized) { + sleep(1); /* don't have condition variables, just give up the CPU */ + } + } + + return loadFreeBLOnce.status; +} + +const FREEBLVector * +FREEBL_GetVector(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) { + return NULL; + } + if (vector) { + return (vector->p_FREEBL_GetVector)(); + } + return NULL; +} + +NSSLOWInitContext * +NSSLOW_Init(void) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_NSSLOW_Init)(); +} + +void +NSSLOW_Shutdown(NSSLOWInitContext *context) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOW_Shutdown)(context); +} + +void +NSSLOW_Reset(NSSLOWInitContext *context) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOW_Reset)(context); +} + +NSSLOWHASHContext * +NSSLOWHASH_NewContext( + NSSLOWInitContext *initContext, + HASH_HashType hashType) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return NULL; + return (vector->p_NSSLOWHASH_NewContext)(initContext, hashType); +} + +void +NSSLOWHASH_Begin(NSSLOWHASHContext *context) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOWHASH_Begin)(context); +} + +void +NSSLOWHASH_Update(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOWHASH_Update)(context, buf, len); +} + +void +NSSLOWHASH_End(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOWHASH_End)(context, buf, ret, len); +} + +void +NSSLOWHASH_Destroy(NSSLOWHASHContext *context) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return; + (vector->p_NSSLOWHASH_Destroy)(context); +} + +unsigned int +NSSLOWHASH_Length(NSSLOWHASHContext *context) +{ + if (!vector && PR_SUCCESS != freebl_RunLoaderOnce()) + return -1; + return (vector->p_NSSLOWHASH_Length)(context); +} diff --git a/security/nss/lib/freebl/manifest.mn b/security/nss/lib/freebl/manifest.mn new file mode 100644 index 000000000..1ef983907 --- /dev/null +++ b/security/nss/lib/freebl/manifest.mn @@ -0,0 +1,195 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# NOTE: any ifdefs in this file must be defined on the gmake command line +# (if anywhere). They cannot come from Makefile or config.mk + +CORE_DEPTH = ../.. + +MODULE = nss + +# copied from Linux.mk. We have a chicken and egg issue here. We need to set +# Library name before we call the platform code in coreconf, but we need to +# Pick up the automatic setting of FREEBL_LOWHASH before we can set the +# Library name... so for now we mimic the code in Linux.mk to get the +# automatic setting early... +# +# On Linux 2.6 or later, build libfreebl3.so with no NSPR and libnssutil3.so +# dependencies by default. Set FREEBL_NO_DEPEND to 0 in the environment to +# override this. +# +# +include $(CORE_DEPTH)/coreconf/arch.mk +ifeq ($(OS_ARCH),Linux) +ifneq ($(OS_TARGET),Android) +ifeq (2.6,$(firstword $(sort 2.6 $(OS_RELEASE)))) +ifndef FREEBL_NO_DEPEND +FREEBL_NO_DEPEND = 1 +FREEBL_LOWHASH = 1 +endif +endif +endif +endif + + +LIBRARY_NAME = freebl +LIBRARY_VERSION = 3 + +ifdef FREEBL_CHILD_BUILD + ifdef USE_ABI32_INT32 + LIBRARY_NAME = freebl_32int + endif + ifdef USE_ABI32_INT64 + LIBRARY_NAME = freebl_32int64 + endif + ifdef USE_ABI32_FPU + LIBRARY_NAME = freebl_32fpu + endif + ifdef USE_ABI64_INT + LIBRARY_NAME = freebl_64int + endif + ifdef USE_ABI64_FPU + LIBRARY_NAME = freebl_64fpu + endif + ifdef FREEBL_LOWHASH + LIBRARY_NAME = freeblpriv + endif + ifdef USE_STUB_BUILD + # for the stub build, reset name to the default (from freeblpriv) + LIBRARY_NAME = freebl + endif +endif + +# if the library name contains _, we prefix the version with _ +ifneq (,$(findstring _,$(LIBRARY_NAME))) + LIBRARY_VERSION := _$(LIBRARY_VERSION) +endif + +MAPFILE = $(OBJDIR)/$(LIBRARY_NAME).def + +SOFTOKEN_LIBRARY_VERSION = 3 + +DEFINES += -DSHLIB_SUFFIX=\"$(DLL_SUFFIX)\" -DSHLIB_PREFIX=\"$(DLL_PREFIX)\" \ + -DSHLIB_VERSION=\"$(LIBRARY_VERSION)\" \ + -DSOFTOKEN_SHLIB_VERSION=\"$(SOFTOKEN_LIBRARY_VERSION)\" + +REQUIRES = + +EXPORTS = \ + blapit.h \ + shsign.h \ + ecl-exp.h \ + $(LOWHASH_EXPORTS) \ + $(NULL) + +PRIVATE_EXPORTS = \ + alghmac.h \ + blapi.h \ + chacha20poly1305.h \ + hmacct.h \ + secmpi.h \ + secrng.h \ + ec.h \ + ecl.h \ + ecl-curve.h \ + $(NULL) + +MPI_HDRS = mpi-config.h mpi.h mpi-priv.h mplogic.h mpprime.h logtab.h mp_gf2m.h +MPI_SRCS = mpprime.c mpmontg.c mplogic.c mpi.c mp_gf2m.c + + +ECL_HDRS = ecl-exp.h ecl.h ecp.h ecl-priv.h +ifndef NSS_DISABLE_ECC +ECL_SRCS = ecl.c ecl_curve.c ecl_mult.c ecl_gf.c \ + ecp_aff.c ecp_jac.c ecp_mont.c \ + ec_naf.c ecp_jm.c ecp_256.c ecp_384.c ecp_521.c \ + ecp_256_32.c ecp_25519.c +else +ECL_SRCS = $(NULL) +endif +SHA_SRCS = sha_fast.c +MPCPU_SRCS = mpcpucache.c + +CSRCS = \ + freeblver.c \ + ldvector.c \ + sysrand.c \ + $(SHA_SRCS) \ + md2.c \ + md5.c \ + sha512.c \ + alghmac.c \ + rawhash.c \ + alg2268.c \ + arcfour.c \ + arcfive.c \ + desblapi.c \ + des.c \ + drbg.c \ + chacha20poly1305.c \ + cts.c \ + ctr.c \ + fipsfreebl.c \ + gcm.c \ + hmacct.c \ + rijndael.c \ + aeskeywrap.c \ + camellia.c \ + dh.c \ + ec.c \ + ecdecode.c \ + pqg.c \ + dsa.c \ + rsa.c \ + rsapkcs.c \ + shvfy.c \ + tlsprfalg.c \ + seed.c \ + jpake.c \ + $(MPI_SRCS) \ + $(MPCPU_SRCS) \ + $(ECL_SRCS) \ + $(STUBS_SRCS) \ + $(LOWHASH_SRCS) \ + $(EXTRA_SRCS) \ + $(NULL) + +ALL_CSRCS := $(CSRCS) + +ALL_HDRS = \ + alghmac.h \ + blapi.h \ + blapit.h \ + des.h \ + ec.h \ + loader.h \ + rijndael.h \ + camellia.h \ + secmpi.h \ + sha_fast.h \ + sha256.h \ + shsign.h \ + vis_proto.h \ + seed.h \ + $(NULL) + + +ifdef AES_GEN_TBL +DEFINES += -DRIJNDAEL_GENERATE_TABLES +else +ifdef AES_GEN_TBL_M +DEFINES += -DRIJNDAEL_GENERATE_TABLES_MACRO +else +ifdef AES_GEN_VAL +DEFINES += -DRIJNDAEL_GENERATE_VALUES +else +ifdef AES_GEN_VAL_M +DEFINES += -DRIJNDAEL_GENERATE_VALUES_MACRO +else +DEFINES += -DRIJNDAEL_INCLUDE_TABLES +endif +endif +endif +endif diff --git a/security/nss/lib/freebl/md2.c b/security/nss/lib/freebl/md2.c new file mode 100644 index 000000000..cb3d3d82b --- /dev/null +++ b/security/nss/lib/freebl/md2.c @@ -0,0 +1,269 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" + +#include "blapi.h" + +#define MD2_DIGEST_LEN 16 +#define MD2_BUFSIZE 16 +#define MD2_X_SIZE 48 /* The X array, [CV | INPUT | TMP VARS] */ +#define MD2_CV 0 /* index into X for chaining variables */ +#define MD2_INPUT 16 /* index into X for input */ +#define MD2_TMPVARS 32 /* index into X for temporary variables */ +#define MD2_CHECKSUM_SIZE 16 + +struct MD2ContextStr { + unsigned char checksum[MD2_BUFSIZE]; + unsigned char X[MD2_X_SIZE]; + PRUint8 unusedBuffer; +}; + +static const PRUint8 MD2S[256] = { + 0051, 0056, 0103, 0311, 0242, 0330, 0174, 0001, + 0075, 0066, 0124, 0241, 0354, 0360, 0006, 0023, + 0142, 0247, 0005, 0363, 0300, 0307, 0163, 0214, + 0230, 0223, 0053, 0331, 0274, 0114, 0202, 0312, + 0036, 0233, 0127, 0074, 0375, 0324, 0340, 0026, + 0147, 0102, 0157, 0030, 0212, 0027, 0345, 0022, + 0276, 0116, 0304, 0326, 0332, 0236, 0336, 0111, + 0240, 0373, 0365, 0216, 0273, 0057, 0356, 0172, + 0251, 0150, 0171, 0221, 0025, 0262, 0007, 0077, + 0224, 0302, 0020, 0211, 0013, 0042, 0137, 0041, + 0200, 0177, 0135, 0232, 0132, 0220, 0062, 0047, + 0065, 0076, 0314, 0347, 0277, 0367, 0227, 0003, + 0377, 0031, 0060, 0263, 0110, 0245, 0265, 0321, + 0327, 0136, 0222, 0052, 0254, 0126, 0252, 0306, + 0117, 0270, 0070, 0322, 0226, 0244, 0175, 0266, + 0166, 0374, 0153, 0342, 0234, 0164, 0004, 0361, + 0105, 0235, 0160, 0131, 0144, 0161, 0207, 0040, + 0206, 0133, 0317, 0145, 0346, 0055, 0250, 0002, + 0033, 0140, 0045, 0255, 0256, 0260, 0271, 0366, + 0034, 0106, 0141, 0151, 0064, 0100, 0176, 0017, + 0125, 0107, 0243, 0043, 0335, 0121, 0257, 0072, + 0303, 0134, 0371, 0316, 0272, 0305, 0352, 0046, + 0054, 0123, 0015, 0156, 0205, 0050, 0204, 0011, + 0323, 0337, 0315, 0364, 0101, 0201, 0115, 0122, + 0152, 0334, 0067, 0310, 0154, 0301, 0253, 0372, + 0044, 0341, 0173, 0010, 0014, 0275, 0261, 0112, + 0170, 0210, 0225, 0213, 0343, 0143, 0350, 0155, + 0351, 0313, 0325, 0376, 0073, 0000, 0035, 0071, + 0362, 0357, 0267, 0016, 0146, 0130, 0320, 0344, + 0246, 0167, 0162, 0370, 0353, 0165, 0113, 0012, + 0061, 0104, 0120, 0264, 0217, 0355, 0037, 0032, + 0333, 0231, 0215, 0063, 0237, 0021, 0203, 0024 +}; + +SECStatus +MD2_Hash(unsigned char *dest, const char *src) +{ + unsigned int len; + MD2Context *cx = MD2_NewContext(); + if (!cx) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return SECFailure; + } + MD2_Begin(cx); + MD2_Update(cx, (const unsigned char *)src, PORT_Strlen(src)); + MD2_End(cx, dest, &len, MD2_DIGEST_LEN); + MD2_DestroyContext(cx, PR_TRUE); + return SECSuccess; +} + +MD2Context * +MD2_NewContext(void) +{ + MD2Context *cx = (MD2Context *)PORT_ZAlloc(sizeof(MD2Context)); + if (cx == NULL) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return NULL; + } + return cx; +} + +void +MD2_DestroyContext(MD2Context *cx, PRBool freeit) +{ + if (freeit) + PORT_ZFree(cx, sizeof(*cx)); +} + +void +MD2_Begin(MD2Context *cx) +{ + memset(cx, 0, sizeof(*cx)); + cx->unusedBuffer = MD2_BUFSIZE; +} + +static void +md2_compress(MD2Context *cx) +{ + int j; + unsigned char P; + P = cx->checksum[MD2_CHECKSUM_SIZE - 1]; +/* Compute the running checksum, and set the tmp variables to be + * CV[i] XOR input[i] + */ +#define CKSUMFN(n) \ + P = cx->checksum[n] ^ MD2S[cx->X[MD2_INPUT + n] ^ P]; \ + cx->checksum[n] = P; \ + cx->X[MD2_TMPVARS + n] = cx->X[n] ^ cx->X[MD2_INPUT + n]; + CKSUMFN(0); + CKSUMFN(1); + CKSUMFN(2); + CKSUMFN(3); + CKSUMFN(4); + CKSUMFN(5); + CKSUMFN(6); + CKSUMFN(7); + CKSUMFN(8); + CKSUMFN(9); + CKSUMFN(10); + CKSUMFN(11); + CKSUMFN(12); + CKSUMFN(13); + CKSUMFN(14); + CKSUMFN(15); +/* The compression function. */ +#define COMPRESS(n) \ + P = cx->X[n] ^ MD2S[P]; \ + cx->X[n] = P; + P = 0x00; + for (j = 0; j < 18; j++) { + COMPRESS(0); + COMPRESS(1); + COMPRESS(2); + COMPRESS(3); + COMPRESS(4); + COMPRESS(5); + COMPRESS(6); + COMPRESS(7); + COMPRESS(8); + COMPRESS(9); + COMPRESS(10); + COMPRESS(11); + COMPRESS(12); + COMPRESS(13); + COMPRESS(14); + COMPRESS(15); + COMPRESS(16); + COMPRESS(17); + COMPRESS(18); + COMPRESS(19); + COMPRESS(20); + COMPRESS(21); + COMPRESS(22); + COMPRESS(23); + COMPRESS(24); + COMPRESS(25); + COMPRESS(26); + COMPRESS(27); + COMPRESS(28); + COMPRESS(29); + COMPRESS(30); + COMPRESS(31); + COMPRESS(32); + COMPRESS(33); + COMPRESS(34); + COMPRESS(35); + COMPRESS(36); + COMPRESS(37); + COMPRESS(38); + COMPRESS(39); + COMPRESS(40); + COMPRESS(41); + COMPRESS(42); + COMPRESS(43); + COMPRESS(44); + COMPRESS(45); + COMPRESS(46); + COMPRESS(47); + P = (P + j) % 256; + } + cx->unusedBuffer = MD2_BUFSIZE; +} + +void +MD2_Update(MD2Context *cx, const unsigned char *input, unsigned int inputLen) +{ + PRUint32 bytesToConsume; + + /* Fill the remaining input buffer. */ + if (cx->unusedBuffer != MD2_BUFSIZE) { + bytesToConsume = PR_MIN(inputLen, cx->unusedBuffer); + memcpy(&cx->X[MD2_INPUT + (MD2_BUFSIZE - cx->unusedBuffer)], + input, bytesToConsume); + if (cx->unusedBuffer + bytesToConsume >= MD2_BUFSIZE) + md2_compress(cx); + inputLen -= bytesToConsume; + input += bytesToConsume; + } + + /* Iterate over 16-byte chunks of the input. */ + while (inputLen >= MD2_BUFSIZE) { + memcpy(&cx->X[MD2_INPUT], input, MD2_BUFSIZE); + md2_compress(cx); + inputLen -= MD2_BUFSIZE; + input += MD2_BUFSIZE; + } + + /* Copy any input that remains into the buffer. */ + if (inputLen) + memcpy(&cx->X[MD2_INPUT], input, inputLen); + cx->unusedBuffer = MD2_BUFSIZE - inputLen; +} + +void +MD2_End(MD2Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint8 padStart; + if (maxDigestLen < MD2_BUFSIZE) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + padStart = MD2_BUFSIZE - cx->unusedBuffer; + memset(&cx->X[MD2_INPUT + padStart], cx->unusedBuffer, + cx->unusedBuffer); + md2_compress(cx); + memcpy(&cx->X[MD2_INPUT], cx->checksum, MD2_BUFSIZE); + md2_compress(cx); + *digestLen = MD2_DIGEST_LEN; + memcpy(digest, &cx->X[MD2_CV], MD2_DIGEST_LEN); +} + +unsigned int +MD2_FlattenSize(MD2Context *cx) +{ + return sizeof(*cx); +} + +SECStatus +MD2_Flatten(MD2Context *cx, unsigned char *space) +{ + memcpy(space, cx, sizeof(*cx)); + return SECSuccess; +} + +MD2Context * +MD2_Resurrect(unsigned char *space, void *arg) +{ + MD2Context *cx = MD2_NewContext(); + if (cx) + memcpy(cx, space, sizeof(*cx)); + return cx; +} + +void +MD2_Clone(MD2Context *dest, MD2Context *src) +{ + memcpy(dest, src, sizeof *dest); +} diff --git a/security/nss/lib/freebl/md5.c b/security/nss/lib/freebl/md5.c new file mode 100644 index 000000000..bdd36a61b --- /dev/null +++ b/security/nss/lib/freebl/md5.c @@ -0,0 +1,598 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "prlong.h" + +#include "blapi.h" +#include "blapii.h" + +#define MD5_HASH_LEN 16 +#define MD5_BUFFER_SIZE 64 +#define MD5_END_BUFFER (MD5_BUFFER_SIZE - 8) + +#define CV0_1 0x67452301 +#define CV0_2 0xefcdab89 +#define CV0_3 0x98badcfe +#define CV0_4 0x10325476 + +#define T1_0 0xd76aa478 +#define T1_1 0xe8c7b756 +#define T1_2 0x242070db +#define T1_3 0xc1bdceee +#define T1_4 0xf57c0faf +#define T1_5 0x4787c62a +#define T1_6 0xa8304613 +#define T1_7 0xfd469501 +#define T1_8 0x698098d8 +#define T1_9 0x8b44f7af +#define T1_10 0xffff5bb1 +#define T1_11 0x895cd7be +#define T1_12 0x6b901122 +#define T1_13 0xfd987193 +#define T1_14 0xa679438e +#define T1_15 0x49b40821 + +#define T2_0 0xf61e2562 +#define T2_1 0xc040b340 +#define T2_2 0x265e5a51 +#define T2_3 0xe9b6c7aa +#define T2_4 0xd62f105d +#define T2_5 0x02441453 +#define T2_6 0xd8a1e681 +#define T2_7 0xe7d3fbc8 +#define T2_8 0x21e1cde6 +#define T2_9 0xc33707d6 +#define T2_10 0xf4d50d87 +#define T2_11 0x455a14ed +#define T2_12 0xa9e3e905 +#define T2_13 0xfcefa3f8 +#define T2_14 0x676f02d9 +#define T2_15 0x8d2a4c8a + +#define T3_0 0xfffa3942 +#define T3_1 0x8771f681 +#define T3_2 0x6d9d6122 +#define T3_3 0xfde5380c +#define T3_4 0xa4beea44 +#define T3_5 0x4bdecfa9 +#define T3_6 0xf6bb4b60 +#define T3_7 0xbebfbc70 +#define T3_8 0x289b7ec6 +#define T3_9 0xeaa127fa +#define T3_10 0xd4ef3085 +#define T3_11 0x04881d05 +#define T3_12 0xd9d4d039 +#define T3_13 0xe6db99e5 +#define T3_14 0x1fa27cf8 +#define T3_15 0xc4ac5665 + +#define T4_0 0xf4292244 +#define T4_1 0x432aff97 +#define T4_2 0xab9423a7 +#define T4_3 0xfc93a039 +#define T4_4 0x655b59c3 +#define T4_5 0x8f0ccc92 +#define T4_6 0xffeff47d +#define T4_7 0x85845dd1 +#define T4_8 0x6fa87e4f +#define T4_9 0xfe2ce6e0 +#define T4_10 0xa3014314 +#define T4_11 0x4e0811a1 +#define T4_12 0xf7537e82 +#define T4_13 0xbd3af235 +#define T4_14 0x2ad7d2bb +#define T4_15 0xeb86d391 + +#define R1B0 0 +#define R1B1 1 +#define R1B2 2 +#define R1B3 3 +#define R1B4 4 +#define R1B5 5 +#define R1B6 6 +#define R1B7 7 +#define R1B8 8 +#define R1B9 9 +#define R1B10 10 +#define R1B11 11 +#define R1B12 12 +#define R1B13 13 +#define R1B14 14 +#define R1B15 15 + +#define R2B0 1 +#define R2B1 6 +#define R2B2 11 +#define R2B3 0 +#define R2B4 5 +#define R2B5 10 +#define R2B6 15 +#define R2B7 4 +#define R2B8 9 +#define R2B9 14 +#define R2B10 3 +#define R2B11 8 +#define R2B12 13 +#define R2B13 2 +#define R2B14 7 +#define R2B15 12 + +#define R3B0 5 +#define R3B1 8 +#define R3B2 11 +#define R3B3 14 +#define R3B4 1 +#define R3B5 4 +#define R3B6 7 +#define R3B7 10 +#define R3B8 13 +#define R3B9 0 +#define R3B10 3 +#define R3B11 6 +#define R3B12 9 +#define R3B13 12 +#define R3B14 15 +#define R3B15 2 + +#define R4B0 0 +#define R4B1 7 +#define R4B2 14 +#define R4B3 5 +#define R4B4 12 +#define R4B5 3 +#define R4B6 10 +#define R4B7 1 +#define R4B8 8 +#define R4B9 15 +#define R4B10 6 +#define R4B11 13 +#define R4B12 4 +#define R4B13 11 +#define R4B14 2 +#define R4B15 9 + +#define S1_0 7 +#define S1_1 12 +#define S1_2 17 +#define S1_3 22 + +#define S2_0 5 +#define S2_1 9 +#define S2_2 14 +#define S2_3 20 + +#define S3_0 4 +#define S3_1 11 +#define S3_2 16 +#define S3_3 23 + +#define S4_0 6 +#define S4_1 10 +#define S4_2 15 +#define S4_3 21 + +struct MD5ContextStr { + PRUint32 lsbInput; + PRUint32 msbInput; + PRUint32 cv[4]; + union { + PRUint8 b[64]; + PRUint32 w[16]; + } u; +}; + +#define inBuf u.b + +SECStatus +MD5_Hash(unsigned char *dest, const char *src) +{ + return MD5_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +SECStatus +MD5_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + unsigned int len; + MD5Context cx; + + MD5_Begin(&cx); + MD5_Update(&cx, src, src_length); + MD5_End(&cx, dest, &len, MD5_HASH_LEN); + memset(&cx, 0, sizeof cx); + return SECSuccess; +} + +MD5Context * +MD5_NewContext(void) +{ + /* no need to ZAlloc, MD5_Begin will init the context */ + MD5Context *cx = (MD5Context *)PORT_Alloc(sizeof(MD5Context)); + if (cx == NULL) { + PORT_SetError(PR_OUT_OF_MEMORY_ERROR); + return NULL; + } + return cx; +} + +void +MD5_DestroyContext(MD5Context *cx, PRBool freeit) +{ + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } +} + +void +MD5_Begin(MD5Context *cx) +{ + cx->lsbInput = 0; + cx->msbInput = 0; + /* memset(cx->inBuf, 0, sizeof(cx->inBuf)); */ + cx->cv[0] = CV0_1; + cx->cv[1] = CV0_2; + cx->cv[2] = CV0_3; + cx->cv[3] = CV0_4; +} + +#define cls(i32, s) (tmp = i32, tmp << s | tmp >> (32 - s)) + +#if defined(SOLARIS) || defined(HPUX) +#define addto64(sumhigh, sumlow, addend) \ + sumlow += addend; \ + sumhigh += (sumlow < addend); +#else +#define addto64(sumhigh, sumlow, addend) \ + sumlow += addend; \ + if (sumlow < addend) \ + ++sumhigh; +#endif + +#define MASK 0x00ff00ff +#ifdef IS_LITTLE_ENDIAN +#define lendian(i32) \ + (i32) +#else +#define lendian(i32) \ + (tmp = (i32 >> 16) | (i32 << 16), ((tmp & MASK) << 8) | ((tmp >> 8) & MASK)) +#endif + +#ifndef IS_LITTLE_ENDIAN + +#define lebytes(b4) \ + ((b4)[3] << 24 | (b4)[2] << 16 | (b4)[1] << 8 | (b4)[0]) + +static void +md5_prep_state_le(MD5Context *cx) +{ + PRUint32 tmp; + cx->u.w[0] = lendian(cx->u.w[0]); + cx->u.w[1] = lendian(cx->u.w[1]); + cx->u.w[2] = lendian(cx->u.w[2]); + cx->u.w[3] = lendian(cx->u.w[3]); + cx->u.w[4] = lendian(cx->u.w[4]); + cx->u.w[5] = lendian(cx->u.w[5]); + cx->u.w[6] = lendian(cx->u.w[6]); + cx->u.w[7] = lendian(cx->u.w[7]); + cx->u.w[8] = lendian(cx->u.w[8]); + cx->u.w[9] = lendian(cx->u.w[9]); + cx->u.w[10] = lendian(cx->u.w[10]); + cx->u.w[11] = lendian(cx->u.w[11]); + cx->u.w[12] = lendian(cx->u.w[12]); + cx->u.w[13] = lendian(cx->u.w[13]); + cx->u.w[14] = lendian(cx->u.w[14]); + cx->u.w[15] = lendian(cx->u.w[15]); +} + +static void +md5_prep_buffer_le(MD5Context *cx, const PRUint8 *beBuf) +{ + cx->u.w[0] = lebytes(&beBuf[0]); + cx->u.w[1] = lebytes(&beBuf[4]); + cx->u.w[2] = lebytes(&beBuf[8]); + cx->u.w[3] = lebytes(&beBuf[12]); + cx->u.w[4] = lebytes(&beBuf[16]); + cx->u.w[5] = lebytes(&beBuf[20]); + cx->u.w[6] = lebytes(&beBuf[24]); + cx->u.w[7] = lebytes(&beBuf[28]); + cx->u.w[8] = lebytes(&beBuf[32]); + cx->u.w[9] = lebytes(&beBuf[36]); + cx->u.w[10] = lebytes(&beBuf[40]); + cx->u.w[11] = lebytes(&beBuf[44]); + cx->u.w[12] = lebytes(&beBuf[48]); + cx->u.w[13] = lebytes(&beBuf[52]); + cx->u.w[14] = lebytes(&beBuf[56]); + cx->u.w[15] = lebytes(&beBuf[60]); +} +#endif + +#define F(X, Y, Z) \ + ((X & Y) | ((~X) & Z)) + +#define G(X, Y, Z) \ + ((X & Z) | (Y & (~Z))) + +#define H(X, Y, Z) \ + (X ^ Y ^ Z) + +#define I(X, Y, Z) \ + (Y ^ (X | (~Z))) + +#define FF(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + F(b, c, d) + bufint + ti, s) + +#define GG(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + G(b, c, d) + bufint + ti, s) + +#define HH(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + H(b, c, d) + bufint + ti, s) + +#define II(a, b, c, d, bufint, s, ti) \ + a = b + cls(a + I(b, c, d) + bufint + ti, s) + +static void NO_SANITIZE_ALIGNMENT +md5_compress(MD5Context *cx, const PRUint32 *wBuf) +{ + PRUint32 a, b, c, d; + PRUint32 tmp; + a = cx->cv[0]; + b = cx->cv[1]; + c = cx->cv[2]; + d = cx->cv[3]; + FF(a, b, c, d, wBuf[R1B0], S1_0, T1_0); + FF(d, a, b, c, wBuf[R1B1], S1_1, T1_1); + FF(c, d, a, b, wBuf[R1B2], S1_2, T1_2); + FF(b, c, d, a, wBuf[R1B3], S1_3, T1_3); + FF(a, b, c, d, wBuf[R1B4], S1_0, T1_4); + FF(d, a, b, c, wBuf[R1B5], S1_1, T1_5); + FF(c, d, a, b, wBuf[R1B6], S1_2, T1_6); + FF(b, c, d, a, wBuf[R1B7], S1_3, T1_7); + FF(a, b, c, d, wBuf[R1B8], S1_0, T1_8); + FF(d, a, b, c, wBuf[R1B9], S1_1, T1_9); + FF(c, d, a, b, wBuf[R1B10], S1_2, T1_10); + FF(b, c, d, a, wBuf[R1B11], S1_3, T1_11); + FF(a, b, c, d, wBuf[R1B12], S1_0, T1_12); + FF(d, a, b, c, wBuf[R1B13], S1_1, T1_13); + FF(c, d, a, b, wBuf[R1B14], S1_2, T1_14); + FF(b, c, d, a, wBuf[R1B15], S1_3, T1_15); + GG(a, b, c, d, wBuf[R2B0], S2_0, T2_0); + GG(d, a, b, c, wBuf[R2B1], S2_1, T2_1); + GG(c, d, a, b, wBuf[R2B2], S2_2, T2_2); + GG(b, c, d, a, wBuf[R2B3], S2_3, T2_3); + GG(a, b, c, d, wBuf[R2B4], S2_0, T2_4); + GG(d, a, b, c, wBuf[R2B5], S2_1, T2_5); + GG(c, d, a, b, wBuf[R2B6], S2_2, T2_6); + GG(b, c, d, a, wBuf[R2B7], S2_3, T2_7); + GG(a, b, c, d, wBuf[R2B8], S2_0, T2_8); + GG(d, a, b, c, wBuf[R2B9], S2_1, T2_9); + GG(c, d, a, b, wBuf[R2B10], S2_2, T2_10); + GG(b, c, d, a, wBuf[R2B11], S2_3, T2_11); + GG(a, b, c, d, wBuf[R2B12], S2_0, T2_12); + GG(d, a, b, c, wBuf[R2B13], S2_1, T2_13); + GG(c, d, a, b, wBuf[R2B14], S2_2, T2_14); + GG(b, c, d, a, wBuf[R2B15], S2_3, T2_15); + HH(a, b, c, d, wBuf[R3B0], S3_0, T3_0); + HH(d, a, b, c, wBuf[R3B1], S3_1, T3_1); + HH(c, d, a, b, wBuf[R3B2], S3_2, T3_2); + HH(b, c, d, a, wBuf[R3B3], S3_3, T3_3); + HH(a, b, c, d, wBuf[R3B4], S3_0, T3_4); + HH(d, a, b, c, wBuf[R3B5], S3_1, T3_5); + HH(c, d, a, b, wBuf[R3B6], S3_2, T3_6); + HH(b, c, d, a, wBuf[R3B7], S3_3, T3_7); + HH(a, b, c, d, wBuf[R3B8], S3_0, T3_8); + HH(d, a, b, c, wBuf[R3B9], S3_1, T3_9); + HH(c, d, a, b, wBuf[R3B10], S3_2, T3_10); + HH(b, c, d, a, wBuf[R3B11], S3_3, T3_11); + HH(a, b, c, d, wBuf[R3B12], S3_0, T3_12); + HH(d, a, b, c, wBuf[R3B13], S3_1, T3_13); + HH(c, d, a, b, wBuf[R3B14], S3_2, T3_14); + HH(b, c, d, a, wBuf[R3B15], S3_3, T3_15); + II(a, b, c, d, wBuf[R4B0], S4_0, T4_0); + II(d, a, b, c, wBuf[R4B1], S4_1, T4_1); + II(c, d, a, b, wBuf[R4B2], S4_2, T4_2); + II(b, c, d, a, wBuf[R4B3], S4_3, T4_3); + II(a, b, c, d, wBuf[R4B4], S4_0, T4_4); + II(d, a, b, c, wBuf[R4B5], S4_1, T4_5); + II(c, d, a, b, wBuf[R4B6], S4_2, T4_6); + II(b, c, d, a, wBuf[R4B7], S4_3, T4_7); + II(a, b, c, d, wBuf[R4B8], S4_0, T4_8); + II(d, a, b, c, wBuf[R4B9], S4_1, T4_9); + II(c, d, a, b, wBuf[R4B10], S4_2, T4_10); + II(b, c, d, a, wBuf[R4B11], S4_3, T4_11); + II(a, b, c, d, wBuf[R4B12], S4_0, T4_12); + II(d, a, b, c, wBuf[R4B13], S4_1, T4_13); + II(c, d, a, b, wBuf[R4B14], S4_2, T4_14); + II(b, c, d, a, wBuf[R4B15], S4_3, T4_15); + cx->cv[0] += a; + cx->cv[1] += b; + cx->cv[2] += c; + cx->cv[3] += d; +} + +void +MD5_Update(MD5Context *cx, const unsigned char *input, unsigned int inputLen) +{ + PRUint32 bytesToConsume; + PRUint32 inBufIndex = cx->lsbInput & 63; + const PRUint32 *wBuf; + + /* Add the number of input bytes to the 64-bit input counter. */ + addto64(cx->msbInput, cx->lsbInput, inputLen); + if (inBufIndex) { + /* There is already data in the buffer. Fill with input. */ + bytesToConsume = PR_MIN(inputLen, MD5_BUFFER_SIZE - inBufIndex); + memcpy(&cx->inBuf[inBufIndex], input, bytesToConsume); + if (inBufIndex + bytesToConsume >= MD5_BUFFER_SIZE) { +/* The buffer is filled. Run the compression function. */ +#ifndef IS_LITTLE_ENDIAN + md5_prep_state_le(cx); +#endif + md5_compress(cx, cx->u.w); + } + /* Remaining input. */ + inputLen -= bytesToConsume; + input += bytesToConsume; + } + + /* Iterate over 64-byte chunks of the message. */ + while (inputLen >= MD5_BUFFER_SIZE) { +#ifdef IS_LITTLE_ENDIAN +#ifdef HAVE_UNALIGNED_ACCESS + /* x86 can handle arithmetic on non-word-aligned buffers */ + wBuf = (PRUint32 *)input; +#else + if ((ptrdiff_t)input & 0x3) { + /* buffer not aligned, copy it to force alignment */ + memcpy(cx->inBuf, input, MD5_BUFFER_SIZE); + wBuf = cx->u.w; + } else { + /* buffer is aligned */ + wBuf = (PRUint32 *)input; + } +#endif +#else + md5_prep_buffer_le(cx, input); + wBuf = cx->u.w; +#endif + md5_compress(cx, wBuf); + inputLen -= MD5_BUFFER_SIZE; + input += MD5_BUFFER_SIZE; + } + + /* Tail of message (message bytes mod 64). */ + if (inputLen) + memcpy(cx->inBuf, input, inputLen); +} + +static const unsigned char padbytes[] = { + 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +}; + +void +MD5_End(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#ifndef IS_LITTLE_ENDIAN + PRUint32 tmp; +#endif + PRUint32 lowInput, highInput; + PRUint32 inBufIndex = cx->lsbInput & 63; + + if (maxDigestLen < MD5_HASH_LEN) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + + /* Copy out the length of bits input before padding. */ + lowInput = cx->lsbInput; + highInput = (cx->msbInput << 3) | (lowInput >> 29); + lowInput <<= 3; + + if (inBufIndex < MD5_END_BUFFER) { + MD5_Update(cx, padbytes, MD5_END_BUFFER - inBufIndex); + } else { + MD5_Update(cx, padbytes, + MD5_END_BUFFER + MD5_BUFFER_SIZE - inBufIndex); + } + + /* Store the number of bytes input (before padding) in final 64 bits. */ + cx->u.w[14] = lendian(lowInput); + cx->u.w[15] = lendian(highInput); + +/* Final call to compress. */ +#ifndef IS_LITTLE_ENDIAN + md5_prep_state_le(cx); +#endif + md5_compress(cx, cx->u.w); + + /* Copy the resulting values out of the chain variables into return buf. */ + if (digestLen) + *digestLen = MD5_HASH_LEN; +#ifndef IS_LITTLE_ENDIAN + cx->cv[0] = lendian(cx->cv[0]); + cx->cv[1] = lendian(cx->cv[1]); + cx->cv[2] = lendian(cx->cv[2]); + cx->cv[3] = lendian(cx->cv[3]); +#endif + memcpy(digest, cx->cv, MD5_HASH_LEN); +} + +void +MD5_EndRaw(MD5Context *cx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#ifndef IS_LITTLE_ENDIAN + PRUint32 tmp; +#endif + PRUint32 cv[4]; + + if (maxDigestLen < MD5_HASH_LEN) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return; + } + + memcpy(cv, cx->cv, sizeof(cv)); +#ifndef IS_LITTLE_ENDIAN + cv[0] = lendian(cv[0]); + cv[1] = lendian(cv[1]); + cv[2] = lendian(cv[2]); + cv[3] = lendian(cv[3]); +#endif + memcpy(digest, cv, MD5_HASH_LEN); + if (digestLen) + *digestLen = MD5_HASH_LEN; +} + +unsigned int +MD5_FlattenSize(MD5Context *cx) +{ + return sizeof(*cx); +} + +SECStatus +MD5_Flatten(MD5Context *cx, unsigned char *space) +{ + memcpy(space, cx, sizeof(*cx)); + return SECSuccess; +} + +MD5Context * +MD5_Resurrect(unsigned char *space, void *arg) +{ + MD5Context *cx = MD5_NewContext(); + if (cx) + memcpy(cx, space, sizeof(*cx)); + return cx; +} + +void +MD5_Clone(MD5Context *dest, MD5Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +void +MD5_TraceState(MD5Context *cx) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} diff --git a/security/nss/lib/freebl/mknewpc2.c b/security/nss/lib/freebl/mknewpc2.c new file mode 100644 index 000000000..6b2968816 --- /dev/null +++ b/security/nss/lib/freebl/mknewpc2.c @@ -0,0 +1,208 @@ +/* + * mknewpc2.c + * + * Generate PC-2 tables for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +typedef unsigned char BYTE; +typedef unsigned int HALF; + +#define DES_ENCRYPT 0 +#define DES_DECRYPT 1 + +/* two 28-bit registers defined in key schedule production process */ +static HALF C0, D0; + +static HALF L0, R0; + +/* key schedule, 16 internal keys, each with 8 6-bit parts */ +static BYTE KS[8][16]; + +/* + * This table takes the 56 bits in C0 and D0 and shows show they are + * permuted into the 8 6-bit parts of the key in the key schedule. + * The bits of C0 are numbered left to right, 1-28. + * The bits of D0 are numbered left to right, 29-56. + * Zeros in this table represent bits that are always zero. + * Note that all the bits in the first 4 rows come from C0, + * and all the bits in the second 4 rows come from D0. + */ +static const BYTE PC2[64] = { + 14, 17, 11, 24, 1, 5, 0, 0, /* S1 */ + 3, 28, 15, 6, 21, 10, 0, 0, /* S2 */ + 23, 19, 12, 4, 26, 8, 0, 0, /* S3 */ + 16, 7, 27, 20, 13, 2, 0, 0, /* S4 */ + + 41, 52, 31, 37, 47, 55, 0, 0, /* S5 */ + 30, 40, 51, 45, 33, 48, 0, 0, /* S6 */ + 44, 49, 39, 56, 34, 53, 0, 0, /* S7 */ + 46, 42, 50, 36, 29, 32, 0, 0 /* S8 */ +}; + +/* This table represents the same info as PC2, except that + * The bits of C0 and D0 are each numbered right to left, 0-27. + * -1 values indicate bits that are always zero. + * As before all the bits in the first 4 rows come from C0, + * and all the bits in the second 4 rows come from D0. + */ +static signed char PC2a[64] = { + /* bits of C0 */ + 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */ + 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */ + 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */ + 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */ + /* bits of D0 */ + 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */ + 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */ + 12, 7, 17, 0, 22, 3, -1, -1, /* S7 */ + 10, 14, 6, 20, 27, 24, -1, -1 /* S8 */ +}; + +/* This table represents the same info as PC2a, except that + * The order of of the rows has been changed to increase the efficiency + * with which the key sechedule is created. + * Fewer shifts and ANDs are required to make the KS from these. + */ +static const signed char PC2b[64] = { + /* bits of C0 */ + 14, 11, 17, 4, 27, 23, -1, -1, /* S1 */ + 5, 9, 16, 24, 2, 20, -1, -1, /* S3 */ + 25, 0, 13, 22, 7, 18, -1, -1, /* S2 */ + 12, 21, 1, 8, 15, 26, -1, -1, /* S4 */ + /* bits of D0 */ + 26, 16, 5, 11, 23, 8, -1, -1, /* S6 */ + 10, 14, 6, 20, 27, 24, -1, -1, /* S8 */ + 15, 4, 25, 19, 9, 1, -1, -1, /* S5 */ + 12, 7, 17, 0, 22, 3, -1, -1 /* S7 */ +}; + +/* Only 24 of the 28 bits in C0 and D0 are used in PC2. + * The used bits of C0 and D0 are grouped into 4 groups of 6, + * so that the PC2 permutation can be accomplished with 4 lookups + * in tables of 64 entries. + * The following table shows how the bits of C0 and D0 are grouped + * into indexes for the respective table lookups. + * Bits are numbered right-to-left, 0-27, as in PC2b. + */ +static BYTE NDX[48] = { + /* Bits of C0 */ + 27, 26, 25, 24, 23, 22, /* C0 table 0 */ + 18, 17, 16, 15, 14, 13, /* C0 table 1 */ + 9, 8, 7, 2, 1, 0, /* C0 table 2 */ + 5, 4, 21, 20, 12, 11, /* C0 table 3 */ + /* bits of D0 */ + 27, 26, 25, 24, 23, 22, /* D0 table 0 */ + 20, 19, 17, 16, 15, 14, /* D0 table 1 */ + 12, 11, 10, 9, 8, 7, /* D0 table 2 */ + 6, 5, 4, 3, 1, 0 /* D0 table 3 */ +}; + +/* Here's the code that does that grouping. + left = PC2LOOKUP(0, 0, ((c0 >> 22) & 0x3F) ); + left |= PC2LOOKUP(0, 1, ((c0 >> 13) & 0x3F) ); + left |= PC2LOOKUP(0, 2, ((c0 >> 4) & 0x38) | (c0 & 0x7) ); + left |= PC2LOOKUP(0, 3, ((c0>>18)&0xC) | ((c0>>11)&0x3) | (c0&0x30)); + + right = PC2LOOKUP(1, 0, ((d0 >> 22) & 0x3F) ); + right |= PC2LOOKUP(1, 1, ((d0 >> 15) & 0x30) | ((d0 >> 14) & 0xf) ); + right |= PC2LOOKUP(1, 2, ((d0 >> 7) & 0x3F) ); + right |= PC2LOOKUP(1, 3, ((d0 >> 1) & 0x3C) | (d0 & 0x3)); +*/ + +void +make_pc2a(void) +{ + + int i, j; + + for (i = 0; i < 64; ++i) { + j = PC2[i]; + if (j == 0) + j = -1; + else if (j < 29) + j = 28 - j; + else + j = 56 - j; + PC2a[i] = j; + } + for (i = 0; i < 64; i += 8) { + printf("%3d,%3d,%3d,%3d,%3d,%3d,%3d,%3d,\n", + PC2a[i + 0], PC2a[i + 1], PC2a[i + 2], PC2a[i + 3], + PC2a[i + 4], PC2a[i + 5], PC2a[i + 6], PC2a[i + 7]); + } +} + +HALF PC2cd0[64]; + +HALF PC_2H[8][64]; + +void +mktable() +{ + int i; + int table; + const BYTE* ndx = NDX; + HALF mask; + + mask = 0x80000000; + for (i = 0; i < 32; ++i, mask >>= 1) { + int bit = PC2b[i]; + if (bit < 0) + continue; + PC2cd0[bit + 32] = mask; + } + + mask = 0x80000000; + for (i = 32; i < 64; ++i, mask >>= 1) { + int bit = PC2b[i]; + if (bit < 0) + continue; + PC2cd0[bit] = mask; + } + +#if DEBUG + for (i = 0; i < 64; ++i) { + printf("0x%08x,\n", PC2cd0[i]); + } +#endif + for (i = 0; i < 24; ++i) { + NDX[i] += 32; /* because c0 is the upper half */ + } + + for (table = 0; table < 8; ++table) { + HALF bitvals[6]; + for (i = 0; i < 6; ++i) { + bitvals[5 - i] = PC2cd0[*ndx++]; + } + for (i = 0; i < 64; ++i) { + int j; + int k = 0; + HALF value = 0; + + for (j = i; j; j >>= 1, ++k) { + if (j & 1) { + value |= bitvals[k]; + } + } + PC_2H[table][i] = value; + } + printf("/* table %d */ {\n", table); + for (i = 0; i < 64; i += 4) { + printf(" 0x%08x, 0x%08x, 0x%08x, 0x%08x, \n", + PC_2H[table][i], PC_2H[table][i + 1], + PC_2H[table][i + 2], PC_2H[table][i + 3]); + } + printf(" },\n"); + } +} + +int +main(void) +{ + /* make_pc2a(); */ + mktable(); + return 0; +} diff --git a/security/nss/lib/freebl/mksp.c b/security/nss/lib/freebl/mksp.c new file mode 100644 index 000000000..ca83ac8e7 --- /dev/null +++ b/security/nss/lib/freebl/mksp.c @@ -0,0 +1,119 @@ +/* + * mksp.c + * + * Generate SP tables for DES-150 library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include + +/* + * sboxes - the tables for the s-box functions + * from FIPS 46, pages 15-16. + */ +unsigned char S[8][64] = { + /* Func S1 = */ + { 14, 0, 4, 15, 13, 7, 1, 4, 2, 14, 15, 2, 11, 13, 8, 1, + 3, 10, 10, 6, 6, 12, 12, 11, 5, 9, 9, 5, 0, 3, 7, 8, + 4, 15, 1, 12, 14, 8, 8, 2, 13, 4, 6, 9, 2, 1, 11, 7, + 15, 5, 12, 11, 9, 3, 7, 14, 3, 10, 10, 0, 5, 6, 0, 13 }, + /* Func S2 = */ + { 15, 3, 1, 13, 8, 4, 14, 7, 6, 15, 11, 2, 3, 8, 4, 14, + 9, 12, 7, 0, 2, 1, 13, 10, 12, 6, 0, 9, 5, 11, 10, 5, + 0, 13, 14, 8, 7, 10, 11, 1, 10, 3, 4, 15, 13, 4, 1, 2, + 5, 11, 8, 6, 12, 7, 6, 12, 9, 0, 3, 5, 2, 14, 15, 9 }, + /* Func S3 = */ + { 10, 13, 0, 7, 9, 0, 14, 9, 6, 3, 3, 4, 15, 6, 5, 10, + 1, 2, 13, 8, 12, 5, 7, 14, 11, 12, 4, 11, 2, 15, 8, 1, + 13, 1, 6, 10, 4, 13, 9, 0, 8, 6, 15, 9, 3, 8, 0, 7, + 11, 4, 1, 15, 2, 14, 12, 3, 5, 11, 10, 5, 14, 2, 7, 12 }, + /* Func S4 = */ + { 7, 13, 13, 8, 14, 11, 3, 5, 0, 6, 6, 15, 9, 0, 10, 3, + 1, 4, 2, 7, 8, 2, 5, 12, 11, 1, 12, 10, 4, 14, 15, 9, + 10, 3, 6, 15, 9, 0, 0, 6, 12, 10, 11, 1, 7, 13, 13, 8, + 15, 9, 1, 4, 3, 5, 14, 11, 5, 12, 2, 7, 8, 2, 4, 14 }, + /* Func S5 = */ + { 2, 14, 12, 11, 4, 2, 1, 12, 7, 4, 10, 7, 11, 13, 6, 1, + 8, 5, 5, 0, 3, 15, 15, 10, 13, 3, 0, 9, 14, 8, 9, 6, + 4, 11, 2, 8, 1, 12, 11, 7, 10, 1, 13, 14, 7, 2, 8, 13, + 15, 6, 9, 15, 12, 0, 5, 9, 6, 10, 3, 4, 0, 5, 14, 3 }, + /* Func S6 = */ + { 12, 10, 1, 15, 10, 4, 15, 2, 9, 7, 2, 12, 6, 9, 8, 5, + 0, 6, 13, 1, 3, 13, 4, 14, 14, 0, 7, 11, 5, 3, 11, 8, + 9, 4, 14, 3, 15, 2, 5, 12, 2, 9, 8, 5, 12, 15, 3, 10, + 7, 11, 0, 14, 4, 1, 10, 7, 1, 6, 13, 0, 11, 8, 6, 13 }, + /* Func S7 = */ + { 4, 13, 11, 0, 2, 11, 14, 7, 15, 4, 0, 9, 8, 1, 13, 10, + 3, 14, 12, 3, 9, 5, 7, 12, 5, 2, 10, 15, 6, 8, 1, 6, + 1, 6, 4, 11, 11, 13, 13, 8, 12, 1, 3, 4, 7, 10, 14, 7, + 10, 9, 15, 5, 6, 0, 8, 15, 0, 14, 5, 2, 9, 3, 2, 12 }, + /* Func S8 = */ + { 13, 1, 2, 15, 8, 13, 4, 8, 6, 10, 15, 3, 11, 7, 1, 4, + 10, 12, 9, 5, 3, 6, 14, 11, 5, 0, 0, 14, 12, 9, 7, 2, + 7, 2, 11, 1, 4, 14, 1, 7, 9, 4, 12, 10, 14, 8, 2, 13, + 0, 15, 6, 12, 10, 9, 13, 0, 15, 3, 3, 5, 5, 6, 8, 11 } +}; + +/* + * Permutation function for results from s-boxes + * from FIPS 46 pages 12 and 16. + * P = + */ +unsigned char P[32] = { + 16, 7, 20, 21, 29, 12, 28, 17, + 1, 15, 23, 26, 5, 18, 31, 10, + 2, 8, 24, 14, 32, 27, 3, 9, + 19, 13, 30, 6, 22, 11, 4, 25 +}; + +unsigned int Pinv[32]; +unsigned int SP[8][64]; + +void +makePinv(void) +{ + int i; + unsigned int Pi = 0x80000000; + for (i = 0; i < 32; ++i) { + int j = 32 - P[i]; + Pinv[j] = Pi; + Pi >>= 1; + } +} + +void +makeSP(void) +{ + int box; + for (box = 0; box < 8; ++box) { + int item; + printf("/* box S%d */ {\n", box + 1); + for (item = 0; item < 64; ++item) { + unsigned int s = S[box][item]; + unsigned int val = 0; + unsigned int bitnum = (7 - box) * 4; + for (; s; s >>= 1, ++bitnum) { + if (s & 1) { + val |= Pinv[bitnum]; + } + } + val = (val << 3) | (val >> 29); + SP[box][item] = val; + } + for (item = 0; item < 64; item += 4) { + printf("\t0x%08x, 0x%08x, 0x%08x, 0x%08x,\n", + SP[box][item], SP[box][item + 1], SP[box][item + 2], SP[box][item + 3]); + } + printf(" },\n"); + } +} + +int +main() +{ + makePinv(); + makeSP(); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/Makefile b/security/nss/lib/freebl/mpi/Makefile new file mode 100644 index 000000000..0dee5bed1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/Makefile @@ -0,0 +1,244 @@ +# +# Makefile for MPI library + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## Define CC to be the C compiler you wish to use. The GNU cc +## compiler (gcc) should work, at the very least +#CC=cc +#CC=gcc + +## +## Define PERL to point to your local Perl interpreter. It +## should be Perl 5.x, although it's conceivable that Perl 4 +## might work ... I haven't tested it. +## +#PERL=/usr/bin/perl +#PERL=perl + +include target.mk + +CFLAGS+= $(XCFLAGS) + +## +## Define LIBS to include any libraries you need to link against. +## If NO_TABLE is define, LIBS should include '-lm' or whatever is +## necessary to bring in the math library. Otherwise, it can be +## left alone, unless your system has other peculiar requirements. +## +LIBS=#-lmalloc#-lefence#-lm + +## +## Define RANLIB to be the library header randomizer; you might not +## need this on some systems (just set it to 'echo' on these systems, +## such as IRIX) +## +RANLIB=echo + +## +## This is the version string used for the documentation and +## building the distribution tarball. Don't mess with it unless +## you are releasing a new version +VERS=1.7p6 + +## ---------------------------------------------------------------------- +## You probably don't need to change anything below this line... +## + +## +## This is the list of source files that need to be packed into +## the distribution file +SRCS= mpi.c mpprime.c mplogic.c mp_gf2m.c mpmontg.c mpi-test.c primes.c \ + mpcpucache.c tests/ \ + utils/gcd.c utils/invmod.c utils/lap.c \ + utils/ptab.pl utils/sieve.c utils/isprime.c\ + utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \ + utils/bbsrand.c utils/prng.c utils/primegen.c \ + utils/basecvt.c utils/makeprime.c\ + utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \ + utils/mpi.h utils/mpprime.h mulsqr.c \ + make-test-arrays test-arrays.txt all-tests make-logtab \ + types.pl stats timetest multest + +## These are the header files that go into the distribution file +HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h mp_gf2m.h \ + mp_gf2m-priv.h utils/bbs_rand.h tests/mpi.h tests/mpprime.h + +## These are the documentation files that go into the distribution file +DOCS=README doc utils/README utils/PRIMES + +## This is the list of tools built by 'make tools' +TOOLS=gcd invmod isprime lap dec2hex hex2dec primegen prng \ + basecvt fact exptmod pi makeprime identest + +LIBOBJS = mpprime.o mpmontg.o mplogic.o mp_gf2m.o mpi.o mpcpucache.o $(AS_OBJS) +LIBHDRS = mpi-config.h mpi-priv.h mpi.h +APPHDRS = mpi-config.h mpi.h mplogic.h mp_gf2m.h mpprime.h + +help: + @ echo "" + @ echo "The following targets can be built with this Makefile:" + @ echo "" + @ echo "libmpi.a - arithmetic and prime testing library" + @ echo "mpi-test - test driver (requires MP_IOFUNC)" + @ echo "tools - command line tools" + @ echo "doc - manual pages for tools" + @ echo "clean - clean up objects and such" + @ echo "distclean - get ready for distribution" + @ echo "dist - distribution tarball" + @ echo "" + +.SUFFIXES: .c .o .i + +.c.i: + $(CC) $(CFLAGS) -E $< > $@ + +#.c.o: $*.h $*.c +# $(CC) $(CFLAGS) -c $< + +#--------------------------------------- + +$(LIBOBJS): $(LIBHDRS) + +logtab.h: make-logtab + $(PERL) make-logtab > logtab.h + +mpi.o: mpi.c logtab.h $(LIBHDRS) + +mplogic.o: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS) + +mp_gf2m.o: mp_gf2m.c mpi-priv.h mp_gf2m.h mp_gf2m-priv.h $(LIBHDRS) + +mpmontg.o: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS) + +mpprime.o: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS) + +mpcpucache.o: mpcpucache.c $(LIBHDRS) + +mpi_mips.o: mpi_mips.s + $(CC) -o $@ $(ASFLAGS) -c mpi_mips.s + +mpi_sparc.o : montmulf.h + +mpv_sparcv9.s: vis_64.il mpv_sparc.c + $(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_64.il mpv_sparc.c + +mpv_sparcv8.s: vis_64.il mpv_sparc.c + $(CC) -o $@ $(SOLARIS_FPU_FLAGS) -S vis_32.il mpv_sparc.c + +montmulfv8.o montmulfv9.o mpv_sparcv8.o mpv_sparcv9.o : %.o : %.s + $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -c $< + +mpi_arm.o: mpi_arm.c $(LIBHDRS) + +# This rule is used to build the .s sources, which are then hand optimized. +#montmulfv8.s montmulfv9.s : montmulf%.s : montmulf%.il montmulf.c montmulf.h +# $(CC) -o $@ $(SOLARIS_ASM_FLAGS) -S montmulf$*.il montmulf.c + + +libmpi.a: $(LIBOBJS) + ar -cvr libmpi.a $(LIBOBJS) + $(RANLIB) libmpi.a + +lib libs: libmpi.a + +mpi.i: mpi.h + +#--------------------------------------- + +MPTESTOBJS = mptest1.o mptest2.o mptest3.o mptest3a.o mptest4.o mptest4a.o \ + mptest4b.o mptest6.o mptest7.o mptest8.o mptest9.o mptestb.o +MPTESTS = $(MPTESTOBJS:.o=) + +$(MPTESTOBJS): mptest%.o: tests/mptest-%.c $(LIBHDRS) + $(CC) $(CFLAGS) -o $@ -c $< + +$(MPTESTS): mptest%: mptest%.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +tests: mptest1 mptest2 mptest3 mptest3a mptest4 mptest4a mptest4b mptest6 \ + mptestb bbsrand + +utests: mptest7 mptest8 mptest9 + +#--------------------------------------- + +EXTRAOBJS = bbsrand.o bbs_rand.o prng.o +UTILOBJS = primegen.o metime.o identest.o basecvt.o fact.o exptmod.o pi.o \ + makeprime.o gcd.o invmod.o lap.o isprime.o \ + dec2hex.o hex2dec.o +UTILS = $(UTILOBJS:.o=) + +$(UTILS): % : %.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +$(UTILOBJS) $(EXTRAOBJS): %.o : utils/%.c $(LIBHDRS) + $(CC) $(CFLAGS) -o $@ -c $< + +prng: prng.o bbs_rand.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +bbsrand: bbsrand.o bbs_rand.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +utils: $(UTILS) prng bbsrand + +#--------------------------------------- + +test-info.c: test-arrays.txt + $(PERL) make-test-arrays test-arrays.txt > test-info.c + +mpi-test.o: mpi-test.c test-info.c $(LIBHDRS) + $(CC) $(CFLAGS) -o $@ -c $< + +mpi-test: mpi-test.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +mdxptest.o: mdxptest.c $(LIBHDRS) mpi-priv.h + +mdxptest: mdxptest.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +mulsqr.o: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h + $(CC) $(CFLAGS) -DMP_SQUARE=1 -o $@ -c mulsqr.c + +mulsqr: mulsqr.o libmpi.a + $(CC) $(CFLAGS) -o $@ $^ $(LIBS) + +#--------------------------------------- + +alltests: tests utests mpi-test + +tools: $(TOOLS) + +doc: + (cd doc; ./build) + +clean: + rm -f *.o *.a *.i + rm -f core + rm -f *~ .*~ + rm -f utils/*.o + rm -f utils/core + rm -f utils/*~ utils/.*~ + +clobber: clean + rm -f $(TOOLS) $(UTILS) + +distclean: clean + rm -f mptest? mpi-test metime mulsqr karatsuba + rm -f mptest?a mptest?b + rm -f utils/mptest? + rm -f test-info.c logtab.h + rm -f libmpi.a + rm -f $(TOOLS) + +dist: Makefile $(HDRS) $(SRCS) $(DOCS) + tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS) + pgps -ab mpi-$(VERS).tar + chmod +r mpi-$(VERS).tar.asc + gzip -9 mpi-$(VERS).tar + +# END diff --git a/security/nss/lib/freebl/mpi/Makefile.os2 b/security/nss/lib/freebl/mpi/Makefile.os2 new file mode 100644 index 000000000..fa705ee08 --- /dev/null +++ b/security/nss/lib/freebl/mpi/Makefile.os2 @@ -0,0 +1,243 @@ +# +# Makefile.win - gmake Makefile for building MPI with VACPP on OS/2 +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## Define CC to be the C compiler you wish to use. The GNU cc +## compiler (gcc) should work, at the very least +#CC=cc +#CC=gcc +CC=icc.exe +AS=alp.exe + +## +## Define PERL to point to your local Perl interpreter. It +## should be Perl 5.x, although it's conceivable that Perl 4 +## might work ... I haven't tested it. +## +#PERL=/usr/bin/perl +#PERL=perl + +## +## Define CFLAGS to contain any local options your compiler +## setup requires. +## +## Conditional compilation options are no longer here; see +## the file 'mpi-config.h' instead. +## +MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD + +#OS/2 +AS_SRCS = mpi_x86.asm +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D +#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \ + -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +CFLAGS = /Ti+ -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + $(MPICMN) +ASFLAGS = + +## +## Define LIBS to include any libraries you need to link against. +## If NO_TABLE is define, LIBS should include '-lm' or whatever is +## necessary to bring in the math library. Otherwise, it can be +## left alone, unless your system has other peculiar requirements. +## +LIBS=#-lmalloc#-lefence#-lm + +## +## Define RANLIB to be the library header randomizer; you might not +## need this on some systems (just set it to 'echo' on these systems, +## such as IRIX) +## +RANLIB=echo + +## +## This is the version string used for the documentation and +## building the distribution tarball. Don't mess with it unless +## you are releasing a new version +VERS=1.7p6 + +## ---------------------------------------------------------------------- +## You probably don't need to change anything below this line... +## + +## +## This is the list of source files that need to be packed into +## the distribution file +SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \ + utils/gcd.c utils/invmod.c utils/lap.c \ + utils/ptab.pl utils/sieve.c utils/isprime.c\ + utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \ + utils/bbsrand.c utils/prng.c utils/primegen.c \ + utils/basecvt.c utils/makeprime.c\ + utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \ + utils/mpi.h utils/mpprime.h mulsqr.c \ + make-test-arrays test-arrays.txt all-tests make-logtab \ + types.pl stats timetest multest + +## These are the header files that go into the distribution file +HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \ + utils/bbs_rand.h tests/mpi.h tests/mpprime.h + +## These are the documentation files that go into the distribution file +DOCS=README doc utils/README utils/PRIMES + +## This is the list of tools built by 'make tools' +TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \ + primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe + +AS_OBJS = $(AS_SRCS:.asm=.obj) +LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS) +LIBHDRS = mpi-config.h mpi-priv.h mpi.h +APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h + + +help: + @ echo "" + @ echo "The following targets can be built with this Makefile:" + @ echo "" + @ echo "mpi.lib - arithmetic and prime testing library" + @ echo "mpi-test.exe - test driver (requires MP_IOFUNC)" + @ echo "tools - command line tools" + @ echo "doc - manual pages for tools" + @ echo "clean - clean up objects and such" + @ echo "distclean - get ready for distribution" + @ echo "dist - distribution tarball" + @ echo "" + +.SUFFIXES: .c .obj .i .lib .exe .asm + +.c.i: + $(CC) $(CFLAGS) -E $< > $@ + +.c.obj: + $(CC) $(CFLAGS) -c $< + +.asm.obj: + $(AS) $(ASFLAGS) $< + +.obj.exe: + $(CC) $(CFLAGS) -Fo$@ $< + +#--------------------------------------- + +$(LIBOBJS): $(LIBHDRS) + +logtab.h: make-logtab + $(PERL) make-logtab > logtab.h + +mpi.obj: mpi.c logtab.h $(LIBHDRS) + +mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS) + +mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS) + +mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS) + +mpi_mips.obj: mpi_mips.s + $(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s + +mpi.lib: $(LIBOBJS) + ilib /out:mpi.lib $(LIBOBJS) + $(RANLIB) mpi.lib + +lib libs: mpi.lib + +#--------------------------------------- + +MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \ + mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj +MPTESTS = $(MPTESTOBJS:.obj=.exe) + +$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \ + mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe + +utests: mptest7.exe mptest8.exe mptest9.exe + +#--------------------------------------- + +EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj +UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \ + exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \ + isprime.obj dec2hex.obj hex2dec.obj +UTILS = $(UTILOBJS:.obj=.exe) + +$(UTILS): %.exe : %.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +utils: $(UTILS) prng.exe bbsrand.exe + +#--------------------------------------- + +test-info.c: test-arrays.txt + $(PERL) make-test-arrays test-arrays.txt > test-info.c + +mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +mpi-test.exe: mpi-test.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h + +mdxptest.exe: mdxptest.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h + $(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c + +mulsqr.exe: mulsqr.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +#--------------------------------------- + +alltests: tests utests mpi-test.exe + +tools: $(TOOLS) + +doc: + (cd doc; ./build) + +clean: + rm -f *.obj *.lib *.pdb *.ilk + cd utils; rm -f *.obj *.lib *.pdb *.ilk + +distclean: clean + rm -f mptest? mpi-test metime mulsqr karatsuba + rm -f mptest?a mptest?b + rm -f utils/mptest? + rm -f test-info.c logtab.h + rm -f mpi.lib + rm -f $(TOOLS) + +dist: Makefile $(HDRS) $(SRCS) $(DOCS) + tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS) + pgps -ab mpi-$(VERS).tar + chmod +r mpi-$(VERS).tar.asc + gzip -9 mpi-$(VERS).tar + + +print: + @echo LIBOBJS = $(LIBOBJS) +# END diff --git a/security/nss/lib/freebl/mpi/Makefile.win b/security/nss/lib/freebl/mpi/Makefile.win new file mode 100644 index 000000000..cd41dfab8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/Makefile.win @@ -0,0 +1,254 @@ +# +# Makefile.win - gmake Makefile for building MPI with MSVC on NT + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## Define CC to be the C compiler you wish to use. The GNU cc +## compiler (gcc) should work, at the very least +#CC=cc +#CC=gcc +CC=cl.exe +ifeq ($(CPU_ARCH),x86_64) +AS=ml64.exe +else +AS=ml.exe +endif + +## +## Define PERL to point to your local Perl interpreter. It +## should be Perl 5.x, although it's conceivable that Perl 4 +## might work ... I haven't tested it. +## +#PERL=/usr/bin/perl +#PERL=perl + +## +## Define CFLAGS to contain any local options your compiler +## setup requires. +## +## Conditional compilation options are no longer here; see +## the file 'mpi-config.h' instead. +## +MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC + +ifeq ($(CPU_ARCH),x86_64) +AS_SRCS = mpi_x86_64.asm +CFLAGS = -O2 -Z7 -MD -W3 -nologo -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WIN64 -D_AMD64_ -D_M_AMD64 -D_WINDOWS -DWIN95 $(MPICMN) +ASFLAGS = -Cp -Sn -Zi -I. +else +#NT +AS_SRCS = mpi_x86.asm +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D +#CFLAGS= -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC \ + -DDEBUG -D_DEBUG -UNDEBUG -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +#CFLAGS = -O2 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +#CFLAGS = -Od -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +CFLAGS = -O2 -Z7 -MD -W3 -nologo -D_X86_ -DXP_PC -UDEBUG -U_DEBUG -DNDEBUG \ + -DWIN32 -D_WINDOWS -DWIN95 $(MPICMN) +ASFLAGS = -Cp -Sn -Zi -coff -I. +endif + +## +## Define LIBS to include any libraries you need to link against. +## If NO_TABLE is define, LIBS should include '-lm' or whatever is +## necessary to bring in the math library. Otherwise, it can be +## left alone, unless your system has other peculiar requirements. +## +LIBS=#-lmalloc#-lefence#-lm + +## +## Define RANLIB to be the library header randomizer; you might not +## need this on some systems (just set it to 'echo' on these systems, +## such as IRIX) +## +RANLIB=echo + +## +## This is the version string used for the documentation and +## building the distribution tarball. Don't mess with it unless +## you are releasing a new version +VERS=1.7p6 + +## ---------------------------------------------------------------------- +## You probably don't need to change anything below this line... +## + +## +## This is the list of source files that need to be packed into +## the distribution file +SRCS= mpi.c mpprime.c mplogic.c mpmontg.c mpi-test.c primes.c tests/ \ + utils/gcd.c utils/invmod.c utils/lap.c \ + utils/ptab.pl utils/sieve.c utils/isprime.c\ + utils/dec2hex.c utils/hex2dec.c utils/bbs_rand.c \ + utils/bbsrand.c utils/prng.c utils/primegen.c \ + utils/basecvt.c utils/makeprime.c\ + utils/fact.c utils/exptmod.c utils/pi.c utils/metime.c \ + utils/mpi.h utils/mpprime.h mulsqr.c \ + make-test-arrays test-arrays.txt all-tests make-logtab \ + types.pl stats timetest multest + +## These are the header files that go into the distribution file +HDRS=mpi.h mpi-config.h utils/mpi.h utils/mpi-config.h mpprime.h mplogic.h \ + utils/bbs_rand.h tests/mpi.h tests/mpprime.h + +## These are the documentation files that go into the distribution file +DOCS=README doc utils/README utils/PRIMES + +## This is the list of tools built by 'make tools' +TOOLS=gcd.exe invmod.exe isprime.exe lap.exe dec2hex.exe hex2dec.exe \ + primegen.exe prng.exe basecvt.exe fact.exe exptmod.exe pi.exe makeprime.exe + +AS_OBJS = $(AS_SRCS:.asm=.obj) +LIBOBJS = mpprime.obj mpmontg.obj mplogic.obj mpi.obj $(AS_OBJS) +LIBHDRS = mpi-config.h mpi-priv.h mpi.h +APPHDRS = mpi-config.h mpi.h mplogic.h mpprime.h + + +help: + @ echo "" + @ echo "The following targets can be built with this Makefile:" + @ echo "" + @ echo "mpi.lib - arithmetic and prime testing library" + @ echo "mpi-test - test driver (requires MP_IOFUNC)" + @ echo "tools - command line tools" + @ echo "doc - manual pages for tools" + @ echo "clean - clean up objects and such" + @ echo "distclean - get ready for distribution" + @ echo "dist - distribution tarball" + @ echo "" + +.SUFFIXES: .c .obj .i .lib .exe .asm + +.c.i: + $(CC) $(CFLAGS) -E $< > $@ + +.c.obj: + $(CC) $(CFLAGS) -c $< + +.asm.obj: + $(AS) $(ASFLAGS) -c $< + +.obj.exe: + $(CC) $(CFLAGS) -Fo$@ $< + +#--------------------------------------- + +$(LIBOBJS): $(LIBHDRS) + +logtab.h: make-logtab + $(PERL) make-logtab > logtab.h + +mpi.obj: mpi.c logtab.h $(LIBHDRS) + +mplogic.obj: mplogic.c mpi-priv.h mplogic.h $(LIBHDRS) + +mpmontg.obj: mpmontg.c mpi-priv.h mplogic.h mpprime.h $(LIBHDRS) + +mpprime.obj: mpprime.c mpi-priv.h mpprime.h mplogic.h primes.c $(LIBHDRS) + +mpi_mips.obj: mpi_mips.s + $(CC) -Fo$@ $(ASFLAGS) -c mpi_mips.s + +mpi.lib: $(LIBOBJS) + ar -cvr mpi.lib $(LIBOBJS) + $(RANLIB) mpi.lib + +lib libs: mpi.lib + +#--------------------------------------- + +MPTESTOBJS = mptest1.obj mptest2.obj mptest3.obj mptest3a.obj mptest4.obj \ + mptest4a.obj mptest4b.obj mptest6.obj mptest7.obj mptest8.obj mptest9.obj +MPTESTS = $(MPTESTOBJS:.obj=.exe) + +$(MPTESTOBJS): mptest%.obj: tests/mptest-%.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +$(MPTESTS): mptest%.exe: mptest%.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +tests: mptest1.exe mptest2.exe mptest3.exe mptest3a.exe mptest4.exe \ + mptest4a.exe mptest4b.exe mptest6.exe bbsrand.exe + +utests: mptest7.exe mptest8.exe mptest9.exe + +#--------------------------------------- + +EXTRAOBJS = bbsrand.obj bbs_rand.obj prng.obj +UTILOBJS = primegen.obj metime.obj identest.obj basecvt.obj fact.obj \ + exptmod.obj pi.obj makeprime.obj karatsuba.obj gcd.obj invmod.obj lap.obj \ + isprime.obj dec2hex.obj hex2dec.obj +UTILS = $(UTILOBJS:.obj=.exe) + +$(UTILS): %.exe : %.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +$(UTILOBJS) $(EXTRAOBJS): %.obj : utils/%.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +prng.exe: prng.obj bbs_rand.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +bbsrand.exe: bbsrand.obj bbs_rand.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +utils: $(UTILS) prng.exe bbsrand.exe + +#--------------------------------------- + +test-info.c: test-arrays.txt + $(PERL) make-test-arrays test-arrays.txt > test-info.c + +mpi-test.obj: mpi-test.c test-info.c $(LIBHDRS) + $(CC) $(CFLAGS) -Fo$@ -c $< + +mpi-test.exe: mpi-test.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +mdxptest.obj: mdxptest.c $(LIBHDRS) mpi-priv.h + +mdxptest.exe: mdxptest.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +mulsqr.obj: mulsqr.c logtab.h mpi.h mpi-config.h mpprime.h + $(CC) $(CFLAGS) -DMP_SQUARE=1 -Fo$@ -c mulsqr.c + +mulsqr.exe: mulsqr.obj mpi.lib $(LIBS) + $(CC) $(CFLAGS) -Fo$@ $^ + +#--------------------------------------- + +alltests: tests utests mpi-test.exe + +tools: $(TOOLS) + +doc: + (cd doc; ./build) + +clean: + rm -f *.obj *.lib *.pdb *.ilk + cd utils; rm -f *.obj *.lib *.pdb *.ilk + +distclean: clean + rm -f mptest? mpi-test metime mulsqr karatsuba + rm -f mptest?a mptest?b + rm -f utils/mptest? + rm -f test-info.c logtab.h + rm -f mpi.lib + rm -f $(TOOLS) + +dist: Makefile $(HDRS) $(SRCS) $(DOCS) + tar -cvf mpi-$(VERS).tar Makefile $(HDRS) $(SRCS) $(DOCS) + pgps -ab mpi-$(VERS).tar + chmod +r mpi-$(VERS).tar.asc + gzip -9 mpi-$(VERS).tar + + +print: + @echo LIBOBJS = $(LIBOBJS) +# END diff --git a/security/nss/lib/freebl/mpi/README b/security/nss/lib/freebl/mpi/README new file mode 100644 index 000000000..475549bad --- /dev/null +++ b/security/nss/lib/freebl/mpi/README @@ -0,0 +1,749 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +About the MPI Library +--------------------- + +The files 'mpi.h' and 'mpi.c' define a simple, arbitrary precision +signed integer arithmetic package. The implementation is not the most +efficient possible, but the code is small and should be fairly easily +portable to just about any machine that supports an ANSI C compiler, +as long as it is capable of at least 16-bit arithmetic (but also see +below for more on this). + +This library was written with an eye to cryptographic applications; +thus, some care is taken to make sure that temporary values are not +left lying around in memory when they are no longer in use. This adds +some overhead for zeroing buffers before they are released back into +the free pool; however, it gives you the assurance that there is only +one copy of your important values residing in your process's address +space at a time. Obviously, it is difficult to guarantee anything, in +a pre-emptive multitasking environment, but this at least helps you +keep a lid on the more obvious ways your data can get spread around in +memory. + + +Using the Library +----------------- + +To use the MPI library in your program, you must include the header: + +#include "mpi.h" + +This header provides all the type and function declarations you'll +need to use the library. Almost all the names defined by the library +begin with the prefix 'mp_', so it should be easy to keep them from +clashing with your program's namespace (he says, glibly, knowing full +well there are always pathological cases). + +There are a few things you may want to configure about the library. +By default, the MPI library uses an unsigned short for its digit type, +and an unsigned int for its word type. The word type must be big +enough to contain at least two digits, for the primitive arithmetic to +work out. On my machine, a short is 2 bytes and an int is 4 bytes -- +but if you have 64-bit ints, you might want to use a 4-byte digit and +an 8-byte word. I have tested the library using 1-byte digits and +2-byte words, as well. Whatever you choose to do, the things you need +to change are: + +(1) The type definitions for mp_digit and mp_word. + +(2) The macro DIGIT_FMT which tells mp_print() how to display a + single digit. This is just a printf() format string, so you + can adjust it appropriately. + +(3) The macros DIGIT_MAX and MP_WORD_MAX, which specify the + largest value expressible in an mp_digit and an mp_word, + respectively. + +Both the mp_digit and mp_word should be UNSIGNED integer types. The +code relies on having the full positive precision of the type used for +digits and words. + +The remaining type definitions should be left alone, for the most +part. The code in the library does not make any significant +assumptions about the sizes of things, but there is little if any +reason to change the other parameters, so I would recommend you leave +them as you found them. + +The library comes with a Perl script, 'types.pl', which will scan your +current Makefile settings, and attempt to find good definitions for +these types. It relies on a Unix sort of build environment, so it +probably won't work under MacOS or Windows, but it can be convenient +if you're porting to a new flavour of Unix. Just run 'types.pl' at +the command line, and it will spit out its results to the standard +output. + + +Conventions +----------- + +Most functions in the library return a value of type mp_err. This +permits the library to communicate success or various kinds of failure +to the calling program. The return values currently defined are: + + MP_OKAY - okay, operation succeeded, all's well + MP_YES - okay, the answer is yes (same as MP_OKAY) + MP_NO - okay, but answer is no (not MP_OKAY) + MP_MEM - operation ran out of memory + MP_RANGE - input parameter was out of range + MP_BADARG - an invalid input parameter was provided + MP_UNDEF - no output value is defined for this input + +The only function which currently uses MP_UNDEF is mp_invmod(). +Division by zero is undefined, but the division functions will return +MP_RANGE for a zero divisor. MP_BADARG usually means you passed a +bogus mp_int structure to the function. MP_YES and MP_NO are not used +by the library itself; they're defined so you can use them in your own +extensions. + +If you need a readable interpretation of these error codes in your +program, you may also use the mp_strerror() function. This function +takes an mp_err as input, and returns a pointer to a human-readable +string describing the meaning of the error. These strings are stored +as constants within the library, so the caller should not attempt to +modify or free the memory associated with these strings. + +The library represents values in signed-magnitude format. Values +strictly less than zero are negative, all others are considered +positive (zero is positive by fiat). You can access the 'sign' member +of the mp_int structure directly, but better is to use the mp_cmp_z() +function, to find out which side of zero the value lies on. + +Most arithmetic functions have a single-digit variant, as well as the +full arbitrary-precision. An mp_digit is an unsigned value between 0 +and DIGIT_MAX inclusive. The radix is available as RADIX. The number +of bits in a given digit is given as DIGIT_BIT. + +Generally, input parameters are given before output parameters. +Unless otherwise specified, any input parameter can be re-used as an +output parameter, without confusing anything. + +The basic numeric type defined by the library is an mp_int. Virtually +all the functions in the library take a pointer to an mp_int as one of +their parameters. An explanation of how to create and use these +structures follows. And so, without further ado... + + +Initialization and Cleanup +-------------------------- + +The basic numeric type defined by the library is an 'mp_int'. +However, it is not sufficient to simply declare a variable of type +mp_int in your program. These variables also need to be initialized +before they can be used, to allocate the internal storage they require +for computation. + +This is done using one of the following functions: + + mp_init(mp_int *mp); + mp_init_copy(mp_int *mp, mp_int *from); + mp_init_size(mp_int *mp, mp_size p); + +Each of these requires a pointer to a structure of type mp_int. The +basic mp_init() simply initializes the mp_int to a default size, and +sets its value to zero. If you would like to initialize a copy of an +existing mp_int, use mp_init_copy(), where the 'from' parameter is the +mp_int you'd like to make a copy of. The third function, +mp_init_size(), permits you to specify how many digits of precision +should be preallocated for your mp_int. This can help the library +avoid unnecessary re-allocations later on. + +The default precision used by mp_init() can be retrieved using: + + precision = mp_get_prec(); + +This returns the number of digits that will be allocated. You can +change this value by using: + + mp_set_prec(unsigned int prec); + +Any positive value is acceptable -- if you pass zero, the default +precision will be re-set to the compiled-in library default (this is +specified in the header file 'mpi-config.h', and typically defaults to +8 or 16). + +Just as you must allocate an mp_int before you can use it, you must +clean up the structure when you are done with it. This is performed +using the mp_clear() function. Remember that any mp_int that you +create as a local variable in a function must be mp_clear()'d before +that function exits, or else the memory allocated to that mp_int will +be orphaned and unrecoverable. + +To set an mp_int to a given value, the following functions are given: + + mp_set(mp_int *mp, mp_digit d); + mp_set_int(mp_int *mp, long z); + +The mp_set() function sets the mp_int to a single digit value, while +mp_set_int() sets the mp_int to a signed long integer value. + +To set an mp_int to zero, use: + + mp_zero(mp_int *mp); + + +Copying and Moving +------------------ + +If you have two initialized mp_int's, and you want to copy the value +of one into the other, use: + + mp_copy(from, to) + +This takes care of clearing the old value of 'to', and copies the new +value into it. If 'to' is not yet initialized, use mp_init_copy() +instead (see above). + +Note: The library tries, whenever possible, to avoid allocating +---- new memory. Thus, mp_copy() tries first to satisfy the needs + of the copy by re-using the memory already allocated to 'to'. + Only if this proves insufficient will mp_copy() actually + allocate new memory. + + For this reason, if you know a priori that 'to' has enough + available space to hold 'from', you don't need to check the + return value of mp_copy() for memory failure. The USED() + macro tells you how many digits are used by an mp_int, and + the ALLOC() macro tells you how many are allocated. + +If you have two initialized mp_int's, and you want to exchange their +values, use: + + mp_exch(a, b) + +This is better than using mp_copy() with a temporary, since it will +not (ever) touch the memory allocator -- it just swaps the exact +contents of the two structures. The mp_exch() function cannot fail; +if you pass it an invalid structure, it just ignores it, and does +nothing. + + +Basic Arithmetic +---------------- + +Once you have initialized your integers, you can operate on them. The +basic arithmetic functions on full mp_int values are: + +mp_add(a, b, c) - computes c = a + b +mp_sub(a, b, c) - computes c = a - b +mp_mul(a, b, c) - computes c = a * b +mp_sqr(a, b) - computes b = a * a +mp_div(a, b, q, r) - computes q, r such that a = bq + r +mp_div_2d(a, d, q, r) - computes q = a / 2^d, r = a % 2^d +mp_expt(a, b, c) - computes c = a ** b +mp_2expt(a, k) - computes a = 2^k + +The mp_div_2d() function efficiently computes division by powers of +two. Either the q or r parameter may be NULL, in which case that +portion of the computation will be discarded. + +The algorithms used for some of the computations here are described in +the following files which are included with this distribution: + +mul.txt Describes the multiplication algorithm +div.txt Describes the division algorithm +expt.txt Describes the exponentiation algorithm +sqrt.txt Describes the square-root algorithm +square.txt Describes the squaring algorithm + +There are single-digit versions of most of these routines, as well. +In the following prototypes, 'd' is a single mp_digit: + +mp_add_d(a, d, c) - computes c = a + d +mp_sub_d(a, d, c) - computes c = a - d +mp_mul_d(a, d, c) - computes c = a * d +mp_mul_2(a, c) - computes c = a * 2 +mp_div_d(a, d, q, r) - computes q, r such that a = bq + r +mp_div_2(a, c) - computes c = a / 2 +mp_expt_d(a, d, c) - computes c = a ** d + +The mp_mul_2() and mp_div_2() functions take advantage of the internal +representation of an mp_int to do multiplication by two more quickly +than mp_mul_d() would. Other basic functions of an arithmetic variety +include: + +mp_zero(a) - assign 0 to a +mp_neg(a, c) - negate a: c = -a +mp_abs(a, c) - absolute value: c = |a| + + +Comparisons +----------- + +Several comparison functions are provided. Each of these, unless +otherwise specified, returns zero if the comparands are equal, < 0 if +the first is less than the second, and > 0 if the first is greater +than the second: + +mp_cmp_z(a) - compare a <=> 0 +mp_cmp_d(a, d) - compare a <=> d, d is a single digit +mp_cmp(a, b) - compare a <=> b +mp_cmp_mag(a, b) - compare |a| <=> |b| +mp_isodd(a) - return nonzero if odd, zero otherwise +mp_iseven(a) - return nonzero if even, zero otherwise + + +Modular Arithmetic +------------------ + +Modular variations of the basic arithmetic functions are also +supported. These are available if the MP_MODARITH parameter in +mpi-config.h is turned on (it is by default). The modular arithmetic +functions are: + +mp_mod(a, m, c) - compute c = a (mod m), 0 <= c < m +mp_mod_d(a, d, c) - compute c = a (mod d), 0 <= c < d (see below) +mp_addmod(a, b, m, c) - compute c = (a + b) mod m +mp_submod(a, b, m, c) - compute c = (a - b) mod m +mp_mulmod(a, b, m, c) - compute c = (a * b) mod m +mp_sqrmod(a, m, c) - compute c = (a * a) mod m +mp_exptmod(a, b, m, c) - compute c = (a ** b) mod m +mp_exptmod_d(a, d, m, c)- compute c = (a ** d) mod m + +The mp_sqr() function squares its input argument. A call to mp_sqr(a, +c) is identical in meaning to mp_mul(a, a, c); however, if the +MP_SQUARE variable is set true in mpi-config.h (see below), then it +will be implemented with a different algorithm, that is supposed to +take advantage of the redundant computation that takes place during +squaring. Unfortunately, some compilers result in worse performance +on this code, so you can change the behaviour at will. There is a +utility program "mulsqr.c" that lets you test which does better on +your system. + +The mp_sqrmod() function is analogous to the mp_sqr() function; it +uses the mp_sqr() function rather than mp_mul(), and then performs the +modular reduction. This probably won't help much unless you are doing +a lot of them. + +See the file 'square.txt' for a synopsis of the algorithm used. + +Note: The mp_mod_d() function computes a modular reduction around +---- a single digit d. The result is a single digit c. + +Because an inverse is defined for a (mod m) if and only if (a, m) = 1 +(that is, if a and m are relatively prime), mp_invmod() may not be +able to compute an inverse for the arguments. In this case, it +returns the value MP_UNDEF, and does not modify c. If an inverse is +defined, however, it returns MP_OKAY, and sets c to the value of the +inverse (mod m). + +See the file 'redux.txt' for a description of the modular reduction +algorithm used by mp_exptmod(). + + +Greatest Common Divisor +----------------------- + +If The greates common divisor of two values can be found using one of the +following functions: + +mp_gcd(a, b, c) - compute c = (a, b) using binary algorithm +mp_lcm(a, b, c) - compute c = [a, b] = ab / (a, b) +mp_xgcd(a, b, g, x, y) - compute g, x, y so that ax + by = g = (a, b) + +Also provided is a function to compute modular inverses, if they +exist: + +mp_invmod(a, m, c) - compute c = a^-1 (mod m), if it exists + +The function mp_xgcd() computes the greatest common divisor, and also +returns values of x and y satisfying Bezout's identity. This is used +by mp_invmod() to find modular inverses. However, if you do not need +these values, you will find that mp_gcd() is MUCH more efficient, +since it doesn't need all the intermediate values that mp_xgcd() +requires in order to compute x and y. + +The mp_gcd() (and mp_xgcd()) functions use the binary (extended) GCD +algorithm due to Josef Stein. + + +Input & Output Functions +------------------------ + +The following basic I/O routines are provided. These are present at +all times: + +mp_read_radix(mp, str, r) - convert a string in radix r to an mp_int +mp_read_raw(mp, s, len) - convert a string of bytes to an mp_int +mp_radix_size(mp, r) - return length of buffer needed by mp_toradix() +mp_raw_size(mp) - return length of buffer needed by mp_toraw() +mp_toradix(mp, str, r) - convert an mp_int to a string of radix r + digits +mp_toraw(mp, str) - convert an mp_int to a string of bytes +mp_tovalue(ch, r) - convert ch to its value when taken as + a radix r digit, or -1 if invalid +mp_strerror(err) - get a string describing mp_err value 'err' + +If you compile the MPI library with MP_IOFUNC defined, you will also +have access to the following additional I/O function: + +mp_print(mp, ofp) - print an mp_int as text to output stream ofp + +Note that mp_radix_size() returns a size in bytes guaranteed to be AT +LEAST big enough for the digits output by mp_toradix(). Because it +uses an approximation technique to figure out how many digits will be +needed, it may return a figure which is larger than necessary. Thus, +the caller should not rely on the value to determine how many bytes +will actually be written by mp_toradix(). The string mp_toradix() +creates will be NUL terminated, so the standard C library function +strlen() should be able to ascertain this for you, if you need it. + +The mp_read_radix() and mp_toradix() functions support bases from 2 to +64 inclusive. If you require more general radix conversion facilities +than this, you will need to write them yourself (that's why mp_div_d() +is provided, after all). + +Note: mp_read_radix() will accept as digits either capital or +---- lower-case letters. However, the current implementation of + mp_toradix() only outputs upper-case letters, when writing + bases betwee 10 and 36. The underlying code supports using + lower-case letters, but the interface stub does not have a + selector for it. You can add one yourself if you think it + is worthwhile -- I do not. Bases from 36 to 64 use lower- + case letters as distinct from upper-case. Bases 63 and + 64 use the characters '+' and '/' as digits. + + Note also that compiling with MP_IOFUNC defined will cause + inclusion of , so if you are trying to write code + which does not depend on the standard C library, you will + probably want to avoid this option. This is needed because + the mp_print() function takes a standard library FILE * as + one of its parameters, and uses the fprintf() function. + +The mp_toraw() function converts the integer to a sequence of bytes, +in big-endian ordering (most-significant byte first). Assuming your +bytes are 8 bits wide, this corresponds to base 256. The sign is +encoded as a single leading byte, whose value is 0 for zero or +positive values, or 1 for negative values. The mp_read_raw() function +reverses this process -- it takes a buffer of bytes, interprets the +first as a sign indicator (0 = zero/positive, nonzero = negative), and +the rest as a sequence of 1-byte digits in big-endian ordering. + +The mp_raw_size() function returns the exact number of bytes required +to store the given integer in "raw" format (as described in the +previous paragraph). Zero is returned in case of error; a valid +integer will require at least three bytes of storage. + +In previous versions of the MPI library, an "external representation +format" was supported. This was removed, however, because I found I +was never using it, it was not as portable as I would have liked, and +I decided it was a waste of space. + + +Other Functions +--------------- + +The files 'mpprime.h' and 'mpprime.c' define some routines which are +useful for divisibility testing and probabilistic primality testing. +The routines defined are: + +mpp_divis(a, b) - is a divisible by b? +mpp_divis_d(a, d) - is a divisible by digit d? +mpp_random(a) - set a to random value at current precision +mpp_random_size(a, prec) - set a to random value at given precision + +Note: The mpp_random() and mpp_random_size() functions use the C +---- library's rand() function to generate random values. It is + up to the caller to seed this generator before it is called. + These functions are not suitable for generating quantities + requiring cryptographic-quality randomness; they are intended + primarily for use in primality testing. + + Note too that the MPI library does not call srand(), so your + application should do this, if you ever want the sequence + to change. + +mpp_divis_vector(a, v, s, w) - is a divisible by any of the s digits + in v? If so, let w be the index of + that digit + +mpp_divis_primes(a, np) - is a divisible by any of the first np + primes? If so, set np to the prime + which divided a. + +mpp_fermat(a, d) - test if w^a = w (mod a). If so, + returns MP_YES, otherwise MP_NO. + +mpp_pprime(a, nt) - perform nt iterations of the Rabin- + Miller probabilistic primality test + on a. Returns MP_YES if all tests + passed, or MP_NO if any test fails. + +The mpp_fermat() function works based on Fermat's little theorem, a +consequence of which is that if p is a prime, and (w, p) = 1, then: + + w^p = w (mod p) + +Put another way, if w^p != w (mod p), then p is not prime. The test +is expensive to compute, but it helps to quickly eliminate an enormous +class of composite numbers prior to Rabin-Miller testing. + +Building the Library +-------------------- + +The MPI library is designed to be as self-contained as possible. You +should be able to compile it with your favourite ANSI C compiler, and +link it into your program directly. If you are on a Unix system using +the GNU C compiler (gcc), the following should work: + +% gcc -ansi -pedantic -Wall -O2 -c mpi.c + +The file 'mpi-config.h' defines several configurable parameters for +the library, which you can adjust to suit your application. At the +time of this writing, the available options are: + +MP_IOFUNC - Define true to include the mp_print() function, + which is moderately useful for debugging. This + implicitly includes . + +MP_MODARITH - Define true to include the modular arithmetic + functions. If you don't need modular arithmetic + in your application, you can set this to zero to + leave out all the modular routines. + +MP_NUMTH - Define true to include number theoretic functions + such as mp_gcd(), mp_lcm(), and mp_invmod(). + +MP_LOGTAB - If true, the file "logtab.h" is included, which + is basically a static table of base 2 logarithms. + These are used to compute how big the buffers for + radix conversion need to be. If you set this false, + the library includes and uses log(). This + typically forces you to link against math libraries. + +MP_MEMSET - If true, use memset() to zero buffers. If you run + into weird alignment related bugs, set this to zero + and an explicit loop will be used. + +MP_MEMCPY - If true, use memcpy() to copy buffers. If you run + into weird alignment bugs, set this to zero and an + explicit loop will be used. + +MP_ARGCHK - Set to 0, 1, or 2. This defines how the argument + checking macro, ARGCHK(), gets expanded. If this + is set to zero, ARGCHK() expands to nothing; no + argument checks are performed. If this is 1, the + ARGCHK() macro expands to code that returns MP_BADARG + or similar at runtime. If it is 2, ARGCHK() expands + to an assert() call that aborts the program on a + bad input. + +MP_DEBUG - Turns on debugging output. This is probably not at + all useful unless you are debugging the library. It + tends to spit out a LOT of output. + +MP_DEFPREC - The default precision of a newly-created mp_int, in + digits. The precision can be changed at runtime by + the mp_set_prec() function, but this is its initial + value. + +MP_SQUARE - If this is set to a nonzero value, the mp_sqr() + function will use an alternate algorithm that takes + advantage of the redundant inner product computation + when both multiplicands are identical. Unfortunately, + with some compilers this is actually SLOWER than just + calling mp_mul() with the same argument twice. So + if you set MP_SQUARE to zero, mp_sqr() will be expan- + ded into a call to mp_mul(). This applies to all + the uses of mp_sqr(), including mp_sqrmod() and the + internal calls to s_mp_sqr() inside mpi.c + + The program 'mulsqr' (mulsqr.c) can be used to test + which works best for your configuration. Set up the + CC and CFLAGS variables in the Makefile, then type: + + make mulsqr + + Invoke it with arguments similar to the following: + + mulsqr 25000 1024 + + That is, 25000 products computed on 1024-bit values. + The output will compare the two timings, and recommend + a setting for MP_SQUARE. It is off by default. + +If you would like to use the mp_print() function (see above), be sure +to define MP_IOFUNC in mpi-config.h. Many of the test drivers in the +'tests' subdirectory expect this to be defined (although the test +driver 'mpi-test' doesn't need it) + +The Makefile which comes with the library should take care of building +the library for you, if you have set the CC and CFLAGS variables at +the top of the file appropriately. By default, they are set up to +use the GNU C compiler: + +CC=gcc +CFLAGS=-ansi -pedantic -Wall -O2 + +If all goes well, the library should compile without warnings using +this combination. You should, of course, make whatever adjustments +you find necessary. + +The MPI library distribution comes with several additional programs +which are intended to demonstrate the use of the library, and provide +a framework for testing it. There are a handful of test driver +programs, in the files named 'mptest-X.c', where X is a digit. Also, +there are some simple command-line utilities (in the 'utils' +directory) for manipulating large numbers. These include: + +basecvt.c A radix-conversion program, supporting bases from + 2 to 64 inclusive. + +bbsrand.c A BBS (quadratic residue) pseudo-random number + generator. The file 'bbsrand.c' is just the driver + for the program; the real code lives in the files + 'bbs_rand.h' and 'bbs_rand.c' + +dec2hex.c Converts decimal to hexadecimal + +gcd.c Computes the greatest common divisor of two values. + If invoked as 'xgcd', also computes constants x and + y such that (a, b) = ax + by, in accordance with + Bezout's identity. + +hex2dec.c Converts hexadecimal to decimal + +invmod.c Computes modular inverses + +isprime.c Performs the Rabin-Miller probabilistic primality + test on a number. Values which fail this test are + definitely composite, and those which pass are very + likely to be prime (although there are no guarantees) + +lap.c Computes the order (least annihilating power) of + a value v modulo m. Very dumb algorithm. + +primegen.c Generates large (probable) primes. + +prng.c A pseudo-random number generator based on the + BBS generator code in 'bbs_rand.c' + +sieve.c Implements the Sieve of Eratosthenes, using a big + bitmap, to generate a list of prime numbers. + +fact.c Computes the factorial of an arbitrary precision + integer (iterative). + +exptmod.c Computes arbitrary precision modular exponentiation + from the command line (exptmod a b m -> a^b (mod m)) + +Most of these can be built from the Makefile that comes with the +library. Try 'make tools', if your environment supports it. + + +Testing the Library +------------------- + +Automatic test vectors are included, in the form of a program called +'mpi-test'. To build this program and run all the tests, simply +invoke the shell script 'all-tests'. If all the tests pass, you +should see a message: + + All tests passed + +If something went wrong, you'll get: + + One or more tests failed. + +If this happens, scan back through the preceding lines, to see which +test failed. Any failure indicates a bug in the library, which needs +to be fixed before it will give accurate results. If you get any such +thing, please let me know, and I'll try to fix it. Please let me know +what platform and compiler you were using, as well as which test +failed. If a reason for failure was given, please send me that text +as well. + +If you're on a system where the standard Unix build tools don't work, +you can build the 'mpi-test' program manually, and run it by hand. +This is tedious and obnoxious, sorry. + +Further manual testing can be performed by building the manual testing +programs, whose source is found in the 'tests' subdirectory. Each +test is in a source file called 'mptest-X.c'. The Makefile contains a +target to build all of them at once: + + make tests + +Read the comments at the top of each source file to see what the +driver is supposed to test. You probably don't need to do this; these +programs were only written to help me as I was developing the library. + +The relevant files are: + +mpi-test.c The source for the test driver + +make-test-arrays A Perl script to generate some of the internal + data structures used by mpi-test.c + +test-arrays.txt The source file for make-test-arrays + +all-tests A Bourne shell script which runs all the + tests in the mpi-test suite + +Running 'make mpi-test' should build the mpi-test program. If you +cannot use make, here is what needs to be done: + +(1) Use 'make-test-arrays' to generate the file 'test-info.c' from + the 'test-arrays.txt' file. Since Perl can be found everywhere, + this should be no trouble. Under Unix, this looks like: + + make-test-arrays test-arrays.txt > test-info.c + +(2) Build the MPI library: + + gcc -ansi -pedantic -Wall -c mpi.c + +(3) Build the mpi-test program: + + gcc -ansi -pedantic -Wall -o mpi-test mpi.o mpi-test.c + +When you've got mpi-test, you can use 'all-tests' to run all the tests +made available by mpi-test. If any of them fail, there should be a +diagnostic indicating what went wrong. These are fairly high-level +diagnostics, and won't really help you debug the problem; they're +simply intended to help you isolate which function caused the problem. +If you encounter a problem of this sort, feel free to e-mail me, and I +will certainly attempt to help you debug it. + +Note: Several of the tests hard-wired into 'mpi-test' operate under +---- the assumption that you are using at least a 16-bit mp_digit + type. If that is not true, several tests might fail, because + of range problems with the maximum digit value. + + If you are using an 8-bit digit, you will also need to + modify the code for mp_read_raw(), which assumes that + multiplication by 256 can be done with mp_mul_d(), a + fact that fails when DIGIT_MAX is 255. You can replace + the call with s_mp_lshd(), which will give you the same + effect, and without doing as much work. :) + +Acknowledgements: +---------------- + +The algorithms used in this library were drawn primarily from Volume +2 of Donald Knuth's magnum opus, _The Art of Computer Programming_, +"Semi-Numerical Methods". Barrett's algorithm for modular reduction +came from Menezes, Oorschot, and Vanstone's _Handbook of Applied +Cryptography_, Chapter 14. + +Thanks are due to Tom St. Denis, for finding an obnoxious sign-related +bug in mp_read_raw() that made things break on platforms which use +signed chars. + +About the Author +---------------- + +This software was written by Michael J. Fromberger. You can contact +the author as follows: + +E-mail: + +Postal: 8000 Cummings Hall, Thayer School of Engineering + Dartmouth College, Hanover, New Hampshire, USA + +PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html + 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907 + +Last updated: 16-Jan-2000 diff --git a/security/nss/lib/freebl/mpi/all-tests b/security/nss/lib/freebl/mpi/all-tests new file mode 100755 index 000000000..3429a15c0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/all-tests @@ -0,0 +1,83 @@ +#!/bin/sh +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ECHO=/bin/echo +MAKE=gmake + +$ECHO "\n** Running unit tests for MPI library\n" + +# Build the mpi-test program, which comprises all the unit tests for +# the MPI library... + +$ECHO "Bringing mpi-test up to date ... " +if $MAKE mpi-test ; then + : +else + $ECHO " " + $ECHO "Make failed to build mpi-test." + $ECHO " " + exit 1 +fi + +if [ ! -x mpi-test ] ; then + $ECHO " " + $ECHO "Cannot find 'mpi-test' program, testing cannot continue." + $ECHO " " + exit 1 +fi + +# Get the list of available test suites... +tests=`./mpi-test list | awk '{print $1}'` +errs=0 + +# Run each test suite and check the result code of mpi-test +for test in $tests ; do + $ECHO "$test ... \c" + if ./mpi-test $test ; then + $ECHO "passed" + else + $ECHO "FAILED" + errs=1 + fi +done + +# If any tests failed, we'll stop at this point +if [ "$errs" = "0" ] ; then + $ECHO "All unit tests passed" +else + $ECHO "One or more tests failed" + exit 1 +fi + +# Now try to build the 'pi' program, and see if it can compute the +# first thousand digits of pi correctly +$ECHO "\n** Running other tests\n" + +$ECHO "Bringing 'pi' up to date ... " +if $MAKE pi ; then + : +else + $ECHO "\nMake failed to build pi.\n" + exit 1 +fi + +if [ ! -x pi ] ; then + $ECHO "\nCannot find 'pi' program; testing cannot continue.\n" + exit 1 +fi + +./pi 2000 > /tmp/pi.tmp.$$ +if cmp tests/pi2k.txt /tmp/pi.tmp.$$ ; then + $ECHO "Okay! The pi test passes." +else + $ECHO "Oops! The pi test failed. :(" + exit 1 +fi + +rm -f /tmp/pi.tmp.$$ + +exit 0 + +# Here there be dragons diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE b/security/nss/lib/freebl/mpi/doc/LICENSE new file mode 100644 index 000000000..35cca68ce --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE @@ -0,0 +1,11 @@ +Within this directory, each of the file listed below is licensed under +the terms given in the file LICENSE-MPL, also in this directory. + +basecvt.pod +gcd.pod +invmod.pod +isprime.pod +lap.pod +mpi-test.pod +prime.txt +prng.pod diff --git a/security/nss/lib/freebl/mpi/doc/LICENSE-MPL b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL new file mode 100644 index 000000000..41dc2327f --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/LICENSE-MPL @@ -0,0 +1,3 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/basecvt.pod b/security/nss/lib/freebl/mpi/doc/basecvt.pod new file mode 100644 index 000000000..c3d87fbc7 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/basecvt.pod @@ -0,0 +1,65 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + basecvt - radix conversion for arbitrary precision integers + +=head1 SYNOPSIS + + basecvt [values] + +=head1 DESCRIPTION + +The B program is a command-line tool for converting integers +of arbitrary precision from one radix to another. The current version +supports radix values from 2 (binary) to 64, inclusive. The first two +command line arguments specify the input and output radix, in base 10. +Any further arguments are taken to be integers notated in the input +radix, and these are converted to the output radix. The output is +written, one integer per line, to standard output. + +When reading integers, only digits considered "valid" for the input +radix are considered. Processing of an integer terminates when an +invalid input digit is encountered. So, for example, if you set the +input radix to 10 and enter '10ACF', B would assume that you +had entered '10' and ignore the rest of the string. + +If no values are provided, no output is written, but the program +simply terminates with a zero exit status. Error diagnostics are +written to standard error in the event of out-of-range radix +specifications. Regardless of the actual values of the input and +output radix, the radix arguments are taken to be in base 10 (decimal) +notation. + +=head1 DIGITS + +For radices from 2-10, standard ASCII decimal digits 0-9 are used for +both input and output. For radices from 11-36, the ASCII letters A-Z +are also included, following the convention used in hexadecimal. In +this range, input is accepted in either upper or lower case, although +on output only lower-case letters are used. + +For radices from 37-62, the output includes both upper- and lower-case +ASCII letters, and case matters. In this range, case is distinguished +both for input and for output values. + +For radices 63 and 64, the characters '+' (plus) and '/' (forward +solidus) are also used. These are derived from the MIME base64 +encoding scheme. The overall encoding is not the same as base64, +because the ASCII digits are used for the bottom of the range, and the +letters are shifted upward; however, the output will consist of the +same character set. + +This input and output behaviour is inherited from the MPI library used +by B, and so is not configurable at runtime. + +=head1 SEE ALSO + + dec2hex(1), hex2dec(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/build b/security/nss/lib/freebl/mpi/doc/build new file mode 100755 index 000000000..4d75b1e5a --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/build @@ -0,0 +1,30 @@ +#!/bin/sh +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +VERS="1.7p6" +SECT="1" +NAME="MPI Tools" + +echo "Building manual pages ..." +case $# in + 0) + files=`ls *.pod` + ;; + *) + files=$* + ;; +esac + +for name in $files +do + echo -n "$name ... " +# sname=`noext $name` + sname=`basename $name .pod` + pod2man --section="$SECT" --center="$NAME" --release="$VERS" $name > $sname.$SECT + echo "(done)" +done + +echo "Finished building." + diff --git a/security/nss/lib/freebl/mpi/doc/div.txt b/security/nss/lib/freebl/mpi/doc/div.txt new file mode 100644 index 000000000..c13fb6ef1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/div.txt @@ -0,0 +1,64 @@ +Division + +This describes the division algorithm used by the MPI library. + +Input: a, b; a > b +Compute: Q, R; a = Qb + R + +The input numbers are normalized so that the high-order digit of b is +at least half the radix. This guarantees that we have a reasonable +way to guess at the digits of the quotient (this method was taken from +Knuth, vol. 2, with adaptations). + +To normalize, test the high-order digit of b. If it is less than half +the radix, multiply both a and b by d, where: + + radix - 1 + d = ----------- + bmax + 1 + +...where bmax is the high-order digit of b. Otherwise, set d = 1. + +Given normalize values for a and b, let the notation a[n] denote the +nth digit of a. Let #a be the number of significant figures of a (not +including any leading zeroes). + + Let R = 0 + Let p = #a - 1 + + while(p >= 0) + do + R = (R * radix) + a[p] + p = p - 1 + while(R < b and p >= 0) + + if(R < b) + break + + q = (R[#R - 1] * radix) + R[#R - 2] + q = q / b[#b - 1] + + T = b * q + + while(T > L) + q = q - 1 + T = T - b + endwhile + + L = L - T + + Q = (Q * radix) + q + + endwhile + +At this point, Q is the quotient, and R is the normalized remainder. +To denormalize R, compute: + + R = (R / d) + +At this point, you are finished. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/expt.txt b/security/nss/lib/freebl/mpi/doc/expt.txt new file mode 100644 index 000000000..bd9d6f196 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/expt.txt @@ -0,0 +1,94 @@ +Exponentiation + +For exponentiation, the MPI library uses a simple and fairly standard +square-and-multiply method. The algorithm is this: + +Input: a, b +Output: a ** b + + s = 1 + + while(b != 0) + if(b is odd) + s = s * a + endif + + b = b / 2 + + x = x * x + endwhile + + return s + +The modular exponentiation is done the same way, except replacing: + + s = s * a + +with + s = (s * a) mod m + +and replacing + + x = x * x + +with + + x = (x * x) mod m + +Here is a sample exponentiation using the MPI library, as compared to +the same problem solved by the Unix 'bc' program on my system: + +Computation of 2,381,283 ** 235 + +'bc' says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +MPI says: + +4385CA4A804D199FBEAD95FAD0796FAD0D0B51FC9C16743C45568C789666985DB719\ +4D90E393522F74C9601262C0514145A49F3B53D00983F95FDFCEA3D0043ECEF6227E\ +6FB59C924C3EE74447B359B5BF12A555D46CB819809EF423F004B55C587D6F0E8A55\ +4988036A42ACEF9F71459F97CEF6E574BD7373657111648626B1FF8EE15F663B2C0E\ +6BBE5082D4CDE8E14F263635AE8F35DB2C280819517BE388B5573B84C5A19C871685\ +FD408A6471F9D6AFAF5129A7548EAE926B40874B340285F44765BF5468CE20A13267\ +CD88CE6BC786ACED36EC7EA50F67FF27622575319068A332C3C0CB23E26FB55E26F4\ +5F732753A52B8E2FB4D4F42D894242613CA912A25486C3DEC9C66E5DB6182F6C1761\ +CF8CD0D255BE64B93836B27D452AE38F950EB98B517D4CF50D48F0165EF0CCCE1F5C\ +49BF18219FDBA0EEDD1A7E8B187B70C2BAED5EC5C6821EF27FAFB1CFF70111C52235\ +5E948B93A015AA1AE152B110BB5658CB14D3E45A48BFE7F082C1182672A455A695CD\ +A1855E8781E625F25B41B516E77F589FA420C3B058861EA138CF7A2C58DB3C7504FD\ +D29554D78237834CC5AE710D403CC4F6973D5012B7E117A8976B14A0B5AFA889BD47\ +92C461F0F96116F00A97AE9E83DC5203680CAF9A18A062566C145650AB86BE4F907F\ +A9F7AB4A700B29E1E5BACCD6DCBFA513E10832815F710807EED2E279081FEC61D619\ +AB270BEB3D3A1787B35A9DD41A8766CF21F3B5C693B3BAB1C2FA14A4ED202BC35743\ +E5CBE2391624D4F8C9BFBBC78D69764E7C6C5B11BF005677BFAD17D9278FFC1F158F\ +1B3683FF7960FA0608103792C4163DC0AF3E06287BB8624F8FE3A0FFBDF82ACECA2F\ +CFFF2E1AC93F3CA264A1B + +Diff says: +% diff bc.txt mp.txt +% + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/gcd.pod b/security/nss/lib/freebl/mpi/doc/gcd.pod new file mode 100644 index 000000000..b5b8fa34f --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/gcd.pod @@ -0,0 +1,28 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + gcd - compute greatest common divisor of two integers + +=head1 SYNOPSIS + + gcd + +=head1 DESCRIPTION + +The B program computes the greatest common divisor of two +arbitrary-precision integers I and I. The result is written in +standard decimal notation to the standard output. + +If I is zero, B will print an error message and exit. + +=head1 SEE ALSO + +invmod(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/invmod.pod b/security/nss/lib/freebl/mpi/doc/invmod.pod new file mode 100644 index 000000000..0194f4488 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/invmod.pod @@ -0,0 +1,34 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + invmod - compute modular inverse of an integer + +=head1 SYNOPSIS + + invmod + +=head1 DESCRIPTION + +The B program computes the inverse of I, modulo I, if +that inverse exists. Both I and I are arbitrary-precision +integers in decimal notation. The result is written in standard +decimal notation to the standard output. + +If there is no inverse, the message: + + No inverse + +...will be printed to the standard output (an inverse exists if and +only if the greatest common divisor of I and I is 1). + +=head1 SEE ALSO + +gcd(1), isprime(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/isprime.pod b/security/nss/lib/freebl/mpi/doc/isprime.pod new file mode 100644 index 000000000..a8ec1f7ee --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/isprime.pod @@ -0,0 +1,63 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + isprime - probabilistic primality testing + +=head1 SYNOPSIS + + isprime + +=head1 DESCRIPTION + +The B program attempts to determine whether the arbitrary +precision integer I is prime. It first tests I for divisibility +by the first 170 or so small primes, and assuming I is not +divisible by any of these, applies 15 iterations of the Rabin-Miller +probabilistic primality test. + +If the program discovers that the number is composite, it will print: + + Not prime (reason) + +Where I is either: + + divisible by small prime x + +Or: + + failed nth pseudoprime test + +In the first case, I indicates the first small prime factor that +was found. In the second case, I indicates which of the +pseudoprime tests failed (numbered from 1) + +If this happens, the number is definitely not prime. However, if the +number succeeds, this message results: + + Probably prime, 1 in 4^15 chance of false positive + +If this happens, the number is prime with very high probability, but +its primality has not been absolutely proven, only demonstrated to a +very convincing degree. + +The value I can be input in standard decimal notation, or, if it is +prefixed with I, it will be read as hexadecimal. + +=head1 ENVIRONMENT + +You can control how many iterations of Rabin-Miller are performed on +the candidate number by setting the I environment variable +to an integer value before starting up B. This will change +the output slightly if the number passes all the tests. + +=head1 SEE ALSO + +gcd(1), invmod(1), lap(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/lap.pod b/security/nss/lib/freebl/mpi/doc/lap.pod new file mode 100644 index 000000000..47539fbbf --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/lap.pod @@ -0,0 +1,36 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + lap - compute least annihilating power of a number + +=head1 SYNOPSIS + + lap + +=head1 DESCRIPTION + +The B program computes the order of I modulo I, for +arbitrary precision integers I and I. The B of I +modulo I is defined as the smallest positive value I for which +I raised to the Ith power, modulo I, is equal to 1. The +order may not exist, if I is composite. + +=head1 RESTRICTIONS + +This program is very slow, especially for large moduli. It is +intended as a way to help find primitive elements in a modular field, +but it does not do so in a particularly inefficient manner. It was +written simply to help verify that a particular candidate does not +have an obviously short cycle mod I. + +=head1 SEE ALSO + +gcd(1), invmod(1), isprime(1) + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mpi-test.pod b/security/nss/lib/freebl/mpi/doc/mpi-test.pod new file mode 100644 index 000000000..b05f866e5 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mpi-test.pod @@ -0,0 +1,51 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +=head1 NAME + + mpi-test - automated test program for MPI library + +=head1 SYNOPSIS + + mpi-test [quiet] + mpi-test list + mpi-test help + +=head1 DESCRIPTION + +The B program is a general unit test driver for the MPI +library. It is used to verify that the library works as it is +supposed to on your architecture. As with most such things, passing +all the tests in B does not guarantee the code is correct, +but if any of them fail, there are certainly problems. + +Each major function of the library can be tested individually. For a +list of the test suites understood by B, run it with the +I command line option: + + mpi-test list + +This will display a list of the available test suites and a brief +synopsis of what each one does. For a brief overview of this +document, run B I. + +B exits with a zero status if the selected test succeeds, or +a nonzero status if it fails. If a I which is not +understood by B is given, a diagnostic is printed to the +standard error, and the program exits with a result code of 2. If a +test fails, the result code will be 1, and a diagnostic is ordinarily +printed to the standard error. However, if the I option is +provided, these diagnostics will be suppressed. + +=head1 RESTRICTIONS + +Only a few canned test cases are provided. The solutions have been +verified using the GNU bc(1) program, so bugs there may cause problems +here; however, this is very unlikely, so if a test fails, it is almost +certainly my fault, not bc(1)'s. + +=head1 AUTHOR + + Michael J. Fromberger + Thayer School of Engineering, Hanover, New Hampshire, USA diff --git a/security/nss/lib/freebl/mpi/doc/mul.txt b/security/nss/lib/freebl/mpi/doc/mul.txt new file mode 100644 index 000000000..975f56ddb --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/mul.txt @@ -0,0 +1,77 @@ +Multiplication + +This describes the multiplication algorithm used by the MPI library. + +This is basically a standard "schoolbook" algorithm. It is slow -- +O(mn) for m = #a, n = #b -- but easy to implement and verify. +Basically, we run two nested loops, as illustrated here (R is the +radix): + +k = 0 +for j <- 0 to (#b - 1) + for i <- 0 to (#a - 1) + w = (a[j] * b[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +It is necessary that 'w' have room for at least two radix R digits. +The product of any two digits in radix R is at most: + + (R - 1)(R - 1) = R^2 - 2R + 1 + +Since a two-digit radix-R number can hold R^2 - 1 distinct values, +this insures that the product will fit into the two-digit register. + +To insure that two digits is enough for w, we must also show that +there is room for the carry-in from the previous multiplication, and +the current value of the product digit that is being recomputed. +Assuming each of these may be as big as R - 1 (and no larger, +certainly), two digits will be enough if and only if: + + (R^2 - 2R + 1) + 2(R - 1) <= R^2 - 1 + +Solving this equation shows that, indeed, this is the case: + + R^2 - 2R + 1 + 2R - 2 <= R^2 - 1 + + R^2 - 1 <= R^2 - 1 + +This suggests that a good radix would be one more than the largest +value that can be held in half a machine word -- so, for example, as +in this implementation, where we used a radix of 65536 on a machine +with 4-byte words. Another advantage of a radix of this sort is that +binary-level operations are easy on numbers in this representation. + +Here's an example multiplication worked out longhand in radix-10, +using the above algorithm: + + a = 999 + b = x 999 + ------------- + p = 98001 + +w = (a[jx] * b[ix]) + kin + c[ix + jx] +c[ix+jx] = w % RADIX +k = w / RADIX + product +ix jx a[jx] b[ix] kin w c[i+j] kout 000000 +0 0 9 9 0 81+0+0 1 8 000001 +0 1 9 9 8 81+8+0 9 8 000091 +0 2 9 9 8 81+8+0 9 8 000991 + 8 0 008991 +1 0 9 9 0 81+0+9 0 9 008901 +1 1 9 9 9 81+9+9 9 9 008901 +1 2 9 9 9 81+9+8 8 9 008901 + 9 0 098901 +2 0 9 9 0 81+0+9 0 9 098001 +2 1 9 9 9 81+9+8 8 9 098001 +2 2 9 9 9 81+9+9 9 9 098001 + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/pi.txt b/security/nss/lib/freebl/mpi/doc/pi.txt new file mode 100644 index 000000000..a6ef91137 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/pi.txt @@ -0,0 +1,53 @@ +This file describes how pi is computed by the program in 'pi.c' (see +the utils subdirectory). + +Basically, we use Machin's formula, which is what everyone in the +world uses as a simple method for computing approximations to pi. +This works for up to a few thousand digits without too much effort. +Beyond that, though, it gets too slow. + +Machin's formula states: + + pi := 16 * arctan(1/5) - 4 * arctan(1/239) + +We compute this in integer arithmetic by first multiplying everything +through by 10^d, where 'd' is the number of digits of pi we wanted to +compute. It turns out, the last few digits will be wrong, but the +number that are wrong is usually very small (ordinarly only 2-3). +Having done this, we compute the arctan() function using the formula: + + 1 1 1 1 1 + arctan(1/x) := --- - ----- + ----- - ----- + ----- - ... + x 3 x^3 5 x^5 7 x^7 9 x^9 + +This is done iteratively by computing the first term manually, and +then iteratively dividing x^2 and k, where k = 3, 5, 7, ... out of the +current figure. This is then added to (or subtracted from) a running +sum, as appropriate. The iteration continues until we overflow our +available precision and the current figure goes to zero under integer +division. At that point, we're finished. + +Actually, we get a couple extra bits of precision out of the fact that +we know we're computing y * arctan(1/x), by setting up the multiplier +as: + + y * 10^d + +... instead of just 10^d. There is also a bit of cleverness in how +the loop is constructed, to avoid special-casing the first term. +Check out the code for arctan() in 'pi.c', if you are interested in +seeing how it is set up. + +Thanks to Jason P. for this algorithm, which I assembled from notes +and programs found on his cool "Pile of Pi Programs" page, at: + + http://www.isr.umd.edu/~jasonp/pipage.html + +Thanks also to Henrik Johansson , from +whose pi program I borrowed the clever idea of pre-multiplying by x in +order to avoid a special case on the loop iteration. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/prime.txt b/security/nss/lib/freebl/mpi/doc/prime.txt new file mode 100644 index 000000000..694797d5f --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prime.txt @@ -0,0 +1,6542 @@ +2 +3 +5 +7 +11 +13 +17 +19 +23 +29 +31 +37 +41 +43 +47 +53 +59 +61 +67 +71 +73 +79 +83 +89 +97 +101 +103 +107 +109 +113 +127 +131 +137 +139 +149 +151 +157 +163 +167 +173 +179 +181 +191 +193 +197 +199 +211 +223 +227 +229 +233 +239 +241 +251 +257 +263 +269 +271 +277 +281 +283 +293 +307 +311 +313 +317 +331 +337 +347 +349 +353 +359 +367 +373 +379 +383 +389 +397 +401 +409 +419 +421 +431 +433 +439 +443 +449 +457 +461 +463 +467 +479 +487 +491 +499 +503 +509 +521 +523 +541 +547 +557 +563 +569 +571 +577 +587 +593 +599 +601 +607 +613 +617 +619 +631 +641 +643 +647 +653 +659 +661 +673 +677 +683 +691 +701 +709 +719 +727 +733 +739 +743 +751 +757 +761 +769 +773 +787 +797 +809 +811 +821 +823 +827 +829 +839 +853 +857 +859 +863 +877 +881 +883 +887 +907 +911 +919 +929 +937 +941 +947 +953 +967 +971 +977 +983 +991 +997 +1009 +1013 +1019 +1021 +1031 +1033 +1039 +1049 +1051 +1061 +1063 +1069 +1087 +1091 +1093 +1097 +1103 +1109 +1117 +1123 +1129 +1151 +1153 +1163 +1171 +1181 +1187 +1193 +1201 +1213 +1217 +1223 +1229 +1231 +1237 +1249 +1259 +1277 +1279 +1283 +1289 +1291 +1297 +1301 +1303 +1307 +1319 +1321 +1327 +1361 +1367 +1373 +1381 +1399 +1409 +1423 +1427 +1429 +1433 +1439 +1447 +1451 +1453 +1459 +1471 +1481 +1483 +1487 +1489 +1493 +1499 +1511 +1523 +1531 +1543 +1549 +1553 +1559 +1567 +1571 +1579 +1583 +1597 +1601 +1607 +1609 +1613 +1619 +1621 +1627 +1637 +1657 +1663 +1667 +1669 +1693 +1697 +1699 +1709 +1721 +1723 +1733 +1741 +1747 +1753 +1759 +1777 +1783 +1787 +1789 +1801 +1811 +1823 +1831 +1847 +1861 +1867 +1871 +1873 +1877 +1879 +1889 +1901 +1907 +1913 +1931 +1933 +1949 +1951 +1973 +1979 +1987 +1993 +1997 +1999 +2003 +2011 +2017 +2027 +2029 +2039 +2053 +2063 +2069 +2081 +2083 +2087 +2089 +2099 +2111 +2113 +2129 +2131 +2137 +2141 +2143 +2153 +2161 +2179 +2203 +2207 +2213 +2221 +2237 +2239 +2243 +2251 +2267 +2269 +2273 +2281 +2287 +2293 +2297 +2309 +2311 +2333 +2339 +2341 +2347 +2351 +2357 +2371 +2377 +2381 +2383 +2389 +2393 +2399 +2411 +2417 +2423 +2437 +2441 +2447 +2459 +2467 +2473 +2477 +2503 +2521 +2531 +2539 +2543 +2549 +2551 +2557 +2579 +2591 +2593 +2609 +2617 +2621 +2633 +2647 +2657 +2659 +2663 +2671 +2677 +2683 +2687 +2689 +2693 +2699 +2707 +2711 +2713 +2719 +2729 +2731 +2741 +2749 +2753 +2767 +2777 +2789 +2791 +2797 +2801 +2803 +2819 +2833 +2837 +2843 +2851 +2857 +2861 +2879 +2887 +2897 +2903 +2909 +2917 +2927 +2939 +2953 +2957 +2963 +2969 +2971 +2999 +3001 +3011 +3019 +3023 +3037 +3041 +3049 +3061 +3067 +3079 +3083 +3089 +3109 +3119 +3121 +3137 +3163 +3167 +3169 +3181 +3187 +3191 +3203 +3209 +3217 +3221 +3229 +3251 +3253 +3257 +3259 +3271 +3299 +3301 +3307 +3313 +3319 +3323 +3329 +3331 +3343 +3347 +3359 +3361 +3371 +3373 +3389 +3391 +3407 +3413 +3433 +3449 +3457 +3461 +3463 +3467 +3469 +3491 +3499 +3511 +3517 +3527 +3529 +3533 +3539 +3541 +3547 +3557 +3559 +3571 +3581 +3583 +3593 +3607 +3613 +3617 +3623 +3631 +3637 +3643 +3659 +3671 +3673 +3677 +3691 +3697 +3701 +3709 +3719 +3727 +3733 +3739 +3761 +3767 +3769 +3779 +3793 +3797 +3803 +3821 +3823 +3833 +3847 +3851 +3853 +3863 +3877 +3881 +3889 +3907 +3911 +3917 +3919 +3923 +3929 +3931 +3943 +3947 +3967 +3989 +4001 +4003 +4007 +4013 +4019 +4021 +4027 +4049 +4051 +4057 +4073 +4079 +4091 +4093 +4099 +4111 +4127 +4129 +4133 +4139 +4153 +4157 +4159 +4177 +4201 +4211 +4217 +4219 +4229 +4231 +4241 +4243 +4253 +4259 +4261 +4271 +4273 +4283 +4289 +4297 +4327 +4337 +4339 +4349 +4357 +4363 +4373 +4391 +4397 +4409 +4421 +4423 +4441 +4447 +4451 +4457 +4463 +4481 +4483 +4493 +4507 +4513 +4517 +4519 +4523 +4547 +4549 +4561 +4567 +4583 +4591 +4597 +4603 +4621 +4637 +4639 +4643 +4649 +4651 +4657 +4663 +4673 +4679 +4691 +4703 +4721 +4723 +4729 +4733 +4751 +4759 +4783 +4787 +4789 +4793 +4799 +4801 +4813 +4817 +4831 +4861 +4871 +4877 +4889 +4903 +4909 +4919 +4931 +4933 +4937 +4943 +4951 +4957 +4967 +4969 +4973 +4987 +4993 +4999 +5003 +5009 +5011 +5021 +5023 +5039 +5051 +5059 +5077 +5081 +5087 +5099 +5101 +5107 +5113 +5119 +5147 +5153 +5167 +5171 +5179 +5189 +5197 +5209 +5227 +5231 +5233 +5237 +5261 +5273 +5279 +5281 +5297 +5303 +5309 +5323 +5333 +5347 +5351 +5381 +5387 +5393 +5399 +5407 +5413 +5417 +5419 +5431 +5437 +5441 +5443 +5449 +5471 +5477 +5479 +5483 +5501 +5503 +5507 +5519 +5521 +5527 +5531 +5557 +5563 +5569 +5573 +5581 +5591 +5623 +5639 +5641 +5647 +5651 +5653 +5657 +5659 +5669 +5683 +5689 +5693 +5701 +5711 +5717 +5737 +5741 +5743 +5749 +5779 +5783 +5791 +5801 +5807 +5813 +5821 +5827 +5839 +5843 +5849 +5851 +5857 +5861 +5867 +5869 +5879 +5881 +5897 +5903 +5923 +5927 +5939 +5953 +5981 +5987 +6007 +6011 +6029 +6037 +6043 +6047 +6053 +6067 +6073 +6079 +6089 +6091 +6101 +6113 +6121 +6131 +6133 +6143 +6151 +6163 +6173 +6197 +6199 +6203 +6211 +6217 +6221 +6229 +6247 +6257 +6263 +6269 +6271 +6277 +6287 +6299 +6301 +6311 +6317 +6323 +6329 +6337 +6343 +6353 +6359 +6361 +6367 +6373 +6379 +6389 +6397 +6421 +6427 +6449 +6451 +6469 +6473 +6481 +6491 +6521 +6529 +6547 +6551 +6553 +6563 +6569 +6571 +6577 +6581 +6599 +6607 +6619 +6637 +6653 +6659 +6661 +6673 +6679 +6689 +6691 +6701 +6703 +6709 +6719 +6733 +6737 +6761 +6763 +6779 +6781 +6791 +6793 +6803 +6823 +6827 +6829 +6833 +6841 +6857 +6863 +6869 +6871 +6883 +6899 +6907 +6911 +6917 +6947 +6949 +6959 +6961 +6967 +6971 +6977 +6983 +6991 +6997 +7001 +7013 +7019 +7027 +7039 +7043 +7057 +7069 +7079 +7103 +7109 +7121 +7127 +7129 +7151 +7159 +7177 +7187 +7193 +7207 +7211 +7213 +7219 +7229 +7237 +7243 +7247 +7253 +7283 +7297 +7307 +7309 +7321 +7331 +7333 +7349 +7351 +7369 +7393 +7411 +7417 +7433 +7451 +7457 +7459 +7477 +7481 +7487 +7489 +7499 +7507 +7517 +7523 +7529 +7537 +7541 +7547 +7549 +7559 +7561 +7573 +7577 +7583 +7589 +7591 +7603 +7607 +7621 +7639 +7643 +7649 +7669 +7673 +7681 +7687 +7691 +7699 +7703 +7717 +7723 +7727 +7741 +7753 +7757 +7759 +7789 +7793 +7817 +7823 +7829 +7841 +7853 +7867 +7873 +7877 +7879 +7883 +7901 +7907 +7919 +7927 +7933 +7937 +7949 +7951 +7963 +7993 +8009 +8011 +8017 +8039 +8053 +8059 +8069 +8081 +8087 +8089 +8093 +8101 +8111 +8117 +8123 +8147 +8161 +8167 +8171 +8179 +8191 +8209 +8219 +8221 +8231 +8233 +8237 +8243 +8263 +8269 +8273 +8287 +8291 +8293 +8297 +8311 +8317 +8329 +8353 +8363 +8369 +8377 +8387 +8389 +8419 +8423 +8429 +8431 +8443 +8447 +8461 +8467 +8501 +8513 +8521 +8527 +8537 +8539 +8543 +8563 +8573 +8581 +8597 +8599 +8609 +8623 +8627 +8629 +8641 +8647 +8663 +8669 +8677 +8681 +8689 +8693 +8699 +8707 +8713 +8719 +8731 +8737 +8741 +8747 +8753 +8761 +8779 +8783 +8803 +8807 +8819 +8821 +8831 +8837 +8839 +8849 +8861 +8863 +8867 +8887 +8893 +8923 +8929 +8933 +8941 +8951 +8963 +8969 +8971 +8999 +9001 +9007 +9011 +9013 +9029 +9041 +9043 +9049 +9059 +9067 +9091 +9103 +9109 +9127 +9133 +9137 +9151 +9157 +9161 +9173 +9181 +9187 +9199 +9203 +9209 +9221 +9227 +9239 +9241 +9257 +9277 +9281 +9283 +9293 +9311 +9319 +9323 +9337 +9341 +9343 +9349 +9371 +9377 +9391 +9397 +9403 +9413 +9419 +9421 +9431 +9433 +9437 +9439 +9461 +9463 +9467 +9473 +9479 +9491 +9497 +9511 +9521 +9533 +9539 +9547 +9551 +9587 +9601 +9613 +9619 +9623 +9629 +9631 +9643 +9649 +9661 +9677 +9679 +9689 +9697 +9719 +9721 +9733 +9739 +9743 +9749 +9767 +9769 +9781 +9787 +9791 +9803 +9811 +9817 +9829 +9833 +9839 +9851 +9857 +9859 +9871 +9883 +9887 +9901 +9907 +9923 +9929 +9931 +9941 +9949 +9967 +9973 +10007 +10009 +10037 +10039 +10061 +10067 +10069 +10079 +10091 +10093 +10099 +10103 +10111 +10133 +10139 +10141 +10151 +10159 +10163 +10169 +10177 +10181 +10193 +10211 +10223 +10243 +10247 +10253 +10259 +10267 +10271 +10273 +10289 +10301 +10303 +10313 +10321 +10331 +10333 +10337 +10343 +10357 +10369 +10391 +10399 +10427 +10429 +10433 +10453 +10457 +10459 +10463 +10477 +10487 +10499 +10501 +10513 +10529 +10531 +10559 +10567 +10589 +10597 +10601 +10607 +10613 +10627 +10631 +10639 +10651 +10657 +10663 +10667 +10687 +10691 +10709 +10711 +10723 +10729 +10733 +10739 +10753 +10771 +10781 +10789 +10799 +10831 +10837 +10847 +10853 +10859 +10861 +10867 +10883 +10889 +10891 +10903 +10909 +10937 +10939 +10949 +10957 +10973 +10979 +10987 +10993 +11003 +11027 +11047 +11057 +11059 +11069 +11071 +11083 +11087 +11093 +11113 +11117 +11119 +11131 +11149 +11159 +11161 +11171 +11173 +11177 +11197 +11213 +11239 +11243 +11251 +11257 +11261 +11273 +11279 +11287 +11299 +11311 +11317 +11321 +11329 +11351 +11353 +11369 +11383 +11393 +11399 +11411 +11423 +11437 +11443 +11447 +11467 +11471 +11483 +11489 +11491 +11497 +11503 +11519 +11527 +11549 +11551 +11579 +11587 +11593 +11597 +11617 +11621 +11633 +11657 +11677 +11681 +11689 +11699 +11701 +11717 +11719 +11731 +11743 +11777 +11779 +11783 +11789 +11801 +11807 +11813 +11821 +11827 +11831 +11833 +11839 +11863 +11867 +11887 +11897 +11903 +11909 +11923 +11927 +11933 +11939 +11941 +11953 +11959 +11969 +11971 +11981 +11987 +12007 +12011 +12037 +12041 +12043 +12049 +12071 +12073 +12097 +12101 +12107 +12109 +12113 +12119 +12143 +12149 +12157 +12161 +12163 +12197 +12203 +12211 +12227 +12239 +12241 +12251 +12253 +12263 +12269 +12277 +12281 +12289 +12301 +12323 +12329 +12343 +12347 +12373 +12377 +12379 +12391 +12401 +12409 +12413 +12421 +12433 +12437 +12451 +12457 +12473 +12479 +12487 +12491 +12497 +12503 +12511 +12517 +12527 +12539 +12541 +12547 +12553 +12569 +12577 +12583 +12589 +12601 +12611 +12613 +12619 +12637 +12641 +12647 +12653 +12659 +12671 +12689 +12697 +12703 +12713 +12721 +12739 +12743 +12757 +12763 +12781 +12791 +12799 +12809 +12821 +12823 +12829 +12841 +12853 +12889 +12893 +12899 +12907 +12911 +12917 +12919 +12923 +12941 +12953 +12959 +12967 +12973 +12979 +12983 +13001 +13003 +13007 +13009 +13033 +13037 +13043 +13049 +13063 +13093 +13099 +13103 +13109 +13121 +13127 +13147 +13151 +13159 +13163 +13171 +13177 +13183 +13187 +13217 +13219 +13229 +13241 +13249 +13259 +13267 +13291 +13297 +13309 +13313 +13327 +13331 +13337 +13339 +13367 +13381 +13397 +13399 +13411 +13417 +13421 +13441 +13451 +13457 +13463 +13469 +13477 +13487 +13499 +13513 +13523 +13537 +13553 +13567 +13577 +13591 +13597 +13613 +13619 +13627 +13633 +13649 +13669 +13679 +13681 +13687 +13691 +13693 +13697 +13709 +13711 +13721 +13723 +13729 +13751 +13757 +13759 +13763 +13781 +13789 +13799 +13807 +13829 +13831 +13841 +13859 +13873 +13877 +13879 +13883 +13901 +13903 +13907 +13913 +13921 +13931 +13933 +13963 +13967 +13997 +13999 +14009 +14011 +14029 +14033 +14051 +14057 +14071 +14081 +14083 +14087 +14107 +14143 +14149 +14153 +14159 +14173 +14177 +14197 +14207 +14221 +14243 +14249 +14251 +14281 +14293 +14303 +14321 +14323 +14327 +14341 +14347 +14369 +14387 +14389 +14401 +14407 +14411 +14419 +14423 +14431 +14437 +14447 +14449 +14461 +14479 +14489 +14503 +14519 +14533 +14537 +14543 +14549 +14551 +14557 +14561 +14563 +14591 +14593 +14621 +14627 +14629 +14633 +14639 +14653 +14657 +14669 +14683 +14699 +14713 +14717 +14723 +14731 +14737 +14741 +14747 +14753 +14759 +14767 +14771 +14779 +14783 +14797 +14813 +14821 +14827 +14831 +14843 +14851 +14867 +14869 +14879 +14887 +14891 +14897 +14923 +14929 +14939 +14947 +14951 +14957 +14969 +14983 +15013 +15017 +15031 +15053 +15061 +15073 +15077 +15083 +15091 +15101 +15107 +15121 +15131 +15137 +15139 +15149 +15161 +15173 +15187 +15193 +15199 +15217 +15227 +15233 +15241 +15259 +15263 +15269 +15271 +15277 +15287 +15289 +15299 +15307 +15313 +15319 +15329 +15331 +15349 +15359 +15361 +15373 +15377 +15383 +15391 +15401 +15413 +15427 +15439 +15443 +15451 +15461 +15467 +15473 +15493 +15497 +15511 +15527 +15541 +15551 +15559 +15569 +15581 +15583 +15601 +15607 +15619 +15629 +15641 +15643 +15647 +15649 +15661 +15667 +15671 +15679 +15683 +15727 +15731 +15733 +15737 +15739 +15749 +15761 +15767 +15773 +15787 +15791 +15797 +15803 +15809 +15817 +15823 +15859 +15877 +15881 +15887 +15889 +15901 +15907 +15913 +15919 +15923 +15937 +15959 +15971 +15973 +15991 +16001 +16007 +16033 +16057 +16061 +16063 +16067 +16069 +16073 +16087 +16091 +16097 +16103 +16111 +16127 +16139 +16141 +16183 +16187 +16189 +16193 +16217 +16223 +16229 +16231 +16249 +16253 +16267 +16273 +16301 +16319 +16333 +16339 +16349 +16361 +16363 +16369 +16381 +16411 +16417 +16421 +16427 +16433 +16447 +16451 +16453 +16477 +16481 +16487 +16493 +16519 +16529 +16547 +16553 +16561 +16567 +16573 +16603 +16607 +16619 +16631 +16633 +16649 +16651 +16657 +16661 +16673 +16691 +16693 +16699 +16703 +16729 +16741 +16747 +16759 +16763 +16787 +16811 +16823 +16829 +16831 +16843 +16871 +16879 +16883 +16889 +16901 +16903 +16921 +16927 +16931 +16937 +16943 +16963 +16979 +16981 +16987 +16993 +17011 +17021 +17027 +17029 +17033 +17041 +17047 +17053 +17077 +17093 +17099 +17107 +17117 +17123 +17137 +17159 +17167 +17183 +17189 +17191 +17203 +17207 +17209 +17231 +17239 +17257 +17291 +17293 +17299 +17317 +17321 +17327 +17333 +17341 +17351 +17359 +17377 +17383 +17387 +17389 +17393 +17401 +17417 +17419 +17431 +17443 +17449 +17467 +17471 +17477 +17483 +17489 +17491 +17497 +17509 +17519 +17539 +17551 +17569 +17573 +17579 +17581 +17597 +17599 +17609 +17623 +17627 +17657 +17659 +17669 +17681 +17683 +17707 +17713 +17729 +17737 +17747 +17749 +17761 +17783 +17789 +17791 +17807 +17827 +17837 +17839 +17851 +17863 +17881 +17891 +17903 +17909 +17911 +17921 +17923 +17929 +17939 +17957 +17959 +17971 +17977 +17981 +17987 +17989 +18013 +18041 +18043 +18047 +18049 +18059 +18061 +18077 +18089 +18097 +18119 +18121 +18127 +18131 +18133 +18143 +18149 +18169 +18181 +18191 +18199 +18211 +18217 +18223 +18229 +18233 +18251 +18253 +18257 +18269 +18287 +18289 +18301 +18307 +18311 +18313 +18329 +18341 +18353 +18367 +18371 +18379 +18397 +18401 +18413 +18427 +18433 +18439 +18443 +18451 +18457 +18461 +18481 +18493 +18503 +18517 +18521 +18523 +18539 +18541 +18553 +18583 +18587 +18593 +18617 +18637 +18661 +18671 +18679 +18691 +18701 +18713 +18719 +18731 +18743 +18749 +18757 +18773 +18787 +18793 +18797 +18803 +18839 +18859 +18869 +18899 +18911 +18913 +18917 +18919 +18947 +18959 +18973 +18979 +19001 +19009 +19013 +19031 +19037 +19051 +19069 +19073 +19079 +19081 +19087 +19121 +19139 +19141 +19157 +19163 +19181 +19183 +19207 +19211 +19213 +19219 +19231 +19237 +19249 +19259 +19267 +19273 +19289 +19301 +19309 +19319 +19333 +19373 +19379 +19381 +19387 +19391 +19403 +19417 +19421 +19423 +19427 +19429 +19433 +19441 +19447 +19457 +19463 +19469 +19471 +19477 +19483 +19489 +19501 +19507 +19531 +19541 +19543 +19553 +19559 +19571 +19577 +19583 +19597 +19603 +19609 +19661 +19681 +19687 +19697 +19699 +19709 +19717 +19727 +19739 +19751 +19753 +19759 +19763 +19777 +19793 +19801 +19813 +19819 +19841 +19843 +19853 +19861 +19867 +19889 +19891 +19913 +19919 +19927 +19937 +19949 +19961 +19963 +19973 +19979 +19991 +19993 +19997 +20011 +20021 +20023 +20029 +20047 +20051 +20063 +20071 +20089 +20101 +20107 +20113 +20117 +20123 +20129 +20143 +20147 +20149 +20161 +20173 +20177 +20183 +20201 +20219 +20231 +20233 +20249 +20261 +20269 +20287 +20297 +20323 +20327 +20333 +20341 +20347 +20353 +20357 +20359 +20369 +20389 +20393 +20399 +20407 +20411 +20431 +20441 +20443 +20477 +20479 +20483 +20507 +20509 +20521 +20533 +20543 +20549 +20551 +20563 +20593 +20599 +20611 +20627 +20639 +20641 +20663 +20681 +20693 +20707 +20717 +20719 +20731 +20743 +20747 +20749 +20753 +20759 +20771 +20773 +20789 +20807 +20809 +20849 +20857 +20873 +20879 +20887 +20897 +20899 +20903 +20921 +20929 +20939 +20947 +20959 +20963 +20981 +20983 +21001 +21011 +21013 +21017 +21019 +21023 +21031 +21059 +21061 +21067 +21089 +21101 +21107 +21121 +21139 +21143 +21149 +21157 +21163 +21169 +21179 +21187 +21191 +21193 +21211 +21221 +21227 +21247 +21269 +21277 +21283 +21313 +21317 +21319 +21323 +21341 +21347 +21377 +21379 +21383 +21391 +21397 +21401 +21407 +21419 +21433 +21467 +21481 +21487 +21491 +21493 +21499 +21503 +21517 +21521 +21523 +21529 +21557 +21559 +21563 +21569 +21577 +21587 +21589 +21599 +21601 +21611 +21613 +21617 +21647 +21649 +21661 +21673 +21683 +21701 +21713 +21727 +21737 +21739 +21751 +21757 +21767 +21773 +21787 +21799 +21803 +21817 +21821 +21839 +21841 +21851 +21859 +21863 +21871 +21881 +21893 +21911 +21929 +21937 +21943 +21961 +21977 +21991 +21997 +22003 +22013 +22027 +22031 +22037 +22039 +22051 +22063 +22067 +22073 +22079 +22091 +22093 +22109 +22111 +22123 +22129 +22133 +22147 +22153 +22157 +22159 +22171 +22189 +22193 +22229 +22247 +22259 +22271 +22273 +22277 +22279 +22283 +22291 +22303 +22307 +22343 +22349 +22367 +22369 +22381 +22391 +22397 +22409 +22433 +22441 +22447 +22453 +22469 +22481 +22483 +22501 +22511 +22531 +22541 +22543 +22549 +22567 +22571 +22573 +22613 +22619 +22621 +22637 +22639 +22643 +22651 +22669 +22679 +22691 +22697 +22699 +22709 +22717 +22721 +22727 +22739 +22741 +22751 +22769 +22777 +22783 +22787 +22807 +22811 +22817 +22853 +22859 +22861 +22871 +22877 +22901 +22907 +22921 +22937 +22943 +22961 +22963 +22973 +22993 +23003 +23011 +23017 +23021 +23027 +23029 +23039 +23041 +23053 +23057 +23059 +23063 +23071 +23081 +23087 +23099 +23117 +23131 +23143 +23159 +23167 +23173 +23189 +23197 +23201 +23203 +23209 +23227 +23251 +23269 +23279 +23291 +23293 +23297 +23311 +23321 +23327 +23333 +23339 +23357 +23369 +23371 +23399 +23417 +23431 +23447 +23459 +23473 +23497 +23509 +23531 +23537 +23539 +23549 +23557 +23561 +23563 +23567 +23581 +23593 +23599 +23603 +23609 +23623 +23627 +23629 +23633 +23663 +23669 +23671 +23677 +23687 +23689 +23719 +23741 +23743 +23747 +23753 +23761 +23767 +23773 +23789 +23801 +23813 +23819 +23827 +23831 +23833 +23857 +23869 +23873 +23879 +23887 +23893 +23899 +23909 +23911 +23917 +23929 +23957 +23971 +23977 +23981 +23993 +24001 +24007 +24019 +24023 +24029 +24043 +24049 +24061 +24071 +24077 +24083 +24091 +24097 +24103 +24107 +24109 +24113 +24121 +24133 +24137 +24151 +24169 +24179 +24181 +24197 +24203 +24223 +24229 +24239 +24247 +24251 +24281 +24317 +24329 +24337 +24359 +24371 +24373 +24379 +24391 +24407 +24413 +24419 +24421 +24439 +24443 +24469 +24473 +24481 +24499 +24509 +24517 +24527 +24533 +24547 +24551 +24571 +24593 +24611 +24623 +24631 +24659 +24671 +24677 +24683 +24691 +24697 +24709 +24733 +24749 +24763 +24767 +24781 +24793 +24799 +24809 +24821 +24841 +24847 +24851 +24859 +24877 +24889 +24907 +24917 +24919 +24923 +24943 +24953 +24967 +24971 +24977 +24979 +24989 +25013 +25031 +25033 +25037 +25057 +25073 +25087 +25097 +25111 +25117 +25121 +25127 +25147 +25153 +25163 +25169 +25171 +25183 +25189 +25219 +25229 +25237 +25243 +25247 +25253 +25261 +25301 +25303 +25307 +25309 +25321 +25339 +25343 +25349 +25357 +25367 +25373 +25391 +25409 +25411 +25423 +25439 +25447 +25453 +25457 +25463 +25469 +25471 +25523 +25537 +25541 +25561 +25577 +25579 +25583 +25589 +25601 +25603 +25609 +25621 +25633 +25639 +25643 +25657 +25667 +25673 +25679 +25693 +25703 +25717 +25733 +25741 +25747 +25759 +25763 +25771 +25793 +25799 +25801 +25819 +25841 +25847 +25849 +25867 +25873 +25889 +25903 +25913 +25919 +25931 +25933 +25939 +25943 +25951 +25969 +25981 +25997 +25999 +26003 +26017 +26021 +26029 +26041 +26053 +26083 +26099 +26107 +26111 +26113 +26119 +26141 +26153 +26161 +26171 +26177 +26183 +26189 +26203 +26209 +26227 +26237 +26249 +26251 +26261 +26263 +26267 +26293 +26297 +26309 +26317 +26321 +26339 +26347 +26357 +26371 +26387 +26393 +26399 +26407 +26417 +26423 +26431 +26437 +26449 +26459 +26479 +26489 +26497 +26501 +26513 +26539 +26557 +26561 +26573 +26591 +26597 +26627 +26633 +26641 +26647 +26669 +26681 +26683 +26687 +26693 +26699 +26701 +26711 +26713 +26717 +26723 +26729 +26731 +26737 +26759 +26777 +26783 +26801 +26813 +26821 +26833 +26839 +26849 +26861 +26863 +26879 +26881 +26891 +26893 +26903 +26921 +26927 +26947 +26951 +26953 +26959 +26981 +26987 +26993 +27011 +27017 +27031 +27043 +27059 +27061 +27067 +27073 +27077 +27091 +27103 +27107 +27109 +27127 +27143 +27179 +27191 +27197 +27211 +27239 +27241 +27253 +27259 +27271 +27277 +27281 +27283 +27299 +27329 +27337 +27361 +27367 +27397 +27407 +27409 +27427 +27431 +27437 +27449 +27457 +27479 +27481 +27487 +27509 +27527 +27529 +27539 +27541 +27551 +27581 +27583 +27611 +27617 +27631 +27647 +27653 +27673 +27689 +27691 +27697 +27701 +27733 +27737 +27739 +27743 +27749 +27751 +27763 +27767 +27773 +27779 +27791 +27793 +27799 +27803 +27809 +27817 +27823 +27827 +27847 +27851 +27883 +27893 +27901 +27917 +27919 +27941 +27943 +27947 +27953 +27961 +27967 +27983 +27997 +28001 +28019 +28027 +28031 +28051 +28057 +28069 +28081 +28087 +28097 +28099 +28109 +28111 +28123 +28151 +28163 +28181 +28183 +28201 +28211 +28219 +28229 +28277 +28279 +28283 +28289 +28297 +28307 +28309 +28319 +28349 +28351 +28387 +28393 +28403 +28409 +28411 +28429 +28433 +28439 +28447 +28463 +28477 +28493 +28499 +28513 +28517 +28537 +28541 +28547 +28549 +28559 +28571 +28573 +28579 +28591 +28597 +28603 +28607 +28619 +28621 +28627 +28631 +28643 +28649 +28657 +28661 +28663 +28669 +28687 +28697 +28703 +28711 +28723 +28729 +28751 +28753 +28759 +28771 +28789 +28793 +28807 +28813 +28817 +28837 +28843 +28859 +28867 +28871 +28879 +28901 +28909 +28921 +28927 +28933 +28949 +28961 +28979 +29009 +29017 +29021 +29023 +29027 +29033 +29059 +29063 +29077 +29101 +29123 +29129 +29131 +29137 +29147 +29153 +29167 +29173 +29179 +29191 +29201 +29207 +29209 +29221 +29231 +29243 +29251 +29269 +29287 +29297 +29303 +29311 +29327 +29333 +29339 +29347 +29363 +29383 +29387 +29389 +29399 +29401 +29411 +29423 +29429 +29437 +29443 +29453 +29473 +29483 +29501 +29527 +29531 +29537 +29567 +29569 +29573 +29581 +29587 +29599 +29611 +29629 +29633 +29641 +29663 +29669 +29671 +29683 +29717 +29723 +29741 +29753 +29759 +29761 +29789 +29803 +29819 +29833 +29837 +29851 +29863 +29867 +29873 +29879 +29881 +29917 +29921 +29927 +29947 +29959 +29983 +29989 +30011 +30013 +30029 +30047 +30059 +30071 +30089 +30091 +30097 +30103 +30109 +30113 +30119 +30133 +30137 +30139 +30161 +30169 +30181 +30187 +30197 +30203 +30211 +30223 +30241 +30253 +30259 +30269 +30271 +30293 +30307 +30313 +30319 +30323 +30341 +30347 +30367 +30389 +30391 +30403 +30427 +30431 +30449 +30467 +30469 +30491 +30493 +30497 +30509 +30517 +30529 +30539 +30553 +30557 +30559 +30577 +30593 +30631 +30637 +30643 +30649 +30661 +30671 +30677 +30689 +30697 +30703 +30707 +30713 +30727 +30757 +30763 +30773 +30781 +30803 +30809 +30817 +30829 +30839 +30841 +30851 +30853 +30859 +30869 +30871 +30881 +30893 +30911 +30931 +30937 +30941 +30949 +30971 +30977 +30983 +31013 +31019 +31033 +31039 +31051 +31063 +31069 +31079 +31081 +31091 +31121 +31123 +31139 +31147 +31151 +31153 +31159 +31177 +31181 +31183 +31189 +31193 +31219 +31223 +31231 +31237 +31247 +31249 +31253 +31259 +31267 +31271 +31277 +31307 +31319 +31321 +31327 +31333 +31337 +31357 +31379 +31387 +31391 +31393 +31397 +31469 +31477 +31481 +31489 +31511 +31513 +31517 +31531 +31541 +31543 +31547 +31567 +31573 +31583 +31601 +31607 +31627 +31643 +31649 +31657 +31663 +31667 +31687 +31699 +31721 +31723 +31727 +31729 +31741 +31751 +31769 +31771 +31793 +31799 +31817 +31847 +31849 +31859 +31873 +31883 +31891 +31907 +31957 +31963 +31973 +31981 +31991 +32003 +32009 +32027 +32029 +32051 +32057 +32059 +32063 +32069 +32077 +32083 +32089 +32099 +32117 +32119 +32141 +32143 +32159 +32173 +32183 +32189 +32191 +32203 +32213 +32233 +32237 +32251 +32257 +32261 +32297 +32299 +32303 +32309 +32321 +32323 +32327 +32341 +32353 +32359 +32363 +32369 +32371 +32377 +32381 +32401 +32411 +32413 +32423 +32429 +32441 +32443 +32467 +32479 +32491 +32497 +32503 +32507 +32531 +32533 +32537 +32561 +32563 +32569 +32573 +32579 +32587 +32603 +32609 +32611 +32621 +32633 +32647 +32653 +32687 +32693 +32707 +32713 +32717 +32719 +32749 +32771 +32779 +32783 +32789 +32797 +32801 +32803 +32831 +32833 +32839 +32843 +32869 +32887 +32909 +32911 +32917 +32933 +32939 +32941 +32957 +32969 +32971 +32983 +32987 +32993 +32999 +33013 +33023 +33029 +33037 +33049 +33053 +33071 +33073 +33083 +33091 +33107 +33113 +33119 +33149 +33151 +33161 +33179 +33181 +33191 +33199 +33203 +33211 +33223 +33247 +33287 +33289 +33301 +33311 +33317 +33329 +33331 +33343 +33347 +33349 +33353 +33359 +33377 +33391 +33403 +33409 +33413 +33427 +33457 +33461 +33469 +33479 +33487 +33493 +33503 +33521 +33529 +33533 +33547 +33563 +33569 +33577 +33581 +33587 +33589 +33599 +33601 +33613 +33617 +33619 +33623 +33629 +33637 +33641 +33647 +33679 +33703 +33713 +33721 +33739 +33749 +33751 +33757 +33767 +33769 +33773 +33791 +33797 +33809 +33811 +33827 +33829 +33851 +33857 +33863 +33871 +33889 +33893 +33911 +33923 +33931 +33937 +33941 +33961 +33967 +33997 +34019 +34031 +34033 +34039 +34057 +34061 +34123 +34127 +34129 +34141 +34147 +34157 +34159 +34171 +34183 +34211 +34213 +34217 +34231 +34253 +34259 +34261 +34267 +34273 +34283 +34297 +34301 +34303 +34313 +34319 +34327 +34337 +34351 +34361 +34367 +34369 +34381 +34403 +34421 +34429 +34439 +34457 +34469 +34471 +34483 +34487 +34499 +34501 +34511 +34513 +34519 +34537 +34543 +34549 +34583 +34589 +34591 +34603 +34607 +34613 +34631 +34649 +34651 +34667 +34673 +34679 +34687 +34693 +34703 +34721 +34729 +34739 +34747 +34757 +34759 +34763 +34781 +34807 +34819 +34841 +34843 +34847 +34849 +34871 +34877 +34883 +34897 +34913 +34919 +34939 +34949 +34961 +34963 +34981 +35023 +35027 +35051 +35053 +35059 +35069 +35081 +35083 +35089 +35099 +35107 +35111 +35117 +35129 +35141 +35149 +35153 +35159 +35171 +35201 +35221 +35227 +35251 +35257 +35267 +35279 +35281 +35291 +35311 +35317 +35323 +35327 +35339 +35353 +35363 +35381 +35393 +35401 +35407 +35419 +35423 +35437 +35447 +35449 +35461 +35491 +35507 +35509 +35521 +35527 +35531 +35533 +35537 +35543 +35569 +35573 +35591 +35593 +35597 +35603 +35617 +35671 +35677 +35729 +35731 +35747 +35753 +35759 +35771 +35797 +35801 +35803 +35809 +35831 +35837 +35839 +35851 +35863 +35869 +35879 +35897 +35899 +35911 +35923 +35933 +35951 +35963 +35969 +35977 +35983 +35993 +35999 +36007 +36011 +36013 +36017 +36037 +36061 +36067 +36073 +36083 +36097 +36107 +36109 +36131 +36137 +36151 +36161 +36187 +36191 +36209 +36217 +36229 +36241 +36251 +36263 +36269 +36277 +36293 +36299 +36307 +36313 +36319 +36341 +36343 +36353 +36373 +36383 +36389 +36433 +36451 +36457 +36467 +36469 +36473 +36479 +36493 +36497 +36523 +36527 +36529 +36541 +36551 +36559 +36563 +36571 +36583 +36587 +36599 +36607 +36629 +36637 +36643 +36653 +36671 +36677 +36683 +36691 +36697 +36709 +36713 +36721 +36739 +36749 +36761 +36767 +36779 +36781 +36787 +36791 +36793 +36809 +36821 +36833 +36847 +36857 +36871 +36877 +36887 +36899 +36901 +36913 +36919 +36923 +36929 +36931 +36943 +36947 +36973 +36979 +36997 +37003 +37013 +37019 +37021 +37039 +37049 +37057 +37061 +37087 +37097 +37117 +37123 +37139 +37159 +37171 +37181 +37189 +37199 +37201 +37217 +37223 +37243 +37253 +37273 +37277 +37307 +37309 +37313 +37321 +37337 +37339 +37357 +37361 +37363 +37369 +37379 +37397 +37409 +37423 +37441 +37447 +37463 +37483 +37489 +37493 +37501 +37507 +37511 +37517 +37529 +37537 +37547 +37549 +37561 +37567 +37571 +37573 +37579 +37589 +37591 +37607 +37619 +37633 +37643 +37649 +37657 +37663 +37691 +37693 +37699 +37717 +37747 +37781 +37783 +37799 +37811 +37813 +37831 +37847 +37853 +37861 +37871 +37879 +37889 +37897 +37907 +37951 +37957 +37963 +37967 +37987 +37991 +37993 +37997 +38011 +38039 +38047 +38053 +38069 +38083 +38113 +38119 +38149 +38153 +38167 +38177 +38183 +38189 +38197 +38201 +38219 +38231 +38237 +38239 +38261 +38273 +38281 +38287 +38299 +38303 +38317 +38321 +38327 +38329 +38333 +38351 +38371 +38377 +38393 +38431 +38447 +38449 +38453 +38459 +38461 +38501 +38543 +38557 +38561 +38567 +38569 +38593 +38603 +38609 +38611 +38629 +38639 +38651 +38653 +38669 +38671 +38677 +38693 +38699 +38707 +38711 +38713 +38723 +38729 +38737 +38747 +38749 +38767 +38783 +38791 +38803 +38821 +38833 +38839 +38851 +38861 +38867 +38873 +38891 +38903 +38917 +38921 +38923 +38933 +38953 +38959 +38971 +38977 +38993 +39019 +39023 +39041 +39043 +39047 +39079 +39089 +39097 +39103 +39107 +39113 +39119 +39133 +39139 +39157 +39161 +39163 +39181 +39191 +39199 +39209 +39217 +39227 +39229 +39233 +39239 +39241 +39251 +39293 +39301 +39313 +39317 +39323 +39341 +39343 +39359 +39367 +39371 +39373 +39383 +39397 +39409 +39419 +39439 +39443 +39451 +39461 +39499 +39503 +39509 +39511 +39521 +39541 +39551 +39563 +39569 +39581 +39607 +39619 +39623 +39631 +39659 +39667 +39671 +39679 +39703 +39709 +39719 +39727 +39733 +39749 +39761 +39769 +39779 +39791 +39799 +39821 +39827 +39829 +39839 +39841 +39847 +39857 +39863 +39869 +39877 +39883 +39887 +39901 +39929 +39937 +39953 +39971 +39979 +39983 +39989 +40009 +40013 +40031 +40037 +40039 +40063 +40087 +40093 +40099 +40111 +40123 +40127 +40129 +40151 +40153 +40163 +40169 +40177 +40189 +40193 +40213 +40231 +40237 +40241 +40253 +40277 +40283 +40289 +40343 +40351 +40357 +40361 +40387 +40423 +40427 +40429 +40433 +40459 +40471 +40483 +40487 +40493 +40499 +40507 +40519 +40529 +40531 +40543 +40559 +40577 +40583 +40591 +40597 +40609 +40627 +40637 +40639 +40693 +40697 +40699 +40709 +40739 +40751 +40759 +40763 +40771 +40787 +40801 +40813 +40819 +40823 +40829 +40841 +40847 +40849 +40853 +40867 +40879 +40883 +40897 +40903 +40927 +40933 +40939 +40949 +40961 +40973 +40993 +41011 +41017 +41023 +41039 +41047 +41051 +41057 +41077 +41081 +41113 +41117 +41131 +41141 +41143 +41149 +41161 +41177 +41179 +41183 +41189 +41201 +41203 +41213 +41221 +41227 +41231 +41233 +41243 +41257 +41263 +41269 +41281 +41299 +41333 +41341 +41351 +41357 +41381 +41387 +41389 +41399 +41411 +41413 +41443 +41453 +41467 +41479 +41491 +41507 +41513 +41519 +41521 +41539 +41543 +41549 +41579 +41593 +41597 +41603 +41609 +41611 +41617 +41621 +41627 +41641 +41647 +41651 +41659 +41669 +41681 +41687 +41719 +41729 +41737 +41759 +41761 +41771 +41777 +41801 +41809 +41813 +41843 +41849 +41851 +41863 +41879 +41887 +41893 +41897 +41903 +41911 +41927 +41941 +41947 +41953 +41957 +41959 +41969 +41981 +41983 +41999 +42013 +42017 +42019 +42023 +42043 +42061 +42071 +42073 +42083 +42089 +42101 +42131 +42139 +42157 +42169 +42179 +42181 +42187 +42193 +42197 +42209 +42221 +42223 +42227 +42239 +42257 +42281 +42283 +42293 +42299 +42307 +42323 +42331 +42337 +42349 +42359 +42373 +42379 +42391 +42397 +42403 +42407 +42409 +42433 +42437 +42443 +42451 +42457 +42461 +42463 +42467 +42473 +42487 +42491 +42499 +42509 +42533 +42557 +42569 +42571 +42577 +42589 +42611 +42641 +42643 +42649 +42667 +42677 +42683 +42689 +42697 +42701 +42703 +42709 +42719 +42727 +42737 +42743 +42751 +42767 +42773 +42787 +42793 +42797 +42821 +42829 +42839 +42841 +42853 +42859 +42863 +42899 +42901 +42923 +42929 +42937 +42943 +42953 +42961 +42967 +42979 +42989 +43003 +43013 +43019 +43037 +43049 +43051 +43063 +43067 +43093 +43103 +43117 +43133 +43151 +43159 +43177 +43189 +43201 +43207 +43223 +43237 +43261 +43271 +43283 +43291 +43313 +43319 +43321 +43331 +43391 +43397 +43399 +43403 +43411 +43427 +43441 +43451 +43457 +43481 +43487 +43499 +43517 +43541 +43543 +43573 +43577 +43579 +43591 +43597 +43607 +43609 +43613 +43627 +43633 +43649 +43651 +43661 +43669 +43691 +43711 +43717 +43721 +43753 +43759 +43777 +43781 +43783 +43787 +43789 +43793 +43801 +43853 +43867 +43889 +43891 +43913 +43933 +43943 +43951 +43961 +43963 +43969 +43973 +43987 +43991 +43997 +44017 +44021 +44027 +44029 +44041 +44053 +44059 +44071 +44087 +44089 +44101 +44111 +44119 +44123 +44129 +44131 +44159 +44171 +44179 +44189 +44201 +44203 +44207 +44221 +44249 +44257 +44263 +44267 +44269 +44273 +44279 +44281 +44293 +44351 +44357 +44371 +44381 +44383 +44389 +44417 +44449 +44453 +44483 +44491 +44497 +44501 +44507 +44519 +44531 +44533 +44537 +44543 +44549 +44563 +44579 +44587 +44617 +44621 +44623 +44633 +44641 +44647 +44651 +44657 +44683 +44687 +44699 +44701 +44711 +44729 +44741 +44753 +44771 +44773 +44777 +44789 +44797 +44809 +44819 +44839 +44843 +44851 +44867 +44879 +44887 +44893 +44909 +44917 +44927 +44939 +44953 +44959 +44963 +44971 +44983 +44987 +45007 +45013 +45053 +45061 +45077 +45083 +45119 +45121 +45127 +45131 +45137 +45139 +45161 +45179 +45181 +45191 +45197 +45233 +45247 +45259 +45263 +45281 +45289 +45293 +45307 +45317 +45319 +45329 +45337 +45341 +45343 +45361 +45377 +45389 +45403 +45413 +45427 +45433 +45439 +45481 +45491 +45497 +45503 +45523 +45533 +45541 +45553 +45557 +45569 +45587 +45589 +45599 +45613 +45631 +45641 +45659 +45667 +45673 +45677 +45691 +45697 +45707 +45737 +45751 +45757 +45763 +45767 +45779 +45817 +45821 +45823 +45827 +45833 +45841 +45853 +45863 +45869 +45887 +45893 +45943 +45949 +45953 +45959 +45971 +45979 +45989 +46021 +46027 +46049 +46051 +46061 +46073 +46091 +46093 +46099 +46103 +46133 +46141 +46147 +46153 +46171 +46181 +46183 +46187 +46199 +46219 +46229 +46237 +46261 +46271 +46273 +46279 +46301 +46307 +46309 +46327 +46337 +46349 +46351 +46381 +46399 +46411 +46439 +46441 +46447 +46451 +46457 +46471 +46477 +46489 +46499 +46507 +46511 +46523 +46549 +46559 +46567 +46573 +46589 +46591 +46601 +46619 +46633 +46639 +46643 +46649 +46663 +46679 +46681 +46687 +46691 +46703 +46723 +46727 +46747 +46751 +46757 +46769 +46771 +46807 +46811 +46817 +46819 +46829 +46831 +46853 +46861 +46867 +46877 +46889 +46901 +46919 +46933 +46957 +46993 +46997 +47017 +47041 +47051 +47057 +47059 +47087 +47093 +47111 +47119 +47123 +47129 +47137 +47143 +47147 +47149 +47161 +47189 +47207 +47221 +47237 +47251 +47269 +47279 +47287 +47293 +47297 +47303 +47309 +47317 +47339 +47351 +47353 +47363 +47381 +47387 +47389 +47407 +47417 +47419 +47431 +47441 +47459 +47491 +47497 +47501 +47507 +47513 +47521 +47527 +47533 +47543 +47563 +47569 +47581 +47591 +47599 +47609 +47623 +47629 +47639 +47653 +47657 +47659 +47681 +47699 +47701 +47711 +47713 +47717 +47737 +47741 +47743 +47777 +47779 +47791 +47797 +47807 +47809 +47819 +47837 +47843 +47857 +47869 +47881 +47903 +47911 +47917 +47933 +47939 +47947 +47951 +47963 +47969 +47977 +47981 +48017 +48023 +48029 +48049 +48073 +48079 +48091 +48109 +48119 +48121 +48131 +48157 +48163 +48179 +48187 +48193 +48197 +48221 +48239 +48247 +48259 +48271 +48281 +48299 +48311 +48313 +48337 +48341 +48353 +48371 +48383 +48397 +48407 +48409 +48413 +48437 +48449 +48463 +48473 +48479 +48481 +48487 +48491 +48497 +48523 +48527 +48533 +48539 +48541 +48563 +48571 +48589 +48593 +48611 +48619 +48623 +48647 +48649 +48661 +48673 +48677 +48679 +48731 +48733 +48751 +48757 +48761 +48767 +48779 +48781 +48787 +48799 +48809 +48817 +48821 +48823 +48847 +48857 +48859 +48869 +48871 +48883 +48889 +48907 +48947 +48953 +48973 +48989 +48991 +49003 +49009 +49019 +49031 +49033 +49037 +49043 +49057 +49069 +49081 +49103 +49109 +49117 +49121 +49123 +49139 +49157 +49169 +49171 +49177 +49193 +49199 +49201 +49207 +49211 +49223 +49253 +49261 +49277 +49279 +49297 +49307 +49331 +49333 +49339 +49363 +49367 +49369 +49391 +49393 +49409 +49411 +49417 +49429 +49433 +49451 +49459 +49463 +49477 +49481 +49499 +49523 +49529 +49531 +49537 +49547 +49549 +49559 +49597 +49603 +49613 +49627 +49633 +49639 +49663 +49667 +49669 +49681 +49697 +49711 +49727 +49739 +49741 +49747 +49757 +49783 +49787 +49789 +49801 +49807 +49811 +49823 +49831 +49843 +49853 +49871 +49877 +49891 +49919 +49921 +49927 +49937 +49939 +49943 +49957 +49991 +49993 +49999 +50021 +50023 +50033 +50047 +50051 +50053 +50069 +50077 +50087 +50093 +50101 +50111 +50119 +50123 +50129 +50131 +50147 +50153 +50159 +50177 +50207 +50221 +50227 +50231 +50261 +50263 +50273 +50287 +50291 +50311 +50321 +50329 +50333 +50341 +50359 +50363 +50377 +50383 +50387 +50411 +50417 +50423 +50441 +50459 +50461 +50497 +50503 +50513 +50527 +50539 +50543 +50549 +50551 +50581 +50587 +50591 +50593 +50599 +50627 +50647 +50651 +50671 +50683 +50707 +50723 +50741 +50753 +50767 +50773 +50777 +50789 +50821 +50833 +50839 +50849 +50857 +50867 +50873 +50891 +50893 +50909 +50923 +50929 +50951 +50957 +50969 +50971 +50989 +50993 +51001 +51031 +51043 +51047 +51059 +51061 +51071 +51109 +51131 +51133 +51137 +51151 +51157 +51169 +51193 +51197 +51199 +51203 +51217 +51229 +51239 +51241 +51257 +51263 +51283 +51287 +51307 +51329 +51341 +51343 +51347 +51349 +51361 +51383 +51407 +51413 +51419 +51421 +51427 +51431 +51437 +51439 +51449 +51461 +51473 +51479 +51481 +51487 +51503 +51511 +51517 +51521 +51539 +51551 +51563 +51577 +51581 +51593 +51599 +51607 +51613 +51631 +51637 +51647 +51659 +51673 +51679 +51683 +51691 +51713 +51719 +51721 +51749 +51767 +51769 +51787 +51797 +51803 +51817 +51827 +51829 +51839 +51853 +51859 +51869 +51871 +51893 +51899 +51907 +51913 +51929 +51941 +51949 +51971 +51973 +51977 +51991 +52009 +52021 +52027 +52051 +52057 +52067 +52069 +52081 +52103 +52121 +52127 +52147 +52153 +52163 +52177 +52181 +52183 +52189 +52201 +52223 +52237 +52249 +52253 +52259 +52267 +52289 +52291 +52301 +52313 +52321 +52361 +52363 +52369 +52379 +52387 +52391 +52433 +52453 +52457 +52489 +52501 +52511 +52517 +52529 +52541 +52543 +52553 +52561 +52567 +52571 +52579 +52583 +52609 +52627 +52631 +52639 +52667 +52673 +52691 +52697 +52709 +52711 +52721 +52727 +52733 +52747 +52757 +52769 +52783 +52807 +52813 +52817 +52837 +52859 +52861 +52879 +52883 +52889 +52901 +52903 +52919 +52937 +52951 +52957 +52963 +52967 +52973 +52981 +52999 +53003 +53017 +53047 +53051 +53069 +53077 +53087 +53089 +53093 +53101 +53113 +53117 +53129 +53147 +53149 +53161 +53171 +53173 +53189 +53197 +53201 +53231 +53233 +53239 +53267 +53269 +53279 +53281 +53299 +53309 +53323 +53327 +53353 +53359 +53377 +53381 +53401 +53407 +53411 +53419 +53437 +53441 +53453 +53479 +53503 +53507 +53527 +53549 +53551 +53569 +53591 +53593 +53597 +53609 +53611 +53617 +53623 +53629 +53633 +53639 +53653 +53657 +53681 +53693 +53699 +53717 +53719 +53731 +53759 +53773 +53777 +53783 +53791 +53813 +53819 +53831 +53849 +53857 +53861 +53881 +53887 +53891 +53897 +53899 +53917 +53923 +53927 +53939 +53951 +53959 +53987 +53993 +54001 +54011 +54013 +54037 +54049 +54059 +54083 +54091 +54101 +54121 +54133 +54139 +54151 +54163 +54167 +54181 +54193 +54217 +54251 +54269 +54277 +54287 +54293 +54311 +54319 +54323 +54331 +54347 +54361 +54367 +54371 +54377 +54401 +54403 +54409 +54413 +54419 +54421 +54437 +54443 +54449 +54469 +54493 +54497 +54499 +54503 +54517 +54521 +54539 +54541 +54547 +54559 +54563 +54577 +54581 +54583 +54601 +54617 +54623 +54629 +54631 +54647 +54667 +54673 +54679 +54709 +54713 +54721 +54727 +54751 +54767 +54773 +54779 +54787 +54799 +54829 +54833 +54851 +54869 +54877 +54881 +54907 +54917 +54919 +54941 +54949 +54959 +54973 +54979 +54983 +55001 +55009 +55021 +55049 +55051 +55057 +55061 +55073 +55079 +55103 +55109 +55117 +55127 +55147 +55163 +55171 +55201 +55207 +55213 +55217 +55219 +55229 +55243 +55249 +55259 +55291 +55313 +55331 +55333 +55337 +55339 +55343 +55351 +55373 +55381 +55399 +55411 +55439 +55441 +55457 +55469 +55487 +55501 +55511 +55529 +55541 +55547 +55579 +55589 +55603 +55609 +55619 +55621 +55631 +55633 +55639 +55661 +55663 +55667 +55673 +55681 +55691 +55697 +55711 +55717 +55721 +55733 +55763 +55787 +55793 +55799 +55807 +55813 +55817 +55819 +55823 +55829 +55837 +55843 +55849 +55871 +55889 +55897 +55901 +55903 +55921 +55927 +55931 +55933 +55949 +55967 +55987 +55997 +56003 +56009 +56039 +56041 +56053 +56081 +56087 +56093 +56099 +56101 +56113 +56123 +56131 +56149 +56167 +56171 +56179 +56197 +56207 +56209 +56237 +56239 +56249 +56263 +56267 +56269 +56299 +56311 +56333 +56359 +56369 +56377 +56383 +56393 +56401 +56417 +56431 +56437 +56443 +56453 +56467 +56473 +56477 +56479 +56489 +56501 +56503 +56509 +56519 +56527 +56531 +56533 +56543 +56569 +56591 +56597 +56599 +56611 +56629 +56633 +56659 +56663 +56671 +56681 +56687 +56701 +56711 +56713 +56731 +56737 +56747 +56767 +56773 +56779 +56783 +56807 +56809 +56813 +56821 +56827 +56843 +56857 +56873 +56891 +56893 +56897 +56909 +56911 +56921 +56923 +56929 +56941 +56951 +56957 +56963 +56983 +56989 +56993 +56999 +57037 +57041 +57047 +57059 +57073 +57077 +57089 +57097 +57107 +57119 +57131 +57139 +57143 +57149 +57163 +57173 +57179 +57191 +57193 +57203 +57221 +57223 +57241 +57251 +57259 +57269 +57271 +57283 +57287 +57301 +57329 +57331 +57347 +57349 +57367 +57373 +57383 +57389 +57397 +57413 +57427 +57457 +57467 +57487 +57493 +57503 +57527 +57529 +57557 +57559 +57571 +57587 +57593 +57601 +57637 +57641 +57649 +57653 +57667 +57679 +57689 +57697 +57709 +57713 +57719 +57727 +57731 +57737 +57751 +57773 +57781 +57787 +57791 +57793 +57803 +57809 +57829 +57839 +57847 +57853 +57859 +57881 +57899 +57901 +57917 +57923 +57943 +57947 +57973 +57977 +57991 +58013 +58027 +58031 +58043 +58049 +58057 +58061 +58067 +58073 +58099 +58109 +58111 +58129 +58147 +58151 +58153 +58169 +58171 +58189 +58193 +58199 +58207 +58211 +58217 +58229 +58231 +58237 +58243 +58271 +58309 +58313 +58321 +58337 +58363 +58367 +58369 +58379 +58391 +58393 +58403 +58411 +58417 +58427 +58439 +58441 +58451 +58453 +58477 +58481 +58511 +58537 +58543 +58549 +58567 +58573 +58579 +58601 +58603 +58613 +58631 +58657 +58661 +58679 +58687 +58693 +58699 +58711 +58727 +58733 +58741 +58757 +58763 +58771 +58787 +58789 +58831 +58889 +58897 +58901 +58907 +58909 +58913 +58921 +58937 +58943 +58963 +58967 +58979 +58991 +58997 +59009 +59011 +59021 +59023 +59029 +59051 +59053 +59063 +59069 +59077 +59083 +59093 +59107 +59113 +59119 +59123 +59141 +59149 +59159 +59167 +59183 +59197 +59207 +59209 +59219 +59221 +59233 +59239 +59243 +59263 +59273 +59281 +59333 +59341 +59351 +59357 +59359 +59369 +59377 +59387 +59393 +59399 +59407 +59417 +59419 +59441 +59443 +59447 +59453 +59467 +59471 +59473 +59497 +59509 +59513 +59539 +59557 +59561 +59567 +59581 +59611 +59617 +59621 +59627 +59629 +59651 +59659 +59663 +59669 +59671 +59693 +59699 +59707 +59723 +59729 +59743 +59747 +59753 +59771 +59779 +59791 +59797 +59809 +59833 +59863 +59879 +59887 +59921 +59929 +59951 +59957 +59971 +59981 +59999 +60013 +60017 +60029 +60037 +60041 +60077 +60083 +60089 +60091 +60101 +60103 +60107 +60127 +60133 +60139 +60149 +60161 +60167 +60169 +60209 +60217 +60223 +60251 +60257 +60259 +60271 +60289 +60293 +60317 +60331 +60337 +60343 +60353 +60373 +60383 +60397 +60413 +60427 +60443 +60449 +60457 +60493 +60497 +60509 +60521 +60527 +60539 +60589 +60601 +60607 +60611 +60617 +60623 +60631 +60637 +60647 +60649 +60659 +60661 +60679 +60689 +60703 +60719 +60727 +60733 +60737 +60757 +60761 +60763 +60773 +60779 +60793 +60811 +60821 +60859 +60869 +60887 +60889 +60899 +60901 +60913 +60917 +60919 +60923 +60937 +60943 +60953 +60961 +61001 +61007 +61027 +61031 +61043 +61051 +61057 +61091 +61099 +61121 +61129 +61141 +61151 +61153 +61169 +61211 +61223 +61231 +61253 +61261 +61283 +61291 +61297 +61331 +61333 +61339 +61343 +61357 +61363 +61379 +61381 +61403 +61409 +61417 +61441 +61463 +61469 +61471 +61483 +61487 +61493 +61507 +61511 +61519 +61543 +61547 +61553 +61559 +61561 +61583 +61603 +61609 +61613 +61627 +61631 +61637 +61643 +61651 +61657 +61667 +61673 +61681 +61687 +61703 +61717 +61723 +61729 +61751 +61757 +61781 +61813 +61819 +61837 +61843 +61861 +61871 +61879 +61909 +61927 +61933 +61949 +61961 +61967 +61979 +61981 +61987 +61991 +62003 +62011 +62017 +62039 +62047 +62053 +62057 +62071 +62081 +62099 +62119 +62129 +62131 +62137 +62141 +62143 +62171 +62189 +62191 +62201 +62207 +62213 +62219 +62233 +62273 +62297 +62299 +62303 +62311 +62323 +62327 +62347 +62351 +62383 +62401 +62417 +62423 +62459 +62467 +62473 +62477 +62483 +62497 +62501 +62507 +62533 +62539 +62549 +62563 +62581 +62591 +62597 +62603 +62617 +62627 +62633 +62639 +62653 +62659 +62683 +62687 +62701 +62723 +62731 +62743 +62753 +62761 +62773 +62791 +62801 +62819 +62827 +62851 +62861 +62869 +62873 +62897 +62903 +62921 +62927 +62929 +62939 +62969 +62971 +62981 +62983 +62987 +62989 +63029 +63031 +63059 +63067 +63073 +63079 +63097 +63103 +63113 +63127 +63131 +63149 +63179 +63197 +63199 +63211 +63241 +63247 +63277 +63281 +63299 +63311 +63313 +63317 +63331 +63337 +63347 +63353 +63361 +63367 +63377 +63389 +63391 +63397 +63409 +63419 +63421 +63439 +63443 +63463 +63467 +63473 +63487 +63493 +63499 +63521 +63527 +63533 +63541 +63559 +63577 +63587 +63589 +63599 +63601 +63607 +63611 +63617 +63629 +63647 +63649 +63659 +63667 +63671 +63689 +63691 +63697 +63703 +63709 +63719 +63727 +63737 +63743 +63761 +63773 +63781 +63793 +63799 +63803 +63809 +63823 +63839 +63841 +63853 +63857 +63863 +63901 +63907 +63913 +63929 +63949 +63977 +63997 +64007 +64013 +64019 +64033 +64037 +64063 +64067 +64081 +64091 +64109 +64123 +64151 +64153 +64157 +64171 +64187 +64189 +64217 +64223 +64231 +64237 +64271 +64279 +64283 +64301 +64303 +64319 +64327 +64333 +64373 +64381 +64399 +64403 +64433 +64439 +64451 +64453 +64483 +64489 +64499 +64513 +64553 +64567 +64577 +64579 +64591 +64601 +64609 +64613 +64621 +64627 +64633 +64661 +64663 +64667 +64679 +64693 +64709 +64717 +64747 +64763 +64781 +64783 +64793 +64811 +64817 +64849 +64853 +64871 +64877 +64879 +64891 +64901 +64919 +64921 +64927 +64937 +64951 +64969 +64997 +65003 +65011 +65027 +65029 +65033 +65053 +65063 +65071 +65089 +65099 +65101 +65111 +65119 +65123 +65129 +65141 +65147 +65167 +65171 +65173 +65179 +65183 +65203 +65213 +65239 +65257 +65267 +65269 +65287 +65293 +65309 +65323 +65327 +65353 +65357 +65371 +65381 +65393 +65407 +65413 +65419 +65423 +65437 +65447 +65449 +65479 +65497 +65519 +65521 diff --git a/security/nss/lib/freebl/mpi/doc/prng.pod b/security/nss/lib/freebl/mpi/doc/prng.pod new file mode 100644 index 000000000..6da4d4a9c --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/prng.pod @@ -0,0 +1,38 @@ +=head1 NAME + + prng - pseudo-random number generator + +=head1 SYNOPSIS + + prng [count] + +=head1 DESCRIPTION + +B generates 32-bit pseudo-random integers using the +Blum-Blum-Shub (BBS) quadratic residue generator. It is seeded using +the standard C library's rand() function, which itself seeded from the +system clock and the process ID number. Thus, the values generated +are not particularly useful for cryptographic applications, but they +are in general much better than the typical output of the usual +multiplicative congruency generator used by most runtime libraries. + +You may optionally specify how many random values should be generated +by giving a I argument on the command line. If you do not +specify a count, only one random value will be generated. The results +are output to the standard output in decimal notation, one value per +line. + +=head1 RESTRICTIONS + +As stated above, B uses the C library's rand() function to seed +the generator, so it is not terribly suitable for cryptographic +applications. Also note that each time you run the program, a new +seed is generated, so it is better to run it once with a I +parameter than it is to run it multiple times to generate several +values. + +=head1 AUTHOR + + Michael J. Fromberger + Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved + Thayer School of Engineering, Dartmouth College, Hanover, NH USA diff --git a/security/nss/lib/freebl/mpi/doc/redux.txt b/security/nss/lib/freebl/mpi/doc/redux.txt new file mode 100644 index 000000000..0df0f0390 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/redux.txt @@ -0,0 +1,86 @@ +Modular Reduction + +Usually, modular reduction is accomplished by long division, using the +mp_div() or mp_mod() functions. However, when performing modular +exponentiation, you spend a lot of time reducing by the same modulus +again and again. For this purpose, doing a full division for each +multiplication is quite inefficient. + +For this reason, the mp_exptmod() function does not perform modular +reductions in the usual way, but instead takes advantage of an +algorithm due to Barrett, as described by Menezes, Oorschot and +VanStone in their book _Handbook of Applied Cryptography_, published +by the CRC Press (see Chapter 14 for details). This method reduces +most of the computation of reduction to efficient shifting and masking +operations, and avoids the multiple-precision division entirely. + +Here is a brief synopsis of Barrett reduction, as it is implemented in +this library. + +Let b denote the radix of the computation (one more than the maximum +value that can be denoted by an mp_digit). Let m be the modulus, and +let k be the number of significant digits of m. Let x be the value to +be reduced modulo m. By the Division Theorem, there exist unique +integers Q and R such that: + + x = Qm + R, 0 <= R < m + +Barrett reduction takes advantage of the fact that you can easily +approximate Q to within two, given a value M such that: + + 2k + b + M = floor( ----- ) + m + +Computation of M requires a full-precision division step, so if you +are only doing a single reduction by m, you gain no advantage. +However, when multiple reductions by the same m are required, this +division need only be done once, beforehand. Using this, we can use +the following equation to compute Q', an approximation of Q: + + x + floor( ------ ) M + k-1 + b +Q' = floor( ----------------- ) + k+1 + b + +The divisions by b^(k-1) and b^(k+1) and the floor() functions can be +efficiently implemented with shifts and masks, leaving only a single +multiplication to be performed to get this approximation. It can be +shown that Q - 2 <= Q' <= Q, so in the worst case, we can get out with +two additional subtractions to bring the value into line with the +actual value of Q. + +Once we've got Q', we basically multiply that by m and subtract from +x, yielding: + + x - Q'm = Qm + R - Q'm + +Since we know the constraint on Q', this is one of: + + R + m + R + 2m + R + +Since R < m by the Division Theorem, we can simply subtract off m +until we get a value in the correct range, which will happen with no +more than 2 subtractions: + + v = x - Q'm + + while(v >= m) + v = v - m + endwhile + + +In random performance trials, modular exponentiation using this method +of reduction gave around a 40% speedup over using the division for +reduction. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/sqrt.txt b/security/nss/lib/freebl/mpi/doc/sqrt.txt new file mode 100644 index 000000000..4529cbfc4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/sqrt.txt @@ -0,0 +1,50 @@ +Square Root + +A simple iterative algorithm is used to compute the greatest integer +less than or equal to the square root. Essentially, this is Newton's +linear approximation, computed by finding successive values of the +equation: + + x[k]^2 - V +x[k+1] = x[k] - ------------ + 2 x[k] + +...where V is the value for which the square root is being sought. In +essence, what is happening here is that we guess a value for the +square root, then figure out how far off we were by squaring our guess +and subtracting the target. Using this value, we compute a linear +approximation for the error, and adjust the "guess". We keep doing +this until the precision gets low enough that the above equation +yields a quotient of zero. At this point, our last guess is one +greater than the square root we're seeking. + +The initial guess is computed by dividing V by 4, which is a heuristic +I have found to be fairly good on average. This also has the +advantage of being very easy to compute efficiently, even for large +values. + +So, the resulting algorithm works as follows: + + x = V / 4 /* compute initial guess */ + + loop + t = (x * x) - V /* Compute absolute error */ + u = 2 * x /* Adjust by tangent slope */ + t = t / u + + /* Loop is done if error is zero */ + if(t == 0) + break + + /* Adjust guess by error term */ + x = x - t + end + + x = x - 1 + +The result of the computation is the value of x. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/square.txt b/security/nss/lib/freebl/mpi/doc/square.txt new file mode 100644 index 000000000..edbb97882 --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/square.txt @@ -0,0 +1,72 @@ +Squaring Algorithm + +When you are squaring a value, you can take advantage of the fact that +half the multiplications performed by the more general multiplication +algorithm (see 'mul.txt' for a description) are redundant when the +multiplicand equals the multiplier. + +In particular, the modified algorithm is: + +k = 0 +for j <- 0 to (#a - 1) + w = c[2*j] + (a[j] ^ 2); + k = w div R + + for i <- j+1 to (#a - 1) + w = (2 * a[j] * a[i]) + k + c[i+j] + c[i+j] = w mod R + k = w div R + endfor + c[i+j] = k; + k = 0; +endfor + +On the surface, this looks identical to the multiplication algorithm; +however, note the following differences: + + - precomputation of the leading term in the outer loop + + - i runs from j+1 instead of from zero + + - doubling of a[i] * a[j] in the inner product + +Unfortunately, the construction of the inner product is such that we +need more than two digits to represent the inner product, in some +cases. In a C implementation, this means that some gymnastics must be +performed in order to handle overflow, for which C has no direct +abstraction. We do this by observing the following: + +If we have multiplied a[i] and a[j], and the product is more than half +the maximum value expressible in two digits, then doubling this result +will overflow into a third digit. If this occurs, we take note of the +overflow, and double it anyway -- C integer arithmetic ignores +overflow, so the two digits we get back should still be valid, modulo +the overflow. + +Having doubled this value, we now have to add in the remainders and +the digits already computed by earlier steps. If we did not overflow +in the previous step, we might still cause an overflow here. That +will happen whenever the maximum value expressible in two digits, less +the amount we have to add, is greater than the result of the previous +step. Thus, the overflow computation is: + + + u = 0 + w = a[i] * a[j] + + if(w > (R - 1)/ 2) + u = 1; + + w = w * 2 + v = c[i + j] + k + + if(u == 0 && (R - 1 - v) < w) + u = 1 + +If there is an overflow, u will be 1, otherwise u will be 0. The rest +of the parameters are the same as they are in the above description. + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/doc/timing.txt b/security/nss/lib/freebl/mpi/doc/timing.txt new file mode 100644 index 000000000..58f37c9df --- /dev/null +++ b/security/nss/lib/freebl/mpi/doc/timing.txt @@ -0,0 +1,213 @@ +MPI Library Timing Tests + +Hardware/OS +(A) SGI O2 1 x MIPS R10000 250MHz IRIX 6.5.3 +(B) IBM RS/6000 43P-240 1 x PowerPC 603e 223MHz AIX 4.3 +(C) Dell GX1/L+ 1 x Pentium III 550MHz Linux 2.2.12-20 +(D) PowerBook G3 1 x PowerPC 750 266MHz LinuxPPC 2.2.6-15apmac +(E) PowerBook G3 1 x PowerPC 750 266MHz MacOS 8.5.1 +(F) PowerBook G3 1 x PowerPC 750 400MHz MacOS 9.0.2 + +Compiler +(1) MIPSpro C 7.2.1 -O3 optimizations +(2) GCC 2.95.1 -O3 optimizations +(3) IBM AIX xlc -O3 optimizations (version unknown) +(4) EGCS 2.91.66 -O3 optimizations +(5) Metrowerks CodeWarrior 5.0 C, all optimizations +(6) MIPSpro C 7.30 -O3 optimizations +(7) same as (6), with optimized libmalloc.so + +Timings are given in seconds, computed using the C library's clock() +function. The first column gives the hardware and compiler +configuration used for the test. The second column indicates the +number of tests that were aggregated to get the statistics for that +size. These were compiled using 16 bit digits. + +Source data were generated randomly using a fixed seed, so they should +be internally consistent, but may vary on different systems depending +on the C library. Also, since the resolution of the timer accessed by +clock() varies, there may be some variance in the precision of these +measurements. + +Prime Generation (primegen) + +128 bits: +A1 200 min=0.03, avg=0.19, max=0.72, sum=38.46 +A2 200 min=0.02, avg=0.16, max=0.62, sum=32.55 +B3 200 min=0.01, avg=0.07, max=0.22, sum=13.29 +C4 200 min=0.00, avg=0.03, max=0.20, sum=6.14 +D4 200 min=0.00, avg=0.05, max=0.33, sum=9.70 +A6 200 min=0.01, avg=0.09, max=0.36, sum=17.48 +A7 200 min=0.00, avg=0.05, max=0.24, sum=10.07 + +192 bits: +A1 200 min=0.05, avg=0.45, max=3.13, sum=89.96 +A2 200 min=0.04, avg=0.39, max=2.61, sum=77.55 +B3 200 min=0.02, avg=0.18, max=1.25, sum=36.97 +C4 200 min=0.01, avg=0.09, max=0.33, sum=18.24 +D4 200 min=0.02, avg=0.15, max=0.54, sum=29.63 +A6 200 min=0.02, avg=0.24, max=1.70, sum=47.84 +A7 200 min=0.01, avg=0.15, max=1.05, sum=30.88 + +256 bits: +A1 200 min=0.08, avg=0.92, max=6.13, sum=184.79 +A2 200 min=0.06, avg=0.76, max=5.03, sum=151.11 +B3 200 min=0.04, avg=0.41, max=2.68, sum=82.35 +C4 200 min=0.02, avg=0.19, max=0.69, sum=37.91 +D4 200 min=0.03, avg=0.31, max=1.15, sum=63.00 +A6 200 min=0.04, avg=0.48, max=3.13, sum=95.46 +A7 200 min=0.03, avg=0.37, max=2.36, sum=73.60 + +320 bits: +A1 200 min=0.11, avg=1.59, max=6.14, sum=318.81 +A2 200 min=0.09, avg=1.27, max=4.93, sum=254.03 +B3 200 min=0.07, avg=0.82, max=3.13, sum=163.80 +C4 200 min=0.04, avg=0.44, max=1.91, sum=87.59 +D4 200 min=0.06, avg=0.73, max=3.22, sum=146.73 +A6 200 min=0.07, avg=0.93, max=3.50, sum=185.01 +A7 200 min=0.05, avg=0.76, max=2.94, sum=151.78 + +384 bits: +A1 200 min=0.16, avg=2.69, max=11.41, sum=537.89 +A2 200 min=0.13, avg=2.15, max=9.03, sum=429.14 +B3 200 min=0.11, avg=1.54, max=6.49, sum=307.78 +C4 200 min=0.06, avg=0.81, max=4.84, sum=161.13 +D4 200 min=0.10, avg=1.38, max=8.31, sum=276.81 +A6 200 min=0.11, avg=1.73, max=7.36, sum=345.55 +A7 200 min=0.09, avg=1.46, max=6.12, sum=292.02 + +448 bits: +A1 200 min=0.23, avg=3.36, max=15.92, sum=672.63 +A2 200 min=0.17, avg=2.61, max=12.25, sum=522.86 +B3 200 min=0.16, avg=2.10, max=9.83, sum=420.86 +C4 200 min=0.09, avg=1.44, max=7.64, sum=288.36 +D4 200 min=0.16, avg=2.50, max=13.29, sum=500.17 +A6 200 min=0.15, avg=2.31, max=10.81, sum=461.58 +A7 200 min=0.14, avg=2.03, max=9.53, sum=405.16 + +512 bits: +A1 200 min=0.30, avg=6.12, max=22.18, sum=1223.35 +A2 200 min=0.25, avg=4.67, max=16.90, sum=933.18 +B3 200 min=0.23, avg=4.13, max=14.94, sum=825.45 +C4 200 min=0.13, avg=2.08, max=9.75, sum=415.22 +D4 200 min=0.24, avg=4.04, max=20.18, sum=808.11 +A6 200 min=0.22, avg=4.47, max=16.19, sum=893.83 +A7 200 min=0.20, avg=4.03, max=14.65, sum=806.02 + +Modular Exponentation (metime) + +The following results are aggregated from 200 pseudo-randomly +generated tests, based on a fixed seed. + + base, exponent, and modulus size (bits) +P/C 128 192 256 320 384 448 512 640 768 896 1024 +------- ----------------------------------------------------------------- +A1 0.015 0.027 0.047 0.069 0.098 0.133 0.176 0.294 0.458 0.680 1.040 +A2 0.013 0.024 0.037 0.053 0.077 0.102 0.133 0.214 0.326 0.476 0.668 +B3 0.005 0.011 0.021 0.036 0.056 0.084 0.121 0.222 0.370 0.573 0.840 +C4 0.002 0.006 0.011 0.020 0.032 0.048 0.069 0.129 0.223 0.344 0.507 +D4 0.004 0.010 0.019 0.034 0.056 0.085 0.123 0.232 0.390 0.609 0.899 +E5 0.007 0.015 0.031 0.055 0.088 0.133 0.183 0.342 0.574 0.893 1.317 +A6 0.008 0.016 0.038 0.042 0.064 0.093 0.133 0.239 0.393 0.604 0.880 +A7 0.005 0.011 0.020 0.036 0.056 0.083 0.121 0.223 0.374 0.583 0.855 + +Multiplication and Squaring tests, (mulsqr) + +The following results are aggregated from 500000 pseudo-randomly +generated tests, based on a per-run wall-clock seed. Times are given +in seconds, except where indicated in microseconds (us). + +(A1) + +bits multiply square ad percent time/mult time/square +64 9.33 9.15 > 1.9 18.7us 18.3us +128 10.88 10.44 > 4.0 21.8us 20.9us +192 13.30 11.89 > 10.6 26.7us 23.8us +256 14.88 12.64 > 15.1 29.8us 25.3us +320 18.64 15.01 > 19.5 37.3us 30.0us +384 23.11 17.70 > 23.4 46.2us 35.4us +448 28.28 20.88 > 26.2 56.6us 41.8us +512 34.09 24.51 > 28.1 68.2us 49.0us +640 47.86 33.25 > 30.5 95.7us 66.5us +768 64.91 43.54 > 32.9 129.8us 87.1us +896 84.49 55.48 > 34.3 169.0us 111.0us +1024 107.25 69.21 > 35.5 214.5us 138.4us +1536 227.97 141.91 > 37.8 456.0us 283.8us +2048 394.05 242.15 > 38.5 788.1us 484.3us + +(A2) + +bits multiply square ad percent time/mult time/square +64 7.87 7.95 < 1.0 15.7us 15.9us +128 9.40 9.19 > 2.2 18.8us 18.4us +192 11.15 10.59 > 5.0 22.3us 21.2us +256 12.02 11.16 > 7.2 24.0us 22.3us +320 14.62 13.43 > 8.1 29.2us 26.9us +384 17.72 15.80 > 10.8 35.4us 31.6us +448 21.24 18.51 > 12.9 42.5us 37.0us +512 25.36 21.78 > 14.1 50.7us 43.6us +640 34.57 29.00 > 16.1 69.1us 58.0us +768 46.10 37.60 > 18.4 92.2us 75.2us +896 58.94 47.72 > 19.0 117.9us 95.4us +1024 73.76 59.12 > 19.8 147.5us 118.2us +1536 152.00 118.80 > 21.8 304.0us 237.6us +2048 259.41 199.57 > 23.1 518.8us 399.1us + +(B3) + +bits multiply square ad percent time/mult time/square +64 2.60 2.47 > 5.0 5.20us 4.94us +128 4.43 4.06 > 8.4 8.86us 8.12us +192 7.03 6.10 > 13.2 14.1us 12.2us +256 10.44 8.59 > 17.7 20.9us 17.2us +320 14.44 11.64 > 19.4 28.9us 23.3us +384 19.12 15.08 > 21.1 38.2us 30.2us +448 24.55 19.09 > 22.2 49.1us 38.2us +512 31.03 23.53 > 24.2 62.1us 47.1us +640 45.05 33.80 > 25.0 90.1us 67.6us +768 63.02 46.05 > 26.9 126.0us 92.1us +896 83.74 60.29 > 28.0 167.5us 120.6us +1024 106.73 76.65 > 28.2 213.5us 153.3us +1536 228.94 160.98 > 29.7 457.9us 322.0us +2048 398.08 275.93 > 30.7 796.2us 551.9us + +(C4) + +bits multiply square ad percent time/mult time/square +64 1.34 1.28 > 4.5 2.68us 2.56us +128 2.76 2.59 > 6.2 5.52us 5.18us +192 4.52 4.16 > 8.0 9.04us 8.32us +256 6.64 5.99 > 9.8 13.3us 12.0us +320 9.20 8.13 > 11.6 18.4us 16.3us +384 12.01 10.58 > 11.9 24.0us 21.2us +448 15.24 13.33 > 12.5 30.5us 26.7us +512 19.02 16.46 > 13.5 38.0us 32.9us +640 27.56 23.54 > 14.6 55.1us 47.1us +768 37.89 31.78 > 16.1 75.8us 63.6us +896 49.24 41.42 > 15.9 98.5us 82.8us +1024 62.59 52.18 > 16.6 125.2us 104.3us +1536 131.66 107.72 > 18.2 263.3us 215.4us +2048 226.45 182.95 > 19.2 453.0us 365.9us + +(A7) + +bits multiply square ad percent time/mult time/square +64 1.74 1.71 > 1.7 3.48us 3.42us +128 3.48 2.96 > 14.9 6.96us 5.92us +192 5.74 4.60 > 19.9 11.5us 9.20us +256 8.75 6.61 > 24.5 17.5us 13.2us +320 12.5 8.99 > 28.1 25.0us 18.0us +384 16.9 11.9 > 29.6 33.8us 23.8us +448 22.2 15.2 > 31.7 44.4us 30.4us +512 28.3 19.0 > 32.7 56.6us 38.0us +640 42.4 28.0 > 34.0 84.8us 56.0us +768 59.4 38.5 > 35.2 118.8us 77.0us +896 79.5 51.2 > 35.6 159.0us 102.4us +1024 102.6 65.5 > 36.2 205.2us 131.0us +1536 224.3 140.6 > 37.3 448.6us 281.2us +2048 393.4 244.3 > 37.9 786.8us 488.6us + +------------------------------------------------------------------ + This Source Code Form is subject to the terms of the Mozilla Public + # License, v. 2.0. If a copy of the MPL was not distributed with this + # file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/hpma512.s b/security/nss/lib/freebl/mpi/hpma512.s new file mode 100644 index 000000000..ae9da630d --- /dev/null +++ b/security/nss/lib/freebl/mpi/hpma512.s @@ -0,0 +1,615 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +/* + * + * This PA-RISC 2.0 function computes the product of two unsigned integers, + * and adds the result to a previously computed integer. The multiplicand + * is a 512-bit (64-byte, eight doubleword) unsigned integer, stored in + * memory in little-double-wordian order. The multiplier is an unsigned + * 64-bit integer. The previously computed integer to which the product is + * added is located in the result ("res") area, and is assumed to be a + * 576-bit (72-byte, nine doubleword) unsigned integer, stored in memory + * in little-double-wordian order. This value normally will be the result + * of a previously computed nine doubleword result. It is not necessary + * to pad the multiplicand with an additional 64-bit zero doubleword. + * + * Multiplicand, multiplier, and addend ideally should be aligned at + * 16-byte boundaries for best performance. The code will function + * correctly for alignment at eight-byte boundaries which are not 16-byte + * boundaries, but the execution may be slightly slower due to even/odd + * bank conflicts on PA-RISC 8000 processors. + * + * This function is designed to accept the same calling sequence as Bill + * Ackerman's "maxpy_little" function. The carry from the ninth doubleword + * of the result is written to the tenth word of the result, as is done by + * Bill Ackerman's function. The final carry also is returned as an + * integer, which may be ignored. The function prototype may be either + * of the following: + * + * void multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * or + * int multacc512( int l, chunk* m, const chunk* a, chunk* res ); + * + * where: "l" originally denoted vector lengths. This parameter is + * ignored. This function always assumes a multiplicand length of + * 512 bits (eight doublewords), and addend and result lengths of + * 576 bits (nine doublewords). + * + * "m" is a pointer to the doubleword multiplier, ideally aligned + * on a 16-byte boundary. + * + * "a" is a pointer to the eight-doubleword multiplicand, stored + * in little-double-wordian order, and ideally aligned on a 16-byte + * boundary. + * + * "res" is a pointer to the nine doubleword addend, and to the + * nine-doubleword product computed by this function. The result + * also is stored in little-double-wordian order, and ideally is + * aligned on a 16-byte boundary. It is expected that the alignment + * of the "res" area may alternate between even/odd doubleword + * boundaries for successive calls for 512-bit x 512-bit + * multiplications. + * + * The code for this function has been scheduled to use the parallelism + * of the PA-RISC 8000 series microprocessors as well as the author was + * able. Comments and/or suggestions for improvement are welcomed. + * + * The code is "64-bit safe". This means it may be called in either + * the 32ILP context or the 64LP context. All 64-bits of registers are + * saved and restored. + * + * This code is self-contained. It requires no other header files in order + * to compile and to be linkable on a PA-RISC 2.0 machine. Symbolic + * definitions for registers and stack offsets are included within this + * one source file. + * + * This is a leaf routine. As such, minimal use is made of the stack area. + * Of the 192 bytes allocated, 64 bytes are used for saving/restoring eight + * general registers, and 128 bytes are used to move intermediate products + * from the floating-point registers to the general registers. Stack + * protocols assure proper alignment of these areas. + * + */ + + +/* ====================================================================*/ +/* symbolic definitions for PA-RISC registers */ +/* in the MIPS style, avoids lots of case shifts */ +/* assigments (except t4) preserve register number parity */ +/* ====================================================================*/ + +#define zero %r0 /* permanent zero */ +#define t5 %r1 /* temp register, altered by addil */ + +#define rp %r2 /* return pointer */ + +#define s1 %r3 /* callee saves register*/ +#define s0 %r4 /* callee saves register*/ +#define s3 %r5 /* callee saves register*/ +#define s2 %r6 /* callee saves register*/ +#define s5 %r7 /* callee saves register*/ +#define s4 %r8 /* callee saves register*/ +#define s7 %r9 /* callee saves register*/ +#define s6 %r10 /* callee saves register*/ + +#define t1 %r19 /* caller saves register*/ +#define t0 %r20 /* caller saves register*/ +#define t3 %r21 /* caller saves register*/ +#define t2 %r22 /* caller saves register*/ + +#define a3 %r23 /* fourth argument register, high word */ +#define a2 %r24 /* third argument register, low word*/ +#define a1 %r25 /* second argument register, high word*/ +#define a0 %r26 /* first argument register, low word*/ + +#define v0 %r28 /* high order return value*/ +#define v1 %r29 /* low order return value*/ + +#define sp %r30 /* stack pointer*/ +#define t4 %r31 /* temporary register */ + +#define fa0 %fr4 /* first argument register*/ +#define fa1 %fr5 /* second argument register*/ +#define fa2 %fr6 /* third argument register*/ +#define fa3 %fr7 /* fourth argument register*/ + +#define fa0r %fr4R /* first argument register*/ +#define fa1r %fr5R /* second argument register*/ +#define fa2r %fr6R /* third argument register*/ +#define fa3r %fr7R /* fourth argument register*/ + +#define ft0 %fr8 /* caller saves register*/ +#define ft1 %fr9 /* caller saves register*/ +#define ft2 %fr10 /* caller saves register*/ +#define ft3 %fr11 /* caller saves register*/ + +#define ft0r %fr8R /* caller saves register*/ +#define ft1r %fr9R /* caller saves register*/ +#define ft2r %fr10R /* caller saves register*/ +#define ft3r %fr11R /* caller saves register*/ + +#define ft4 %fr22 /* caller saves register*/ +#define ft5 %fr23 /* caller saves register*/ +#define ft6 %fr24 /* caller saves register*/ +#define ft7 %fr25 /* caller saves register*/ +#define ft8 %fr26 /* caller saves register*/ +#define ft9 %fr27 /* caller saves register*/ +#define ft10 %fr28 /* caller saves register*/ +#define ft11 %fr29 /* caller saves register*/ +#define ft12 %fr30 /* caller saves register*/ +#define ft13 %fr31 /* caller saves register*/ + +#define ft4r %fr22R /* caller saves register*/ +#define ft5r %fr23R /* caller saves register*/ +#define ft6r %fr24R /* caller saves register*/ +#define ft7r %fr25R /* caller saves register*/ +#define ft8r %fr26R /* caller saves register*/ +#define ft9r %fr27R /* caller saves register*/ +#define ft10r %fr28R /* caller saves register*/ +#define ft11r %fr29R /* caller saves register*/ +#define ft12r %fr30R /* caller saves register*/ +#define ft13r %fr31R /* caller saves register*/ + + + +/* ================================================================== */ +/* functional definitions for PA-RISC registers */ +/* ================================================================== */ + +/* general registers */ + +#define T1 a0 /* temp, (length parameter ignored) */ + +#define pM a1 /* -> 64-bit multiplier */ +#define T2 a1 /* temp, (after fetching multiplier) */ + +#define pA a2 /* -> multiplicand vector (8 64-bit words) */ +#define T3 a2 /* temp, (after fetching multiplicand) */ + +#define pR a3 /* -> addend vector (8 64-bit doublewords, + result vector (9 64-bit words) */ + +#define S0 s0 /* callee saves summand registers */ +#define S1 s1 +#define S2 s2 +#define S3 s3 +#define S4 s4 +#define S5 s5 +#define S6 s6 +#define S7 s7 + +#define S8 v0 /* caller saves summand registers */ +#define S9 v1 +#define S10 t0 +#define S11 t1 +#define S12 t2 +#define S13 t3 +#define S14 t4 +#define S15 t5 + + + +/* floating-point registers */ + +#define M fa0 /* multiplier double word */ +#define MR fa0r /* low order half of multiplier double word */ +#define ML fa0 /* high order half of multiplier double word */ + +#define A0 fa2 /* multiplicand double word 0 */ +#define A0R fa2r /* low order half of multiplicand double word */ +#define A0L fa2 /* high order half of multiplicand double word */ + +#define A1 fa3 /* multiplicand double word 1 */ +#define A1R fa3r /* low order half of multiplicand double word */ +#define A1L fa3 /* high order half of multiplicand double word */ + +#define A2 ft0 /* multiplicand double word 2 */ +#define A2R ft0r /* low order half of multiplicand double word */ +#define A2L ft0 /* high order half of multiplicand double word */ + +#define A3 ft1 /* multiplicand double word 3 */ +#define A3R ft1r /* low order half of multiplicand double word */ +#define A3L ft1 /* high order half of multiplicand double word */ + +#define A4 ft2 /* multiplicand double word 4 */ +#define A4R ft2r /* low order half of multiplicand double word */ +#define A4L ft2 /* high order half of multiplicand double word */ + +#define A5 ft3 /* multiplicand double word 5 */ +#define A5R ft3r /* low order half of multiplicand double word */ +#define A5L ft3 /* high order half of multiplicand double word */ + +#define A6 ft4 /* multiplicand double word 6 */ +#define A6R ft4r /* low order half of multiplicand double word */ +#define A6L ft4 /* high order half of multiplicand double word */ + +#define A7 ft5 /* multiplicand double word 7 */ +#define A7R ft5r /* low order half of multiplicand double word */ +#define A7L ft5 /* high order half of multiplicand double word */ + +#define P0 ft6 /* product word 0 */ +#define P1 ft7 /* product word 0 */ +#define P2 ft8 /* product word 0 */ +#define P3 ft9 /* product word 0 */ +#define P4 ft10 /* product word 0 */ +#define P5 ft11 /* product word 0 */ +#define P6 ft12 /* product word 0 */ +#define P7 ft13 /* product word 0 */ + + + + +/* ====================================================================== */ +/* symbolic definitions for HP-UX stack offsets */ +/* symbolic definitions for memory NOPs */ +/* ====================================================================== */ + +#define ST_SZ 192 /* stack area total size */ + +#define SV0 -192(sp) /* general register save area */ +#define SV1 -184(sp) +#define SV2 -176(sp) +#define SV3 -168(sp) +#define SV4 -160(sp) +#define SV5 -152(sp) +#define SV6 -144(sp) +#define SV7 -136(sp) + +#define XF0 -128(sp) /* data transfer area */ +#define XF1 -120(sp) /* for floating-pt to integer regs */ +#define XF2 -112(sp) +#define XF3 -104(sp) +#define XF4 -96(sp) +#define XF5 -88(sp) +#define XF6 -80(sp) +#define XF7 -72(sp) +#define XF8 -64(sp) +#define XF9 -56(sp) +#define XF10 -48(sp) +#define XF11 -40(sp) +#define XF12 -32(sp) +#define XF13 -24(sp) +#define XF14 -16(sp) +#define XF15 -8(sp) + +#define mnop proberi (sp),3,zero /* memory NOP */ + + + + +/* ====================================================================== */ +/* assembler formalities */ +/* ====================================================================== */ + +#ifdef __LP64__ + .level 2.0W +#else + .level 2.0 +#endif + .space $TEXT$ + .subspa $CODE$ + .align 16 + +/* ====================================================================== */ +/* here to compute 64-bit x 512-bit product + 512-bit addend */ +/* ====================================================================== */ + +multacc512 + .PROC + .CALLINFO + .ENTRY + fldd 0(pM),M ; multiplier double word + ldo ST_SZ(sp),sp ; push stack + + fldd 0(pA),A0 ; multiplicand double word 0 + std S1,SV1 ; save s1 + + fldd 16(pA),A2 ; multiplicand double word 2 + std S3,SV3 ; save s3 + + fldd 32(pA),A4 ; multiplicand double word 4 + std S5,SV5 ; save s5 + + fldd 48(pA),A6 ; multiplicand double word 6 + std S7,SV7 ; save s7 + + + std S0,SV0 ; save s0 + fldd 8(pA),A1 ; multiplicand double word 1 + xmpyu MR,A0L,P0 ; A0 cross 32-bit word products + xmpyu ML,A0R,P2 + + std S2,SV2 ; save s2 + fldd 24(pA),A3 ; multiplicand double word 3 + xmpyu MR,A2L,P4 ; A2 cross 32-bit word products + xmpyu ML,A2R,P6 + + std S4,SV4 ; save s4 + fldd 40(pA),A5 ; multiplicand double word 5 + + std S6,SV6 ; save s6 + fldd 56(pA),A7 ; multiplicand double word 7 + + + fstd P0,XF0 ; MR * A0L + xmpyu MR,A0R,P0 ; A0 right 32-bit word product + xmpyu MR,A1L,P1 ; A1 cross 32-bit word product + + fstd P2,XF2 ; ML * A0R + xmpyu ML,A0L,P2 ; A0 left 32-bit word product + xmpyu ML,A1R,P3 ; A1 cross 32-bit word product + + fstd P4,XF4 ; MR * A2L + xmpyu MR,A2R,P4 ; A2 right 32-bit word product + xmpyu MR,A3L,P5 ; A3 cross 32-bit word product + + fstd P6,XF6 ; ML * A2R + xmpyu ML,A2L,P6 ; A2 parallel 32-bit word product + xmpyu ML,A3R,P7 ; A3 cross 32-bit word product + + + ldd XF0,S0 ; MR * A0L + fstd P1,XF1 ; MR * A1L + + ldd XF2,S2 ; ML * A0R + fstd P3,XF3 ; ML * A1R + + ldd XF4,S4 ; MR * A2L + fstd P5,XF5 ; MR * A3L + xmpyu MR,A1R,P1 ; A1 parallel 32-bit word products + xmpyu ML,A1L,P3 + + ldd XF6,S6 ; ML * A2R + fstd P7,XF7 ; ML * A3R + xmpyu MR,A3R,P5 ; A3 parallel 32-bit word products + xmpyu ML,A3L,P7 + + + fstd P0,XF0 ; MR * A0R + ldd XF1,S1 ; MR * A1L + nop + add S0,S2,T1 ; A0 cross product sum + + fstd P2,XF2 ; ML * A0L + ldd XF3,S3 ; ML * A1R + add,dc zero,zero,S0 ; A0 cross product sum carry + depd,z T1,31,32,S2 ; A0 cross product sum << 32 + + fstd P4,XF4 ; MR * A2R + ldd XF5,S5 ; MR * A3L + shrpd S0,T1,32,S0 ; A0 carry | cross product sum >> 32 + add S4,S6,T3 ; A2 cross product sum + + fstd P6,XF6 ; ML * A2L + ldd XF7,S7 ; ML * A3R + add,dc zero,zero,S4 ; A2 cross product sum carry + depd,z T3,31,32,S6 ; A2 cross product sum << 32 + + + ldd XF0,S8 ; MR * A0R + fstd P1,XF1 ; MR * A1R + xmpyu MR,A4L,P0 ; A4 cross 32-bit word product + xmpyu MR,A5L,P1 ; A5 cross 32-bit word product + + ldd XF2,S10 ; ML * A0L + fstd P3,XF3 ; ML * A1L + xmpyu ML,A4R,P2 ; A4 cross 32-bit word product + xmpyu ML,A5R,P3 ; A5 cross 32-bit word product + + ldd XF4,S12 ; MR * A2R + fstd P5,XF5 ; MR * A3L + xmpyu MR,A6L,P4 ; A6 cross 32-bit word product + xmpyu MR,A7L,P5 ; A7 cross 32-bit word product + + ldd XF6,S14 ; ML * A2L + fstd P7,XF7 ; ML * A3L + xmpyu ML,A6R,P6 ; A6 cross 32-bit word product + xmpyu ML,A7R,P7 ; A7 cross 32-bit word product + + + fstd P0,XF0 ; MR * A4L + ldd XF1,S9 ; MR * A1R + shrpd S4,T3,32,S4 ; A2 carry | cross product sum >> 32 + add S1,S3,T1 ; A1 cross product sum + + fstd P2,XF2 ; ML * A4R + ldd XF3,S11 ; ML * A1L + add,dc zero,zero,S1 ; A1 cross product sum carry + depd,z T1,31,32,S3 ; A1 cross product sum << 32 + + fstd P4,XF4 ; MR * A6L + ldd XF5,S13 ; MR * A3R + shrpd S1,T1,32,S1 ; A1 carry | cross product sum >> 32 + add S5,S7,T3 ; A3 cross product sum + + fstd P6,XF6 ; ML * A6R + ldd XF7,S15 ; ML * A3L + add,dc zero,zero,S5 ; A3 cross product sum carry + depd,z T3,31,32,S7 ; A3 cross product sum << 32 + + + shrpd S5,T3,32,S5 ; A3 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A0 right doubleword, P0 doubleword + + add,dc S0,S10,S10 ; M * A0 left doubleword + add S3,S9,S9 ; M * A1 right doubleword + + add,dc S1,S11,S11 ; M * A1 left doubleword + add S6,S12,S12 ; M * A2 right doubleword + + + ldd 24(pR),S3 ; Addend word 3 + fstd P1,XF1 ; MR * A5L + add,dc S4,S14,S14 ; M * A2 left doubleword + xmpyu MR,A5R,P1 ; A5 right 32-bit word product + + ldd 8(pR),S1 ; Addend word 1 + fstd P3,XF3 ; ML * A5R + add S7,S13,S13 ; M * A3 right doubleword + xmpyu ML,A5L,P3 ; A5 left 32-bit word product + + ldd 0(pR),S7 ; Addend word 0 + fstd P5,XF5 ; MR * A7L + add,dc S5,S15,S15 ; M * A3 left doubleword + xmpyu MR,A7R,P5 ; A7 right 32-bit word product + + ldd 16(pR),S5 ; Addend word 2 + fstd P7,XF7 ; ML * A7R + add S10,S9,S9 ; P1 doubleword + xmpyu ML,A7L,P7 ; A7 left 32-bit word products + + + ldd XF0,S0 ; MR * A4L + fstd P1,XF9 ; MR * A5R + add,dc S11,S12,S12 ; P2 doubleword + xmpyu MR,A4R,P0 ; A4 right 32-bit word product + + ldd XF2,S2 ; ML * A4R + fstd P3,XF11 ; ML * A5L + add,dc S14,S13,S13 ; P3 doubleword + xmpyu ML,A4L,P2 ; A4 left 32-bit word product + + ldd XF6,S6 ; ML * A6R + fstd P5,XF13 ; MR * A7R + add,dc zero,S15,T2 ; P4 partial doubleword + xmpyu MR,A6R,P4 ; A6 right 32-bit word product + + ldd XF4,S4 ; MR * A6L + fstd P7,XF15 ; ML * A7L + add S7,S8,S8 ; R0 + P0, new R0 doubleword + xmpyu ML,A6L,P6 ; A6 left 32-bit word product + + + fstd P0,XF0 ; MR * A4R + ldd XF7,S7 ; ML * A7R + add,dc S1,S9,S9 ; c + R1 + P1, new R1 doubleword + + fstd P2,XF2 ; ML * A4L + ldd XF1,S1 ; MR * A5L + add,dc S5,S12,S12 ; c + R2 + P2, new R2 doubleword + + fstd P4,XF4 ; MR * A6R + ldd XF5,S5 ; MR * A7L + add,dc S3,S13,S13 ; c + R3 + P3, new R3 doubleword + + fstd P6,XF6 ; ML * A6L + ldd XF3,S3 ; ML * A5R + add,dc zero,T2,T2 ; c + partial P4 + add S0,S2,T1 ; A4 cross product sum + + + std S8,0(pR) ; save R0 + add,dc zero,zero,S0 ; A4 cross product sum carry + depd,z T1,31,32,S2 ; A4 cross product sum << 32 + + std S9,8(pR) ; save R1 + shrpd S0,T1,32,S0 ; A4 carry | cross product sum >> 32 + add S4,S6,T3 ; A6 cross product sum + + std S12,16(pR) ; save R2 + add,dc zero,zero,S4 ; A6 cross product sum carry + depd,z T3,31,32,S6 ; A6 cross product sum << 32 + + + std S13,24(pR) ; save R3 + shrpd S4,T3,32,S4 ; A6 carry | cross product sum >> 32 + add S1,S3,T1 ; A5 cross product sum + + ldd XF0,S8 ; MR * A4R + add,dc zero,zero,S1 ; A5 cross product sum carry + depd,z T1,31,32,S3 ; A5 cross product sum << 32 + + ldd XF2,S10 ; ML * A4L + ldd XF9,S9 ; MR * A5R + shrpd S1,T1,32,S1 ; A5 carry | cross product sum >> 32 + add S5,S7,T3 ; A7 cross product sum + + ldd XF4,S12 ; MR * A6R + ldd XF11,S11 ; ML * A5L + add,dc zero,zero,S5 ; A7 cross product sum carry + depd,z T3,31,32,S7 ; A7 cross product sum << 32 + + ldd XF6,S14 ; ML * A6L + ldd XF13,S13 ; MR * A7R + shrpd S5,T3,32,S5 ; A7 carry | cross product sum >> 32 + add S2,S8,S8 ; M * A4 right doubleword + + + ldd XF15,S15 ; ML * A7L + add,dc S0,S10,S10 ; M * A4 left doubleword + add S3,S9,S9 ; M * A5 right doubleword + + add,dc S1,S11,S11 ; M * A5 left doubleword + add S6,S12,S12 ; M * A6 right doubleword + + ldd 32(pR),S0 ; Addend word 4 + ldd 40(pR),S1 ; Addend word 5 + add,dc S4,S14,S14 ; M * A6 left doubleword + add S7,S13,S13 ; M * A7 right doubleword + + ldd 48(pR),S2 ; Addend word 6 + ldd 56(pR),S3 ; Addend word 7 + add,dc S5,S15,S15 ; M * A7 left doubleword + add S8,T2,S8 ; P4 doubleword + + ldd 64(pR),S4 ; Addend word 8 + ldd SV5,s5 ; restore s5 + add,dc S10,S9,S9 ; P5 doubleword + add,dc S11,S12,S12 ; P6 doubleword + + + ldd SV6,s6 ; restore s6 + ldd SV7,s7 ; restore s7 + add,dc S14,S13,S13 ; P7 doubleword + add,dc zero,S15,S15 ; P8 doubleword + + add S0,S8,S8 ; new R4 doubleword + + ldd SV0,s0 ; restore s0 + std S8,32(pR) ; save R4 + add,dc S1,S9,S9 ; new R5 doubleword + + ldd SV1,s1 ; restore s1 + std S9,40(pR) ; save R5 + add,dc S2,S12,S12 ; new R6 doubleword + + ldd SV2,s2 ; restore s2 + std S12,48(pR) ; save R6 + add,dc S3,S13,S13 ; new R7 doubleword + + ldd SV3,s3 ; restore s3 + std S13,56(pR) ; save R7 + add,dc S4,S15,S15 ; new R8 doubleword + + ldd SV4,s4 ; restore s4 + std S15,64(pR) ; save result[8] + add,dc zero,zero,v0 ; return carry from R8 + + CMPIB,*= 0,v0,$L0 ; if no overflow, exit + LDO 8(pR),pR + +$FINAL1 ; Final carry propagation + LDD 64(pR),v0 + LDO 8(pR),pR + ADDI 1,v0,v0 + CMPIB,*= 0,v0,$FINAL1 ; Keep looping if there is a carry. + STD v0,56(pR) +$L0 + bv zero(rp) ; -> caller + ldo -ST_SZ(sp),sp ; pop stack + +/* ====================================================================== */ +/* end of module */ +/* ====================================================================== */ + + + bve (rp) + .EXIT + nop + .PROCEND + .SPACE $TEXT$ + .SUBSPA $CODE$ + .EXPORT multacc512,ENTRY + + .end diff --git a/security/nss/lib/freebl/mpi/hppa20.s b/security/nss/lib/freebl/mpi/hppa20.s new file mode 100644 index 000000000..c72de8a12 --- /dev/null +++ b/security/nss/lib/freebl/mpi/hppa20.s @@ -0,0 +1,904 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef __LP64__ + .LEVEL 2.0W +#else +; .LEVEL 1.1 +; .ALLOW 2.0N + .LEVEL 2.0 +#endif + .SPACE $TEXT$,SORT=8 + .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24 + +; *************************************************************** +; +; maxpy_[little/big] +; +; *************************************************************** + +; There is no default -- you must specify one or the other. +#define LITTLE_WORDIAN 1 + +#ifdef LITTLE_WORDIAN +#define EIGHT 8 +#define SIXTEEN 16 +#define THIRTY_TWO 32 +#define UN_EIGHT -8 +#define UN_SIXTEEN -16 +#define UN_TWENTY_FOUR -24 +#endif + +#ifdef BIG_WORDIAN +#define EIGHT -8 +#define SIXTEEN -16 +#define THIRTY_TWO -32 +#define UN_EIGHT 8 +#define UN_SIXTEEN 16 +#define UN_TWENTY_FOUR 24 +#endif + +; This performs a multiple-precision integer version of "daxpy", +; Using the selected addressing direction. "Little-wordian" means that +; the least significant word of a number is stored at the lowest address. +; "Big-wordian" means that the most significant word is at the lowest +; address. Either way, the incoming address of the vector is that +; of the least significant word. That means that, for little-wordian +; addressing, we move the address upward as we propagate carries +; from the least significant word to the most significant. For +; big-wordian we move the address downward. + +; We use the following registers: +; +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = address of scalar +; r24 = arg3 = multiplicand vector +; r23 = arg4 = result vector +; +; fr9 = scalar loaded once only from r25 + +; The cycle counts shown in the bodies below are simply the result of a +; scheduling by hand. The actual PCX-U hardware does it differently. +; The intention is that the overall speed is the same. + +; The pipeline startup and shutdown code is constructed in the usual way, +; by taking the loop bodies and removing unnecessary instructions. +; We have left the comments describing cycle numbers in the code. +; These are intended for reference when comparing with the main loop, +; and have no particular relationship to actual cycle numbers. + +#ifdef LITTLE_WORDIAN +maxpy_little +#else +maxpy_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$L0 ; If N = 0, exit immediately. + FLDD 0(%r25),%fr9 ; fr9 = scalar + +; First startup + + FLDD 0(%r24),%fr24 ; Cycle 1 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + CMPIB,> 3,%r26,$N_IS_SMALL ; Pick out cases N = 1, 2, or 3 + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + FSTD %fr24,-96(%sp) + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Second startup + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + FSTD %fr26,-88(%sp) ; Cycle 2 + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + LDD -104(%sp),%r31 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -4,%r26,$ENDLOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) +; MFCTL %cr16,%r21 ; for timing +; STD %r21,-112(%sp) + +; Here is the loop. + +$LOOP XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + FSTD %fr30,-56(%sp) + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + ADD %r3,%r1,%r1 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + FSTD %fr29,-72(%sp) + LDD -96(%sp),%r3 + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + XMPYU %fr9L,%fr24R,%fr24 ; Cycle 6 + ADD %r21,%r3,%r3 + LDD -56(%sp),%r20 + STD %r1,UN_SIXTEEN(%r23) + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + FLDD EIGHT(%r24),%fr28 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + STD %r28,UN_EIGHT(%r23) + LDD -72(%sp),%r29 + + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + ADD,DC %r3,%r4,%r4 + FSTD %fr24,-96(%sp) + + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + ADD,DC %r0,%r20,%r20 + FSTD %fr25,-80(%sp) + LDD 0(%r23),%r3 + + LDO SIXTEEN(%r24),%r24 ; Cycle 12 + FSTD %fr31,-64(%sp) + + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + ADD %r22,%r1,%r1 + ADDIB,> -2,%r26,$LOOP ; actually happens in cycle 12 + FSTD %fr27,-48(%sp) + +$ENDLOOP + +; Shutdown code, first stage. + +; MFCTL %cr16,%r21 ; for timing +; STD %r21,UN_SIXTEEN(%r23) +; LDD -112(%sp),%r21 +; STD %r21,UN_EIGHT(%r23) + + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + ADD,DC %r29,%r4,%r4 + CMPIB,= 0,%r26,$ONEMORE + FSTD %fr30,-56(%sp) + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + ADD %r3,%r1,%r1 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + FSTD %fr29,-72(%sp) + STD %r28,UN_EIGHT(%r23) ; moved up from cycle 9 + LDD -96(%sp),%r3 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN4 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + ADD %r21,%r3,%r3 ; Cycle 6 + LDD -56(%sp),%r20 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + SHRPD %r3,%r0,32,%r21 + LDD -88(%sp),%r4 + LDD -48(%sp),%r1 + + ADD,DC %r0,%r0,%r20 ; Cycle 8 + SHRPD %r19,%r3,32,%r3 + LDD -104(%sp),%r31 + + SHRPD %r20,%r19,32,%r20 ; Cycle 9 + ADD %r21,%r1,%r1 + LDD -72(%sp),%r29 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, second stage. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 + + STD %r28,UN_EIGHT(%r23) ; Cycle 9 + + LDD 0(%r23),%r3 ; Cycle 11 + +; Shutdown code, third stage. + + LDO SIXTEEN(%r23),%r23 + ADD %r3,%r22,%r1 +$JOIN1 ADD,DC %r0,%r0,%r21 + CMPIB,*= 0,%r21,$L0 ; if no overflow, exit + STD %r1,UN_SIXTEEN(%r23) + +; Final carry propagation + +$FINAL1 LDO EIGHT(%r23),%r23 + LDD UN_SIXTEEN(%r23),%r21 + ADDI 1,%r21,%r21 + CMPIB,*= 0,%r21,$FINAL1 ; Keep looping if there is a carry. + STD %r21,UN_SIXTEEN(%r23) + B $L0 + NOP + +; Here is the code that handles the difficult cases N=1, N=2, and N=3. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$N_IS_SMALL + CMPIB,= 0,%r26,$N_IS_ONE + FSTD %fr24,-96(%sp) ; Cycle 10 + FLDD EIGHT(%r24),%fr28 ; Cycle 8 + XMPYU %fr9L,%fr28R,%fr31 ; Cycle 10 + XMPYU %fr9R,%fr28L,%fr30 ; Cycle 11 + FSTD %fr25,-80(%sp) + FSTD %fr31,-64(%sp) ; Cycle 12 + XMPYU %fr9R,%fr28R,%fr29 ; Cycle 13 + FSTD %fr27,-48(%sp) + XMPYU %fr9L,%fr28L,%fr28 ; Cycle 1 + CMPIB,= 2,%r26,$N_IS_THREE + FSTD %fr30,-56(%sp) + +; N = 2 + FSTD %fr26,-88(%sp) ; Cycle 2 + FSTD %fr28,-104(%sp) ; Cycle 3 + LDD -96(%sp),%r3 ; Cycle 4 + FSTD %fr29,-72(%sp) + B $JOIN4 + ADD %r0,%r0,%r22 + +$N_IS_THREE + FLDD SIXTEEN(%r24),%fr24 + FSTD %fr26,-88(%sp) ; Cycle 2 + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + B $JOIN3 + ADD %r0,%r0,%r22 + +$N_IS_ONE + FSTD %fr25,-80(%sp) + FSTD %fr27,-48(%sp) + FSTD %fr26,-88(%sp) ; Cycle 2 + B $JOIN5 + ADD %r0,%r0,%r22 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is quite tricky, because of the way the +; carry chains and SHRPD chains have been chopped up. + +$ONEMORE + + FLDD 0(%r24),%fr24 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + XMPYU %fr9R,%fr24R,%fr27 ; Cycle 3 + FSTD %fr28,-104(%sp) + LDD UN_EIGHT(%r23),%r21 + ADD %r3,%r1,%r1 + + XMPYU %fr9R,%fr24L,%fr25 ; Cycle 4 + ADD,DC %r21,%r4,%r28 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + LDD -96(%sp),%r3 + FSTD %fr29,-72(%sp) + + XMPYU %fr9L,%fr24L,%fr26 ; Cycle 5 + ADD,DC %r20,%r31,%r22 + LDD -64(%sp),%r19 + LDD -80(%sp),%r21 + + STD %r1,UN_SIXTEEN(%r23); Cycle 6 +$JOIN3 + XMPYU %fr9L,%fr24R,%fr24 + LDD -56(%sp),%r20 + ADD %r21,%r3,%r3 + + ADD,DC %r20,%r19,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + + LDD -104(%sp),%r31 ; Cycle 8 + ADD,DC %r0,%r0,%r20 + SHRPD %r19,%r3,32,%r3 + + LDD -72(%sp),%r29 ; Cycle 9 + SHRPD %r20,%r19,32,%r20 + ADD %r21,%r1,%r1 + + ADD,DC %r3,%r4,%r4 ; Cycle 10 + FSTD %fr24,-96(%sp) + + ADD,DC %r0,%r20,%r20 ; Cycle 11 + LDD 0(%r23),%r3 + FSTD %fr25,-80(%sp) + + ADD %r22,%r1,%r1 ; Cycle 13 + FSTD %fr27,-48(%sp) + +; Shutdown code, stage 1-1/2. + + ADD,DC %r29,%r4,%r4 ; Cycle 1 + + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + ADD,DC %r0,%r20,%r20 + FSTD %fr26,-88(%sp) + + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + + ADD,DC %r21,%r4,%r28 ; Cycle 4 + STD %r28,UN_EIGHT(%r23) ; moved from cycle 9 + + ADD,DC %r20,%r31,%r22 ; Cycle 5 + STD %r1,UN_SIXTEEN(%r23) +$JOIN5 + LDD -96(%sp),%r3 ; moved from cycle 4 + LDD -80(%sp),%r21 + ADD %r21,%r3,%r3 ; Cycle 6 + ADD,DC %r0,%r0,%r19 ; Cycle 7 + LDD -88(%sp),%r4 + SHRPD %r3,%r0,32,%r21 + LDD -48(%sp),%r1 + SHRPD %r19,%r3,32,%r3 ; Cycle 8 + ADD %r21,%r1,%r1 ; Cycle 9 + ADD,DC %r3,%r4,%r4 ; Cycle 10 + LDD 0(%r23),%r3 ; Cycle 11 + ADD %r22,%r1,%r1 ; Cycle 13 + +; Shutdown code, stage 2-1/2. + + ADD,DC %r0,%r4,%r4 ; Cycle 1 + LDO SIXTEEN(%r23),%r23 ; Cycle 2 + LDD UN_EIGHT(%r23),%r21 ; Cycle 3 + ADD %r3,%r1,%r1 + STD %r1,UN_SIXTEEN(%r23) + ADD,DC %r21,%r4,%r1 + B $JOIN1 + LDO EIGHT(%r23),%r23 + +; exit + +$L0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + + .PROCEND + +; *************************************************************** +; +; add_diag_[little/big] +; +; *************************************************************** + +; The arguments are as follows: +; r2 return PC, of course +; r26 = arg1 = length +; r25 = arg2 = vector to square +; r24 = arg3 = result vector + +#ifdef LITTLE_WORDIAN +add_diag_little +#else +add_diag_big +#endif + .PROC + .CALLINFO FRAME=120,ENTRY_GR=4 + .ENTRY + STW,MA %r3,128(%sp) + STW %r4,-124(%sp) + + ADDIB,< -1,%r26,$Z0 ; If N=0, exit immediately. + NOP + +; Startup code + + FLDD 0(%r25),%fr7 ; Cycle 2 (alternate body) + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + FSTD %fr29,-88(%sp) + FSTD %fr27,-72(%sp) ; Cycle 7 + CMPIB,= 0,%r26,$DIAG_N_IS_ONE ; Cycle 1 (main body) + FSTD %fr30,-96(%sp) + FLDD UN_EIGHT(%r25),%fr7 ; Cycle 2 + LDD -88(%sp),%r22 ; Cycle 3 + LDD -72(%sp),%r31 ; Cycle 4 + XMPYU %fr7R,%fr7R,%fr28 + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -96(%sp),%r20 ; Cycle 6 + FSTD %fr28,-80(%sp) + ADD %r0,%r0,%r0 ; clear the carry bit + ADDIB,<= -2,%r26,$ENDDIAGLOOP ; Cycle 7 + FSTD %fr24,-64(%sp) + +; Here is the loop. It is unrolled twice, modelled after the "alternate body" and then the "main body". + +$DIAGLOOP + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDO SIXTEEN(%r25),%r25 + LDD 0(%r24),%r1 + FSTD %fr31,-104(%sp) + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD,DC %r22,%r3,%r3 + FLDD UN_SIXTEEN(%r25),%fr7 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r1,%r3,%r3 + XMPYU %fr7R,%fr7R,%fr29 ; Cycle 4 + LDD -80(%sp),%r21 + STD %r3,0(%r24) + XMPYU %fr7L,%fr7R,%fr27 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr30 + LDD -64(%sp),%r29 + LDD EIGHT(%r24),%r1 + ADD,DC %r4,%r20,%r20 ; Cycle 6 + LDD -104(%sp),%r19 + FSTD %fr29,-88(%sp) + ADD %r20,%r1,%r1 ; Cycle 7 + FSTD %fr27,-72(%sp) + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r28 + FSTD %fr30,-96(%sp) + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD,DC %r21,%r4,%r4 + FLDD UN_EIGHT(%r25),%fr7 + STD %r1,UN_TWENTY_FOUR(%r24) + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r28,%r4,%r4 + XMPYU %fr7R,%fr7R,%fr28 ; Cycle 4 + LDD -88(%sp),%r22 + STD %r4,UN_SIXTEEN(%r24) + XMPYU %fr7L,%fr7R,%fr24 ; Cycle 5 + XMPYU %fr7L,%fr7L,%fr31 + LDD -72(%sp),%r31 + LDD UN_EIGHT(%r24),%r28 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + LDD -96(%sp),%r20 + FSTD %fr28,-80(%sp) + ADD %r19,%r28,%r28 ; Cycle 7 + FSTD %fr24,-64(%sp) + ADDIB,> -2,%r26,$DIAGLOOP ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +$ENDDIAGLOOP + + ADD,DC %r0,%r22,%r22 + CMPIB,= 0,%r26,$ONEMOREDIAG + SHRPD %r31,%r0,31,%r3 + +; Shutdown code, first stage. + + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + LDD EIGHT(%r24),%r1 ; Cycle 5 + LDO SIXTEEN(%r25),%r25 ; Cycle 6 + LDD -104(%sp),%r19 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, second stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + ADD %r4,%r1,%r4 + STD %r4,UN_SIXTEEN(%r24); Cycle 4 + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + ADD,DC %r3,%r19,%r19 ; Cycle 6 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r0,%r22 ; Cycle 8 + CMPIB,*= 0,%r22,$Z0 ; if no overflow, exit + STD %r28,UN_EIGHT(%r24) + +; Final carry propagation + +$FDIAG2 + LDO EIGHT(%r24),%r24 + LDD UN_EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG2 ; Keep looping if there is a carry. + STD %r26,UN_EIGHT(%r24) + + B $Z0 + NOP + +; Here is the code that handles the difficult case N=1. +; We do the usual trick -- branch out of the startup code at appropriate +; points, and branch into the shutdown code. + +$DIAG_N_IS_ONE + + LDD -88(%sp),%r22 + LDD -72(%sp),%r31 + B $JOINDIAG + LDD -96(%sp),%r20 + +; We came out of the unrolled loop with wrong parity. Do one more +; single cycle. This is the "alternate body". It will, of course, +; give us opposite registers from the other case, so we need +; completely different shutdown code. + +$ONEMOREDIAG + FSTD %fr31,-104(%sp) ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + FLDD 0(%r25),%fr7 ; Cycle 2 + SHRPD %r0,%r31,31,%r4 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + LDD -80(%sp),%r21 + ADD %r3,%r28,%r3 + LDD -64(%sp),%r29 ; Cycle 4 + STD %r3,0(%r24) + XMPYU %fr7R,%fr7R,%fr29 + LDD EIGHT(%r24),%r1 ; Cycle 5 + XMPYU %fr7L,%fr7R,%fr27 + XMPYU %fr7L,%fr7L,%fr30 + LDD -104(%sp),%r19 ; Cycle 6 + FSTD %fr29,-88(%sp) + ADD,DC %r4,%r20,%r20 + FSTD %fr27,-72(%sp) ; Cycle 7 + ADD %r20,%r1,%r1 + ADD,DC %r0,%r21,%r21 ; Cycle 8 + STD %r1,EIGHT(%r24) + +; Shutdown code, first stage. + + SHRPD %r29,%r0,31,%r4 ; Cycle 1 (main body) + LDO THIRTY_TWO(%r24),%r24 + FSTD %fr30,-96(%sp) + LDD UN_SIXTEEN(%r24),%r1 + SHRPD %r0,%r29,31,%r3 ; Cycle 2 + ADD %r4,%r21,%r4 + ADD,DC %r0,%r19,%r19 ; Cycle 3 + LDD -88(%sp),%r22 + ADD %r4,%r1,%r4 + LDD -72(%sp),%r31 ; Cycle 4 + STD %r4,UN_SIXTEEN(%r24) + LDD UN_EIGHT(%r24),%r28 ; Cycle 5 + LDD -96(%sp),%r20 ; Cycle 6 + ADD,DC %r3,%r19,%r19 + ADD %r19,%r28,%r28 ; Cycle 7 + ADD,DC %r0,%r22,%r22 ; Cycle 8 + STD %r28,UN_EIGHT(%r24) + +; Shutdown code, second stage. + +$JOINDIAG + SHRPD %r31,%r0,31,%r3 ; Cycle 1 (alternate body) + LDD 0(%r24),%r28 + SHRPD %r0,%r31,31,%r4 ; Cycle 2 + ADD %r3,%r22,%r3 + ADD,DC %r0,%r20,%r20 ; Cycle 3 + ADD %r3,%r28,%r3 + STD %r3,0(%r24) ; Cycle 4 + LDD EIGHT(%r24),%r1 ; Cycle 5 + ADD,DC %r4,%r20,%r20 + ADD %r20,%r1,%r1 ; Cycle 7 + ADD,DC %r0,%r0,%r21 ; Cycle 8 + CMPIB,*= 0,%r21,$Z0 ; if no overflow, exit + STD %r1,EIGHT(%r24) + +; Final carry propagation + +$FDIAG1 + LDO EIGHT(%r24),%r24 + LDD EIGHT(%r24),%r26 + ADDI 1,%r26,%r26 + CMPIB,*= 0,%r26,$FDIAG1 ; Keep looping if there is a carry. + STD %r26,EIGHT(%r24) + +$Z0 + LDW -124(%sp),%r4 + BVE (%r2) + .EXIT + LDW,MB -128(%sp),%r3 + .PROCEND +; .ALLOW + + .SPACE $TEXT$ + .SUBSPA $CODE$ +#ifdef LITTLE_WORDIAN +#ifdef __GNUC__ +; GNU-as (as of 2.19) does not support LONG_RETURN + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR +#else + .EXPORT maxpy_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_little,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif +#else + .EXPORT maxpy_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR,LONG_RETURN + .EXPORT add_diag_big,ENTRY,PRIV_LEV=3,ARGW0=GR,ARGW1=GR,ARGW2=GR,LONG_RETURN +#endif + .END + + +; How to use "maxpy_PA20_little" and "maxpy_PA20_big" +; +; The routine "maxpy_PA20_little" or "maxpy_PA20_big" +; performs a 64-bit x any-size multiply, and adds the +; result to an area of memory. That is, it performs +; something like +; +; A B C D +; * Z +; __________ +; P Q R S T +; +; and then adds the "PQRST" vector into an area of memory, +; handling all carries. +; +; Digression on nomenclature and endian-ness: +; +; Each of the capital letters in the above represents a 64-bit +; quantity. That is, you could think of the discussion as +; being in terms of radix-16-quintillion arithmetic. The data +; type being manipulated is "unsigned long long int". This +; requires the 64-bit extension of the HP-UX C compiler, +; available at release 10. You need these compiler flags to +; enable these extensions: +; +; -Aa +e +DA2.0 +DS2.0 +; +; (The first specifies ANSI C, the second enables the +; extensions, which are beyond ANSI C, and the third and +; fourth tell the compiler to use whatever features of the +; PA2.0 architecture it wishes, in order to made the code more +; efficient. Since the presence of the assembly code will +; make the program unable to run on anything less than PA2.0, +; you might as well gain the performance enhancements in the C +; code as well.) +; +; Questions of "endian-ness" often come up, usually in the +; context of byte ordering in a word. These routines have a +; similar issue, that could be called "wordian-ness". +; Independent of byte ordering (PA is always big-endian), one +; can make two choices when representing extremely large +; numbers as arrays of 64-bit doublewords in memory. +; +; "Little-wordian" layout means that the least significant +; word of a number is stored at the lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 0 +; | | +; | |_______address 8 +; | +; address 32 +; +; "Big-wordian" means that the most significant word is at the +; lowest address. +; +; MSW LSW +; | | +; V V +; +; A B C D E +; +; ^ ^ ^ +; | | |____ address 32 +; | | +; | |_______address 24 +; | +; address 0 +; +; When you compile the file, you must specify one or the other, with +; a switch "-DLITTLE_WORDIAN" or "-DBIG_WORDIAN". +; +; Incidentally, you assemble this file as part of your +; project with the same C compiler as the rest of the program. +; My "makefile" for a superprecision arithmetic package has +; the following stuff: +; +; # definitions: +; CC = cc -Aa +e -z +DA2.0 +DS2.0 +w1 +; CFLAGS = +O3 +; LDFLAGS = -L /usr/lib -Wl,-aarchive +; +; # general build rule for ".s" files: +; .s.o: +; $(CC) $(CFLAGS) -c $< -DBIG_WORDIAN +; +; # Now any bind step that calls for pa20.o will assemble pa20.s +; +; End of digression, back to arithmetic: +; +; The way we multiply two huge numbers is, of course, to multiply +; the "ABCD" vector by each of the "WXYZ" doublewords, adding +; the result vectors with increasing offsets, the way we learned +; in school, back before we all used calculators: +; +; A B C D +; * W X Y Z +; __________ +; P Q R S T +; E F G H I +; M N O P Q +; + R S T U V +; _______________ +; F I N A L S U M +; +; So we call maxpy_PA20_big (in my case; my package is +; big-wordian) repeatedly, giving the W, X, Y, and Z arguments +; in turn as the "scalar", and giving the "ABCD" vector each +; time. We direct it to add its result into an area of memory +; that we have cleared at the start. We skew the exact +; location into that area with each call. +; +; The prototype for the function is +; +; extern void maxpy_PA20_big( +; int length, /* Number of doublewords in the multiplicand vector. */ +; const long long int *scalaraddr, /* Address to fetch the scalar. */ +; const long long int *multiplicand, /* The multiplicand vector. */ +; long long int *result); /* Where to accumulate the result. */ +; +; (You should place a copy of this prototype in an include file +; or in your C file.) +; +; Now, IN ALL CASES, the given address for the multiplicand or +; the result is that of the LEAST SIGNIFICANT DOUBLEWORD. +; That word is, of course, the word at which the routine +; starts processing. "maxpy_PA20_little" then increases the +; addresses as it computes. "maxpy_PA20_big" decreases them. +; +; In our example above, "length" would be 4 in each case. +; "multiplicand" would be the "ABCD" vector. Specifically, +; the address of the element "D". "scalaraddr" would be the +; address of "W", "X", "Y", or "Z" on the four calls that we +; would make. (The order doesn't matter, of course.) +; "result" would be the appropriate address in the result +; area. When multiplying by "Z", that would be the least +; significant word. When multiplying by "Y", it would be the +; next higher word (8 bytes higher if little-wordian; 8 bytes +; lower if big-wordian), and so on. The size of the result +; area must be the the sum of the sizes of the multiplicand +; and multiplier vectors, and must be initialized to zero +; before we start. +; +; Whenever the routine adds its partial product into the result +; vector, it follows carry chains as far as they need to go. +; +; Here is the super-precision multiply routine that I use for +; my package. The package is big-wordian. I have taken out +; handling of exponents (it's a floating point package): +; +; static void mul_PA20( +; int size, +; const long long int *arg1, +; const long long int *arg2, +; long long int *result) +; { +; int i; +; +; for (i=0 ; i<2*size ; i++) result[i] = 0ULL; +; +; for (i=0 ; i) { + chomp; + s/\#.*$//; + next if /^\s*$/; + + ($suite, $func, $desc) = split(/:/, $_); + + $tmp = { "suite" => $suite, + "func" => $func, + "desc" => $desc }; + + push(@item, $tmp); +} +$count = scalar(@item); +$last = pop(@item); + +#------------------------------------------------------------------------ +# Output the table of names +print "/* Table mapping test suite names to index numbers */\n"; +printf("const int %s = %d;\n", $COUNTVAR, $count); +printf("const char *%s[] = {\n", $NAMEVAR); + +foreach $elt (@item) { + printf(" \"%s\",%s/* %s%s */\n", $elt->{"suite"}, + " " x ($NAMELEN - length($elt->{"suite"})), + $elt->{"desc"}, + " " x ($DESCLEN - length($elt->{"desc"}))); +} +printf(" \"%s\" %s/* %s%s */\n", $last->{"suite"}, + " " x ($NAMELEN - length($last->{"suite"})), + $last->{"desc"}, + " " x ($DESCLEN - length($last->{"desc"}))); +print "};\n\n"; + +#------------------------------------------------------------------------ +# Output the driver function prototypes +print "/* Test function prototypes */\n"; +foreach $elt (@item, $last) { + printf("int %s(void);\n", $elt->{"func"}); +} +print "\n"; + +#------------------------------------------------------------------------ +# Output the table of functions +print "/* Table mapping index numbers to functions */\n"; +printf("int (*%s[])(void) = {\n ", $FUNCVAR); +$brk = 0; + +foreach $elt (@item) { + print($elt->{"func"}, ", ", + " " x ($FUNCLEN - length($elt->{"func"}))); + $brk = ($brk + 1) & 3; + print "\n " unless($brk); +} +print $last->{"func"}, "\n};\n\n"; + +#------------------------------------------------------------------------ +# Output the table of descriptions +print "/* Table mapping index numbers to descriptions */\n"; +printf("const char *%s[] = {\n", $DESCVAR); + +foreach $elt (@item) { + printf(" \"%s\",\n", $elt->{"desc"}); +} +printf(" \"%s\"\n};\n\n", $last->{"desc"}); + +exit 0; + diff --git a/security/nss/lib/freebl/mpi/mdxptest.c b/security/nss/lib/freebl/mpi/mdxptest.c new file mode 100644 index 000000000..adbcfc3d1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mdxptest.c @@ -0,0 +1,306 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include "mpi.h" +#include "mpi-priv.h" + +/* #define OLD_WAY 1 */ + +/* This key is the 1024-bit test key used for speed testing of RSA private +** key ops. +*/ + +#define CONST const + +static CONST unsigned char default_n[128] = { + 0xc2, 0xae, 0x96, 0x89, 0xaf, 0xce, 0xd0, 0x7b, 0x3b, 0x35, 0xfd, 0x0f, 0xb1, 0xf4, 0x7a, 0xd1, + 0x3c, 0x7d, 0xb5, 0x86, 0xf2, 0x68, 0x36, 0xc9, 0x97, 0xe6, 0x82, 0x94, 0x86, 0xaa, 0x05, 0x39, + 0xec, 0x11, 0x51, 0xcc, 0x5c, 0xa1, 0x59, 0xba, 0x29, 0x18, 0xf3, 0x28, 0xf1, 0x9d, 0xe3, 0xae, + 0x96, 0x5d, 0x6d, 0x87, 0x73, 0xf6, 0xf6, 0x1f, 0xd0, 0x2d, 0xfb, 0x2f, 0x7a, 0x13, 0x7f, 0xc8, + 0x0c, 0x7a, 0xe9, 0x85, 0xfb, 0xce, 0x74, 0x86, 0xf8, 0xef, 0x2f, 0x85, 0x37, 0x73, 0x0f, 0x62, + 0x4e, 0x93, 0x17, 0xb7, 0x7e, 0x84, 0x9a, 0x94, 0x11, 0x05, 0xca, 0x0d, 0x31, 0x4b, 0x2a, 0xc8, + 0xdf, 0xfe, 0xe9, 0x0c, 0x13, 0xc7, 0xf2, 0xad, 0x19, 0x64, 0x28, 0x3c, 0xb5, 0x6a, 0xc8, 0x4b, + 0x79, 0xea, 0x7c, 0xce, 0x75, 0x92, 0x45, 0x3e, 0xa3, 0x9d, 0x64, 0x6f, 0x04, 0x69, 0x19, 0x17 +}; + +static CONST unsigned char default_d[128] = { + 0x13, 0xcb, 0xbc, 0xf2, 0xf3, 0x35, 0x8c, 0x6d, 0x7b, 0x6f, 0xd9, 0xf3, 0xa6, 0x9c, 0xbd, 0x80, + 0x59, 0x2e, 0x4f, 0x2f, 0x11, 0xa7, 0x17, 0x2b, 0x18, 0x8f, 0x0f, 0xe8, 0x1a, 0x69, 0x5f, 0x6e, + 0xac, 0x5a, 0x76, 0x7e, 0xd9, 0x4c, 0x6e, 0xdb, 0x47, 0x22, 0x8a, 0x57, 0x37, 0x7a, 0x5e, 0x94, + 0x7a, 0x25, 0xb5, 0xe5, 0x78, 0x1d, 0x3c, 0x99, 0xaf, 0x89, 0x7d, 0x69, 0x2e, 0x78, 0x9d, 0x1d, + 0x84, 0xc8, 0xc1, 0xd7, 0x1a, 0xb2, 0x6d, 0x2d, 0x8a, 0xd9, 0xab, 0x6b, 0xce, 0xae, 0xb0, 0xa0, + 0x58, 0x55, 0xad, 0x5c, 0x40, 0x8a, 0xd6, 0x96, 0x08, 0x8a, 0xe8, 0x63, 0xe6, 0x3d, 0x6c, 0x20, + 0x49, 0xc7, 0xaf, 0x0f, 0x25, 0x73, 0xd3, 0x69, 0x43, 0x3b, 0xf2, 0x32, 0xf8, 0x3d, 0x5e, 0xee, + 0x7a, 0xca, 0xd6, 0x94, 0x55, 0xe5, 0xbd, 0x25, 0x34, 0x8d, 0x63, 0x40, 0xb5, 0x8a, 0xc3, 0x01 +}; + +#define DEFAULT_ITERS 50 + +typedef clock_t timetype; +#define gettime(x) *(x) = clock() +#define subtime(a, b) a -= b +#define msec(x) ((clock_t)((double)x * 1000.0 / CLOCKS_PER_SEC)) +#define sec(x) (x / CLOCKS_PER_SEC) + +struct TimingContextStr { + timetype start; + timetype end; + timetype interval; + + int minutes; + int seconds; + int millisecs; +}; + +typedef struct TimingContextStr TimingContext; + +TimingContext * +CreateTimingContext(void) +{ + return (TimingContext *)malloc(sizeof(TimingContext)); +} + +void +DestroyTimingContext(TimingContext *ctx) +{ + free(ctx); +} + +void +TimingBegin(TimingContext *ctx) +{ + gettime(&ctx->start); +} + +static void +timingUpdate(TimingContext *ctx) +{ + + ctx->millisecs = msec(ctx->interval) % 1000; + ctx->seconds = sec(ctx->interval); + ctx->minutes = ctx->seconds / 60; + ctx->seconds %= 60; +} + +void +TimingEnd(TimingContext *ctx) +{ + gettime(&ctx->end); + ctx->interval = ctx->end; + subtime(ctx->interval, ctx->start); + timingUpdate(ctx); +} + +char * +TimingGenerateString(TimingContext *ctx) +{ + static char sBuf[4096]; + + sprintf(sBuf, "%d minutes, %d.%03d seconds", ctx->minutes, + ctx->seconds, ctx->millisecs); + return sBuf; +} + +static void +dumpBytes(unsigned char *b, int l) +{ + int i; + if (l <= 0) + return; + for (i = 0; i < l; ++i) { + if (i % 16 == 0) + printf("\t"); + printf(" %02x", b[i]); + if (i % 16 == 15) + printf("\n"); + } + if ((i % 16) != 0) + printf("\n"); + printf("\n"); +} + +static mp_err +testNewFuncs(const unsigned char *modulusBytes, int modulus_len) +{ + mp_err mperr = MP_OKAY; + mp_int modulus; + unsigned char buf[512]; + + mperr = mp_init(&modulus); + mperr = mp_read_unsigned_octets(&modulus, modulusBytes, modulus_len); + mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len); + mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 1); + mperr = mp_to_fixlen_octets(&modulus, buf, modulus_len + 4); + mperr = mp_to_unsigned_octets(&modulus, buf, modulus_len); + mperr = mp_to_signed_octets(&modulus, buf, modulus_len + 1); + mp_clear(&modulus); + return mperr; +} + +int +testModExp(const unsigned char *modulusBytes, + const unsigned int expo, + const unsigned char *input, + unsigned char *output, + int modulus_len) +{ + mp_err mperr = MP_OKAY; + mp_int modulus; + mp_int base; + mp_int exponent; + mp_int result; + + mperr = mp_init(&modulus); + mperr += mp_init(&base); + mperr += mp_init(&exponent); + mperr += mp_init(&result); + /* we initialize all mp_ints unconditionally, even if some fail. + ** This guarantees that the DIGITS pointer is valid (even if null). + ** So, mp_clear will do the right thing below. + */ + if (mperr == MP_OKAY) { + mperr = mp_read_unsigned_octets(&modulus, + modulusBytes + (sizeof default_n - modulus_len), modulus_len); + mperr += mp_read_unsigned_octets(&base, input, modulus_len); + mp_set(&exponent, expo); + if (mperr == MP_OKAY) { +#if OLD_WAY + mperr = s_mp_exptmod(&base, &exponent, &modulus, &result); +#else + mperr = mp_exptmod(&base, &exponent, &modulus, &result); +#endif + if (mperr == MP_OKAY) { + mperr = mp_to_fixlen_octets(&result, output, modulus_len); + } + } + } + mp_clear(&base); + mp_clear(&result); + + mp_clear(&modulus); + mp_clear(&exponent); + + return (int)mperr; +} + +int +doModExp(const unsigned char *modulusBytes, + const unsigned char *exponentBytes, + const unsigned char *input, + unsigned char *output, + int modulus_len) +{ + mp_err mperr = MP_OKAY; + mp_int modulus; + mp_int base; + mp_int exponent; + mp_int result; + + mperr = mp_init(&modulus); + mperr += mp_init(&base); + mperr += mp_init(&exponent); + mperr += mp_init(&result); + /* we initialize all mp_ints unconditionally, even if some fail. + ** This guarantees that the DIGITS pointer is valid (even if null). + ** So, mp_clear will do the right thing below. + */ + if (mperr == MP_OKAY) { + mperr = mp_read_unsigned_octets(&modulus, + modulusBytes + (sizeof default_n - modulus_len), modulus_len); + mperr += mp_read_unsigned_octets(&exponent, exponentBytes, modulus_len); + mperr += mp_read_unsigned_octets(&base, input, modulus_len); + if (mperr == MP_OKAY) { +#if OLD_WAY + mperr = s_mp_exptmod(&base, &exponent, &modulus, &result); +#else + mperr = mp_exptmod(&base, &exponent, &modulus, &result); +#endif + if (mperr == MP_OKAY) { + mperr = mp_to_fixlen_octets(&result, output, modulus_len); + } + } + } + mp_clear(&base); + mp_clear(&result); + + mp_clear(&modulus); + mp_clear(&exponent); + + return (int)mperr; +} + +int +main(int argc, char **argv) +{ + TimingContext *timeCtx; + char *progName; + long iters = DEFAULT_ITERS; + unsigned int modulus_len; + int i; + int rv; + unsigned char buf[1024]; + unsigned char buf2[1024]; + + progName = strrchr(argv[0], '/'); + if (!progName) + progName = strrchr(argv[0], '\\'); + progName = progName ? progName + 1 : argv[0]; + + if (argc >= 2) { + iters = atol(argv[1]); + } + + if (argc >= 3) { + modulus_len = atol(argv[2]); + } else + modulus_len = sizeof default_n; + + /* no library init function !? */ + + memset(buf, 0x41, sizeof buf); + + if (iters < 2) { + testNewFuncs(default_n, modulus_len); + testNewFuncs(default_n + 1, modulus_len - 1); + testNewFuncs(default_n + 2, modulus_len - 2); + testNewFuncs(default_n + 3, modulus_len - 3); + + rv = testModExp(default_n, 0, buf, buf2, modulus_len); + dumpBytes((unsigned char *)buf2, modulus_len); + + rv = testModExp(default_n, 1, buf, buf2, modulus_len); + dumpBytes((unsigned char *)buf2, modulus_len); + + rv = testModExp(default_n, 2, buf, buf2, modulus_len); + dumpBytes((unsigned char *)buf2, modulus_len); + + rv = testModExp(default_n, 3, buf, buf2, modulus_len); + dumpBytes((unsigned char *)buf2, modulus_len); + } + rv = doModExp(default_n, default_d, buf, buf2, modulus_len); + if (rv != 0) { + fprintf(stderr, "Error in modexp operation:\n"); + exit(1); + } + dumpBytes((unsigned char *)buf2, modulus_len); + + timeCtx = CreateTimingContext(); + TimingBegin(timeCtx); + i = iters; + while (i--) { + rv = doModExp(default_n, default_d, buf, buf2, modulus_len); + if (rv != 0) { + fprintf(stderr, "Error in modexp operation\n"); + exit(1); + } + } + TimingEnd(timeCtx); + printf("%ld iterations in %s\n", iters, TimingGenerateString(timeCtx)); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/montmulf.c b/security/nss/lib/freebl/mpi/montmulf.c new file mode 100644 index 000000000..ce8fbc31d --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.c @@ -0,0 +1,286 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef SOLARIS +#define RF_INLINE_MACROS 1 +#endif + +static const double TwoTo16 = 65536.0; +static const double TwoToMinus16 = 1.0 / 65536.0; +static const double Zero = 0.0; +static const double TwoTo32 = 65536.0 * 65536.0; +static const double TwoToMinus32 = 1.0 / (65536.0 * 65536.0); + +#ifdef RF_INLINE_MACROS + +double upper32(double); +double lower32(double, double); +double mod(double, double, double); + +void i16_to_d16_and_d32x4(const double * /*1/(2^16)*/, + const double * /* 2^16*/, + const double * /* 0 */, + double * /*result16*/, + double * /* result32 */, + float * /*source - should be unsigned int* converted to float* */); + +#else +#ifdef MP_USE_FLOOR +#include +#else +#define floor(d) ((double)((unsigned long long)(d))) +#endif + +static double +upper32(double x) +{ + return floor(x * TwoToMinus32); +} + +static double +lower32(double x, double y) +{ + return x - TwoTo32 * floor(x * TwoToMinus32); +} + +static double +mod(double x, double oneoverm, double m) +{ + return x - m * floor(x * oneoverm); +} + +#endif + +static void +cleanup(double *dt, int from, int tlen) +{ + int i; + double tmp, tmp1, x, x1; + + tmp = tmp1 = Zero; + /* original code ** + for(i=2*from;i<2*tlen-2;i++) + { + x=dt[i]; + dt[i]=lower32(x,Zero)+tmp1; + tmp1=tmp; + tmp=upper32(x); + } + dt[tlen-2]+=tmp1; + dt[tlen-1]+=tmp; + **end original code ***/ + /* new code ***/ + for (i = 2 * from; i < 2 * tlen; i += 2) { + x = dt[i]; + x1 = dt[i + 1]; + dt[i] = lower32(x, Zero) + tmp; + dt[i + 1] = lower32(x1, Zero) + tmp1; + tmp = upper32(x); + tmp1 = upper32(x1); + } + /** end new code **/ +} + +void +conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +{ + int i; + long long t, t1, a, b, c, d; + + t1 = 0; + a = (long long)d16[0]; + b = (long long)d16[1]; + for (i = 0; i < ilen - 1; i++) { + c = (long long)d16[2 * i + 2]; + t1 += (unsigned int)a; + t = (a >> 32); + d = (long long)d16[2 * i + 3]; + t1 += (b & 0xffff) << 16; + t += (b >> 16) + (t1 >> 32); + i32[i] = (unsigned int)t1; + t1 = t; + a = c; + b = d; + } + t1 += (unsigned int)a; + t = (a >> 32); + t1 += (b & 0xffff) << 16; + i32[i] = (unsigned int)t1; +} + +void +conv_i32_to_d32(double *d32, unsigned int *i32, int len) +{ + int i; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) + d32[i] = (double)(i32[i]); +} + +void +conv_i32_to_d16(double *d16, unsigned int *i32, int len) +{ + int i; + unsigned int a; + +#pragma pipeloop(0) + for (i = 0; i < len; i++) { + a = i32[i]; + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len) +{ + int i = 0; + unsigned int a; + +#pragma pipeloop(0) +#ifdef RF_INLINE_MACROS + for (; i < len - 3; i += 4) { + i16_to_d16_and_d32x4(&TwoToMinus16, &TwoTo16, &Zero, + &(d16[2 * i]), &(d32[i]), (float *)(&(i32[i]))); + } +#endif + for (; i < len; i++) { + a = i32[i]; + d32[i] = (double)(i32[i]); + d16[2 * i] = (double)(a & 0xffff); + d16[2 * i + 1] = (double)(a >> 16); + } +} + +void +adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +{ + long long acc; + int i; + + if (i32[len] > 0) + i = -1; + else { + for (i = len - 1; i >= 0; i--) { + if (i32[i] != nint[i]) + break; + } + } + if ((i < 0) || (i32[i] > nint[i])) { + acc = 0; + for (i = 0; i < len; i++) { + acc = acc + (unsigned long long)(i32[i]) - (unsigned long long)(nint[i]); + i32[i] = (unsigned int)acc; + acc = acc >> 32; + } + } +} + +/* +** the lengths of the input arrays should be at least the following: +** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +** all of them should be different from one another +** +*/ +void +mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0) +{ + int i, j, jj; + int tmp; + double digit, m2j, nextm2j, a, b; + double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; + + pdm1 = &(dm1[0]); + pdm2 = &(dm2[0]); + pdn = &(dn[0]); + pdm2[2 * nlen] = Zero; + + if (nlen != 16) { + for (i = 0; i < 4 * nlen + 2; i++) + dt[i] = Zero; + + a = dt[0] = pdm1[0] * pdm2[0]; + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + + pdtj = &(dt[0]); + for (j = jj = 0; j < 2 * nlen; j++, jj++, pdtj++) { + m2j = pdm2[j]; + a = pdtj[0] + pdn[0] * digit; + b = pdtj[1] + pdm1[0] * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + +#pragma pipeloop(0) + for (i = 1; i < nlen; i++) { + pdtj[2 * i] += pdm1[i] * m2j + pdn[i] * digit; + } + if ((jj == 30)) { + cleanup(dt, j / 2 + 1, 2 * nlen + 1); + jj = 0; + } + + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } else { + a = dt[0] = pdm1[0] * pdm2[0]; + + dt[65] = dt[64] = dt[63] = dt[62] = dt[61] = dt[60] = + dt[59] = dt[58] = dt[57] = dt[56] = dt[55] = dt[54] = + dt[53] = dt[52] = dt[51] = dt[50] = dt[49] = dt[48] = + dt[47] = dt[46] = dt[45] = dt[44] = dt[43] = dt[42] = + dt[41] = dt[40] = dt[39] = dt[38] = dt[37] = dt[36] = + dt[35] = dt[34] = dt[33] = dt[32] = dt[31] = dt[30] = + dt[29] = dt[28] = dt[27] = dt[26] = dt[25] = dt[24] = + dt[23] = dt[22] = dt[21] = dt[20] = dt[19] = dt[18] = + dt[17] = dt[16] = dt[15] = dt[14] = dt[13] = dt[12] = + dt[11] = dt[10] = dt[9] = dt[8] = dt[7] = dt[6] = + dt[5] = dt[4] = dt[3] = dt[2] = dt[1] = Zero; + + pdn_0 = pdn[0]; + pdm1_0 = pdm1[0]; + + digit = mod(lower32(a, Zero) * dn0, TwoToMinus16, TwoTo16); + pdtj = &(dt[0]); + + for (j = 0; j < 32; j++, pdtj++) { + + m2j = pdm2[j]; + a = pdtj[0] + pdn_0 * digit; + b = pdtj[1] + pdm1_0 * pdm2[j + 1] + a * TwoToMinus16; + pdtj[1] = b; + + /**** this loop will be fully unrolled: + for(i=1;i<16;i++) + { + pdtj[2*i]+=pdm1[i]*m2j+pdn[i]*digit; + } + *************************************/ + pdtj[2] += pdm1[1] * m2j + pdn[1] * digit; + pdtj[4] += pdm1[2] * m2j + pdn[2] * digit; + pdtj[6] += pdm1[3] * m2j + pdn[3] * digit; + pdtj[8] += pdm1[4] * m2j + pdn[4] * digit; + pdtj[10] += pdm1[5] * m2j + pdn[5] * digit; + pdtj[12] += pdm1[6] * m2j + pdn[6] * digit; + pdtj[14] += pdm1[7] * m2j + pdn[7] * digit; + pdtj[16] += pdm1[8] * m2j + pdn[8] * digit; + pdtj[18] += pdm1[9] * m2j + pdn[9] * digit; + pdtj[20] += pdm1[10] * m2j + pdn[10] * digit; + pdtj[22] += pdm1[11] * m2j + pdn[11] * digit; + pdtj[24] += pdm1[12] * m2j + pdn[12] * digit; + pdtj[26] += pdm1[13] * m2j + pdn[13] * digit; + pdtj[28] += pdm1[14] * m2j + pdn[14] * digit; + pdtj[30] += pdm1[15] * m2j + pdn[15] * digit; + /* no need for cleenup, cannot overflow */ + digit = mod(lower32(b, Zero) * dn0, TwoToMinus16, TwoTo16); + } + } + + conv_d16_to_i32(result, dt + 2 * nlen, (long long *)dt, nlen + 1); + + adjust_montf_result(result, nint, nlen); +} diff --git a/security/nss/lib/freebl/mpi/montmulf.h b/security/nss/lib/freebl/mpi/montmulf.h new file mode 100644 index 000000000..69bed4acb --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.h @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* The functions that are to be called from outside of the .s file have the + * following interfaces and array size requirements: + */ + +void conv_i32_to_d32(double *d32, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles, so that each double + * corresponds to an int. len is the number of items converted. + * Does not allocate the output array. + * The pointers d32 and i32 should point to arrays of size at least len + * (doubles and unsigned ints, respectively) + */ + +void conv_i32_to_d16(double *d16, unsigned int *i32, int len); + +/* Converts an array of int's to an array of doubles so that each element + * of the int array is converted to a pair of doubles, the first one + * corresponding to the lower (least significant) 16 bits of the int and + * the second one corresponding to the upper (most significant) 16 bits of + * the 32-bit int. len is the number of ints converted. + * Does not allocate the output array. + * The pointer d16 should point to an array of doubles of size at least + * 2*len and i32 should point an array of ints of size at least len + */ + +void conv_i32_to_d32_and_d16(double *d32, double *d16, + unsigned int *i32, int len); + +/* Does the above two conversions together, it is much faster than doing + * both of those in succession + */ + +void mont_mulf_noconv(unsigned int *result, + double *dm1, double *dm2, double *dt, + double *dn, unsigned int *nint, + int nlen, double dn0); + +/* Does the Montgomery multiplication of the numbers stored in the arrays + * pointed to by dm1 and dm2, writing the result to the array pointed to by + * result. It uses the array pointed to by dt as a temporary work area. + * nint should point to the modulus in the array-of-integers representation, + * dn should point to its array-of-doubles as obtained as a result of the + * function call conv_i32_to_d32(dn, nint, nlen); + * nlen is the length of the array containing the modulus. + * The representation used for dm1 is the one that is a result of the function + * call conv_i32_to_d32(dm1, m1, nlen), the representation for dm2 is the + * result of the function call conv_i32_to_d16(dm2, m2, nlen). + * Note that m1 and m2 should both be of length nlen, so they should be + * padded with 0's if necessary before the conversion. The result comes in + * this form (int representation, padded with 0's). + * dn0 is the value of the 16 least significant bits of n0'. + * The function does not allocate memory for any of the arrays, so the + * pointers should point to arrays with the following minimal sizes: + * result - nlen+1 + * dm1 - nlen + * dm2 - 2*nlen+1 ( the +1 is necessary for technical reasons ) + * dt - 4*nlen+2 + * dn - nlen + * nint - nlen + * No two arrays should point to overlapping areas of memory. + */ diff --git a/security/nss/lib/freebl/mpi/montmulf.il b/security/nss/lib/freebl/mpi/montmulf.il new file mode 100644 index 000000000..4952d0fb8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.il @@ -0,0 +1,108 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! +! double upper32(double /*frs1*/); +! + .inline upper32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + + fdtox %f10,%f10 + fitod %f10,%f0 + .end + +! +! double lower32(double /*frs1*/, double /* Zero */); +! + .inline lower32,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f10 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f12 + + fdtox %f10,%f10 + fmovs %f12,%f10 + fxtod %f10,%f0 + .end + +! +! double mod(double /*x*/, double /*1/m*/, double /*m*/); +! + .inline mod,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f2 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f6 + + fmuld %f2,%f4,%f4 + fdtox %f4,%f4 + fxtod %f4,%f4 + fmuld %f4,%f6,%f4 + fsubd %f2,%f4,%f0 + .end + + +! +! void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! double * /* 0 */, +! double * /*result16*/, double * /* result32 */ +! float * /*source - should be unsigned int* +! converted to float* */); +! + .inline i16_to_d16_and_d32x4,24 + ldd [%o0],%f2 ! 1/(2^16) + ldd [%o1],%f4 ! 2^16 + ldd [%o2],%f22 + + fmovd %f22,%f6 + ld [%o5],%f7 + fmovd %f22,%f10 + ld [%o5+4],%f11 + fmovd %f22,%f14 + ld [%o5+8],%f15 + fmovd %f22,%f18 + ld [%o5+12],%f19 + fxtod %f6,%f6 + std %f6,[%o4] + fxtod %f10,%f10 + std %f10,[%o4+8] + fxtod %f14,%f14 + std %f14,[%o4+16] + fxtod %f18,%f18 + std %f18,[%o4+24] + fmuld %f2,%f6,%f8 + fmuld %f2,%f10,%f12 + fmuld %f2,%f14,%f16 + fmuld %f2,%f18,%f20 + fdtox %f8,%f8 + fdtox %f12,%f12 + fdtox %f16,%f16 + fdtox %f20,%f20 + fxtod %f8,%f8 + std %f8,[%o3+8] + fxtod %f12,%f12 + std %f12,[%o3+24] + fxtod %f16,%f16 + std %f16,[%o3+40] + fxtod %f20,%f20 + std %f20,[%o3+56] + fmuld %f8,%f4,%f8 + fmuld %f12,%f4,%f12 + fmuld %f16,%f4,%f16 + fmuld %f20,%f4,%f20 + fsubd %f6,%f8,%f8 + std %f8,[%o3] + fsubd %f10,%f12,%f12 + std %f12,[%o3+16] + fsubd %f14,%f16,%f16 + std %f16,[%o3+32] + fsubd %f18,%f20,%f20 + std %f20,[%o3+48] + .end + + diff --git a/security/nss/lib/freebl/mpi/montmulf.s b/security/nss/lib/freebl/mpi/montmulf.s new file mode 100644 index 000000000..69d2a3c51 --- /dev/null +++ b/security/nss/lib/freebl/mpi/montmulf.s @@ -0,0 +1,1938 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr + .file "montmulf.c" + + .section ".data",#alloc,#write + .align 8 +TwoTo16: /* frequency 1.0 confidence 0.0 */ + .word 1089470464 + .word 0 + .type TwoTo16,#object + .size TwoTo16,8 +TwoToMinus16: /* frequency 1.0 confidence 0.0 */ + .word 1055916032 + .word 0 + .type TwoToMinus16,#object + .size TwoToMinus16,8 +Zero: /* frequency 1.0 confidence 0.0 */ + .word 0 + .word 0 + .type Zero,#object + .size Zero,8 +TwoTo32: /* frequency 1.0 confidence 0.0 */ + .word 1106247680 + .word 0 + .type TwoTo32,#object + .size TwoTo32,8 +TwoToMinus32: /* frequency 1.0 confidence 0.0 */ + .word 1039138816 + .word 0 + .type TwoToMinus32,#object + .size TwoToMinus32,8 + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE cleanup +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global cleanup + cleanup: /* frequency 1.0 confidence 0.0 */ +! FILE montmulf.c + +! 1 !#define RF_INLINE_MACROS +! 3 !static double TwoTo16=65536.0; +! 4 !static double TwoToMinus16=1.0/65536.0; +! 5 !static double Zero=0.0; +! 6 !static double TwoTo32=65536.0*65536.0; +! 7 !static double TwoToMinus32=1.0/(65536.0*65536.0); +! 9 !#ifdef RF_INLINE_MACROS +! 11 !double upper32(double); +! 12 !double lower32(double, double); +! 13 !double mod(double, double, double); +! 15 !#else +! 17 !static double upper32(double x) +! 18 !{ +! 19 ! return floor(x*TwoToMinus32); +! 20 !} +! 22 !static double lower32(double x, double y) +! 23 !{ +! 24 ! return x-TwoTo32*floor(x*TwoToMinus32); +! 25 !} +! 27 !static double mod(double x, double oneoverm, double m) +! 28 !{ +! 29 ! return x-m*floor(x*oneoverm); +! 30 !} +! 32 !#endif +! 35 !void cleanup(double *dt, int from, int tlen) +! 36 !{ +! 37 ! int i; +! 38 ! double tmp,tmp1,x,x1; +! 40 ! tmp=tmp1=Zero; + +/* 000000 40 ( 0 1) */ sethi %hi(Zero),%g2 + +! 41 ! /* original code ** +! 42 ! for(i=2*from;i<2*tlen-2;i++) +! 43 ! { +! 44 ! x=dt[i]; +! 45 ! dt[i]=lower32(x,Zero)+tmp1; +! 46 ! tmp1=tmp; +! 47 ! tmp=upper32(x); +! 48 ! } +! 49 ! dt[tlen-2]+=tmp1; +! 50 ! dt[tlen-1]+=tmp; +! 51 ! **end original code ***/ +! 52 ! /* new code ***/ +! 53 ! for(i=2*from;i<2*tlen;i+=2) + +/* 0x0004 53 ( 1 2) */ sll %o2,1,%g3 +/* 0x0008 40 ( 1 4) */ ldd [%g2+%lo(Zero)],%f0 +/* 0x000c ( 1 2) */ add %g2,%lo(Zero),%g2 +/* 0x0010 53 ( 2 3) */ sll %o1,1,%g4 +/* 0x0014 36 ( 3 4) */ sll %o1,4,%g1 +/* 0x0018 40 ( 3 4) */ fmovd %f0,%f4 +/* 0x001c 53 ( 3 4) */ cmp %g4,%g3 +/* 0x0020 ( 3 4) */ bge,pt %icc,.L77000116 ! tprob=0.56 +/* 0x0024 ( 4 5) */ fmovd %f0,%f2 +/* 0x0028 36 ( 4 5) */ add %o0,%g1,%g1 +/* 0x002c ( 4 5) */ sub %g3,1,%g3 + +! 54 ! { +! 55 ! x=dt[i]; + +/* 0x0030 55 ( 5 8) */ ldd [%g1],%f8 + .L900000114: /* frequency 6.4 confidence 0.0 */ +/* 0x0034 ( 0 3) */ fdtox %f8,%f6 + +! 56 ! x1=dt[i+1]; + +/* 0x0038 56 ( 0 3) */ ldd [%g1+8],%f10 + +! 57 ! dt[i]=lower32(x,Zero)+tmp; +! 58 ! dt[i+1]=lower32(x1,Zero)+tmp1; +! 59 ! tmp=upper32(x); +! 60 ! tmp1=upper32(x1); + +/* 0x003c 60 ( 0 1) */ add %g4,2,%g4 +/* 0x0040 ( 1 4) */ fdtox %f8,%f8 +/* 0x0044 ( 1 2) */ cmp %g4,%g3 +/* 0x0048 ( 5 6) */ fmovs %f0,%f6 +/* 0x004c ( 7 10) */ fxtod %f6,%f6 +/* 0x0050 ( 8 11) */ fdtox %f10,%f0 +/* 0x0054 57 (10 13) */ faddd %f6,%f2,%f2 +/* 0x0058 (10 11) */ std %f2,[%g1] +/* 0x005c (12 15) */ ldd [%g2],%f2 +/* 0x0060 (14 15) */ fmovs %f2,%f0 +/* 0x0064 (16 19) */ fxtod %f0,%f6 +/* 0x0068 (17 20) */ fdtox %f10,%f0 +/* 0x006c (18 21) */ fitod %f8,%f2 +/* 0x0070 58 (19 22) */ faddd %f6,%f4,%f4 +/* 0x0074 (19 20) */ std %f4,[%g1+8] +/* 0x0078 60 (19 20) */ add %g1,16,%g1 +/* 0x007c (20 23) */ fitod %f0,%f4 +/* 0x0080 (20 23) */ ldd [%g2],%f0 +/* 0x0084 (20 21) */ ble,a,pt %icc,.L900000114 ! tprob=0.86 +/* 0x0088 (21 24) */ ldd [%g1],%f8 + .L77000116: /* frequency 1.0 confidence 0.0 */ +/* 0x008c ( 0 2) */ retl ! Result = +/* 0x0090 ( 1 2) */ nop +/* 0x0094 0 ( 0 0) */ .type cleanup,2 +/* 0x0094 ( 0 0) */ .size cleanup,(.-cleanup) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_d16_to_i32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_d16_to_i32 + conv_d16_to_i32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-136,%sp + +! 61 ! } +! 62 ! /** end new code **/ +! 63 !} +! 66 !void conv_d16_to_i32(unsigned int *i32, double *d16, long long *tmp, int ilen) +! 67 !{ +! 68 !int i; +! 69 !long long t, t1, a, b, c, d; +! 71 ! t1=0; +! 72 ! a=(long long)d16[0]; + +/* 0x0004 72 ( 1 4) */ ldd [%i1],%f0 + +! 73 ! b=(long long)d16[1]; +! 74 ! for(i=0; i>32); +! 79 ! d=(long long)d16[2*i+3]; +! 80 ! t1+=(b&0xffff)<<16; + +/* 0x0070 80 (15 16) */ and %g1,%o1,%o0 + +! 81 ! t+=(b>>16)+(t1>>32); +! 82 ! i32[i]=t1&0xffffffff; +! 83 ! t1=t; +! 84 ! a=c; +! 85 ! b=d; + +/* 0x0074 85 (15 16) */ add %g2,16,%g2 +/* 0x0078 80 (16 17) */ sllx %o0,16,%g3 +/* 0x007c 77 (16 17) */ and %g4,%o3,%o0 +/* 0x0080 76 (17 20) */ fdtox %f0,%f0 +/* 0x0084 (17 18) */ std %f0,[%sp+104] +/* 0x0088 74 (17 18) */ add %o0,%g3,%o4 +/* 0x008c 79 (18 21) */ ldd [%g2+8],%f2 +/* 0x0090 81 (18 19) */ srax %g1,16,%o0 +/* 0x0094 82 (18 19) */ and %o4,%o3,%o7 +/* 0x0098 81 (19 20) */ stx %o0,[%sp+112] +/* 0x009c (19 20) */ srax %o4,32,%o0 +/* 0x00a0 85 (19 20) */ add %g5,4,%o5 +/* 0x00a4 81 (20 21) */ stx %o0,[%sp+120] +/* 0x00a8 78 (20 21) */ srax %g4,32,%o4 +/* 0x00ac 79 (20 23) */ fdtox %f2,%f0 +/* 0x00b0 (21 22) */ std %f0,[%sp+96] +/* 0x00b4 81 (22 24) */ ldx [%sp+112],%o0 +/* 0x00b8 (23 25) */ ldx [%sp+120],%g4 +/* 0x00bc 76 (25 27) */ ldx [%sp+104],%g3 +/* 0x00c0 81 (25 26) */ add %o0,%g4,%g4 +/* 0x00c4 79 (26 28) */ ldx [%sp+96],%g1 +/* 0x00c8 81 (26 27) */ add %o4,%g4,%o4 +/* 0x00cc 82 (27 28) */ st %o7,[%g5] +/* 0x00d0 (27 28) */ or %g0,1,%o7 +/* 0x00d4 84 (27 28) */ or %g0,%g3,%g4 + .L900000209: /* frequency 64.0 confidence 0.0 */ +/* 0x00d8 76 (17 19) */ ldd [%g2+16],%f0 +/* 0x00dc 85 (17 18) */ add %o7,1,%o7 +/* 0x00e0 (17 18) */ add %o5,4,%o5 +/* 0x00e4 (18 18) */ cmp %o7,%o2 +/* 0x00e8 (18 19) */ add %g2,16,%g2 +/* 0x00ec 76 (19 22) */ fdtox %f0,%f0 +/* 0x00f0 (20 21) */ std %f0,[%sp+104] +/* 0x00f4 79 (21 23) */ ldd [%g2+8],%f0 +/* 0x00f8 (23 26) */ fdtox %f0,%f0 +/* 0x00fc (24 25) */ std %f0,[%sp+96] +/* 0x0100 80 (25 26) */ and %g1,%o1,%g3 +/* 0x0104 (26 27) */ sllx %g3,16,%g3 +/* 0x0108 ( 0 0) */ stx %g3,[%sp+120] +/* 0x010c 77 (26 27) */ and %g4,%o3,%g3 +/* 0x0110 74 ( 0 0) */ stx %o7,[%sp+128] +/* 0x0114 ( 0 0) */ ldx [%sp+120],%o7 +/* 0x0118 (27 27) */ add %g3,%o7,%g3 +/* 0x011c ( 0 0) */ ldx [%sp+128],%o7 +/* 0x0120 81 (28 29) */ srax %g1,16,%g1 +/* 0x0124 74 (28 28) */ add %g3,%o4,%g3 +/* 0x0128 81 (29 30) */ srax %g3,32,%o4 +/* 0x012c ( 0 0) */ stx %o4,[%sp+112] +/* 0x0130 78 (30 31) */ srax %g4,32,%o4 +/* 0x0134 81 ( 0 0) */ ldx [%sp+112],%g4 +/* 0x0138 (30 31) */ add %g1,%g4,%g4 +/* 0x013c 79 (31 33) */ ldx [%sp+96],%g1 +/* 0x0140 81 (31 32) */ add %o4,%g4,%o4 +/* 0x0144 82 (32 33) */ and %g3,%o3,%g3 +/* 0x0148 84 ( 0 0) */ ldx [%sp+104],%g4 +/* 0x014c 85 (33 34) */ ble,pt %icc,.L900000209 ! tprob=0.50 +/* 0x0150 (33 34) */ st %g3,[%o5-4] + .L900000212: /* frequency 8.0 confidence 0.0 */ +/* 0x0154 85 ( 0 1) */ ba .L900000214 ! tprob=1.00 +/* 0x0158 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L77000134: /* frequency 0.7 confidence 0.0 */ + .L900000213: /* frequency 6.4 confidence 0.0 */ +/* 0x015c 77 ( 0 1) */ and %g4,%o3,%o0 +/* 0x0160 80 ( 0 1) */ and %g1,%o1,%g3 +/* 0x0164 76 ( 0 3) */ fdtox %f0,%f0 +/* 0x0168 77 ( 1 2) */ add %o4,%o0,%o0 +/* 0x016c 76 ( 1 2) */ std %f0,[%sp+104] +/* 0x0170 85 ( 1 2) */ add %o7,1,%o7 +/* 0x0174 80 ( 2 3) */ sllx %g3,16,%o4 +/* 0x0178 79 ( 2 5) */ ldd [%g2+24],%f2 +/* 0x017c 85 ( 2 3) */ add %g2,16,%g2 +/* 0x0180 80 ( 3 4) */ add %o0,%o4,%o4 +/* 0x0184 81 ( 3 4) */ stx %o7,[%sp+128] +/* 0x0188 ( 4 5) */ srax %g1,16,%o0 +/* 0x018c ( 4 5) */ stx %o0,[%sp+112] +/* 0x0190 82 ( 4 5) */ and %o4,%o3,%g3 +/* 0x0194 81 ( 5 6) */ srax %o4,32,%o0 +/* 0x0198 ( 5 6) */ stx %o0,[%sp+120] +/* 0x019c 79 ( 5 8) */ fdtox %f2,%f0 +/* 0x01a0 ( 6 7) */ std %f0,[%sp+96] +/* 0x01a4 78 ( 6 7) */ srax %g4,32,%o4 +/* 0x01a8 81 ( 7 9) */ ldx [%sp+120],%o7 +/* 0x01ac ( 8 10) */ ldx [%sp+112],%g4 +/* 0x01b0 76 (10 12) */ ldx [%sp+104],%g1 +/* 0x01b4 81 (10 11) */ add %g4,%o7,%g4 +/* 0x01b8 (11 13) */ ldx [%sp+128],%o7 +/* 0x01bc (11 12) */ add %o4,%g4,%o4 +/* 0x01c0 79 (12 14) */ ldx [%sp+96],%o0 +/* 0x01c4 84 (12 13) */ or %g0,%g1,%g4 +/* 0x01c8 82 (13 14) */ st %g3,[%o5] +/* 0x01cc 85 (13 14) */ add %o5,4,%o5 +/* 0x01d0 (13 14) */ cmp %o7,%o2 +/* 0x01d4 (14 15) */ or %g0,%o0,%g1 +/* 0x01d8 (14 15) */ ble,a,pt %icc,.L900000213 ! tprob=0.86 +/* 0x01dc (14 17) */ ldd [%g2+16],%f0 + .L77000127: /* frequency 1.0 confidence 0.0 */ + +! 86 ! } +! 87 ! t1+=a&0xffffffff; +! 88 ! t=(a>>32); +! 89 ! t1+=(b&0xffff)<<16; +! 90 ! i32[i]=t1&0xffffffff; + +/* 0x01e0 90 ( 0 1) */ sethi %hi(0xfc00),%g2 + .L900000214: /* frequency 1.0 confidence 0.0 */ +/* 0x01e4 90 ( 0 1) */ or %g0,-1,%g3 +/* 0x01e8 ( 0 1) */ add %g2,1023,%g2 +/* 0x01ec ( 1 2) */ srl %g3,0,%g3 +/* 0x01f0 ( 1 2) */ and %g1,%g2,%g2 +/* 0x01f4 ( 2 3) */ and %g4,%g3,%g4 +/* 0x01f8 ( 3 4) */ sllx %g2,16,%g2 +/* 0x01fc ( 3 4) */ add %o4,%g4,%g4 +/* 0x0200 ( 4 5) */ add %g4,%g2,%g2 +/* 0x0204 ( 5 6) */ sll %o7,2,%g4 +/* 0x0208 ( 5 6) */ and %g2,%g3,%g2 +/* 0x020c ( 6 7) */ st %g2,[%g5+%g4] +/* 0x0210 ( 7 9) */ ret ! Result = +/* 0x0214 ( 9 10) */ restore %g0,%g0,%g0 +/* 0x0218 0 ( 0 0) */ .type conv_d16_to_i32,2 +/* 0x0218 ( 0 0) */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000301: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32 + conv_i32_to_d32: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ orcc %g0,%o2,%g1 + +! 92 !} +! 94 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 95 !{ +! 96 !int i; +! 98 !#pragma pipeloop(0) +! 99 ! for(i=0;i>16); + +/* 0x0018 113 ( 3 4) */ sethi %hi(.L_const_seg_900000401),%o0 +/* 0x001c ( 3 4) */ add %o5,1,%g3 +/* 0x0020 ( 4 5) */ add %g2,1023,%o4 +/* 0x0024 109 ( 4 5) */ or %g0,0,%g1 +/* 0x0028 ( 5 6) */ cmp %g3,3 +/* 0x002c ( 5 6) */ or %g0,%i1,%o7 +/* 0x0030 ( 6 7) */ add %o0,%lo(.L_const_seg_900000401),%o3 +/* 0x0034 ( 6 7) */ or %g0,%i0,%g2 +/* 0x0038 ( 6 7) */ bl,pn %icc,.L77000154 ! tprob=0.44 +/* 0x003c ( 7 8) */ add %o7,4,%o0 +/* 0x0040 112 ( 7 10) */ ldd [%o3],%f0 +/* 0x0044 113 ( 7 8) */ or %g0,1,%g1 +/* 0x0048 111 ( 8 11) */ ld [%o0-4],%o1 +/* 0x004c 0 ( 8 9) */ or %g0,%o0,%o7 +/* 0x0050 112 (10 11) */ and %o1,%o4,%o0 + .L900000406: /* frequency 64.0 confidence 0.0 */ +/* 0x0054 112 (22 23) */ st %o0,[%sp+96] +/* 0x0058 113 (22 23) */ add %g1,1,%g1 +/* 0x005c (22 23) */ add %g2,16,%g2 +/* 0x0060 (23 23) */ cmp %g1,%o5 +/* 0x0064 (23 24) */ add %o7,4,%o7 +/* 0x0068 112 (29 31) */ ld [%sp+96],%f3 +/* 0x006c ( 0 0) */ fmovs %f0,%f2 +/* 0x0070 (31 34) */ fsubd %f2,%f0,%f2 +/* 0x0074 113 (32 33) */ srl %o1,16,%o0 +/* 0x0078 112 (32 33) */ std %f2,[%g2-16] +/* 0x007c 113 (33 34) */ st %o0,[%sp+92] +/* 0x0080 (40 42) */ ld [%sp+92],%f3 +/* 0x0084 111 (41 43) */ ld [%o7-4],%o1 +/* 0x0088 113 ( 0 0) */ fmovs %f0,%f2 +/* 0x008c (42 45) */ fsubd %f2,%f0,%f2 +/* 0x0090 112 (43 44) */ and %o1,%o4,%o0 +/* 0x0094 113 (43 44) */ ble,pt %icc,.L900000406 ! tprob=0.50 +/* 0x0098 (43 44) */ std %f2,[%g2-8] + .L900000409: /* frequency 8.0 confidence 0.0 */ +/* 0x009c 112 ( 0 1) */ st %o0,[%sp+96] +/* 0x00a0 ( 0 1) */ fmovs %f0,%f2 +/* 0x00a4 113 ( 0 1) */ add %g2,16,%g2 +/* 0x00a8 ( 1 2) */ srl %o1,16,%o0 +/* 0x00ac 112 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00b0 ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x00b4 ( 6 7) */ std %f2,[%g2-16] +/* 0x00b8 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x00bc (10 11) */ fmovs %f0,%f2 +/* 0x00c0 (11 14) */ ld [%sp+92],%f3 +/* 0x00c4 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x00c8 (13 14) */ std %f0,[%g2-8] +/* 0x00cc (14 16) */ ret ! Result = +/* 0x00d0 (16 17) */ restore %g0,%g0,%g0 + .L77000154: /* frequency 0.7 confidence 0.0 */ +/* 0x00d4 111 ( 0 3) */ ld [%o7],%o0 + .L900000410: /* frequency 6.4 confidence 0.0 */ +/* 0x00d8 112 ( 0 1) */ and %o0,%o4,%o1 +/* 0x00dc ( 0 1) */ st %o1,[%sp+96] +/* 0x00e0 113 ( 0 1) */ add %g1,1,%g1 +/* 0x00e4 112 ( 1 4) */ ldd [%o3],%f0 +/* 0x00e8 113 ( 1 2) */ srl %o0,16,%o0 +/* 0x00ec ( 1 2) */ add %o7,4,%o7 +/* 0x00f0 ( 2 3) */ cmp %g1,%o5 +/* 0x00f4 112 ( 3 4) */ fmovs %f0,%f2 +/* 0x00f8 ( 4 7) */ ld [%sp+96],%f3 +/* 0x00fc ( 6 9) */ fsubd %f2,%f0,%f2 +/* 0x0100 ( 6 7) */ std %f2,[%g2] +/* 0x0104 113 ( 7 8) */ st %o0,[%sp+92] +/* 0x0108 (10 11) */ fmovs %f0,%f2 +/* 0x010c (11 14) */ ld [%sp+92],%f3 +/* 0x0110 (13 16) */ fsubd %f2,%f0,%f0 +/* 0x0114 (13 14) */ std %f0,[%g2+8] +/* 0x0118 (13 14) */ add %g2,16,%g2 +/* 0x011c (13 14) */ ble,a,pt %icc,.L900000410 ! tprob=0.86 +/* 0x0120 (14 17) */ ld [%o7],%o0 + .L77000150: /* frequency 1.0 confidence 0.0 */ +/* 0x0124 ( 0 2) */ ret ! Result = +/* 0x0128 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x012c 0 ( 0 0) */ .type conv_i32_to_d16,2 +/* 0x012c ( 0 0) */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000501: /* frequency 1.0 confidence 0.0 */ +/* 000000 0 ( 0 0) */ .word 1127219200,0 +/* 0x0008 0 ( 0 0) */ .align 4 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-104,%sp +/* 0x0004 ( 1 2) */ or %g0,%i3,%i4 +/* 0x0008 ( 1 2) */ or %g0,%i2,%g1 + +! 114 ! } +! 115 !} +! 118 !void i16_to_d16_and_d32x4(double * /*1/(2^16)*/, double * /* 2^16*/, +! 119 ! double * /* 0 */, +! 120 ! double * /*result16*/, double * /* result32 */, +! 121 ! float * /*source - should be unsigned int* +! 122 ! converted to float* */); +! 126 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 127 ! unsigned int *i32, int len) +! 128 !{ +! 129 !int i; +! 130 !unsigned int a; +! 132 !#pragma pipeloop(0) +! 133 ! for(i=0;i>16); + +/* 0x0128 143 ( 0 1) */ sethi %hi(.L_const_seg_900000501),%o1 +/* 0x012c 138 ( 1 2) */ sethi %hi(0xfc00),%o0 +/* 0x0130 141 ( 1 4) */ ldd [%o1+%lo(.L_const_seg_900000501)],%f0 +/* 0x0134 138 ( 1 2) */ sub %i4,%o7,%g3 +/* 0x0138 ( 2 3) */ sll %o7,2,%g2 +/* 0x013c ( 2 3) */ add %o0,1023,%o3 +/* 0x0140 ( 3 4) */ sll %o7,3,%g4 +/* 0x0144 ( 3 4) */ cmp %g3,3 +/* 0x0148 ( 4 5) */ add %g1,%g2,%o0 +/* 0x014c ( 4 5) */ add %o1,%lo(.L_const_seg_900000501),%o2 +/* 0x0150 ( 5 6) */ add %i3,%g4,%o4 +/* 0x0154 ( 5 6) */ sub %i4,1,%o1 +/* 0x0158 ( 6 7) */ sll %o7,4,%g5 +/* 0x015c ( 6 7) */ bl,pn %icc,.L77000161 ! tprob=0.44 +/* 0x0160 ( 7 8) */ add %i1,%g5,%o5 +/* 0x0164 141 ( 7 10) */ ld [%g1+%g2],%f3 +/* 0x0168 143 ( 7 8) */ add %o4,8,%o4 +/* 0x016c 140 ( 8 11) */ ld [%g1+%g2],%g1 +/* 0x0170 143 ( 8 9) */ add %o5,16,%o5 +/* 0x0174 ( 8 9) */ add %o7,1,%o7 +/* 0x0178 141 ( 9 10) */ fmovs %f0,%f2 +/* 0x017c 143 ( 9 10) */ add %o0,4,%o0 +/* 0x0180 142 (10 11) */ and %g1,%o3,%g2 +/* 0x0184 141 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x0188 (11 12) */ std %f2,[%o4-8] +/* 0x018c 143 (11 12) */ srl %g1,16,%g1 +/* 0x0190 142 (12 13) */ st %g2,[%sp+96] +/* 0x0194 (15 16) */ fmovs %f0,%f2 +/* 0x0198 (16 19) */ ld [%sp+96],%f3 +/* 0x019c (18 21) */ fsubd %f2,%f0,%f2 +/* 0x01a0 (18 19) */ std %f2,[%o5-16] +/* 0x01a4 143 (19 20) */ st %g1,[%sp+92] +/* 0x01a8 (22 23) */ fmovs %f0,%f2 +/* 0x01ac (23 26) */ ld [%sp+92],%f3 +/* 0x01b0 (25 28) */ fsubd %f2,%f0,%f2 +/* 0x01b4 (25 26) */ std %f2,[%o5-8] + .L900000509: /* frequency 64.0 confidence 0.0 */ +/* 0x01b8 141 (26 28) */ ld [%o0],%f3 +/* 0x01bc 143 (26 27) */ add %o7,2,%o7 +/* 0x01c0 (26 27) */ add %o5,32,%o5 +/* 0x01c4 140 (27 29) */ ld [%o0],%g1 +/* 0x01c8 143 (27 27) */ cmp %o7,%o1 +/* 0x01cc (27 28) */ add %o4,16,%o4 +/* 0x01d0 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x01d4 (28 31) */ fsubd %f2,%f0,%f2 +/* 0x01d8 (29 30) */ std %f2,[%o4-16] +/* 0x01dc 142 (29 30) */ and %g1,%o3,%g2 +/* 0x01e0 (30 31) */ st %g2,[%sp+96] +/* 0x01e4 (37 39) */ ld [%sp+96],%f3 +/* 0x01e8 ( 0 0) */ fmovs %f0,%f2 +/* 0x01ec (39 42) */ fsubd %f2,%f0,%f2 +/* 0x01f0 143 (40 41) */ srl %g1,16,%g1 +/* 0x01f4 142 (40 41) */ std %f2,[%o5-32] +/* 0x01f8 143 (41 42) */ st %g1,[%sp+92] +/* 0x01fc (48 50) */ ld [%sp+92],%f3 +/* 0x0200 ( 0 0) */ fmovs %f0,%f2 +/* 0x0204 (50 53) */ fsubd %f2,%f0,%f2 +/* 0x0208 (51 52) */ std %f2,[%o5-24] +/* 0x020c (51 52) */ add %o0,4,%o0 +/* 0x0210 141 (52 54) */ ld [%o0],%f3 +/* 0x0214 140 (53 55) */ ld [%o0],%g1 +/* 0x0218 141 ( 0 0) */ fmovs %f0,%f2 +/* 0x021c (54 57) */ fsubd %f2,%f0,%f2 +/* 0x0220 (55 56) */ std %f2,[%o4-8] +/* 0x0224 142 (55 56) */ and %g1,%o3,%g2 +/* 0x0228 (56 57) */ st %g2,[%sp+96] +/* 0x022c (63 65) */ ld [%sp+96],%f3 +/* 0x0230 ( 0 0) */ fmovs %f0,%f2 +/* 0x0234 (65 68) */ fsubd %f2,%f0,%f2 +/* 0x0238 143 (66 67) */ srl %g1,16,%g1 +/* 0x023c 142 (66 67) */ std %f2,[%o5-16] +/* 0x0240 143 (67 68) */ st %g1,[%sp+92] +/* 0x0244 (74 76) */ ld [%sp+92],%f3 +/* 0x0248 ( 0 0) */ fmovs %f0,%f2 +/* 0x024c (76 79) */ fsubd %f2,%f0,%f2 +/* 0x0250 (77 78) */ std %f2,[%o5-8] +/* 0x0254 (77 78) */ bl,pt %icc,.L900000509 ! tprob=0.50 +/* 0x0258 (77 78) */ add %o0,4,%o0 + .L900000512: /* frequency 8.0 confidence 0.0 */ +/* 0x025c 143 ( 0 1) */ cmp %o7,%i4 +/* 0x0260 ( 0 1) */ bge,pn %icc,.L77000164 ! tprob=0.14 +/* 0x0264 ( 0 1) */ nop + .L77000161: /* frequency 0.7 confidence 0.0 */ +/* 0x0268 141 ( 0 3) */ ld [%o0],%f3 + .L900000513: /* frequency 6.4 confidence 0.0 */ +/* 0x026c 141 ( 0 3) */ ldd [%o2],%f0 +/* 0x0270 143 ( 0 1) */ add %o7,1,%o7 +/* 0x0274 140 ( 1 4) */ ld [%o0],%o1 +/* 0x0278 143 ( 1 2) */ add %o0,4,%o0 +/* 0x027c ( 1 2) */ cmp %o7,%i4 +/* 0x0280 141 ( 2 3) */ fmovs %f0,%f2 +/* 0x0284 142 ( 3 4) */ and %o1,%o3,%g1 +/* 0x0288 141 ( 4 7) */ fsubd %f2,%f0,%f2 +/* 0x028c ( 4 5) */ std %f2,[%o4] +/* 0x0290 143 ( 4 5) */ srl %o1,16,%o1 +/* 0x0294 142 ( 5 6) */ st %g1,[%sp+96] +/* 0x0298 143 ( 5 6) */ add %o4,8,%o4 +/* 0x029c 142 ( 8 9) */ fmovs %f0,%f2 +/* 0x02a0 ( 9 12) */ ld [%sp+96],%f3 +/* 0x02a4 (11 14) */ fsubd %f2,%f0,%f2 +/* 0x02a8 (11 12) */ std %f2,[%o5] +/* 0x02ac 143 (12 13) */ st %o1,[%sp+92] +/* 0x02b0 (15 16) */ fmovs %f0,%f2 +/* 0x02b4 (16 19) */ ld [%sp+92],%f3 +/* 0x02b8 (18 21) */ fsubd %f2,%f0,%f0 +/* 0x02bc (18 19) */ std %f0,[%o5+8] +/* 0x02c0 (18 19) */ add %o5,16,%o5 +/* 0x02c4 (18 19) */ bl,a,pt %icc,.L900000513 ! tprob=0.86 +/* 0x02c8 (19 22) */ ld [%o0],%f3 + .L77000164: /* frequency 1.0 confidence 0.0 */ +/* 0x02cc ( 0 2) */ ret ! Result = +/* 0x02d0 ( 2 3) */ restore %g0,%g0,%g0 +/* 0x02d4 0 ( 0 0) */ .type conv_i32_to_d32_and_d16,2 +/* 0x02d4 ( 0 0) */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global adjust_montf_result + adjust_montf_result: /* frequency 1.0 confidence 0.0 */ + +! 144 ! } +! 145 !} +! 148 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 149 !{ +! 150 !long long acc; +! 151 !int i; +! 153 ! if(i32[len]>0) i=-1; + +/* 000000 153 ( 0 1) */ sll %o2,2,%g1 +/* 0x0004 ( 0 1) */ or %g0,-1,%g3 +/* 0x0008 ( 1 4) */ ld [%o0+%g1],%g1 +/* 0x000c ( 3 4) */ cmp %g1,0 +/* 0x0010 ( 3 4) */ bleu,pn %icc,.L77000175 ! tprob=0.50 +/* 0x0014 ( 3 4) */ or %g0,%o1,%o3 +/* 0x0018 ( 4 5) */ ba .L900000611 ! tprob=1.00 +/* 0x001c ( 4 5) */ cmp %g3,0 + .L77000175: /* frequency 0.8 confidence 0.0 */ + +! 154 ! else +! 155 ! { +! 156 ! for(i=len-1; i>=0; i++) + +/* 0x0020 156 ( 0 1) */ subcc %o2,1,%g3 +/* 0x0024 ( 0 1) */ bneg,pt %icc,.L900000611 ! tprob=0.60 +/* 0x0028 ( 1 2) */ cmp %g3,0 +/* 0x002c ( 1 2) */ sll %g3,2,%g1 +/* 0x0030 ( 2 3) */ add %o0,%g1,%g2 +/* 0x0034 ( 2 3) */ add %o1,%g1,%g1 + +! 157 ! { +! 158 ! if(i32[i]!=nint[i]) break; + +/* 0x0038 158 ( 3 6) */ ld [%g1],%g5 + .L900000610: /* frequency 5.3 confidence 0.0 */ +/* 0x003c 158 ( 0 3) */ ld [%g2],%o5 +/* 0x0040 ( 0 1) */ add %g1,4,%g1 +/* 0x0044 ( 0 1) */ add %g2,4,%g2 +/* 0x0048 ( 2 3) */ cmp %o5,%g5 +/* 0x004c ( 2 3) */ bne,pn %icc,.L77000182 ! tprob=0.16 +/* 0x0050 ( 2 3) */ nop +/* 0x0054 ( 3 4) */ addcc %g3,1,%g3 +/* 0x0058 ( 3 4) */ bpos,a,pt %icc,.L900000610 ! tprob=0.84 +/* 0x005c ( 3 6) */ ld [%g1],%g5 + .L77000182: /* frequency 1.0 confidence 0.0 */ + +! 159 ! } +! 160 ! } +! 161 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0060 161 ( 0 1) */ cmp %g3,0 + .L900000611: /* frequency 1.0 confidence 0.0 */ +/* 0x0064 161 ( 0 1) */ bl,pn %icc,.L77000198 ! tprob=0.50 +/* 0x0068 ( 0 1) */ sll %g3,2,%g2 +/* 0x006c ( 1 4) */ ld [%o1+%g2],%g1 +/* 0x0070 ( 2 5) */ ld [%o0+%g2],%g2 +/* 0x0074 ( 4 5) */ cmp %g2,%g1 +/* 0x0078 ( 4 5) */ bleu,pt %icc,.L77000191 ! tprob=0.56 +/* 0x007c ( 4 5) */ nop + .L77000198: /* frequency 0.8 confidence 0.0 */ + +! 162 ! { +! 163 ! acc=0; +! 164 ! for(i=0;i>32; + +/* 0x00c4 168 ( 6 7) */ or %g0,2,%o5 +/* 0x00c8 166 ( 7 10) */ ld [%o0+4],%g1 +/* 0x00cc 164 ( 8 9) */ sub %o2,%o1,%o2 +/* 0x00d0 ( 9 10) */ or %g0,%o2,%g5 +/* 0x00d4 167 ( 9 10) */ and %o2,%g3,%o2 +/* 0x00d8 ( 9 10) */ st %o2,[%o0] +/* 0x00dc 168 (10 11) */ srax %g5,32,%g5 + .L900000605: /* frequency 64.0 confidence 0.0 */ +/* 0x00e0 166 (12 20) */ ld [%o3],%o2 +/* 0x00e4 168 (12 13) */ add %o5,1,%o5 +/* 0x00e8 (12 13) */ add %o3,4,%o3 +/* 0x00ec (13 13) */ cmp %o5,%g4 +/* 0x00f0 (13 14) */ add %o4,4,%o4 +/* 0x00f4 164 (14 14) */ sub %g1,%o2,%g1 +/* 0x00f8 (15 15) */ add %g1,%g5,%g5 +/* 0x00fc 167 (16 17) */ and %g5,%g3,%o2 +/* 0x0100 166 (16 24) */ ld [%o4-4],%g1 +/* 0x0104 167 (17 18) */ st %o2,[%o4-8] +/* 0x0108 168 (17 18) */ ble,pt %icc,.L900000605 ! tprob=0.50 +/* 0x010c (17 18) */ srax %g5,32,%g5 + .L900000608: /* frequency 8.0 confidence 0.0 */ +/* 0x0110 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0114 164 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0118 ( 3 4) */ add %g1,%g5,%g1 +/* 0x011c 167 ( 4 5) */ and %g1,%g3,%g2 +/* 0x0120 ( 5 7) */ retl ! Result = +/* 0x0124 ( 6 7) */ st %g2,[%o4-4] + .L77000199: /* frequency 0.6 confidence 0.0 */ +/* 0x0128 166 ( 0 3) */ ld [%o4],%g1 + .L900000609: /* frequency 5.3 confidence 0.0 */ +/* 0x012c 166 ( 0 3) */ ld [%o3],%g2 +/* 0x0130 ( 0 1) */ add %g5,%g1,%g1 +/* 0x0134 168 ( 0 1) */ add %o5,1,%o5 +/* 0x0138 ( 1 2) */ add %o3,4,%o3 +/* 0x013c ( 1 2) */ cmp %o5,%g4 +/* 0x0140 166 ( 2 3) */ sub %g1,%g2,%g1 +/* 0x0144 167 ( 3 4) */ and %g1,%g3,%g2 +/* 0x0148 ( 3 4) */ st %g2,[%o4] +/* 0x014c 168 ( 3 4) */ add %o4,4,%o4 +/* 0x0150 ( 4 5) */ srax %g1,32,%g5 +/* 0x0154 ( 4 5) */ ble,a,pt %icc,.L900000609 ! tprob=0.84 +/* 0x0158 ( 4 7) */ ld [%o4],%g1 + .L77000191: /* frequency 1.0 confidence 0.0 */ +/* 0x015c ( 0 2) */ retl ! Result = +/* 0x0160 ( 1 2) */ nop +/* 0x0164 0 ( 0 0) */ .type adjust_montf_result,2 +/* 0x0164 ( 0 0) */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .align 32 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .global mont_mulf_noconv + mont_mulf_noconv: /* frequency 1.0 confidence 0.0 */ +/* 000000 ( 0 1) */ save %sp,-144,%sp +/* 0x0004 ( 1 2) */ st %i0,[%fp+68] + +! 169 ! } +! 170 ! } +! 171 !} +! 175 !void cleanup(double *dt, int from, int tlen); +! 177 !/* +! 178 !** the lengths of the input arrays should be at least the following: +! 179 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 180 !** all of them should be different from one another +! 181 !** +! 182 !*/ +! 183 !void mont_mulf_noconv(unsigned int *result, +! 184 ! double *dm1, double *dm2, double *dt, +! 185 ! double *dn, unsigned int *nint, +! 186 ! int nlen, double dn0) +! 187 !{ +! 188 ! int i, j, jj; +! 189 ! int tmp; +! 190 ! double digit, m2j, nextm2j, a, b; +! 191 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 193 ! pdm1=&(dm1[0]); +! 194 ! pdm2=&(dm2[0]); +! 195 ! pdn=&(dn[0]); +! 196 ! pdm2[2*nlen]=Zero; + +/* 0x0008 196 ( 1 2) */ sethi %hi(Zero),%g2 +/* 0x000c 187 ( 1 2) */ or %g0,%i2,%o1 +/* 0x0010 ( 2 3) */ st %i5,[%fp+88] +/* 0x0014 ( 2 3) */ or %g0,%i3,%o2 +/* 0x0018 196 ( 2 3) */ add %g2,%lo(Zero),%g4 +/* 0x001c ( 3 6) */ ldd [%g2+%lo(Zero)],%f2 +/* 0x0020 187 ( 3 4) */ or %g0,%o2,%g5 +/* 0x0024 196 ( 3 4) */ or %g0,%o1,%i0 +/* 0x0028 187 ( 4 5) */ or %g0,%i4,%i2 + +! 198 ! if (nlen!=16) +! 199 ! { +! 200 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 202 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 203 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); +! 205 ! pdtj=&(dt[0]); +! 206 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 207 ! { +! 208 ! m2j=pdm2[j]; +! 209 ! a=pdtj[0]+pdn[0]*digit; +! 210 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 211 ! pdtj[1]=b; +! 213 !#pragma pipeloop(0) +! 214 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; +! 124 ! t+=(b>>16)+(t1>>32); +! 125 ! i32[i]=t1&0xffffffff; +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x0070 128 */ add %o0,16,%g2 +/* 0x0074 123 */ and %g1,%o1,%o0 +/* 0x0078 */ sllx %o0,16,%g3 +/* 0x007c 120 */ and %g4,%o3,%o0 +/* 0x0080 117 */ add %o0,%g3,%o4 +/* 0x0084 119 */ fdtox %f0,%f0 +/* 0x0088 */ std %f0,[%sp+104] +/* 0x008c 125 */ and %o4,%o3,%g5 +/* 0x0090 122 */ ldd [%g2+8],%f2 +/* 0x0094 128 */ add %o5,4,%o5 +/* 0x0098 124 */ srax %o4,32,%o4 +/* 0x009c */ stx %o4,[%sp+112] +/* 0x00a0 122 */ fdtox %f2,%f0 +/* 0x00a4 */ std %f0,[%sp+96] +/* 0x00a8 124 */ srax %g1,16,%o0 +/* 0x00ac */ ldx [%sp+112],%o7 +/* 0x00b0 121 */ srax %g4,32,%o4 +/* 0x00b4 124 */ add %o0,%o7,%g4 +/* 0x00b8 128 */ or %g0,1,%o7 +/* 0x00bc 119 */ ldx [%sp+104],%g3 +/* 0x00c0 124 */ add %o4,%g4,%o4 +/* 0x00c4 122 */ ldx [%sp+96],%g1 +/* 0x00c8 125 */ st %g5,[%o5-4] +/* 0x00cc 127 */ or %g0,%g3,%g4 + .L900000112: +/* 0x00d0 119 */ ldd [%g2+16],%f0 +/* 0x00d4 128 */ add %o7,1,%o7 +/* 0x00d8 */ add %o5,4,%o5 +/* 0x00dc */ cmp %o7,%o2 +/* 0x00e0 */ add %g2,16,%g2 +/* 0x00e4 119 */ fdtox %f0,%f0 +/* 0x00e8 */ std %f0,[%sp+104] +/* 0x00ec 122 */ ldd [%g2+8],%f0 +/* 0x00f0 */ fdtox %f0,%f0 +/* 0x00f4 */ std %f0,[%sp+96] +/* 0x00f8 123 */ and %g1,%o1,%g3 +/* 0x00fc */ sllx %g3,16,%g5 +/* 0x0100 120 */ and %g4,%o3,%g3 +/* 0x0104 117 */ add %g3,%g5,%g3 +/* 0x0108 124 */ srax %g1,16,%g1 +/* 0x010c 117 */ add %g3,%o4,%g3 +/* 0x0110 124 */ srax %g3,32,%o4 +/* 0x0114 */ stx %o4,[%sp+112] +/* 0x0118 119 */ ldx [%sp+104],%g5 +/* 0x011c 121 */ srax %g4,32,%o4 +/* 0x0120 124 */ ldx [%sp+112],%g4 +/* 0x0124 */ add %g1,%g4,%g4 +/* 0x0128 122 */ ldx [%sp+96],%g1 +/* 0x012c 124 */ add %o4,%g4,%o4 +/* 0x0130 125 */ and %g3,%o3,%g3 +/* 0x0134 127 */ or %g0,%g5,%g4 +/* 0x0138 128 */ ble,pt %icc,.L900000112 +/* 0x013c */ st %g3,[%o5-4] + .L900000115: +/* 0x0140 128 */ ba .L900000117 +/* 0x0144 */ sethi %hi(0xfc00),%g2 + .L77000134: +/* 0x0148 119 */ ldd [%g2+16],%f0 + .L900000116: +/* 0x014c 120 */ and %g4,%o3,%o0 +/* 0x0150 123 */ and %g1,%o1,%g3 +/* 0x0154 119 */ fdtox %f0,%f0 +/* 0x0158 120 */ add %o4,%o0,%o0 +/* 0x015c 119 */ std %f0,[%sp+104] +/* 0x0160 128 */ add %o7,1,%o7 +/* 0x0164 123 */ sllx %g3,16,%o4 +/* 0x0168 122 */ ldd [%g2+24],%f2 +/* 0x016c 128 */ add %g2,16,%g2 +/* 0x0170 123 */ add %o0,%o4,%o0 +/* 0x0174 128 */ cmp %o7,%o2 +/* 0x0178 125 */ and %o0,%o3,%g3 +/* 0x017c 122 */ fdtox %f2,%f0 +/* 0x0180 */ std %f0,[%sp+96] +/* 0x0184 124 */ srax %o0,32,%o0 +/* 0x0188 */ stx %o0,[%sp+112] +/* 0x018c 121 */ srax %g4,32,%o4 +/* 0x0190 122 */ ldx [%sp+96],%o0 +/* 0x0194 124 */ srax %g1,16,%g5 +/* 0x0198 */ ldx [%sp+112],%g4 +/* 0x019c 119 */ ldx [%sp+104],%g1 +/* 0x01a0 125 */ st %g3,[%o5] +/* 0x01a4 124 */ add %g5,%g4,%g4 +/* 0x01a8 128 */ add %o5,4,%o5 +/* 0x01ac 124 */ add %o4,%g4,%o4 +/* 0x01b0 127 */ or %g0,%g1,%g4 +/* 0x01b4 128 */ or %g0,%o0,%g1 +/* 0x01b8 */ ble,a,pt %icc,.L900000116 +/* 0x01bc */ ldd [%g2+16],%f0 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x01c0 133 */ sethi %hi(0xfc00),%g2 + .L900000117: +/* 0x01c4 133 */ or %g0,-1,%g3 +/* 0x01c8 */ add %g2,1023,%g2 +/* 0x01cc */ srl %g3,0,%g3 +/* 0x01d0 */ and %g1,%g2,%g2 +/* 0x01d4 */ and %g4,%g3,%g4 +/* 0x01d8 */ sllx %g2,16,%g2 +/* 0x01dc */ add %o4,%g4,%g4 +/* 0x01e0 */ add %g4,%g2,%g2 +/* 0x01e4 */ sll %o7,2,%g4 +/* 0x01e8 */ and %g2,%g3,%g2 +/* 0x01ec */ st %g2,[%i0+%g4] +/* 0x01f0 */ ret ! Result = +/* 0x01f4 */ restore %g0,%g0,%g0 +/* 0x01f8 0 */ .type conv_d16_to_i32,2 +/* 0x01f8 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g2 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x0008 156 */ sethi %hi(.L_const_seg_900000301),%g2 + .L900000310: +/* 0x000c */ call .+8 +/* 0x0010 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x0014 152 */ cmp %o0,0 +/* 0x0018 147 */ add %g3,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000310-.)),%g3 +/* 0x001c 152 */ ble,pt %icc,.L77000150 +/* 0x0020 */ add %g3,%o7,%o2 +/* 0x0024 */ sub %i2,1,%o5 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%o1 +/* 0x002c 152 */ sethi %hi(0xfc00),%o0 +/* 0x0030 */ ld [%o2+%o1],%o3 +/* 0x0034 */ add %o5,1,%g2 +/* 0x0038 */ or %g0,0,%g1 +/* 0x003c */ cmp %g2,3 +/* 0x0040 */ or %g0,%i1,%o7 +/* 0x0044 */ add %o0,1023,%o4 +/* 0x0048 */ or %g0,%i0,%g3 +/* 0x004c */ bl,pn %icc,.L77000154 +/* 0x0050 */ add %o7,4,%o0 +/* 0x0054 155 */ ldd [%o3],%f0 +/* 0x0058 156 */ or %g0,1,%g1 +/* 0x005c 154 */ ld [%o0-4],%o1 +/* 0x0060 0 */ or %g0,%o0,%o7 +/* 0x0064 155 */ and %o1,%o4,%o0 + .L900000306: +/* 0x0068 155 */ st %o0,[%sp+96] +/* 0x006c 156 */ add %g1,1,%g1 +/* 0x0070 */ add %g3,16,%g3 +/* 0x0074 */ cmp %g1,%o5 +/* 0x0078 */ add %o7,4,%o7 +/* 0x007c 155 */ ld [%sp+96],%f3 +/* 0x0080 */ fmovs %f0,%f2 +/* 0x0084 */ fsubd %f2,%f0,%f2 +/* 0x0088 156 */ srl %o1,16,%o0 +/* 0x008c 155 */ std %f2,[%g3-16] +/* 0x0090 156 */ st %o0,[%sp+92] +/* 0x0094 */ ld [%sp+92],%f3 +/* 0x0098 154 */ ld [%o7-4],%o1 +/* 0x009c 156 */ fmovs %f0,%f2 +/* 0x00a0 */ fsubd %f2,%f0,%f2 +/* 0x00a4 155 */ and %o1,%o4,%o0 +/* 0x00a8 156 */ ble,pt %icc,.L900000306 +/* 0x00ac */ std %f2,[%g3-8] + .L900000309: +/* 0x00b0 155 */ st %o0,[%sp+96] +/* 0x00b4 */ fmovs %f0,%f2 +/* 0x00b8 156 */ add %g3,16,%g3 +/* 0x00bc */ srl %o1,16,%o0 +/* 0x00c0 155 */ ld [%sp+96],%f3 +/* 0x00c4 */ fsubd %f2,%f0,%f2 +/* 0x00c8 */ std %f2,[%g3-16] +/* 0x00cc 156 */ st %o0,[%sp+92] +/* 0x00d0 */ fmovs %f0,%f2 +/* 0x00d4 */ ld [%sp+92],%f3 +/* 0x00d8 */ fsubd %f2,%f0,%f0 +/* 0x00dc */ std %f0,[%g3-8] +/* 0x00e0 */ ret ! Result = +/* 0x00e4 */ restore %g0,%g0,%g0 + .L77000154: +/* 0x00e8 154 */ ld [%o7],%o0 + .L900000311: +/* 0x00ec 155 */ and %o0,%o4,%o1 +/* 0x00f0 */ st %o1,[%sp+96] +/* 0x00f4 156 */ add %g1,1,%g1 +/* 0x00f8 155 */ ldd [%o3],%f0 +/* 0x00fc 156 */ srl %o0,16,%o0 +/* 0x0100 */ add %o7,4,%o7 +/* 0x0104 */ cmp %g1,%o5 +/* 0x0108 155 */ fmovs %f0,%f2 +/* 0x010c */ ld [%sp+96],%f3 +/* 0x0110 */ fsubd %f2,%f0,%f2 +/* 0x0114 */ std %f2,[%g3] +/* 0x0118 156 */ st %o0,[%sp+92] +/* 0x011c */ fmovs %f0,%f2 +/* 0x0120 */ ld [%sp+92],%f3 +/* 0x0124 */ fsubd %f2,%f0,%f0 +/* 0x0128 */ std %f0,[%g3+8] +/* 0x012c */ add %g3,16,%g3 +/* 0x0130 */ ble,a,pt %icc,.L900000311 +/* 0x0134 */ ld [%o7],%o0 + .L77000150: +/* 0x0138 */ ret ! Result = +/* 0x013c */ restore %g0,%g0,%g0 +/* 0x0140 0 */ .type conv_i32_to_d16,2 +/* 0x0140 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 4 +/* 0x0008 */ .skip 16 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-120,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g4 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%o1 +/* 0x0160 175 */ sethi %hi(0xfc00),%o0 +/* 0x0164 */ ld [%o4+%o1],%o2 +/* 0x0168 */ sll %g5,2,%o3 +/* 0x016c */ sub %i3,%g5,%g3 +/* 0x0170 */ sll %g5,3,%g2 +/* 0x0174 */ add %o0,1023,%o4 +/* 0x0178 178 */ ldd [%o2],%f0 +/* 0x017c */ add %i2,%o3,%o0 +/* 0x0180 175 */ cmp %g3,3 +/* 0x0184 */ add %i4,%g2,%o3 +/* 0x0188 */ sub %i3,1,%o1 +/* 0x018c */ sll %g5,4,%g4 +/* 0x0190 */ bl,pn %icc,.L77000161 +/* 0x0194 */ add %i1,%g4,%o5 +/* 0x0198 178 */ ld [%o0],%f3 +/* 0x019c 180 */ add %o3,8,%o3 +/* 0x01a0 177 */ ld [%o0],%o7 +/* 0x01a4 180 */ add %o5,16,%o5 +/* 0x01a8 */ add %g5,1,%g5 +/* 0x01ac 178 */ fmovs %f0,%f2 +/* 0x01b0 180 */ add %o0,4,%o0 +/* 0x01b4 179 */ and %o7,%o4,%g1 +/* 0x01b8 178 */ fsubd %f2,%f0,%f2 +/* 0x01bc */ std %f2,[%o3-8] +/* 0x01c0 180 */ srl %o7,16,%o7 +/* 0x01c4 179 */ st %g1,[%sp+96] +/* 0x01c8 */ fmovs %f0,%f2 +/* 0x01cc */ ld [%sp+96],%f3 +/* 0x01d0 */ fsubd %f2,%f0,%f2 +/* 0x01d4 */ std %f2,[%o5-16] +/* 0x01d8 180 */ st %o7,[%sp+92] +/* 0x01dc */ fmovs %f0,%f2 +/* 0x01e0 */ ld [%sp+92],%f3 +/* 0x01e4 */ fsubd %f2,%f0,%f2 +/* 0x01e8 */ std %f2,[%o5-8] + .L900000411: +/* 0x01ec 178 */ ld [%o0],%f3 +/* 0x01f0 180 */ add %g5,2,%g5 +/* 0x01f4 */ add %o5,32,%o5 +/* 0x01f8 177 */ ld [%o0],%o7 +/* 0x01fc 180 */ cmp %g5,%o1 +/* 0x0200 */ add %o3,16,%o3 +/* 0x0204 178 */ fmovs %f0,%f2 +/* 0x0208 */ fsubd %f2,%f0,%f2 +/* 0x020c */ std %f2,[%o3-16] +/* 0x0210 179 */ and %o7,%o4,%g1 +/* 0x0214 */ st %g1,[%sp+96] +/* 0x0218 */ ld [%sp+96],%f3 +/* 0x021c */ fmovs %f0,%f2 +/* 0x0220 */ fsubd %f2,%f0,%f2 +/* 0x0224 180 */ srl %o7,16,%o7 +/* 0x0228 179 */ std %f2,[%o5-32] +/* 0x022c 180 */ st %o7,[%sp+92] +/* 0x0230 */ ld [%sp+92],%f3 +/* 0x0234 */ fmovs %f0,%f2 +/* 0x0238 */ fsubd %f2,%f0,%f2 +/* 0x023c */ std %f2,[%o5-24] +/* 0x0240 */ add %o0,4,%o0 +/* 0x0244 178 */ ld [%o0],%f3 +/* 0x0248 177 */ ld [%o0],%o7 +/* 0x024c 178 */ fmovs %f0,%f2 +/* 0x0250 */ fsubd %f2,%f0,%f2 +/* 0x0254 */ std %f2,[%o3-8] +/* 0x0258 179 */ and %o7,%o4,%g1 +/* 0x025c */ st %g1,[%sp+96] +/* 0x0260 */ ld [%sp+96],%f3 +/* 0x0264 */ fmovs %f0,%f2 +/* 0x0268 */ fsubd %f2,%f0,%f2 +/* 0x026c 180 */ srl %o7,16,%o7 +/* 0x0270 179 */ std %f2,[%o5-16] +/* 0x0274 180 */ st %o7,[%sp+92] +/* 0x0278 */ ld [%sp+92],%f3 +/* 0x027c */ fmovs %f0,%f2 +/* 0x0280 */ fsubd %f2,%f0,%f2 +/* 0x0284 */ std %f2,[%o5-8] +/* 0x0288 */ bl,pt %icc,.L900000411 +/* 0x028c */ add %o0,4,%o0 + .L900000414: +/* 0x0290 180 */ cmp %g5,%i3 +/* 0x0294 */ bge,pn %icc,.L77000164 +/* 0x0298 */ nop + .L77000161: +/* 0x029c 178 */ ld [%o0],%f3 + .L900000416: +/* 0x02a0 178 */ ldd [%o2],%f0 +/* 0x02a4 180 */ add %g5,1,%g5 +/* 0x02a8 177 */ ld [%o0],%o1 +/* 0x02ac 180 */ add %o0,4,%o0 +/* 0x02b0 */ cmp %g5,%i3 +/* 0x02b4 178 */ fmovs %f0,%f2 +/* 0x02b8 179 */ and %o1,%o4,%o7 +/* 0x02bc 178 */ fsubd %f2,%f0,%f2 +/* 0x02c0 */ std %f2,[%o3] +/* 0x02c4 180 */ srl %o1,16,%o1 +/* 0x02c8 179 */ st %o7,[%sp+96] +/* 0x02cc 180 */ add %o3,8,%o3 +/* 0x02d0 179 */ fmovs %f0,%f2 +/* 0x02d4 */ ld [%sp+96],%f3 +/* 0x02d8 */ fsubd %f2,%f0,%f2 +/* 0x02dc */ std %f2,[%o5] +/* 0x02e0 180 */ st %o1,[%sp+92] +/* 0x02e4 */ fmovs %f0,%f2 +/* 0x02e8 */ ld [%sp+92],%f3 +/* 0x02ec */ fsubd %f2,%f0,%f0 +/* 0x02f0 */ std %f0,[%o5+8] +/* 0x02f4 */ add %o5,16,%o5 +/* 0x02f8 */ bl,a,pt %icc,.L900000416 +/* 0x02fc */ ld [%o0],%f3 + .L77000164: +/* 0x0300 */ ret ! Result = +/* 0x0304 */ restore %g0,%g0,%g0 +/* 0x0308 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0308 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ or %g0,%o2,%g5 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x0004 190 */ or %g0,-1,%g4 +/* 0x0008 */ sll %o2,2,%g1 +/* 0x000c */ ld [%o0+%g1],%g1 +/* 0x0010 */ cmp %g1,0 +/* 0x0014 */ bleu,pn %icc,.L77000175 +/* 0x0018 */ or %g0,%o1,%o3 +/* 0x001c */ ba .L900000511 +/* 0x0020 */ cmp %g4,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0024 193 */ sub %o2,1,%g4 +/* 0x0028 */ sll %g4,2,%g1 +/* 0x002c */ cmp %g4,0 +/* 0x0030 */ bl,pt %icc,.L900000511 +/* 0x0034 */ cmp %g4,0 +/* 0x0038 */ add %o1,%g1,%g2 + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x003c 195 */ ld [%g2],%o5 +/* 0x0040 193 */ add %o0,%g1,%g3 + .L900000510: +/* 0x0044 195 */ ld [%g3],%o2 +/* 0x0048 */ sub %g4,1,%g1 +/* 0x004c */ sub %g2,4,%g2 +/* 0x0050 */ sub %g3,4,%g3 +/* 0x0054 */ cmp %o2,%o5 +/* 0x0058 */ bne,pn %icc,.L77000182 +/* 0x005c */ nop +/* 0x0060 0 */ or %g0,%g1,%g4 +/* 0x0064 195 */ cmp %g1,0 +/* 0x0068 */ bge,a,pt %icc,.L900000510 +/* 0x006c */ ld [%g2],%o5 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x0070 198 */ cmp %g4,0 + .L900000511: +/* 0x0074 198 */ bl,pn %icc,.L77000198 +/* 0x0078 */ sll %g4,2,%g2 +/* 0x007c */ ld [%o1+%g2],%g1 +/* 0x0080 */ ld [%o0+%g2],%g2 +/* 0x0084 */ cmp %g2,%g1 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00dc 205 */ or %g0,2,%o5 +/* 0x00e0 201 */ sub %o2,%o1,%o2 +/* 0x00e4 */ or %g0,%o2,%g5 +/* 0x00e8 204 */ and %o2,%g3,%o2 +/* 0x00ec */ st %o2,[%o0] +/* 0x00f0 205 */ srax %g5,32,%g5 + .L900000505: +/* 0x00f4 203 */ ld [%o3],%o2 +/* 0x00f8 205 */ add %o5,1,%o5 +/* 0x00fc */ add %o3,4,%o3 +/* 0x0100 */ cmp %o5,%g4 +/* 0x0104 */ add %o4,4,%o4 +/* 0x0108 201 */ sub %g1,%o2,%g1 +/* 0x010c */ add %g1,%g5,%g5 +/* 0x0110 204 */ and %g5,%g3,%o2 +/* 0x0114 203 */ ld [%o4-4],%g1 +/* 0x0118 204 */ st %o2,[%o4-8] +/* 0x011c 205 */ ble,pt %icc,.L900000505 +/* 0x0120 */ srax %g5,32,%g5 + .L900000508: +/* 0x0124 203 */ ld [%o3],%g2 +/* 0x0128 201 */ sub %g1,%g2,%g1 +/* 0x012c */ add %g1,%g5,%g1 +/* 0x0130 204 */ and %g1,%g3,%g2 +/* 0x0134 */ retl ! Result = +/* 0x0138 */ st %g2,[%o4-4] + .L77000199: +/* 0x013c 203 */ ld [%o4],%g1 + .L900000509: +/* 0x0140 203 */ ld [%o3],%g2 +/* 0x0144 */ add %g5,%g1,%g1 +/* 0x0148 205 */ add %o5,1,%o5 +/* 0x014c */ add %o3,4,%o3 +/* 0x0150 */ cmp %o5,%g4 +/* 0x0154 203 */ sub %g1,%g2,%g1 +/* 0x0158 204 */ and %g1,%g3,%g2 +/* 0x015c */ st %g2,[%o4] +/* 0x0160 205 */ add %o4,4,%o4 +/* 0x0164 */ srax %g1,32,%g5 +/* 0x0168 */ ble,a,pt %icc,.L900000509 +/* 0x016c */ ld [%o4],%g1 + .L77000191: +/* 0x0170 */ retl ! Result = +/* 0x0174 */ nop +/* 0x0178 0 */ .type adjust_montf_result,2 +/* 0x0178 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 4 +/* 000000 */ .skip 16 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-144,%sp + .L900000646: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x000c 232 */ ld [%fp+92],%o1 +/* 0x0010 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ ldd [%fp+96],%f2 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000646-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 223 */ st %i0,[%fp+68] +/* 0x0024 */ add %g5,%o7,%o3 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0028 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x002c 232 */ ld [%o3+%g2],%l0 +/* 0x0030 239 */ sethi %hi(TwoTo16),%g4 +/* 0x0034 223 */ or %g0,%i2,%o2 +/* 0x0038 */ fmovd %f2,%f16 +/* 0x003c */ st %i5,[%fp+88] +/* 0x0040 239 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x0044 223 */ or %g0,%i1,%i2 +/* 0x0048 232 */ ldd [%l0],%f0 +/* 0x004c 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0050 223 */ or %g0,%i3,%o0 +/* 0x0054 232 */ sll %o1,4,%g4 +/* 0x0058 239 */ ld [%o3+%g2],%g5 +/* 0x005c 223 */ or %g0,%i3,%i1 +/* 0x0060 239 */ ld [%o3+%g3],%g1 +/* 0x0064 232 */ or %g0,%o1,%i0 +/* 0x0068 */ or %g0,%o2,%i3 +/* 0x006c 234 */ cmp %o1,16 +/* 0x0070 */ be,pn %icc,.L77000279 +/* 0x0074 */ std %f0,[%o2+%g4] +/* 0x0078 236 */ sll %o1,2,%g2 +/* 0x007c */ or %g0,%o0,%o3 +/* 0x0080 232 */ sll %o1,1,%o1 +/* 0x0084 236 */ add %g2,2,%o2 +/* 0x0088 */ cmp %o2,0 +/* 0x008c */ ble,a,pt %icc,.L900000660 +/* 0x0090 */ ldd [%i2],%f0 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;i>32); +! 122 ! d=(long long)d16[2*i+3]; +! 123 ! t1+=(b&0xffff)<<16; + +/* 0x0068 123 */ and %i2,%o5,%i4 +/* 0x006c */ sllx %i4,16,%o1 +/* 0x0070 117 */ cmp %g2,6 +/* 0x0074 */ bl,pn %icc,.L77000134 +/* 0x0078 */ or %g0,3,%i0 +/* 0x007c 119 */ ldd [%o4+16],%f0 +/* 0x0080 120 */ and %i3,%g3,%o3 + +! 124 ! t+=(b>>16)+(t1>>32); + +/* 0x0084 124 */ srax %i2,16,%i5 +/* 0x0088 117 */ add %o3,%o1,%i4 +/* 0x008c 121 */ srax %i3,32,%i3 +/* 0x0090 119 */ fdtox %f0,%f0 +/* 0x0094 */ std %f0,[%sp+2231] + +! 125 ! i32[i]=t1&0xffffffff; + +/* 0x0098 125 */ and %i4,%g3,%l0 +/* 0x009c 117 */ or %g0,72,%o3 +/* 0x00a0 122 */ ldd [%g4+24],%f0 +/* 0x00a4 117 */ or %g0,64,%o4 +/* 0x00a8 */ or %g0,4,%o1 + +! 126 ! t1=t; +! 127 ! a=c; +! 128 ! b=d; + +/* 0x00ac 128 */ or %g0,5,%i0 +/* 0x00b0 */ or %g0,4,%i1 +/* 0x00b4 119 */ ldx [%sp+2231],%g2 +/* 0x00b8 122 */ fdtox %f0,%f0 +/* 0x00bc 128 */ or %g0,4,%o0 +/* 0x00c0 122 */ std %f0,[%sp+2223] +/* 0x00c4 */ ldd [%g4+40],%f2 +/* 0x00c8 120 */ and %g2,%g3,%i2 +/* 0x00cc 119 */ ldd [%g4+32],%f0 +/* 0x00d0 121 */ srax %g2,32,%g2 +/* 0x00d4 122 */ ldd [%g4+56],%f4 +/* 0x00d8 */ fdtox %f2,%f2 +/* 0x00dc */ ldx [%sp+2223],%g5 +/* 0x00e0 119 */ fdtox %f0,%f0 +/* 0x00e4 125 */ st %l0,[%g1] +/* 0x00e8 124 */ srax %i4,32,%l0 +/* 0x00ec 122 */ fdtox %f4,%f4 +/* 0x00f0 */ std %f2,[%sp+2223] +/* 0x00f4 123 */ and %g5,%o5,%i4 +/* 0x00f8 124 */ add %i5,%l0,%i5 +/* 0x00fc 119 */ std %f0,[%sp+2231] +/* 0x0100 123 */ sllx %i4,16,%i4 +/* 0x0104 124 */ add %i3,%i5,%i3 +/* 0x0108 119 */ ldd [%g4+48],%f2 +/* 0x010c 124 */ srax %g5,16,%g5 +/* 0x0110 117 */ add %i2,%i4,%i2 +/* 0x0114 122 */ ldd [%g4+72],%f0 +/* 0x0118 117 */ add %i2,%i3,%i4 +/* 0x011c 124 */ srax %i4,32,%i5 +/* 0x0120 119 */ fdtox %f2,%f2 +/* 0x0124 125 */ and %i4,%g3,%i4 +/* 0x0128 122 */ ldx [%sp+2223],%i2 +/* 0x012c 124 */ add %g5,%i5,%g5 +/* 0x0130 119 */ ldx [%sp+2231],%i3 +/* 0x0134 124 */ add %g2,%g5,%g5 +/* 0x0138 119 */ std %f2,[%sp+2231] +/* 0x013c 122 */ std %f4,[%sp+2223] +/* 0x0140 119 */ ldd [%g4+64],%f2 +/* 0x0144 125 */ st %i4,[%g1+4] + .L900000108: +/* 0x0148 122 */ ldx [%sp+2223],%i4 +/* 0x014c 128 */ add %o0,2,%o0 +/* 0x0150 */ add %i0,4,%i0 +/* 0x0154 119 */ ldx [%sp+2231],%l0 +/* 0x0158 117 */ add %o3,16,%o3 +/* 0x015c 123 */ and %i2,%o5,%g2 +/* 0x0160 */ sllx %g2,16,%i5 +/* 0x0164 120 */ and %i3,%g3,%g2 +/* 0x0168 122 */ ldd [%g4+%o3],%f4 +/* 0x016c */ fdtox %f0,%f0 +/* 0x0170 */ std %f0,[%sp+2223] +/* 0x0174 124 */ srax %i2,16,%i2 +/* 0x0178 117 */ add %g2,%i5,%g2 +/* 0x017c 119 */ fdtox %f2,%f0 +/* 0x0180 117 */ add %o4,16,%o4 +/* 0x0184 119 */ std %f0,[%sp+2231] +/* 0x0188 117 */ add %g2,%g5,%g2 +/* 0x018c 119 */ ldd [%g4+%o4],%f2 +/* 0x0190 124 */ srax %g2,32,%i5 +/* 0x0194 128 */ cmp %o0,%o2 +/* 0x0198 121 */ srax %i3,32,%g5 +/* 0x019c 124 */ add %i2,%i5,%i2 +/* 0x01a0 */ add %g5,%i2,%i5 +/* 0x01a4 117 */ add %o1,4,%o1 +/* 0x01a8 125 */ and %g2,%g3,%g2 +/* 0x01ac 127 */ or %g0,%l0,%g5 +/* 0x01b0 125 */ st %g2,[%g1+%o1] +/* 0x01b4 128 */ add %i1,4,%i1 +/* 0x01b8 122 */ ldx [%sp+2223],%i2 +/* 0x01bc 119 */ ldx [%sp+2231],%i3 +/* 0x01c0 117 */ add %o3,16,%o3 +/* 0x01c4 123 */ and %i4,%o5,%g2 +/* 0x01c8 */ sllx %g2,16,%l0 +/* 0x01cc 120 */ and %g5,%g3,%g2 +/* 0x01d0 122 */ ldd [%g4+%o3],%f0 +/* 0x01d4 */ fdtox %f4,%f4 +/* 0x01d8 */ std %f4,[%sp+2223] +/* 0x01dc 124 */ srax %i4,16,%i4 +/* 0x01e0 117 */ add %g2,%l0,%g2 +/* 0x01e4 119 */ fdtox %f2,%f2 +/* 0x01e8 117 */ add %o4,16,%o4 +/* 0x01ec 119 */ std %f2,[%sp+2231] +/* 0x01f0 117 */ add %g2,%i5,%g2 +/* 0x01f4 119 */ ldd [%g4+%o4],%f2 +/* 0x01f8 124 */ srax %g2,32,%i5 +/* 0x01fc 121 */ srax %g5,32,%g5 +/* 0x0200 124 */ add %i4,%i5,%i4 +/* 0x0204 */ add %g5,%i4,%g5 +/* 0x0208 117 */ add %o1,4,%o1 +/* 0x020c 125 */ and %g2,%g3,%g2 +/* 0x0210 128 */ ble,pt %icc,.L900000108 +/* 0x0214 */ st %g2,[%g1+%o1] + .L900000111: +/* 0x0218 122 */ ldx [%sp+2223],%o2 +/* 0x021c 123 */ and %i2,%o5,%i4 +/* 0x0220 120 */ and %i3,%g3,%g2 +/* 0x0224 123 */ sllx %i4,16,%i4 +/* 0x0228 119 */ ldx [%sp+2231],%i5 +/* 0x022c 128 */ cmp %o0,%o7 +/* 0x0230 124 */ srax %i2,16,%i2 +/* 0x0234 117 */ add %g2,%i4,%g2 +/* 0x0238 122 */ fdtox %f0,%f4 +/* 0x023c */ std %f4,[%sp+2223] +/* 0x0240 117 */ add %g2,%g5,%g5 +/* 0x0244 123 */ and %o2,%o5,%l0 +/* 0x0248 124 */ srax %g5,32,%l1 +/* 0x024c 120 */ and %i5,%g3,%i4 +/* 0x0250 119 */ fdtox %f2,%f0 +/* 0x0254 121 */ srax %i3,32,%g2 +/* 0x0258 119 */ std %f0,[%sp+2231] +/* 0x025c 124 */ add %i2,%l1,%i2 +/* 0x0260 123 */ sllx %l0,16,%i3 +/* 0x0264 124 */ add %g2,%i2,%i2 +/* 0x0268 */ srax %o2,16,%o2 +/* 0x026c 117 */ add %o1,4,%g2 +/* 0x0270 */ add %i4,%i3,%o1 +/* 0x0274 125 */ and %g5,%g3,%g5 +/* 0x0278 */ st %g5,[%g1+%g2] +/* 0x027c 119 */ ldx [%sp+2231],%i3 +/* 0x0280 117 */ add %o1,%i2,%o1 +/* 0x0284 */ add %g2,4,%g2 +/* 0x0288 124 */ srax %o1,32,%i4 +/* 0x028c 122 */ ldx [%sp+2223],%i2 +/* 0x0290 125 */ and %o1,%g3,%g5 +/* 0x0294 121 */ srax %i5,32,%o1 +/* 0x0298 124 */ add %o2,%i4,%o2 +/* 0x029c 125 */ st %g5,[%g1+%g2] +/* 0x02a0 128 */ bg,pn %icc,.L77000127 +/* 0x02a4 */ add %o1,%o2,%g5 +/* 0x02a8 */ add %i0,6,%i0 +/* 0x02ac */ add %i1,6,%i1 + .L77000134: +/* 0x02b0 119 */ sra %i1,0,%o2 + .L900000112: +/* 0x02b4 119 */ sllx %o2,3,%o3 +/* 0x02b8 120 */ and %i3,%g3,%o1 +/* 0x02bc 119 */ ldd [%g4+%o3],%f0 +/* 0x02c0 122 */ sra %i0,0,%o3 +/* 0x02c4 123 */ and %i2,%o5,%o2 +/* 0x02c8 122 */ sllx %o3,3,%o3 +/* 0x02cc 120 */ add %g5,%o1,%o1 +/* 0x02d0 119 */ fdtox %f0,%f0 +/* 0x02d4 */ std %f0,[%sp+2231] +/* 0x02d8 123 */ sllx %o2,16,%o2 +/* 0x02dc */ add %o1,%o2,%o2 +/* 0x02e0 128 */ add %i1,2,%i1 +/* 0x02e4 122 */ ldd [%g4+%o3],%f0 +/* 0x02e8 124 */ srax %o2,32,%g2 +/* 0x02ec 125 */ and %o2,%g3,%o3 +/* 0x02f0 124 */ srax %i2,16,%o1 +/* 0x02f4 128 */ add %i0,2,%i0 +/* 0x02f8 122 */ fdtox %f0,%f0 +/* 0x02fc */ std %f0,[%sp+2223] +/* 0x0300 125 */ sra %o0,0,%o2 +/* 0x0304 */ sllx %o2,2,%o2 +/* 0x0308 124 */ add %o1,%g2,%g5 +/* 0x030c 121 */ srax %i3,32,%g2 +/* 0x0310 128 */ add %o0,1,%o0 +/* 0x0314 124 */ add %g2,%g5,%g5 +/* 0x0318 128 */ cmp %o0,%o7 +/* 0x031c 119 */ ldx [%sp+2231],%o4 +/* 0x0320 122 */ ldx [%sp+2223],%i2 +/* 0x0324 125 */ st %o3,[%g1+%o2] +/* 0x0328 127 */ or %g0,%o4,%i3 +/* 0x032c 128 */ ble,pt %icc,.L900000112 +/* 0x0330 */ sra %i1,0,%o2 + .L77000127: + +! 129 ! } +! 130 ! t1+=a&0xffffffff; +! 131 ! t=(a>>32); +! 132 ! t1+=(b&0xffff)<<16; +! 133 ! i32[i]=t1&0xffffffff; + +/* 0x0334 133 */ sethi %hi(0xfc00),%g2 + .L900000113: +/* 0x0338 133 */ or %g0,-1,%g3 +/* 0x033c */ add %g2,1023,%g2 +/* 0x0340 */ srl %g3,0,%g3 +/* 0x0344 */ and %i2,%g2,%g2 +/* 0x0348 */ and %i3,%g3,%g4 +/* 0x034c */ sllx %g2,16,%g2 +/* 0x0350 */ add %g5,%g4,%g4 +/* 0x0354 */ sra %o0,0,%g5 +/* 0x0358 */ add %g4,%g2,%g4 +/* 0x035c */ sllx %g5,2,%g2 +/* 0x0360 */ and %g4,%g3,%g3 +/* 0x0364 */ st %g3,[%g1+%g2] +/* 0x0368 */ ret ! Result = +/* 0x036c */ restore %g0,%g0,%g0 +/* 0x0370 0 */ .type conv_d16_to_i32,2 +/* 0x0370 */ .size conv_d16_to_i32,(.-conv_d16_to_i32) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000201: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32 + conv_i32_to_d32: +/* 000000 */ or %g0,%o7,%g3 + +! 135 !} +! 137 !void conv_i32_to_d32(double *d32, unsigned int *i32, int len) +! 138 !{ +! 139 !int i; +! 141 !#pragma pipeloop(0) +! 142 ! for(i=0;i>16); + +/* 0x001c 156 */ sethi %hi(.L_const_seg_900000301),%g2 +/* 0x0020 147 */ or %g0,%i2,%o1 +/* 0x0024 152 */ sethi %hi(0xfc00),%g3 +/* 0x0028 156 */ add %g2,%lo(.L_const_seg_900000301),%g2 +/* 0x002c 152 */ or %g0,%o1,%g4 +/* 0x0030 156 */ ldx [%o0+%g2],%o5 +/* 0x0034 152 */ add %g3,1023,%g1 +/* 0x0038 147 */ or %g0,%i1,%o7 +/* 0x003c 152 */ or %g0,0,%i2 +/* 0x0040 */ sub %o1,1,%g5 +/* 0x0044 */ or %g0,0,%g3 +/* 0x0048 */ or %g0,1,%g2 +/* 0x004c 154 */ or %g0,0,%o2 +/* 0x0050 */ cmp %g4,6 +/* 0x0054 152 */ bl,pn %icc,.L77000154 +/* 0x0058 */ ldd [%o5],%f0 +/* 0x005c */ sub %o1,2,%o3 +/* 0x0060 */ or %g0,16,%o2 +/* 0x0064 154 */ ld [%i1],%o4 +/* 0x0068 156 */ or %g0,3,%g2 +/* 0x006c */ or %g0,2,%g3 +/* 0x0070 155 */ fmovs %f0,%f2 +/* 0x0074 156 */ or %g0,4,%i2 +/* 0x0078 155 */ and %o4,%g1,%o0 +/* 0x007c */ st %o0,[%sp+2227] +/* 0x0080 */ fmovs %f0,%f4 +/* 0x0084 156 */ srl %o4,16,%i4 +/* 0x0088 152 */ or %g0,12,%o4 +/* 0x008c */ or %g0,24,%o0 +/* 0x0090 155 */ ld [%sp+2227],%f3 +/* 0x0094 */ fsubd %f2,%f0,%f2 +/* 0x0098 */ std %f2,[%i0] +/* 0x009c 156 */ st %i4,[%sp+2223] +/* 0x00a0 154 */ ld [%o7+4],%o1 +/* 0x00a4 156 */ fmovs %f0,%f2 +/* 0x00a8 155 */ and %o1,%g1,%i1 +/* 0x00ac 156 */ ld [%sp+2223],%f3 +/* 0x00b0 */ srl %o1,16,%o1 +/* 0x00b4 */ fsubd %f2,%f0,%f2 +/* 0x00b8 */ std %f2,[%i0+8] +/* 0x00bc */ st %o1,[%sp+2223] +/* 0x00c0 155 */ st %i1,[%sp+2227] +/* 0x00c4 154 */ ld [%o7+8],%o1 +/* 0x00c8 156 */ fmovs %f0,%f2 +/* 0x00cc 155 */ and %o1,%g1,%g4 +/* 0x00d0 */ ld [%sp+2227],%f5 +/* 0x00d4 156 */ srl %o1,16,%o1 +/* 0x00d8 */ ld [%sp+2223],%f3 +/* 0x00dc */ st %o1,[%sp+2223] +/* 0x00e0 155 */ fsubd %f4,%f0,%f4 +/* 0x00e4 */ st %g4,[%sp+2227] +/* 0x00e8 156 */ fsubd %f2,%f0,%f2 +/* 0x00ec 154 */ ld [%o7+12],%o1 +/* 0x00f0 155 */ std %f4,[%i0+16] +/* 0x00f4 156 */ std %f2,[%i0+24] + .L900000306: +/* 0x00f8 155 */ ld [%sp+2227],%f5 +/* 0x00fc 156 */ add %i2,2,%i2 +/* 0x0100 */ add %g2,4,%g2 +/* 0x0104 */ ld [%sp+2223],%f3 +/* 0x0108 */ cmp %i2,%o3 +/* 0x010c */ add %g3,4,%g3 +/* 0x0110 155 */ and %o1,%g1,%g4 +/* 0x0114 156 */ srl %o1,16,%o1 +/* 0x0118 155 */ st %g4,[%sp+2227] +/* 0x011c 156 */ st %o1,[%sp+2223] +/* 0x0120 152 */ add %o4,4,%o1 +/* 0x0124 154 */ ld [%o7+%o1],%o4 +/* 0x0128 156 */ fmovs %f0,%f2 +/* 0x012c 155 */ fmovs %f0,%f4 +/* 0x0130 */ fsubd %f4,%f0,%f4 +/* 0x0134 152 */ add %o2,16,%o2 +/* 0x0138 156 */ fsubd %f2,%f0,%f2 +/* 0x013c 155 */ std %f4,[%i0+%o2] +/* 0x0140 152 */ add %o0,16,%o0 +/* 0x0144 156 */ std %f2,[%i0+%o0] +/* 0x0148 155 */ ld [%sp+2227],%f5 +/* 0x014c 156 */ ld [%sp+2223],%f3 +/* 0x0150 155 */ and %o4,%g1,%g4 +/* 0x0154 156 */ srl %o4,16,%o4 +/* 0x0158 155 */ st %g4,[%sp+2227] +/* 0x015c 156 */ st %o4,[%sp+2223] +/* 0x0160 152 */ add %o1,4,%o4 +/* 0x0164 154 */ ld [%o7+%o4],%o1 +/* 0x0168 156 */ fmovs %f0,%f2 +/* 0x016c 155 */ fmovs %f0,%f4 +/* 0x0170 */ fsubd %f4,%f0,%f4 +/* 0x0174 152 */ add %o2,16,%o2 +/* 0x0178 156 */ fsubd %f2,%f0,%f2 +/* 0x017c 155 */ std %f4,[%i0+%o2] +/* 0x0180 152 */ add %o0,16,%o0 +/* 0x0184 156 */ ble,pt %icc,.L900000306 +/* 0x0188 */ std %f2,[%i0+%o0] + .L900000309: +/* 0x018c 155 */ ld [%sp+2227],%f5 +/* 0x0190 156 */ fmovs %f0,%f2 +/* 0x0194 */ srl %o1,16,%o3 +/* 0x0198 */ ld [%sp+2223],%f3 +/* 0x019c 155 */ and %o1,%g1,%i1 +/* 0x01a0 152 */ add %o2,16,%g4 +/* 0x01a4 155 */ fmovs %f0,%f4 +/* 0x01a8 */ st %i1,[%sp+2227] +/* 0x01ac 152 */ add %o0,16,%o2 +/* 0x01b0 156 */ st %o3,[%sp+2223] +/* 0x01b4 154 */ sra %i2,0,%o3 +/* 0x01b8 152 */ add %g4,16,%o1 +/* 0x01bc 155 */ fsubd %f4,%f0,%f4 +/* 0x01c0 */ std %f4,[%i0+%g4] +/* 0x01c4 152 */ add %o0,32,%o0 +/* 0x01c8 156 */ fsubd %f2,%f0,%f2 +/* 0x01cc */ std %f2,[%i0+%o2] +/* 0x01d0 */ sllx %o3,2,%o2 +/* 0x01d4 155 */ ld [%sp+2227],%f5 +/* 0x01d8 156 */ cmp %i2,%g5 +/* 0x01dc */ add %g2,6,%g2 +/* 0x01e0 */ ld [%sp+2223],%f3 +/* 0x01e4 */ add %g3,6,%g3 +/* 0x01e8 155 */ fmovs %f0,%f4 +/* 0x01ec 156 */ fmovs %f0,%f2 +/* 0x01f0 155 */ fsubd %f4,%f0,%f4 +/* 0x01f4 */ std %f4,[%i0+%o1] +/* 0x01f8 156 */ fsubd %f2,%f0,%f0 +/* 0x01fc */ bg,pn %icc,.L77000150 +/* 0x0200 */ std %f0,[%i0+%o0] + .L77000154: +/* 0x0204 155 */ ldd [%o5],%f0 + .L900000311: +/* 0x0208 154 */ ld [%o7+%o2],%o0 +/* 0x020c 155 */ sra %g3,0,%o1 +/* 0x0210 */ fmovs %f0,%f2 +/* 0x0214 */ sllx %o1,3,%o2 +/* 0x0218 156 */ add %i2,1,%i2 +/* 0x021c 155 */ and %o0,%g1,%o1 +/* 0x0220 */ st %o1,[%sp+2227] +/* 0x0224 156 */ add %g3,2,%g3 +/* 0x0228 */ srl %o0,16,%o1 +/* 0x022c */ cmp %i2,%g5 +/* 0x0230 */ sra %g2,0,%o0 +/* 0x0234 */ add %g2,2,%g2 +/* 0x0238 */ sllx %o0,3,%o0 +/* 0x023c 155 */ ld [%sp+2227],%f3 +/* 0x0240 154 */ sra %i2,0,%o3 +/* 0x0244 155 */ fsubd %f2,%f0,%f2 +/* 0x0248 */ std %f2,[%i0+%o2] +/* 0x024c */ sllx %o3,2,%o2 +/* 0x0250 156 */ st %o1,[%sp+2223] +/* 0x0254 */ fmovs %f0,%f2 +/* 0x0258 */ ld [%sp+2223],%f3 +/* 0x025c */ fsubd %f2,%f0,%f0 +/* 0x0260 */ std %f0,[%i0+%o0] +/* 0x0264 */ ble,a,pt %icc,.L900000311 +/* 0x0268 */ ldd [%o5],%f0 + .L77000150: +/* 0x026c */ ret ! Result = +/* 0x0270 */ restore %g0,%g0,%g0 +/* 0x0274 0 */ .type conv_i32_to_d16,2 +/* 0x0274 */ .size conv_i32_to_d16,(.-conv_i32_to_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! CONSTANT POOL +! + .L_const_seg_900000401: +/* 000000 0 */ .word 1127219200,0 +/* 0x0008 0 */ .align 8 +/* 0x0008 */ .skip 24 +! +! SUBROUTINE conv_i32_to_d32_and_d16 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global conv_i32_to_d32_and_d16 + conv_i32_to_d32_and_d16: +/* 000000 */ save %sp,-192,%sp + .L900000415: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000415-.)),%g3 + +! 157 ! } +! 158 !} +! 161 !void conv_i32_to_d32_and_d16(double *d32, double *d16, +! 162 ! unsigned int *i32, int len) +! 163 !{ +! 164 !int i = 0; +! 165 !unsigned int a; +! 167 !#pragma pipeloop(0) +! 168 !#ifdef RF_INLINE_MACROS +! 169 ! for(;i>16); + +/* 0x0158 180 */ sethi %hi(.L_const_seg_900000401),%g2 +/* 0x015c */ add %g2,%lo(.L_const_seg_900000401),%g2 +/* 0x0160 175 */ sethi %hi(0xfc00),%g3 +/* 0x0164 180 */ ldx [%o3+%g2],%g1 +/* 0x0168 175 */ sll %i2,1,%i4 +/* 0x016c */ sub %g5,%i2,%g4 +/* 0x0170 177 */ sra %i2,0,%o3 +/* 0x0174 175 */ add %g3,1023,%g3 +/* 0x0178 178 */ ldd [%g1],%f2 +/* 0x017c */ sllx %o3,2,%o2 +/* 0x0180 175 */ add %i4,1,%g2 +/* 0x0184 177 */ or %g0,%o3,%o1 +/* 0x0188 */ cmp %g4,6 +/* 0x018c 175 */ bl,pn %icc,.L77000161 +/* 0x0190 */ sra %i2,0,%o3 +/* 0x0194 177 */ or %g0,%o2,%o0 +/* 0x0198 178 */ ld [%i0+%o2],%f5 +/* 0x019c 179 */ fmovs %f2,%f8 +/* 0x01a0 175 */ add %o0,4,%o3 +/* 0x01a4 177 */ ld [%i0+%o0],%o7 +/* 0x01a8 180 */ fmovs %f2,%f6 +/* 0x01ac 178 */ fmovs %f2,%f4 +/* 0x01b0 */ sllx %o1,3,%o2 +/* 0x01b4 175 */ add %o3,4,%o5 +/* 0x01b8 179 */ sra %i4,0,%o0 +/* 0x01bc 175 */ add %o3,8,%o4 +/* 0x01c0 178 */ fsubd %f4,%f2,%f4 +/* 0x01c4 */ std %f4,[%i3+%o2] +/* 0x01c8 179 */ sllx %o0,3,%i5 +/* 0x01cc */ and %o7,%g3,%o0 +/* 0x01d0 */ st %o0,[%sp+2227] +/* 0x01d4 175 */ add %i5,16,%o1 +/* 0x01d8 180 */ srl %o7,16,%g4 +/* 0x01dc */ add %i2,1,%i2 +/* 0x01e0 */ sra %g2,0,%o0 +/* 0x01e4 175 */ add %o2,8,%o2 +/* 0x01e8 179 */ fmovs %f2,%f4 +/* 0x01ec 180 */ sllx %o0,3,%l0 +/* 0x01f0 */ add %i4,3,%g2 +/* 0x01f4 179 */ ld [%sp+2227],%f5 +/* 0x01f8 175 */ add %l0,16,%o0 +/* 0x01fc 180 */ add %i4,2,%i4 +/* 0x0200 175 */ sub %g5,1,%o7 +/* 0x0204 180 */ add %i2,3,%i2 +/* 0x0208 179 */ fsubd %f4,%f2,%f4 +/* 0x020c */ std %f4,[%i1+%i5] +/* 0x0210 180 */ st %g4,[%sp+2223] +/* 0x0214 177 */ ld [%i0+%o3],%i5 +/* 0x0218 180 */ fmovs %f2,%f4 +/* 0x021c */ srl %i5,16,%g4 +/* 0x0220 179 */ and %i5,%g3,%i5 +/* 0x0224 180 */ ld [%sp+2223],%f5 +/* 0x0228 */ fsubd %f4,%f2,%f4 +/* 0x022c */ std %f4,[%i1+%l0] +/* 0x0230 */ st %g4,[%sp+2223] +/* 0x0234 177 */ ld [%i0+%o5],%g4 +/* 0x0238 179 */ st %i5,[%sp+2227] +/* 0x023c 178 */ fmovs %f2,%f4 +/* 0x0240 180 */ srl %g4,16,%i5 +/* 0x0244 179 */ and %g4,%g3,%g4 +/* 0x0248 180 */ ld [%sp+2223],%f7 +/* 0x024c */ st %i5,[%sp+2223] +/* 0x0250 178 */ ld [%i0+%o3],%f5 +/* 0x0254 180 */ fsubd %f6,%f2,%f6 +/* 0x0258 177 */ ld [%i0+%o4],%o3 +/* 0x025c 178 */ fsubd %f4,%f2,%f4 +/* 0x0260 179 */ ld [%sp+2227],%f9 +/* 0x0264 180 */ ld [%sp+2223],%f1 +/* 0x0268 179 */ st %g4,[%sp+2227] +/* 0x026c */ fsubd %f8,%f2,%f8 +/* 0x0270 */ std %f8,[%i1+%o1] +/* 0x0274 180 */ std %f6,[%i1+%o0] +/* 0x0278 178 */ std %f4,[%i3+%o2] + .L900000411: +/* 0x027c 179 */ ld [%sp+2227],%f13 +/* 0x0280 180 */ srl %o3,16,%g4 +/* 0x0284 */ add %i2,2,%i2 +/* 0x0288 */ st %g4,[%sp+2223] +/* 0x028c */ cmp %i2,%o7 +/* 0x0290 */ add %g2,4,%g2 +/* 0x0294 178 */ ld [%i0+%o5],%f11 +/* 0x0298 180 */ add %i4,4,%i4 +/* 0x029c 175 */ add %o4,4,%o5 +/* 0x02a0 177 */ ld [%i0+%o5],%g4 +/* 0x02a4 179 */ and %o3,%g3,%o3 +/* 0x02a8 */ st %o3,[%sp+2227] +/* 0x02ac 180 */ fmovs %f2,%f0 +/* 0x02b0 179 */ fmovs %f2,%f12 +/* 0x02b4 180 */ fsubd %f0,%f2,%f8 +/* 0x02b8 179 */ fsubd %f12,%f2,%f4 +/* 0x02bc 175 */ add %o1,16,%o1 +/* 0x02c0 180 */ ld [%sp+2223],%f7 +/* 0x02c4 178 */ fmovs %f2,%f10 +/* 0x02c8 179 */ std %f4,[%i1+%o1] +/* 0x02cc 175 */ add %o0,16,%o0 +/* 0x02d0 178 */ fsubd %f10,%f2,%f4 +/* 0x02d4 175 */ add %o2,8,%o2 +/* 0x02d8 180 */ std %f8,[%i1+%o0] +/* 0x02dc 178 */ std %f4,[%i3+%o2] +/* 0x02e0 179 */ ld [%sp+2227],%f9 +/* 0x02e4 180 */ srl %g4,16,%o3 +/* 0x02e8 */ st %o3,[%sp+2223] +/* 0x02ec 178 */ ld [%i0+%o4],%f5 +/* 0x02f0 175 */ add %o4,8,%o4 +/* 0x02f4 177 */ ld [%i0+%o4],%o3 +/* 0x02f8 179 */ and %g4,%g3,%g4 +/* 0x02fc */ st %g4,[%sp+2227] +/* 0x0300 180 */ fmovs %f2,%f6 +/* 0x0304 179 */ fmovs %f2,%f8 +/* 0x0308 180 */ fsubd %f6,%f2,%f6 +/* 0x030c 179 */ fsubd %f8,%f2,%f8 +/* 0x0310 175 */ add %o1,16,%o1 +/* 0x0314 180 */ ld [%sp+2223],%f1 +/* 0x0318 178 */ fmovs %f2,%f4 +/* 0x031c 179 */ std %f8,[%i1+%o1] +/* 0x0320 175 */ add %o0,16,%o0 +/* 0x0324 178 */ fsubd %f4,%f2,%f4 +/* 0x0328 175 */ add %o2,8,%o2 +/* 0x032c 180 */ std %f6,[%i1+%o0] +/* 0x0330 */ bl,pt %icc,.L900000411 +/* 0x0334 */ std %f4,[%i3+%o2] + .L900000414: +/* 0x0338 180 */ srl %o3,16,%o7 +/* 0x033c */ st %o7,[%sp+2223] +/* 0x0340 179 */ fmovs %f2,%f12 +/* 0x0344 178 */ ld [%i0+%o5],%f11 +/* 0x0348 180 */ fmovs %f2,%f0 +/* 0x034c 179 */ and %o3,%g3,%g4 +/* 0x0350 180 */ fmovs %f2,%f6 +/* 0x0354 175 */ add %o1,16,%o3 +/* 0x0358 */ add %o0,16,%o7 +/* 0x035c 178 */ fmovs %f2,%f10 +/* 0x0360 175 */ add %o2,8,%o2 +/* 0x0364 */ add %o1,32,%o5 +/* 0x0368 179 */ ld [%sp+2227],%f13 +/* 0x036c 178 */ fmovs %f2,%f4 +/* 0x0370 175 */ add %o0,32,%o1 +/* 0x0374 180 */ ld [%sp+2223],%f7 +/* 0x0378 175 */ add %o2,8,%o0 +/* 0x037c 180 */ cmp %i2,%g5 +/* 0x0380 179 */ st %g4,[%sp+2227] +/* 0x0384 */ fsubd %f12,%f2,%f8 +/* 0x0388 180 */ add %g2,6,%g2 +/* 0x038c 179 */ std %f8,[%i1+%o3] +/* 0x0390 180 */ fsubd %f0,%f2,%f0 +/* 0x0394 177 */ sra %i2,0,%o3 +/* 0x0398 180 */ std %f0,[%i1+%o7] +/* 0x039c 178 */ fsubd %f10,%f2,%f0 +/* 0x03a0 180 */ add %i4,6,%i4 +/* 0x03a4 178 */ std %f0,[%i3+%o2] +/* 0x03a8 */ sllx %o3,2,%o2 +/* 0x03ac 179 */ ld [%sp+2227],%f9 +/* 0x03b0 178 */ ld [%i0+%o4],%f5 +/* 0x03b4 179 */ fmovs %f2,%f8 +/* 0x03b8 */ fsubd %f8,%f2,%f0 +/* 0x03bc */ std %f0,[%i1+%o5] +/* 0x03c0 180 */ fsubd %f6,%f2,%f0 +/* 0x03c4 */ std %f0,[%i1+%o1] +/* 0x03c8 178 */ fsubd %f4,%f2,%f0 +/* 0x03cc 180 */ bge,pn %icc,.L77000164 +/* 0x03d0 */ std %f0,[%i3+%o0] + .L77000161: +/* 0x03d4 178 */ ldd [%g1],%f2 + .L900000416: +/* 0x03d8 178 */ ld [%i0+%o2],%f5 +/* 0x03dc 179 */ sra %i4,0,%o0 +/* 0x03e0 180 */ add %i2,1,%i2 +/* 0x03e4 177 */ ld [%i0+%o2],%o1 +/* 0x03e8 178 */ sllx %o3,3,%o3 +/* 0x03ec 180 */ add %i4,2,%i4 +/* 0x03f0 178 */ fmovs %f2,%f4 +/* 0x03f4 179 */ sllx %o0,3,%o4 +/* 0x03f8 180 */ cmp %i2,%g5 +/* 0x03fc 179 */ and %o1,%g3,%o0 +/* 0x0400 178 */ fsubd %f4,%f2,%f0 +/* 0x0404 */ std %f0,[%i3+%o3] +/* 0x0408 180 */ srl %o1,16,%o1 +/* 0x040c 179 */ st %o0,[%sp+2227] +/* 0x0410 180 */ sra %g2,0,%o0 +/* 0x0414 */ add %g2,2,%g2 +/* 0x0418 177 */ sra %i2,0,%o3 +/* 0x041c 180 */ sllx %o0,3,%o0 +/* 0x0420 179 */ fmovs %f2,%f4 +/* 0x0424 */ sllx %o3,2,%o2 +/* 0x0428 */ ld [%sp+2227],%f5 +/* 0x042c */ fsubd %f4,%f2,%f0 +/* 0x0430 */ std %f0,[%i1+%o4] +/* 0x0434 180 */ st %o1,[%sp+2223] +/* 0x0438 */ fmovs %f2,%f4 +/* 0x043c */ ld [%sp+2223],%f5 +/* 0x0440 */ fsubd %f4,%f2,%f0 +/* 0x0444 */ std %f0,[%i1+%o0] +/* 0x0448 */ bl,a,pt %icc,.L900000416 +/* 0x044c */ ldd [%g1],%f2 + .L77000164: +/* 0x0450 */ ret ! Result = +/* 0x0454 */ restore %g0,%g0,%g0 +/* 0x0458 0 */ .type conv_i32_to_d32_and_d16,2 +/* 0x0458 */ .size conv_i32_to_d32_and_d16,(.-conv_i32_to_d32_and_d16) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +! +! SUBROUTINE adjust_montf_result +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global adjust_montf_result + adjust_montf_result: +/* 000000 */ save %sp,-176,%sp +/* 0x0004 */ or %g0,%i2,%o1 +/* 0x0008 */ or %g0,%i0,%i2 + +! 181 ! } +! 182 !} +! 185 !void adjust_montf_result(unsigned int *i32, unsigned int *nint, int len) +! 186 !{ +! 187 !long long acc; +! 188 !int i; +! 190 ! if(i32[len]>0) i=-1; + +/* 0x000c 190 */ sra %o1,0,%g2 +/* 0x0010 */ or %g0,-1,%o2 +/* 0x0014 */ sllx %g2,2,%g2 +/* 0x0018 */ ld [%i2+%g2],%g2 +/* 0x001c */ cmp %g2,0 +/* 0x0020 */ bleu,pn %icc,.L77000175 +/* 0x0024 */ or %g0,%i1,%i0 +/* 0x0028 */ ba .L900000511 +/* 0x002c */ cmp %o2,0 + .L77000175: + +! 191 ! else +! 192 ! { +! 193 ! for(i=len-1; i>=0; i--) + +/* 0x0030 193 */ sub %o1,1,%o2 +/* 0x0034 */ cmp %o2,0 +/* 0x0038 */ bl,pn %icc,.L77000182 +/* 0x003c */ sra %o2,0,%g2 + .L900000510: + +! 194 ! { +! 195 ! if(i32[i]!=nint[i]) break; + +/* 0x0040 195 */ sllx %g2,2,%g2 +/* 0x0044 */ sub %o2,1,%o0 +/* 0x0048 */ ld [%i1+%g2],%g3 +/* 0x004c */ ld [%i2+%g2],%g2 +/* 0x0050 */ cmp %g2,%g3 +/* 0x0054 */ bne,pn %icc,.L77000182 +/* 0x0058 */ nop +/* 0x005c 0 */ or %g0,%o0,%o2 +/* 0x0060 195 */ cmp %o0,0 +/* 0x0064 */ bge,pt %icc,.L900000510 +/* 0x0068 */ sra %o2,0,%g2 + .L77000182: + +! 196 ! } +! 197 ! } +! 198 ! if((i<0)||(i32[i]>nint[i])) + +/* 0x006c 198 */ cmp %o2,0 + .L900000511: +/* 0x0070 198 */ bl,pn %icc,.L77000198 +/* 0x0074 */ sra %o2,0,%g2 +/* 0x0078 */ sllx %g2,2,%g2 +/* 0x007c */ ld [%i1+%g2],%g3 +/* 0x0080 */ ld [%i2+%g2],%g2 +/* 0x0084 */ cmp %g2,%g3 +/* 0x0088 */ bleu,pt %icc,.L77000191 +/* 0x008c */ nop + .L77000198: + +! 199 ! { +! 200 ! acc=0; +! 201 ! for(i=0;i>32; + +/* 0x00c8 205 */ or %g0,5,%i1 +/* 0x00cc 203 */ ld [%i0],%o2 +/* 0x00d0 201 */ or %g0,8,%o5 +/* 0x00d4 */ or %g0,12,%o4 +/* 0x00d8 203 */ ld [%i0+4],%o3 +/* 0x00dc 201 */ or %g0,16,%g1 +/* 0x00e0 203 */ ld [%i2+4],%o0 +/* 0x00e4 201 */ sub %o1,%o2,%o1 +/* 0x00e8 203 */ ld [%i0+8],%i3 +/* 0x00ec 204 */ and %o1,%g2,%g5 +/* 0x00f0 */ st %g5,[%i2] +/* 0x00f4 205 */ srax %o1,32,%g5 +/* 0x00f8 201 */ sub %o0,%o3,%o0 +/* 0x00fc 203 */ ld [%i0+12],%o2 +/* 0x0100 201 */ add %o0,%g5,%o0 +/* 0x0104 204 */ and %o0,%g2,%g5 +/* 0x0108 */ st %g5,[%i2+4] +/* 0x010c 205 */ srax %o0,32,%o0 +/* 0x0110 203 */ ld [%i2+8],%o1 +/* 0x0114 */ ld [%i2+12],%o3 +/* 0x0118 201 */ sub %o1,%i3,%o1 + .L900000505: +/* 0x011c */ add %g1,4,%g3 +/* 0x0120 203 */ ld [%g1+%i2],%g5 +/* 0x0124 201 */ add %o1,%o0,%o0 +/* 0x0128 203 */ ld [%i0+%g1],%i3 +/* 0x012c 201 */ sub %o3,%o2,%o1 +/* 0x0130 204 */ and %o0,%g2,%o2 +/* 0x0134 */ st %o2,[%o5+%i2] +/* 0x0138 205 */ srax %o0,32,%o2 +/* 0x013c */ add %i1,4,%i1 +/* 0x0140 201 */ add %g1,8,%o5 +/* 0x0144 203 */ ld [%g3+%i2],%o0 +/* 0x0148 201 */ add %o1,%o2,%o1 +/* 0x014c 203 */ ld [%i0+%g3],%o3 +/* 0x0150 201 */ sub %g5,%i3,%o2 +/* 0x0154 204 */ and %o1,%g2,%g5 +/* 0x0158 */ st %g5,[%o4+%i2] +/* 0x015c 205 */ srax %o1,32,%g5 +/* 0x0160 */ cmp %i1,%o7 +/* 0x0164 201 */ add %g1,12,%o4 +/* 0x0168 203 */ ld [%o5+%i2],%o1 +/* 0x016c 201 */ add %o2,%g5,%o2 +/* 0x0170 203 */ ld [%i0+%o5],%i3 +/* 0x0174 201 */ sub %o0,%o3,%o0 +/* 0x0178 204 */ and %o2,%g2,%o3 +/* 0x017c */ st %o3,[%g1+%i2] +/* 0x0180 205 */ srax %o2,32,%g5 +/* 0x0184 203 */ ld [%o4+%i2],%o3 +/* 0x0188 201 */ add %g1,16,%g1 +/* 0x018c */ add %o0,%g5,%o0 +/* 0x0190 203 */ ld [%i0+%o4],%o2 +/* 0x0194 201 */ sub %o1,%i3,%o1 +/* 0x0198 204 */ and %o0,%g2,%g5 +/* 0x019c */ st %g5,[%g3+%i2] +/* 0x01a0 205 */ ble,pt %icc,.L900000505 +/* 0x01a4 */ srax %o0,32,%o0 + .L900000508: +/* 0x01a8 */ add %o1,%o0,%g3 +/* 0x01ac */ sub %o3,%o2,%o1 +/* 0x01b0 203 */ ld [%g1+%i2],%o0 +/* 0x01b4 */ ld [%i0+%g1],%o2 +/* 0x01b8 205 */ srax %g3,32,%o7 +/* 0x01bc 204 */ and %g3,%g2,%o3 +/* 0x01c0 201 */ add %o1,%o7,%o1 +/* 0x01c4 204 */ st %o3,[%o5+%i2] +/* 0x01c8 205 */ cmp %i1,%g4 +/* 0x01cc 201 */ sub %o0,%o2,%o0 +/* 0x01d0 204 */ and %o1,%g2,%o2 +/* 0x01d4 */ st %o2,[%o4+%i2] +/* 0x01d8 205 */ srax %o1,32,%o1 +/* 0x01dc 203 */ sra %i1,0,%o2 +/* 0x01e0 201 */ add %o0,%o1,%o0 +/* 0x01e4 205 */ srax %o0,32,%g5 +/* 0x01e8 204 */ and %o0,%g2,%o1 +/* 0x01ec */ st %o1,[%g1+%i2] +/* 0x01f0 205 */ bg,pn %icc,.L77000191 +/* 0x01f4 */ sllx %o2,2,%o1 + .L77000199: +/* 0x01f8 0 */ or %g0,%o1,%g1 + .L900000509: +/* 0x01fc 203 */ ld [%o1+%i2],%o0 +/* 0x0200 205 */ add %i1,1,%i1 +/* 0x0204 203 */ ld [%i0+%o1],%o1 +/* 0x0208 */ sra %i1,0,%o2 +/* 0x020c 205 */ cmp %i1,%g4 +/* 0x0210 203 */ add %g5,%o0,%o0 +/* 0x0214 */ sub %o0,%o1,%o0 +/* 0x0218 205 */ srax %o0,32,%g5 +/* 0x021c 204 */ and %o0,%g2,%o1 +/* 0x0220 */ st %o1,[%g1+%i2] +/* 0x0224 */ sllx %o2,2,%o1 +/* 0x0228 205 */ ble,pt %icc,.L900000509 +/* 0x022c */ or %g0,%o1,%g1 + .L77000191: +/* 0x0230 */ ret ! Result = +/* 0x0234 */ restore %g0,%g0,%g0 +/* 0x0238 0 */ .type adjust_montf_result,2 +/* 0x0238 */ .size adjust_montf_result,(.-adjust_montf_result) + + .section ".text",#alloc,#execinstr +/* 000000 0 */ .align 8 +/* 000000 */ .skip 24 +! +! SUBROUTINE mont_mulf_noconv +! +! OFFSET SOURCE LINE LABEL INSTRUCTION + + .global mont_mulf_noconv + mont_mulf_noconv: +/* 000000 */ save %sp,-224,%sp + .L900000643: +/* 0x0004 */ call .+8 +/* 0x0008 */ sethi /*X*/%hi(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x000c */ ldx [%fp+2223],%l0 + +! 206 ! } +! 207 ! } +! 208 !} +! 213 !/* +! 214 !** the lengths of the input arrays should be at least the following: +! 215 !** result[nlen+1], dm1[nlen], dm2[2*nlen+1], dt[4*nlen+2], dn[nlen], nint[nlen] +! 216 !** all of them should be different from one another +! 217 !** +! 218 !*/ +! 219 !void mont_mulf_noconv(unsigned int *result, +! 220 ! double *dm1, double *dm2, double *dt, +! 221 ! double *dn, unsigned int *nint, +! 222 ! int nlen, double dn0) +! 223 !{ +! 224 ! int i, j, jj; +! 225 ! int tmp; +! 226 ! double digit, m2j, nextm2j, a, b; +! 227 ! double *dptmp, *pdm1, *pdm2, *pdn, *pdtj, pdn_0, pdm1_0; +! 229 ! pdm1=&(dm1[0]); +! 230 ! pdm2=&(dm2[0]); +! 231 ! pdn=&(dn[0]); +! 232 ! pdm2[2*nlen]=Zero; + +/* 0x0010 232 */ sethi %hi(Zero),%g2 +/* 0x0014 223 */ fmovd %f14,%f30 +/* 0x0018 */ add %g5,/*X*/%lo(_GLOBAL_OFFSET_TABLE_-(.L900000643-.)),%g5 +/* 0x001c 232 */ add %g2,%lo(Zero),%g2 +/* 0x0020 */ sll %l0,1,%o3 +/* 0x0024 223 */ add %g5,%o7,%o4 +/* 0x0028 232 */ sra %o3,0,%g5 +/* 0x002c */ ldx [%o4+%g2],%o7 + +! 234 ! if (nlen!=16) +! 235 ! { +! 236 ! for(i=0;i<4*nlen+2;i++) dt[i]=Zero; +! 238 ! a=dt[0]=pdm1[0]*pdm2[0]; +! 239 ! digit=mod(lower32(a,Zero)*dn0,TwoToMinus16,TwoTo16); + +/* 0x0030 239 */ sethi %hi(TwoToMinus16),%g3 +/* 0x0034 */ sethi %hi(TwoTo16),%g4 +/* 0x0038 */ add %g3,%lo(TwoToMinus16),%g2 +/* 0x003c 232 */ ldd [%o7],%f0 +/* 0x0040 239 */ add %g4,%lo(TwoTo16),%g3 +/* 0x0044 223 */ or %g0,%i4,%o0 +/* 0x0048 232 */ sllx %g5,3,%g4 +/* 0x004c 239 */ ldx [%o4+%g2],%o5 +/* 0x0050 223 */ or %g0,%i5,%l3 +/* 0x0054 */ or %g0,%i0,%l2 +/* 0x0058 239 */ ldx [%o4+%g3],%o4 +/* 0x005c 234 */ cmp %l0,16 +/* 0x0060 232 */ std %f0,[%i2+%g4] +/* 0x0064 234 */ be,pn %icc,.L77000279 +/* 0x0068 */ or %g0,%i3,%l4 +/* 0x006c 236 */ sll %l0,2,%g2 +/* 0x0070 223 */ or %g0,%o0,%i5 +/* 0x0074 236 */ add %g2,2,%o0 +/* 0x0078 223 */ or %g0,%i1,%i4 +/* 0x007c 236 */ cmp %o0,0 +/* 0x0080 223 */ or %g0,%i2,%l1 +/* 0x0084 236 */ ble,a,pt %icc,.L900000657 +/* 0x0088 */ ldd [%i1],%f6 + +! 241 ! pdtj=&(dt[0]); +! 242 ! for(j=jj=0;j<2*nlen;j++,jj++,pdtj++) +! 243 ! { +! 244 ! m2j=pdm2[j]; +! 245 ! a=pdtj[0]+pdn[0]*digit; +! 246 ! b=pdtj[1]+pdm1[0]*pdm2[j+1]+a*TwoToMinus16; +! 247 ! pdtj[1]=b; +! 249 !#pragma pipeloop(0) +! 250 ! for(i=1;iused && (a)->dp[(a)->used - 1] == 0) \ + --((a)->used); \ + (a)->sign = (a)->used ? (a)->sign : ZPOS; \ + } + +/* anything you need at the start */ +#define COMBA_START + +/* clear the chaining variables */ +#define COMBA_CLEAR \ + c0 = c1 = c2 = 0; + +/* forward the carry to the next digit */ +#define COMBA_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +/* anything you need at the end */ +#define COMBA_FINI + +/* this should multiply i and j */ +#define MULADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +/* sqr macros only */ +#define CLEAR_CARRY \ + c0 = c1 = c2 = 0; + +#define COMBA_STORE(x) \ + x = c0; + +#define COMBA_STORE2(x) \ + x = c1; + +#define CARRY_FORWARD \ + do { \ + c0 = c1; \ + c1 = c2; \ + c2 = 0; \ + } while (0); + +#define COMBA_FINI + +#define SQRADD(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %%rax \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i) \ + : "%rax", "%rdx", "cc"); + +#define SQRADD2(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(c0), "=r"(c1), "=r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDSC(i, j) \ + __asm__( \ + "movq %3,%%rax \n\t" \ + "mulq %4 \n\t" \ + "movq %%rax,%0 \n\t" \ + "movq %%rdx,%1 \n\t" \ + "xorq %2,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDAC(i, j) \ + __asm__( \ + "movq %6,%%rax \n\t" \ + "mulq %7 \n\t" \ + "addq %%rax,%0 \n\t" \ + "adcq %%rdx,%1 \n\t" \ + "adcq $0,%2 \n\t" \ + : "=r"(sc0), "=r"(sc1), "=r"(sc2) \ + : "0"(sc0), "1"(sc1), "2"(sc2), "g"(i), "g"(j) \ + : "%rax", "%rdx", "cc"); + +#define SQRADDDB \ + __asm__( \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + "addq %6,%0 \n\t" \ + "adcq %7,%1 \n\t" \ + "adcq %8,%2 \n\t" \ + : "=&r"(c0), "=&r"(c1), "=&r"(c2) \ + : "0"(c0), "1"(c1), "2"(c2), "r"(sc0), "r"(sc1), "r"(sc2) \ + : "cc"); + +void +s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[8]; + + memcpy(at, A->dp, 4 * sizeof(mp_digit)); + memcpy(at + 4, B->dp, 4 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[4]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[5]); + MULADD(at[1], at[4]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[6]); + MULADD(at[1], at[5]); + MULADD(at[2], at[4]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[7]); + MULADD(at[1], at[6]); + MULADD(at[2], at[5]); + MULADD(at[3], at[4]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[1], at[7]); + MULADD(at[2], at[6]); + MULADD(at[3], at[5]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[2], at[7]); + MULADD(at[3], at[6]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[3], at[7]); + COMBA_STORE(C->dp[6]); + COMBA_STORE2(C->dp[7]); + C->used = 8; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[16]; + + memcpy(at, A->dp, 8 * sizeof(mp_digit)); + memcpy(at + 8, B->dp, 8 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[8]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[9]); + MULADD(at[1], at[8]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[10]); + MULADD(at[1], at[9]); + MULADD(at[2], at[8]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[11]); + MULADD(at[1], at[10]); + MULADD(at[2], at[9]); + MULADD(at[3], at[8]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[12]); + MULADD(at[1], at[11]); + MULADD(at[2], at[10]); + MULADD(at[3], at[9]); + MULADD(at[4], at[8]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[13]); + MULADD(at[1], at[12]); + MULADD(at[2], at[11]); + MULADD(at[3], at[10]); + MULADD(at[4], at[9]); + MULADD(at[5], at[8]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[14]); + MULADD(at[1], at[13]); + MULADD(at[2], at[12]); + MULADD(at[3], at[11]); + MULADD(at[4], at[10]); + MULADD(at[5], at[9]); + MULADD(at[6], at[8]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[15]); + MULADD(at[1], at[14]); + MULADD(at[2], at[13]); + MULADD(at[3], at[12]); + MULADD(at[4], at[11]); + MULADD(at[5], at[10]); + MULADD(at[6], at[9]); + MULADD(at[7], at[8]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[1], at[15]); + MULADD(at[2], at[14]); + MULADD(at[3], at[13]); + MULADD(at[4], at[12]); + MULADD(at[5], at[11]); + MULADD(at[6], at[10]); + MULADD(at[7], at[9]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[2], at[15]); + MULADD(at[3], at[14]); + MULADD(at[4], at[13]); + MULADD(at[5], at[12]); + MULADD(at[6], at[11]); + MULADD(at[7], at[10]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[3], at[15]); + MULADD(at[4], at[14]); + MULADD(at[5], at[13]); + MULADD(at[6], at[12]); + MULADD(at[7], at[11]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[4], at[15]); + MULADD(at[5], at[14]); + MULADD(at[6], at[13]); + MULADD(at[7], at[12]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[5], at[15]); + MULADD(at[6], at[14]); + MULADD(at[7], at[13]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[6], at[15]); + MULADD(at[7], at[14]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[7], at[15]); + COMBA_STORE(C->dp[14]); + COMBA_STORE2(C->dp[15]); + C->used = 16; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[32]; + + memcpy(at, A->dp, 16 * sizeof(mp_digit)); + memcpy(at + 16, B->dp, 16 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[16]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[17]); + MULADD(at[1], at[16]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[18]); + MULADD(at[1], at[17]); + MULADD(at[2], at[16]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[19]); + MULADD(at[1], at[18]); + MULADD(at[2], at[17]); + MULADD(at[3], at[16]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[20]); + MULADD(at[1], at[19]); + MULADD(at[2], at[18]); + MULADD(at[3], at[17]); + MULADD(at[4], at[16]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[21]); + MULADD(at[1], at[20]); + MULADD(at[2], at[19]); + MULADD(at[3], at[18]); + MULADD(at[4], at[17]); + MULADD(at[5], at[16]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[22]); + MULADD(at[1], at[21]); + MULADD(at[2], at[20]); + MULADD(at[3], at[19]); + MULADD(at[4], at[18]); + MULADD(at[5], at[17]); + MULADD(at[6], at[16]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[23]); + MULADD(at[1], at[22]); + MULADD(at[2], at[21]); + MULADD(at[3], at[20]); + MULADD(at[4], at[19]); + MULADD(at[5], at[18]); + MULADD(at[6], at[17]); + MULADD(at[7], at[16]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[24]); + MULADD(at[1], at[23]); + MULADD(at[2], at[22]); + MULADD(at[3], at[21]); + MULADD(at[4], at[20]); + MULADD(at[5], at[19]); + MULADD(at[6], at[18]); + MULADD(at[7], at[17]); + MULADD(at[8], at[16]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[25]); + MULADD(at[1], at[24]); + MULADD(at[2], at[23]); + MULADD(at[3], at[22]); + MULADD(at[4], at[21]); + MULADD(at[5], at[20]); + MULADD(at[6], at[19]); + MULADD(at[7], at[18]); + MULADD(at[8], at[17]); + MULADD(at[9], at[16]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[26]); + MULADD(at[1], at[25]); + MULADD(at[2], at[24]); + MULADD(at[3], at[23]); + MULADD(at[4], at[22]); + MULADD(at[5], at[21]); + MULADD(at[6], at[20]); + MULADD(at[7], at[19]); + MULADD(at[8], at[18]); + MULADD(at[9], at[17]); + MULADD(at[10], at[16]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[27]); + MULADD(at[1], at[26]); + MULADD(at[2], at[25]); + MULADD(at[3], at[24]); + MULADD(at[4], at[23]); + MULADD(at[5], at[22]); + MULADD(at[6], at[21]); + MULADD(at[7], at[20]); + MULADD(at[8], at[19]); + MULADD(at[9], at[18]); + MULADD(at[10], at[17]); + MULADD(at[11], at[16]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[28]); + MULADD(at[1], at[27]); + MULADD(at[2], at[26]); + MULADD(at[3], at[25]); + MULADD(at[4], at[24]); + MULADD(at[5], at[23]); + MULADD(at[6], at[22]); + MULADD(at[7], at[21]); + MULADD(at[8], at[20]); + MULADD(at[9], at[19]); + MULADD(at[10], at[18]); + MULADD(at[11], at[17]); + MULADD(at[12], at[16]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[29]); + MULADD(at[1], at[28]); + MULADD(at[2], at[27]); + MULADD(at[3], at[26]); + MULADD(at[4], at[25]); + MULADD(at[5], at[24]); + MULADD(at[6], at[23]); + MULADD(at[7], at[22]); + MULADD(at[8], at[21]); + MULADD(at[9], at[20]); + MULADD(at[10], at[19]); + MULADD(at[11], at[18]); + MULADD(at[12], at[17]); + MULADD(at[13], at[16]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[30]); + MULADD(at[1], at[29]); + MULADD(at[2], at[28]); + MULADD(at[3], at[27]); + MULADD(at[4], at[26]); + MULADD(at[5], at[25]); + MULADD(at[6], at[24]); + MULADD(at[7], at[23]); + MULADD(at[8], at[22]); + MULADD(at[9], at[21]); + MULADD(at[10], at[20]); + MULADD(at[11], at[19]); + MULADD(at[12], at[18]); + MULADD(at[13], at[17]); + MULADD(at[14], at[16]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[31]); + MULADD(at[1], at[30]); + MULADD(at[2], at[29]); + MULADD(at[3], at[28]); + MULADD(at[4], at[27]); + MULADD(at[5], at[26]); + MULADD(at[6], at[25]); + MULADD(at[7], at[24]); + MULADD(at[8], at[23]); + MULADD(at[9], at[22]); + MULADD(at[10], at[21]); + MULADD(at[11], at[20]); + MULADD(at[12], at[19]); + MULADD(at[13], at[18]); + MULADD(at[14], at[17]); + MULADD(at[15], at[16]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[1], at[31]); + MULADD(at[2], at[30]); + MULADD(at[3], at[29]); + MULADD(at[4], at[28]); + MULADD(at[5], at[27]); + MULADD(at[6], at[26]); + MULADD(at[7], at[25]); + MULADD(at[8], at[24]); + MULADD(at[9], at[23]); + MULADD(at[10], at[22]); + MULADD(at[11], at[21]); + MULADD(at[12], at[20]); + MULADD(at[13], at[19]); + MULADD(at[14], at[18]); + MULADD(at[15], at[17]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[2], at[31]); + MULADD(at[3], at[30]); + MULADD(at[4], at[29]); + MULADD(at[5], at[28]); + MULADD(at[6], at[27]); + MULADD(at[7], at[26]); + MULADD(at[8], at[25]); + MULADD(at[9], at[24]); + MULADD(at[10], at[23]); + MULADD(at[11], at[22]); + MULADD(at[12], at[21]); + MULADD(at[13], at[20]); + MULADD(at[14], at[19]); + MULADD(at[15], at[18]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[3], at[31]); + MULADD(at[4], at[30]); + MULADD(at[5], at[29]); + MULADD(at[6], at[28]); + MULADD(at[7], at[27]); + MULADD(at[8], at[26]); + MULADD(at[9], at[25]); + MULADD(at[10], at[24]); + MULADD(at[11], at[23]); + MULADD(at[12], at[22]); + MULADD(at[13], at[21]); + MULADD(at[14], at[20]); + MULADD(at[15], at[19]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[4], at[31]); + MULADD(at[5], at[30]); + MULADD(at[6], at[29]); + MULADD(at[7], at[28]); + MULADD(at[8], at[27]); + MULADD(at[9], at[26]); + MULADD(at[10], at[25]); + MULADD(at[11], at[24]); + MULADD(at[12], at[23]); + MULADD(at[13], at[22]); + MULADD(at[14], at[21]); + MULADD(at[15], at[20]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[5], at[31]); + MULADD(at[6], at[30]); + MULADD(at[7], at[29]); + MULADD(at[8], at[28]); + MULADD(at[9], at[27]); + MULADD(at[10], at[26]); + MULADD(at[11], at[25]); + MULADD(at[12], at[24]); + MULADD(at[13], at[23]); + MULADD(at[14], at[22]); + MULADD(at[15], at[21]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[6], at[31]); + MULADD(at[7], at[30]); + MULADD(at[8], at[29]); + MULADD(at[9], at[28]); + MULADD(at[10], at[27]); + MULADD(at[11], at[26]); + MULADD(at[12], at[25]); + MULADD(at[13], at[24]); + MULADD(at[14], at[23]); + MULADD(at[15], at[22]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[7], at[31]); + MULADD(at[8], at[30]); + MULADD(at[9], at[29]); + MULADD(at[10], at[28]); + MULADD(at[11], at[27]); + MULADD(at[12], at[26]); + MULADD(at[13], at[25]); + MULADD(at[14], at[24]); + MULADD(at[15], at[23]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[8], at[31]); + MULADD(at[9], at[30]); + MULADD(at[10], at[29]); + MULADD(at[11], at[28]); + MULADD(at[12], at[27]); + MULADD(at[13], at[26]); + MULADD(at[14], at[25]); + MULADD(at[15], at[24]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[9], at[31]); + MULADD(at[10], at[30]); + MULADD(at[11], at[29]); + MULADD(at[12], at[28]); + MULADD(at[13], at[27]); + MULADD(at[14], at[26]); + MULADD(at[15], at[25]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[10], at[31]); + MULADD(at[11], at[30]); + MULADD(at[12], at[29]); + MULADD(at[13], at[28]); + MULADD(at[14], at[27]); + MULADD(at[15], at[26]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[11], at[31]); + MULADD(at[12], at[30]); + MULADD(at[13], at[29]); + MULADD(at[14], at[28]); + MULADD(at[15], at[27]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[12], at[31]); + MULADD(at[13], at[30]); + MULADD(at[14], at[29]); + MULADD(at[15], at[28]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[13], at[31]); + MULADD(at[14], at[30]); + MULADD(at[15], at[29]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[14], at[31]); + MULADD(at[15], at[30]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[15], at[31]); + COMBA_STORE(C->dp[30]); + COMBA_STORE2(C->dp[31]); + C->used = 32; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) +{ + mp_digit c0, c1, c2, at[64]; + + memcpy(at, A->dp, 32 * sizeof(mp_digit)); + memcpy(at + 32, B->dp, 32 * sizeof(mp_digit)); + COMBA_START; + + COMBA_CLEAR; + /* 0 */ + MULADD(at[0], at[32]); + COMBA_STORE(C->dp[0]); + /* 1 */ + COMBA_FORWARD; + MULADD(at[0], at[33]); + MULADD(at[1], at[32]); + COMBA_STORE(C->dp[1]); + /* 2 */ + COMBA_FORWARD; + MULADD(at[0], at[34]); + MULADD(at[1], at[33]); + MULADD(at[2], at[32]); + COMBA_STORE(C->dp[2]); + /* 3 */ + COMBA_FORWARD; + MULADD(at[0], at[35]); + MULADD(at[1], at[34]); + MULADD(at[2], at[33]); + MULADD(at[3], at[32]); + COMBA_STORE(C->dp[3]); + /* 4 */ + COMBA_FORWARD; + MULADD(at[0], at[36]); + MULADD(at[1], at[35]); + MULADD(at[2], at[34]); + MULADD(at[3], at[33]); + MULADD(at[4], at[32]); + COMBA_STORE(C->dp[4]); + /* 5 */ + COMBA_FORWARD; + MULADD(at[0], at[37]); + MULADD(at[1], at[36]); + MULADD(at[2], at[35]); + MULADD(at[3], at[34]); + MULADD(at[4], at[33]); + MULADD(at[5], at[32]); + COMBA_STORE(C->dp[5]); + /* 6 */ + COMBA_FORWARD; + MULADD(at[0], at[38]); + MULADD(at[1], at[37]); + MULADD(at[2], at[36]); + MULADD(at[3], at[35]); + MULADD(at[4], at[34]); + MULADD(at[5], at[33]); + MULADD(at[6], at[32]); + COMBA_STORE(C->dp[6]); + /* 7 */ + COMBA_FORWARD; + MULADD(at[0], at[39]); + MULADD(at[1], at[38]); + MULADD(at[2], at[37]); + MULADD(at[3], at[36]); + MULADD(at[4], at[35]); + MULADD(at[5], at[34]); + MULADD(at[6], at[33]); + MULADD(at[7], at[32]); + COMBA_STORE(C->dp[7]); + /* 8 */ + COMBA_FORWARD; + MULADD(at[0], at[40]); + MULADD(at[1], at[39]); + MULADD(at[2], at[38]); + MULADD(at[3], at[37]); + MULADD(at[4], at[36]); + MULADD(at[5], at[35]); + MULADD(at[6], at[34]); + MULADD(at[7], at[33]); + MULADD(at[8], at[32]); + COMBA_STORE(C->dp[8]); + /* 9 */ + COMBA_FORWARD; + MULADD(at[0], at[41]); + MULADD(at[1], at[40]); + MULADD(at[2], at[39]); + MULADD(at[3], at[38]); + MULADD(at[4], at[37]); + MULADD(at[5], at[36]); + MULADD(at[6], at[35]); + MULADD(at[7], at[34]); + MULADD(at[8], at[33]); + MULADD(at[9], at[32]); + COMBA_STORE(C->dp[9]); + /* 10 */ + COMBA_FORWARD; + MULADD(at[0], at[42]); + MULADD(at[1], at[41]); + MULADD(at[2], at[40]); + MULADD(at[3], at[39]); + MULADD(at[4], at[38]); + MULADD(at[5], at[37]); + MULADD(at[6], at[36]); + MULADD(at[7], at[35]); + MULADD(at[8], at[34]); + MULADD(at[9], at[33]); + MULADD(at[10], at[32]); + COMBA_STORE(C->dp[10]); + /* 11 */ + COMBA_FORWARD; + MULADD(at[0], at[43]); + MULADD(at[1], at[42]); + MULADD(at[2], at[41]); + MULADD(at[3], at[40]); + MULADD(at[4], at[39]); + MULADD(at[5], at[38]); + MULADD(at[6], at[37]); + MULADD(at[7], at[36]); + MULADD(at[8], at[35]); + MULADD(at[9], at[34]); + MULADD(at[10], at[33]); + MULADD(at[11], at[32]); + COMBA_STORE(C->dp[11]); + /* 12 */ + COMBA_FORWARD; + MULADD(at[0], at[44]); + MULADD(at[1], at[43]); + MULADD(at[2], at[42]); + MULADD(at[3], at[41]); + MULADD(at[4], at[40]); + MULADD(at[5], at[39]); + MULADD(at[6], at[38]); + MULADD(at[7], at[37]); + MULADD(at[8], at[36]); + MULADD(at[9], at[35]); + MULADD(at[10], at[34]); + MULADD(at[11], at[33]); + MULADD(at[12], at[32]); + COMBA_STORE(C->dp[12]); + /* 13 */ + COMBA_FORWARD; + MULADD(at[0], at[45]); + MULADD(at[1], at[44]); + MULADD(at[2], at[43]); + MULADD(at[3], at[42]); + MULADD(at[4], at[41]); + MULADD(at[5], at[40]); + MULADD(at[6], at[39]); + MULADD(at[7], at[38]); + MULADD(at[8], at[37]); + MULADD(at[9], at[36]); + MULADD(at[10], at[35]); + MULADD(at[11], at[34]); + MULADD(at[12], at[33]); + MULADD(at[13], at[32]); + COMBA_STORE(C->dp[13]); + /* 14 */ + COMBA_FORWARD; + MULADD(at[0], at[46]); + MULADD(at[1], at[45]); + MULADD(at[2], at[44]); + MULADD(at[3], at[43]); + MULADD(at[4], at[42]); + MULADD(at[5], at[41]); + MULADD(at[6], at[40]); + MULADD(at[7], at[39]); + MULADD(at[8], at[38]); + MULADD(at[9], at[37]); + MULADD(at[10], at[36]); + MULADD(at[11], at[35]); + MULADD(at[12], at[34]); + MULADD(at[13], at[33]); + MULADD(at[14], at[32]); + COMBA_STORE(C->dp[14]); + /* 15 */ + COMBA_FORWARD; + MULADD(at[0], at[47]); + MULADD(at[1], at[46]); + MULADD(at[2], at[45]); + MULADD(at[3], at[44]); + MULADD(at[4], at[43]); + MULADD(at[5], at[42]); + MULADD(at[6], at[41]); + MULADD(at[7], at[40]); + MULADD(at[8], at[39]); + MULADD(at[9], at[38]); + MULADD(at[10], at[37]); + MULADD(at[11], at[36]); + MULADD(at[12], at[35]); + MULADD(at[13], at[34]); + MULADD(at[14], at[33]); + MULADD(at[15], at[32]); + COMBA_STORE(C->dp[15]); + /* 16 */ + COMBA_FORWARD; + MULADD(at[0], at[48]); + MULADD(at[1], at[47]); + MULADD(at[2], at[46]); + MULADD(at[3], at[45]); + MULADD(at[4], at[44]); + MULADD(at[5], at[43]); + MULADD(at[6], at[42]); + MULADD(at[7], at[41]); + MULADD(at[8], at[40]); + MULADD(at[9], at[39]); + MULADD(at[10], at[38]); + MULADD(at[11], at[37]); + MULADD(at[12], at[36]); + MULADD(at[13], at[35]); + MULADD(at[14], at[34]); + MULADD(at[15], at[33]); + MULADD(at[16], at[32]); + COMBA_STORE(C->dp[16]); + /* 17 */ + COMBA_FORWARD; + MULADD(at[0], at[49]); + MULADD(at[1], at[48]); + MULADD(at[2], at[47]); + MULADD(at[3], at[46]); + MULADD(at[4], at[45]); + MULADD(at[5], at[44]); + MULADD(at[6], at[43]); + MULADD(at[7], at[42]); + MULADD(at[8], at[41]); + MULADD(at[9], at[40]); + MULADD(at[10], at[39]); + MULADD(at[11], at[38]); + MULADD(at[12], at[37]); + MULADD(at[13], at[36]); + MULADD(at[14], at[35]); + MULADD(at[15], at[34]); + MULADD(at[16], at[33]); + MULADD(at[17], at[32]); + COMBA_STORE(C->dp[17]); + /* 18 */ + COMBA_FORWARD; + MULADD(at[0], at[50]); + MULADD(at[1], at[49]); + MULADD(at[2], at[48]); + MULADD(at[3], at[47]); + MULADD(at[4], at[46]); + MULADD(at[5], at[45]); + MULADD(at[6], at[44]); + MULADD(at[7], at[43]); + MULADD(at[8], at[42]); + MULADD(at[9], at[41]); + MULADD(at[10], at[40]); + MULADD(at[11], at[39]); + MULADD(at[12], at[38]); + MULADD(at[13], at[37]); + MULADD(at[14], at[36]); + MULADD(at[15], at[35]); + MULADD(at[16], at[34]); + MULADD(at[17], at[33]); + MULADD(at[18], at[32]); + COMBA_STORE(C->dp[18]); + /* 19 */ + COMBA_FORWARD; + MULADD(at[0], at[51]); + MULADD(at[1], at[50]); + MULADD(at[2], at[49]); + MULADD(at[3], at[48]); + MULADD(at[4], at[47]); + MULADD(at[5], at[46]); + MULADD(at[6], at[45]); + MULADD(at[7], at[44]); + MULADD(at[8], at[43]); + MULADD(at[9], at[42]); + MULADD(at[10], at[41]); + MULADD(at[11], at[40]); + MULADD(at[12], at[39]); + MULADD(at[13], at[38]); + MULADD(at[14], at[37]); + MULADD(at[15], at[36]); + MULADD(at[16], at[35]); + MULADD(at[17], at[34]); + MULADD(at[18], at[33]); + MULADD(at[19], at[32]); + COMBA_STORE(C->dp[19]); + /* 20 */ + COMBA_FORWARD; + MULADD(at[0], at[52]); + MULADD(at[1], at[51]); + MULADD(at[2], at[50]); + MULADD(at[3], at[49]); + MULADD(at[4], at[48]); + MULADD(at[5], at[47]); + MULADD(at[6], at[46]); + MULADD(at[7], at[45]); + MULADD(at[8], at[44]); + MULADD(at[9], at[43]); + MULADD(at[10], at[42]); + MULADD(at[11], at[41]); + MULADD(at[12], at[40]); + MULADD(at[13], at[39]); + MULADD(at[14], at[38]); + MULADD(at[15], at[37]); + MULADD(at[16], at[36]); + MULADD(at[17], at[35]); + MULADD(at[18], at[34]); + MULADD(at[19], at[33]); + MULADD(at[20], at[32]); + COMBA_STORE(C->dp[20]); + /* 21 */ + COMBA_FORWARD; + MULADD(at[0], at[53]); + MULADD(at[1], at[52]); + MULADD(at[2], at[51]); + MULADD(at[3], at[50]); + MULADD(at[4], at[49]); + MULADD(at[5], at[48]); + MULADD(at[6], at[47]); + MULADD(at[7], at[46]); + MULADD(at[8], at[45]); + MULADD(at[9], at[44]); + MULADD(at[10], at[43]); + MULADD(at[11], at[42]); + MULADD(at[12], at[41]); + MULADD(at[13], at[40]); + MULADD(at[14], at[39]); + MULADD(at[15], at[38]); + MULADD(at[16], at[37]); + MULADD(at[17], at[36]); + MULADD(at[18], at[35]); + MULADD(at[19], at[34]); + MULADD(at[20], at[33]); + MULADD(at[21], at[32]); + COMBA_STORE(C->dp[21]); + /* 22 */ + COMBA_FORWARD; + MULADD(at[0], at[54]); + MULADD(at[1], at[53]); + MULADD(at[2], at[52]); + MULADD(at[3], at[51]); + MULADD(at[4], at[50]); + MULADD(at[5], at[49]); + MULADD(at[6], at[48]); + MULADD(at[7], at[47]); + MULADD(at[8], at[46]); + MULADD(at[9], at[45]); + MULADD(at[10], at[44]); + MULADD(at[11], at[43]); + MULADD(at[12], at[42]); + MULADD(at[13], at[41]); + MULADD(at[14], at[40]); + MULADD(at[15], at[39]); + MULADD(at[16], at[38]); + MULADD(at[17], at[37]); + MULADD(at[18], at[36]); + MULADD(at[19], at[35]); + MULADD(at[20], at[34]); + MULADD(at[21], at[33]); + MULADD(at[22], at[32]); + COMBA_STORE(C->dp[22]); + /* 23 */ + COMBA_FORWARD; + MULADD(at[0], at[55]); + MULADD(at[1], at[54]); + MULADD(at[2], at[53]); + MULADD(at[3], at[52]); + MULADD(at[4], at[51]); + MULADD(at[5], at[50]); + MULADD(at[6], at[49]); + MULADD(at[7], at[48]); + MULADD(at[8], at[47]); + MULADD(at[9], at[46]); + MULADD(at[10], at[45]); + MULADD(at[11], at[44]); + MULADD(at[12], at[43]); + MULADD(at[13], at[42]); + MULADD(at[14], at[41]); + MULADD(at[15], at[40]); + MULADD(at[16], at[39]); + MULADD(at[17], at[38]); + MULADD(at[18], at[37]); + MULADD(at[19], at[36]); + MULADD(at[20], at[35]); + MULADD(at[21], at[34]); + MULADD(at[22], at[33]); + MULADD(at[23], at[32]); + COMBA_STORE(C->dp[23]); + /* 24 */ + COMBA_FORWARD; + MULADD(at[0], at[56]); + MULADD(at[1], at[55]); + MULADD(at[2], at[54]); + MULADD(at[3], at[53]); + MULADD(at[4], at[52]); + MULADD(at[5], at[51]); + MULADD(at[6], at[50]); + MULADD(at[7], at[49]); + MULADD(at[8], at[48]); + MULADD(at[9], at[47]); + MULADD(at[10], at[46]); + MULADD(at[11], at[45]); + MULADD(at[12], at[44]); + MULADD(at[13], at[43]); + MULADD(at[14], at[42]); + MULADD(at[15], at[41]); + MULADD(at[16], at[40]); + MULADD(at[17], at[39]); + MULADD(at[18], at[38]); + MULADD(at[19], at[37]); + MULADD(at[20], at[36]); + MULADD(at[21], at[35]); + MULADD(at[22], at[34]); + MULADD(at[23], at[33]); + MULADD(at[24], at[32]); + COMBA_STORE(C->dp[24]); + /* 25 */ + COMBA_FORWARD; + MULADD(at[0], at[57]); + MULADD(at[1], at[56]); + MULADD(at[2], at[55]); + MULADD(at[3], at[54]); + MULADD(at[4], at[53]); + MULADD(at[5], at[52]); + MULADD(at[6], at[51]); + MULADD(at[7], at[50]); + MULADD(at[8], at[49]); + MULADD(at[9], at[48]); + MULADD(at[10], at[47]); + MULADD(at[11], at[46]); + MULADD(at[12], at[45]); + MULADD(at[13], at[44]); + MULADD(at[14], at[43]); + MULADD(at[15], at[42]); + MULADD(at[16], at[41]); + MULADD(at[17], at[40]); + MULADD(at[18], at[39]); + MULADD(at[19], at[38]); + MULADD(at[20], at[37]); + MULADD(at[21], at[36]); + MULADD(at[22], at[35]); + MULADD(at[23], at[34]); + MULADD(at[24], at[33]); + MULADD(at[25], at[32]); + COMBA_STORE(C->dp[25]); + /* 26 */ + COMBA_FORWARD; + MULADD(at[0], at[58]); + MULADD(at[1], at[57]); + MULADD(at[2], at[56]); + MULADD(at[3], at[55]); + MULADD(at[4], at[54]); + MULADD(at[5], at[53]); + MULADD(at[6], at[52]); + MULADD(at[7], at[51]); + MULADD(at[8], at[50]); + MULADD(at[9], at[49]); + MULADD(at[10], at[48]); + MULADD(at[11], at[47]); + MULADD(at[12], at[46]); + MULADD(at[13], at[45]); + MULADD(at[14], at[44]); + MULADD(at[15], at[43]); + MULADD(at[16], at[42]); + MULADD(at[17], at[41]); + MULADD(at[18], at[40]); + MULADD(at[19], at[39]); + MULADD(at[20], at[38]); + MULADD(at[21], at[37]); + MULADD(at[22], at[36]); + MULADD(at[23], at[35]); + MULADD(at[24], at[34]); + MULADD(at[25], at[33]); + MULADD(at[26], at[32]); + COMBA_STORE(C->dp[26]); + /* 27 */ + COMBA_FORWARD; + MULADD(at[0], at[59]); + MULADD(at[1], at[58]); + MULADD(at[2], at[57]); + MULADD(at[3], at[56]); + MULADD(at[4], at[55]); + MULADD(at[5], at[54]); + MULADD(at[6], at[53]); + MULADD(at[7], at[52]); + MULADD(at[8], at[51]); + MULADD(at[9], at[50]); + MULADD(at[10], at[49]); + MULADD(at[11], at[48]); + MULADD(at[12], at[47]); + MULADD(at[13], at[46]); + MULADD(at[14], at[45]); + MULADD(at[15], at[44]); + MULADD(at[16], at[43]); + MULADD(at[17], at[42]); + MULADD(at[18], at[41]); + MULADD(at[19], at[40]); + MULADD(at[20], at[39]); + MULADD(at[21], at[38]); + MULADD(at[22], at[37]); + MULADD(at[23], at[36]); + MULADD(at[24], at[35]); + MULADD(at[25], at[34]); + MULADD(at[26], at[33]); + MULADD(at[27], at[32]); + COMBA_STORE(C->dp[27]); + /* 28 */ + COMBA_FORWARD; + MULADD(at[0], at[60]); + MULADD(at[1], at[59]); + MULADD(at[2], at[58]); + MULADD(at[3], at[57]); + MULADD(at[4], at[56]); + MULADD(at[5], at[55]); + MULADD(at[6], at[54]); + MULADD(at[7], at[53]); + MULADD(at[8], at[52]); + MULADD(at[9], at[51]); + MULADD(at[10], at[50]); + MULADD(at[11], at[49]); + MULADD(at[12], at[48]); + MULADD(at[13], at[47]); + MULADD(at[14], at[46]); + MULADD(at[15], at[45]); + MULADD(at[16], at[44]); + MULADD(at[17], at[43]); + MULADD(at[18], at[42]); + MULADD(at[19], at[41]); + MULADD(at[20], at[40]); + MULADD(at[21], at[39]); + MULADD(at[22], at[38]); + MULADD(at[23], at[37]); + MULADD(at[24], at[36]); + MULADD(at[25], at[35]); + MULADD(at[26], at[34]); + MULADD(at[27], at[33]); + MULADD(at[28], at[32]); + COMBA_STORE(C->dp[28]); + /* 29 */ + COMBA_FORWARD; + MULADD(at[0], at[61]); + MULADD(at[1], at[60]); + MULADD(at[2], at[59]); + MULADD(at[3], at[58]); + MULADD(at[4], at[57]); + MULADD(at[5], at[56]); + MULADD(at[6], at[55]); + MULADD(at[7], at[54]); + MULADD(at[8], at[53]); + MULADD(at[9], at[52]); + MULADD(at[10], at[51]); + MULADD(at[11], at[50]); + MULADD(at[12], at[49]); + MULADD(at[13], at[48]); + MULADD(at[14], at[47]); + MULADD(at[15], at[46]); + MULADD(at[16], at[45]); + MULADD(at[17], at[44]); + MULADD(at[18], at[43]); + MULADD(at[19], at[42]); + MULADD(at[20], at[41]); + MULADD(at[21], at[40]); + MULADD(at[22], at[39]); + MULADD(at[23], at[38]); + MULADD(at[24], at[37]); + MULADD(at[25], at[36]); + MULADD(at[26], at[35]); + MULADD(at[27], at[34]); + MULADD(at[28], at[33]); + MULADD(at[29], at[32]); + COMBA_STORE(C->dp[29]); + /* 30 */ + COMBA_FORWARD; + MULADD(at[0], at[62]); + MULADD(at[1], at[61]); + MULADD(at[2], at[60]); + MULADD(at[3], at[59]); + MULADD(at[4], at[58]); + MULADD(at[5], at[57]); + MULADD(at[6], at[56]); + MULADD(at[7], at[55]); + MULADD(at[8], at[54]); + MULADD(at[9], at[53]); + MULADD(at[10], at[52]); + MULADD(at[11], at[51]); + MULADD(at[12], at[50]); + MULADD(at[13], at[49]); + MULADD(at[14], at[48]); + MULADD(at[15], at[47]); + MULADD(at[16], at[46]); + MULADD(at[17], at[45]); + MULADD(at[18], at[44]); + MULADD(at[19], at[43]); + MULADD(at[20], at[42]); + MULADD(at[21], at[41]); + MULADD(at[22], at[40]); + MULADD(at[23], at[39]); + MULADD(at[24], at[38]); + MULADD(at[25], at[37]); + MULADD(at[26], at[36]); + MULADD(at[27], at[35]); + MULADD(at[28], at[34]); + MULADD(at[29], at[33]); + MULADD(at[30], at[32]); + COMBA_STORE(C->dp[30]); + /* 31 */ + COMBA_FORWARD; + MULADD(at[0], at[63]); + MULADD(at[1], at[62]); + MULADD(at[2], at[61]); + MULADD(at[3], at[60]); + MULADD(at[4], at[59]); + MULADD(at[5], at[58]); + MULADD(at[6], at[57]); + MULADD(at[7], at[56]); + MULADD(at[8], at[55]); + MULADD(at[9], at[54]); + MULADD(at[10], at[53]); + MULADD(at[11], at[52]); + MULADD(at[12], at[51]); + MULADD(at[13], at[50]); + MULADD(at[14], at[49]); + MULADD(at[15], at[48]); + MULADD(at[16], at[47]); + MULADD(at[17], at[46]); + MULADD(at[18], at[45]); + MULADD(at[19], at[44]); + MULADD(at[20], at[43]); + MULADD(at[21], at[42]); + MULADD(at[22], at[41]); + MULADD(at[23], at[40]); + MULADD(at[24], at[39]); + MULADD(at[25], at[38]); + MULADD(at[26], at[37]); + MULADD(at[27], at[36]); + MULADD(at[28], at[35]); + MULADD(at[29], at[34]); + MULADD(at[30], at[33]); + MULADD(at[31], at[32]); + COMBA_STORE(C->dp[31]); + /* 32 */ + COMBA_FORWARD; + MULADD(at[1], at[63]); + MULADD(at[2], at[62]); + MULADD(at[3], at[61]); + MULADD(at[4], at[60]); + MULADD(at[5], at[59]); + MULADD(at[6], at[58]); + MULADD(at[7], at[57]); + MULADD(at[8], at[56]); + MULADD(at[9], at[55]); + MULADD(at[10], at[54]); + MULADD(at[11], at[53]); + MULADD(at[12], at[52]); + MULADD(at[13], at[51]); + MULADD(at[14], at[50]); + MULADD(at[15], at[49]); + MULADD(at[16], at[48]); + MULADD(at[17], at[47]); + MULADD(at[18], at[46]); + MULADD(at[19], at[45]); + MULADD(at[20], at[44]); + MULADD(at[21], at[43]); + MULADD(at[22], at[42]); + MULADD(at[23], at[41]); + MULADD(at[24], at[40]); + MULADD(at[25], at[39]); + MULADD(at[26], at[38]); + MULADD(at[27], at[37]); + MULADD(at[28], at[36]); + MULADD(at[29], at[35]); + MULADD(at[30], at[34]); + MULADD(at[31], at[33]); + COMBA_STORE(C->dp[32]); + /* 33 */ + COMBA_FORWARD; + MULADD(at[2], at[63]); + MULADD(at[3], at[62]); + MULADD(at[4], at[61]); + MULADD(at[5], at[60]); + MULADD(at[6], at[59]); + MULADD(at[7], at[58]); + MULADD(at[8], at[57]); + MULADD(at[9], at[56]); + MULADD(at[10], at[55]); + MULADD(at[11], at[54]); + MULADD(at[12], at[53]); + MULADD(at[13], at[52]); + MULADD(at[14], at[51]); + MULADD(at[15], at[50]); + MULADD(at[16], at[49]); + MULADD(at[17], at[48]); + MULADD(at[18], at[47]); + MULADD(at[19], at[46]); + MULADD(at[20], at[45]); + MULADD(at[21], at[44]); + MULADD(at[22], at[43]); + MULADD(at[23], at[42]); + MULADD(at[24], at[41]); + MULADD(at[25], at[40]); + MULADD(at[26], at[39]); + MULADD(at[27], at[38]); + MULADD(at[28], at[37]); + MULADD(at[29], at[36]); + MULADD(at[30], at[35]); + MULADD(at[31], at[34]); + COMBA_STORE(C->dp[33]); + /* 34 */ + COMBA_FORWARD; + MULADD(at[3], at[63]); + MULADD(at[4], at[62]); + MULADD(at[5], at[61]); + MULADD(at[6], at[60]); + MULADD(at[7], at[59]); + MULADD(at[8], at[58]); + MULADD(at[9], at[57]); + MULADD(at[10], at[56]); + MULADD(at[11], at[55]); + MULADD(at[12], at[54]); + MULADD(at[13], at[53]); + MULADD(at[14], at[52]); + MULADD(at[15], at[51]); + MULADD(at[16], at[50]); + MULADD(at[17], at[49]); + MULADD(at[18], at[48]); + MULADD(at[19], at[47]); + MULADD(at[20], at[46]); + MULADD(at[21], at[45]); + MULADD(at[22], at[44]); + MULADD(at[23], at[43]); + MULADD(at[24], at[42]); + MULADD(at[25], at[41]); + MULADD(at[26], at[40]); + MULADD(at[27], at[39]); + MULADD(at[28], at[38]); + MULADD(at[29], at[37]); + MULADD(at[30], at[36]); + MULADD(at[31], at[35]); + COMBA_STORE(C->dp[34]); + /* 35 */ + COMBA_FORWARD; + MULADD(at[4], at[63]); + MULADD(at[5], at[62]); + MULADD(at[6], at[61]); + MULADD(at[7], at[60]); + MULADD(at[8], at[59]); + MULADD(at[9], at[58]); + MULADD(at[10], at[57]); + MULADD(at[11], at[56]); + MULADD(at[12], at[55]); + MULADD(at[13], at[54]); + MULADD(at[14], at[53]); + MULADD(at[15], at[52]); + MULADD(at[16], at[51]); + MULADD(at[17], at[50]); + MULADD(at[18], at[49]); + MULADD(at[19], at[48]); + MULADD(at[20], at[47]); + MULADD(at[21], at[46]); + MULADD(at[22], at[45]); + MULADD(at[23], at[44]); + MULADD(at[24], at[43]); + MULADD(at[25], at[42]); + MULADD(at[26], at[41]); + MULADD(at[27], at[40]); + MULADD(at[28], at[39]); + MULADD(at[29], at[38]); + MULADD(at[30], at[37]); + MULADD(at[31], at[36]); + COMBA_STORE(C->dp[35]); + /* 36 */ + COMBA_FORWARD; + MULADD(at[5], at[63]); + MULADD(at[6], at[62]); + MULADD(at[7], at[61]); + MULADD(at[8], at[60]); + MULADD(at[9], at[59]); + MULADD(at[10], at[58]); + MULADD(at[11], at[57]); + MULADD(at[12], at[56]); + MULADD(at[13], at[55]); + MULADD(at[14], at[54]); + MULADD(at[15], at[53]); + MULADD(at[16], at[52]); + MULADD(at[17], at[51]); + MULADD(at[18], at[50]); + MULADD(at[19], at[49]); + MULADD(at[20], at[48]); + MULADD(at[21], at[47]); + MULADD(at[22], at[46]); + MULADD(at[23], at[45]); + MULADD(at[24], at[44]); + MULADD(at[25], at[43]); + MULADD(at[26], at[42]); + MULADD(at[27], at[41]); + MULADD(at[28], at[40]); + MULADD(at[29], at[39]); + MULADD(at[30], at[38]); + MULADD(at[31], at[37]); + COMBA_STORE(C->dp[36]); + /* 37 */ + COMBA_FORWARD; + MULADD(at[6], at[63]); + MULADD(at[7], at[62]); + MULADD(at[8], at[61]); + MULADD(at[9], at[60]); + MULADD(at[10], at[59]); + MULADD(at[11], at[58]); + MULADD(at[12], at[57]); + MULADD(at[13], at[56]); + MULADD(at[14], at[55]); + MULADD(at[15], at[54]); + MULADD(at[16], at[53]); + MULADD(at[17], at[52]); + MULADD(at[18], at[51]); + MULADD(at[19], at[50]); + MULADD(at[20], at[49]); + MULADD(at[21], at[48]); + MULADD(at[22], at[47]); + MULADD(at[23], at[46]); + MULADD(at[24], at[45]); + MULADD(at[25], at[44]); + MULADD(at[26], at[43]); + MULADD(at[27], at[42]); + MULADD(at[28], at[41]); + MULADD(at[29], at[40]); + MULADD(at[30], at[39]); + MULADD(at[31], at[38]); + COMBA_STORE(C->dp[37]); + /* 38 */ + COMBA_FORWARD; + MULADD(at[7], at[63]); + MULADD(at[8], at[62]); + MULADD(at[9], at[61]); + MULADD(at[10], at[60]); + MULADD(at[11], at[59]); + MULADD(at[12], at[58]); + MULADD(at[13], at[57]); + MULADD(at[14], at[56]); + MULADD(at[15], at[55]); + MULADD(at[16], at[54]); + MULADD(at[17], at[53]); + MULADD(at[18], at[52]); + MULADD(at[19], at[51]); + MULADD(at[20], at[50]); + MULADD(at[21], at[49]); + MULADD(at[22], at[48]); + MULADD(at[23], at[47]); + MULADD(at[24], at[46]); + MULADD(at[25], at[45]); + MULADD(at[26], at[44]); + MULADD(at[27], at[43]); + MULADD(at[28], at[42]); + MULADD(at[29], at[41]); + MULADD(at[30], at[40]); + MULADD(at[31], at[39]); + COMBA_STORE(C->dp[38]); + /* 39 */ + COMBA_FORWARD; + MULADD(at[8], at[63]); + MULADD(at[9], at[62]); + MULADD(at[10], at[61]); + MULADD(at[11], at[60]); + MULADD(at[12], at[59]); + MULADD(at[13], at[58]); + MULADD(at[14], at[57]); + MULADD(at[15], at[56]); + MULADD(at[16], at[55]); + MULADD(at[17], at[54]); + MULADD(at[18], at[53]); + MULADD(at[19], at[52]); + MULADD(at[20], at[51]); + MULADD(at[21], at[50]); + MULADD(at[22], at[49]); + MULADD(at[23], at[48]); + MULADD(at[24], at[47]); + MULADD(at[25], at[46]); + MULADD(at[26], at[45]); + MULADD(at[27], at[44]); + MULADD(at[28], at[43]); + MULADD(at[29], at[42]); + MULADD(at[30], at[41]); + MULADD(at[31], at[40]); + COMBA_STORE(C->dp[39]); + /* 40 */ + COMBA_FORWARD; + MULADD(at[9], at[63]); + MULADD(at[10], at[62]); + MULADD(at[11], at[61]); + MULADD(at[12], at[60]); + MULADD(at[13], at[59]); + MULADD(at[14], at[58]); + MULADD(at[15], at[57]); + MULADD(at[16], at[56]); + MULADD(at[17], at[55]); + MULADD(at[18], at[54]); + MULADD(at[19], at[53]); + MULADD(at[20], at[52]); + MULADD(at[21], at[51]); + MULADD(at[22], at[50]); + MULADD(at[23], at[49]); + MULADD(at[24], at[48]); + MULADD(at[25], at[47]); + MULADD(at[26], at[46]); + MULADD(at[27], at[45]); + MULADD(at[28], at[44]); + MULADD(at[29], at[43]); + MULADD(at[30], at[42]); + MULADD(at[31], at[41]); + COMBA_STORE(C->dp[40]); + /* 41 */ + COMBA_FORWARD; + MULADD(at[10], at[63]); + MULADD(at[11], at[62]); + MULADD(at[12], at[61]); + MULADD(at[13], at[60]); + MULADD(at[14], at[59]); + MULADD(at[15], at[58]); + MULADD(at[16], at[57]); + MULADD(at[17], at[56]); + MULADD(at[18], at[55]); + MULADD(at[19], at[54]); + MULADD(at[20], at[53]); + MULADD(at[21], at[52]); + MULADD(at[22], at[51]); + MULADD(at[23], at[50]); + MULADD(at[24], at[49]); + MULADD(at[25], at[48]); + MULADD(at[26], at[47]); + MULADD(at[27], at[46]); + MULADD(at[28], at[45]); + MULADD(at[29], at[44]); + MULADD(at[30], at[43]); + MULADD(at[31], at[42]); + COMBA_STORE(C->dp[41]); + /* 42 */ + COMBA_FORWARD; + MULADD(at[11], at[63]); + MULADD(at[12], at[62]); + MULADD(at[13], at[61]); + MULADD(at[14], at[60]); + MULADD(at[15], at[59]); + MULADD(at[16], at[58]); + MULADD(at[17], at[57]); + MULADD(at[18], at[56]); + MULADD(at[19], at[55]); + MULADD(at[20], at[54]); + MULADD(at[21], at[53]); + MULADD(at[22], at[52]); + MULADD(at[23], at[51]); + MULADD(at[24], at[50]); + MULADD(at[25], at[49]); + MULADD(at[26], at[48]); + MULADD(at[27], at[47]); + MULADD(at[28], at[46]); + MULADD(at[29], at[45]); + MULADD(at[30], at[44]); + MULADD(at[31], at[43]); + COMBA_STORE(C->dp[42]); + /* 43 */ + COMBA_FORWARD; + MULADD(at[12], at[63]); + MULADD(at[13], at[62]); + MULADD(at[14], at[61]); + MULADD(at[15], at[60]); + MULADD(at[16], at[59]); + MULADD(at[17], at[58]); + MULADD(at[18], at[57]); + MULADD(at[19], at[56]); + MULADD(at[20], at[55]); + MULADD(at[21], at[54]); + MULADD(at[22], at[53]); + MULADD(at[23], at[52]); + MULADD(at[24], at[51]); + MULADD(at[25], at[50]); + MULADD(at[26], at[49]); + MULADD(at[27], at[48]); + MULADD(at[28], at[47]); + MULADD(at[29], at[46]); + MULADD(at[30], at[45]); + MULADD(at[31], at[44]); + COMBA_STORE(C->dp[43]); + /* 44 */ + COMBA_FORWARD; + MULADD(at[13], at[63]); + MULADD(at[14], at[62]); + MULADD(at[15], at[61]); + MULADD(at[16], at[60]); + MULADD(at[17], at[59]); + MULADD(at[18], at[58]); + MULADD(at[19], at[57]); + MULADD(at[20], at[56]); + MULADD(at[21], at[55]); + MULADD(at[22], at[54]); + MULADD(at[23], at[53]); + MULADD(at[24], at[52]); + MULADD(at[25], at[51]); + MULADD(at[26], at[50]); + MULADD(at[27], at[49]); + MULADD(at[28], at[48]); + MULADD(at[29], at[47]); + MULADD(at[30], at[46]); + MULADD(at[31], at[45]); + COMBA_STORE(C->dp[44]); + /* 45 */ + COMBA_FORWARD; + MULADD(at[14], at[63]); + MULADD(at[15], at[62]); + MULADD(at[16], at[61]); + MULADD(at[17], at[60]); + MULADD(at[18], at[59]); + MULADD(at[19], at[58]); + MULADD(at[20], at[57]); + MULADD(at[21], at[56]); + MULADD(at[22], at[55]); + MULADD(at[23], at[54]); + MULADD(at[24], at[53]); + MULADD(at[25], at[52]); + MULADD(at[26], at[51]); + MULADD(at[27], at[50]); + MULADD(at[28], at[49]); + MULADD(at[29], at[48]); + MULADD(at[30], at[47]); + MULADD(at[31], at[46]); + COMBA_STORE(C->dp[45]); + /* 46 */ + COMBA_FORWARD; + MULADD(at[15], at[63]); + MULADD(at[16], at[62]); + MULADD(at[17], at[61]); + MULADD(at[18], at[60]); + MULADD(at[19], at[59]); + MULADD(at[20], at[58]); + MULADD(at[21], at[57]); + MULADD(at[22], at[56]); + MULADD(at[23], at[55]); + MULADD(at[24], at[54]); + MULADD(at[25], at[53]); + MULADD(at[26], at[52]); + MULADD(at[27], at[51]); + MULADD(at[28], at[50]); + MULADD(at[29], at[49]); + MULADD(at[30], at[48]); + MULADD(at[31], at[47]); + COMBA_STORE(C->dp[46]); + /* 47 */ + COMBA_FORWARD; + MULADD(at[16], at[63]); + MULADD(at[17], at[62]); + MULADD(at[18], at[61]); + MULADD(at[19], at[60]); + MULADD(at[20], at[59]); + MULADD(at[21], at[58]); + MULADD(at[22], at[57]); + MULADD(at[23], at[56]); + MULADD(at[24], at[55]); + MULADD(at[25], at[54]); + MULADD(at[26], at[53]); + MULADD(at[27], at[52]); + MULADD(at[28], at[51]); + MULADD(at[29], at[50]); + MULADD(at[30], at[49]); + MULADD(at[31], at[48]); + COMBA_STORE(C->dp[47]); + /* 48 */ + COMBA_FORWARD; + MULADD(at[17], at[63]); + MULADD(at[18], at[62]); + MULADD(at[19], at[61]); + MULADD(at[20], at[60]); + MULADD(at[21], at[59]); + MULADD(at[22], at[58]); + MULADD(at[23], at[57]); + MULADD(at[24], at[56]); + MULADD(at[25], at[55]); + MULADD(at[26], at[54]); + MULADD(at[27], at[53]); + MULADD(at[28], at[52]); + MULADD(at[29], at[51]); + MULADD(at[30], at[50]); + MULADD(at[31], at[49]); + COMBA_STORE(C->dp[48]); + /* 49 */ + COMBA_FORWARD; + MULADD(at[18], at[63]); + MULADD(at[19], at[62]); + MULADD(at[20], at[61]); + MULADD(at[21], at[60]); + MULADD(at[22], at[59]); + MULADD(at[23], at[58]); + MULADD(at[24], at[57]); + MULADD(at[25], at[56]); + MULADD(at[26], at[55]); + MULADD(at[27], at[54]); + MULADD(at[28], at[53]); + MULADD(at[29], at[52]); + MULADD(at[30], at[51]); + MULADD(at[31], at[50]); + COMBA_STORE(C->dp[49]); + /* 50 */ + COMBA_FORWARD; + MULADD(at[19], at[63]); + MULADD(at[20], at[62]); + MULADD(at[21], at[61]); + MULADD(at[22], at[60]); + MULADD(at[23], at[59]); + MULADD(at[24], at[58]); + MULADD(at[25], at[57]); + MULADD(at[26], at[56]); + MULADD(at[27], at[55]); + MULADD(at[28], at[54]); + MULADD(at[29], at[53]); + MULADD(at[30], at[52]); + MULADD(at[31], at[51]); + COMBA_STORE(C->dp[50]); + /* 51 */ + COMBA_FORWARD; + MULADD(at[20], at[63]); + MULADD(at[21], at[62]); + MULADD(at[22], at[61]); + MULADD(at[23], at[60]); + MULADD(at[24], at[59]); + MULADD(at[25], at[58]); + MULADD(at[26], at[57]); + MULADD(at[27], at[56]); + MULADD(at[28], at[55]); + MULADD(at[29], at[54]); + MULADD(at[30], at[53]); + MULADD(at[31], at[52]); + COMBA_STORE(C->dp[51]); + /* 52 */ + COMBA_FORWARD; + MULADD(at[21], at[63]); + MULADD(at[22], at[62]); + MULADD(at[23], at[61]); + MULADD(at[24], at[60]); + MULADD(at[25], at[59]); + MULADD(at[26], at[58]); + MULADD(at[27], at[57]); + MULADD(at[28], at[56]); + MULADD(at[29], at[55]); + MULADD(at[30], at[54]); + MULADD(at[31], at[53]); + COMBA_STORE(C->dp[52]); + /* 53 */ + COMBA_FORWARD; + MULADD(at[22], at[63]); + MULADD(at[23], at[62]); + MULADD(at[24], at[61]); + MULADD(at[25], at[60]); + MULADD(at[26], at[59]); + MULADD(at[27], at[58]); + MULADD(at[28], at[57]); + MULADD(at[29], at[56]); + MULADD(at[30], at[55]); + MULADD(at[31], at[54]); + COMBA_STORE(C->dp[53]); + /* 54 */ + COMBA_FORWARD; + MULADD(at[23], at[63]); + MULADD(at[24], at[62]); + MULADD(at[25], at[61]); + MULADD(at[26], at[60]); + MULADD(at[27], at[59]); + MULADD(at[28], at[58]); + MULADD(at[29], at[57]); + MULADD(at[30], at[56]); + MULADD(at[31], at[55]); + COMBA_STORE(C->dp[54]); + /* 55 */ + COMBA_FORWARD; + MULADD(at[24], at[63]); + MULADD(at[25], at[62]); + MULADD(at[26], at[61]); + MULADD(at[27], at[60]); + MULADD(at[28], at[59]); + MULADD(at[29], at[58]); + MULADD(at[30], at[57]); + MULADD(at[31], at[56]); + COMBA_STORE(C->dp[55]); + /* 56 */ + COMBA_FORWARD; + MULADD(at[25], at[63]); + MULADD(at[26], at[62]); + MULADD(at[27], at[61]); + MULADD(at[28], at[60]); + MULADD(at[29], at[59]); + MULADD(at[30], at[58]); + MULADD(at[31], at[57]); + COMBA_STORE(C->dp[56]); + /* 57 */ + COMBA_FORWARD; + MULADD(at[26], at[63]); + MULADD(at[27], at[62]); + MULADD(at[28], at[61]); + MULADD(at[29], at[60]); + MULADD(at[30], at[59]); + MULADD(at[31], at[58]); + COMBA_STORE(C->dp[57]); + /* 58 */ + COMBA_FORWARD; + MULADD(at[27], at[63]); + MULADD(at[28], at[62]); + MULADD(at[29], at[61]); + MULADD(at[30], at[60]); + MULADD(at[31], at[59]); + COMBA_STORE(C->dp[58]); + /* 59 */ + COMBA_FORWARD; + MULADD(at[28], at[63]); + MULADD(at[29], at[62]); + MULADD(at[30], at[61]); + MULADD(at[31], at[60]); + COMBA_STORE(C->dp[59]); + /* 60 */ + COMBA_FORWARD; + MULADD(at[29], at[63]); + MULADD(at[30], at[62]); + MULADD(at[31], at[61]); + COMBA_STORE(C->dp[60]); + /* 61 */ + COMBA_FORWARD; + MULADD(at[30], at[63]); + MULADD(at[31], at[62]); + COMBA_STORE(C->dp[61]); + /* 62 */ + COMBA_FORWARD; + MULADD(at[31], at[63]); + COMBA_STORE(C->dp[62]); + COMBA_STORE2(C->dp[63]); + C->used = 64; + C->sign = A->sign ^ B->sign; + mp_clamp(C); + COMBA_FINI; +} + +void +s_mp_sqr_comba_4(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[8], c0, c1, c2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADD2(a[2], a[3]); + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + COMBA_STORE2(b[7]); + COMBA_FINI; + + B->used = 8; + B->sign = ZPOS; + memcpy(B->dp, b, 8 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_8(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[16], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADD2(a[3], a[7]); + SQRADD2(a[4], a[6]); + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADD2(a[4], a[7]); + SQRADD2(a[5], a[6]); + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADD2(a[5], a[7]); + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADD2(a[6], a[7]); + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + COMBA_STORE2(b[15]); + COMBA_FINI; + + B->used = 16; + B->sign = ZPOS; + memcpy(B->dp, b, 16 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_16(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[32], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADD2(a[11], a[15]); + SQRADD2(a[12], a[14]); + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADD2(a[12], a[15]); + SQRADD2(a[13], a[14]); + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADD2(a[13], a[15]); + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADD2(a[14], a[15]); + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + COMBA_STORE2(b[31]); + COMBA_FINI; + + B->used = 32; + B->sign = ZPOS; + memcpy(B->dp, b, 32 * sizeof(mp_digit)); + mp_clamp(B); +} + +void +s_mp_sqr_comba_32(const mp_int *A, mp_int *B) +{ + mp_digit *a, b[64], c0, c1, c2, sc0, sc1, sc2; + + a = A->dp; + COMBA_START; + + /* clear carries */ + CLEAR_CARRY; + + /* output 0 */ + SQRADD(a[0], a[0]); + COMBA_STORE(b[0]); + + /* output 1 */ + CARRY_FORWARD; + SQRADD2(a[0], a[1]); + COMBA_STORE(b[1]); + + /* output 2 */ + CARRY_FORWARD; + SQRADD2(a[0], a[2]); + SQRADD(a[1], a[1]); + COMBA_STORE(b[2]); + + /* output 3 */ + CARRY_FORWARD; + SQRADD2(a[0], a[3]); + SQRADD2(a[1], a[2]); + COMBA_STORE(b[3]); + + /* output 4 */ + CARRY_FORWARD; + SQRADD2(a[0], a[4]); + SQRADD2(a[1], a[3]); + SQRADD(a[2], a[2]); + COMBA_STORE(b[4]); + + /* output 5 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[5]); + SQRADDAC(a[1], a[4]); + SQRADDAC(a[2], a[3]); + SQRADDDB; + COMBA_STORE(b[5]); + + /* output 6 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[6]); + SQRADDAC(a[1], a[5]); + SQRADDAC(a[2], a[4]); + SQRADDDB; + SQRADD(a[3], a[3]); + COMBA_STORE(b[6]); + + /* output 7 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[7]); + SQRADDAC(a[1], a[6]); + SQRADDAC(a[2], a[5]); + SQRADDAC(a[3], a[4]); + SQRADDDB; + COMBA_STORE(b[7]); + + /* output 8 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[8]); + SQRADDAC(a[1], a[7]); + SQRADDAC(a[2], a[6]); + SQRADDAC(a[3], a[5]); + SQRADDDB; + SQRADD(a[4], a[4]); + COMBA_STORE(b[8]); + + /* output 9 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[9]); + SQRADDAC(a[1], a[8]); + SQRADDAC(a[2], a[7]); + SQRADDAC(a[3], a[6]); + SQRADDAC(a[4], a[5]); + SQRADDDB; + COMBA_STORE(b[9]); + + /* output 10 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[10]); + SQRADDAC(a[1], a[9]); + SQRADDAC(a[2], a[8]); + SQRADDAC(a[3], a[7]); + SQRADDAC(a[4], a[6]); + SQRADDDB; + SQRADD(a[5], a[5]); + COMBA_STORE(b[10]); + + /* output 11 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[11]); + SQRADDAC(a[1], a[10]); + SQRADDAC(a[2], a[9]); + SQRADDAC(a[3], a[8]); + SQRADDAC(a[4], a[7]); + SQRADDAC(a[5], a[6]); + SQRADDDB; + COMBA_STORE(b[11]); + + /* output 12 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[12]); + SQRADDAC(a[1], a[11]); + SQRADDAC(a[2], a[10]); + SQRADDAC(a[3], a[9]); + SQRADDAC(a[4], a[8]); + SQRADDAC(a[5], a[7]); + SQRADDDB; + SQRADD(a[6], a[6]); + COMBA_STORE(b[12]); + + /* output 13 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[13]); + SQRADDAC(a[1], a[12]); + SQRADDAC(a[2], a[11]); + SQRADDAC(a[3], a[10]); + SQRADDAC(a[4], a[9]); + SQRADDAC(a[5], a[8]); + SQRADDAC(a[6], a[7]); + SQRADDDB; + COMBA_STORE(b[13]); + + /* output 14 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[14]); + SQRADDAC(a[1], a[13]); + SQRADDAC(a[2], a[12]); + SQRADDAC(a[3], a[11]); + SQRADDAC(a[4], a[10]); + SQRADDAC(a[5], a[9]); + SQRADDAC(a[6], a[8]); + SQRADDDB; + SQRADD(a[7], a[7]); + COMBA_STORE(b[14]); + + /* output 15 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[15]); + SQRADDAC(a[1], a[14]); + SQRADDAC(a[2], a[13]); + SQRADDAC(a[3], a[12]); + SQRADDAC(a[4], a[11]); + SQRADDAC(a[5], a[10]); + SQRADDAC(a[6], a[9]); + SQRADDAC(a[7], a[8]); + SQRADDDB; + COMBA_STORE(b[15]); + + /* output 16 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[16]); + SQRADDAC(a[1], a[15]); + SQRADDAC(a[2], a[14]); + SQRADDAC(a[3], a[13]); + SQRADDAC(a[4], a[12]); + SQRADDAC(a[5], a[11]); + SQRADDAC(a[6], a[10]); + SQRADDAC(a[7], a[9]); + SQRADDDB; + SQRADD(a[8], a[8]); + COMBA_STORE(b[16]); + + /* output 17 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[17]); + SQRADDAC(a[1], a[16]); + SQRADDAC(a[2], a[15]); + SQRADDAC(a[3], a[14]); + SQRADDAC(a[4], a[13]); + SQRADDAC(a[5], a[12]); + SQRADDAC(a[6], a[11]); + SQRADDAC(a[7], a[10]); + SQRADDAC(a[8], a[9]); + SQRADDDB; + COMBA_STORE(b[17]); + + /* output 18 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[18]); + SQRADDAC(a[1], a[17]); + SQRADDAC(a[2], a[16]); + SQRADDAC(a[3], a[15]); + SQRADDAC(a[4], a[14]); + SQRADDAC(a[5], a[13]); + SQRADDAC(a[6], a[12]); + SQRADDAC(a[7], a[11]); + SQRADDAC(a[8], a[10]); + SQRADDDB; + SQRADD(a[9], a[9]); + COMBA_STORE(b[18]); + + /* output 19 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[19]); + SQRADDAC(a[1], a[18]); + SQRADDAC(a[2], a[17]); + SQRADDAC(a[3], a[16]); + SQRADDAC(a[4], a[15]); + SQRADDAC(a[5], a[14]); + SQRADDAC(a[6], a[13]); + SQRADDAC(a[7], a[12]); + SQRADDAC(a[8], a[11]); + SQRADDAC(a[9], a[10]); + SQRADDDB; + COMBA_STORE(b[19]); + + /* output 20 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[20]); + SQRADDAC(a[1], a[19]); + SQRADDAC(a[2], a[18]); + SQRADDAC(a[3], a[17]); + SQRADDAC(a[4], a[16]); + SQRADDAC(a[5], a[15]); + SQRADDAC(a[6], a[14]); + SQRADDAC(a[7], a[13]); + SQRADDAC(a[8], a[12]); + SQRADDAC(a[9], a[11]); + SQRADDDB; + SQRADD(a[10], a[10]); + COMBA_STORE(b[20]); + + /* output 21 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[21]); + SQRADDAC(a[1], a[20]); + SQRADDAC(a[2], a[19]); + SQRADDAC(a[3], a[18]); + SQRADDAC(a[4], a[17]); + SQRADDAC(a[5], a[16]); + SQRADDAC(a[6], a[15]); + SQRADDAC(a[7], a[14]); + SQRADDAC(a[8], a[13]); + SQRADDAC(a[9], a[12]); + SQRADDAC(a[10], a[11]); + SQRADDDB; + COMBA_STORE(b[21]); + + /* output 22 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[22]); + SQRADDAC(a[1], a[21]); + SQRADDAC(a[2], a[20]); + SQRADDAC(a[3], a[19]); + SQRADDAC(a[4], a[18]); + SQRADDAC(a[5], a[17]); + SQRADDAC(a[6], a[16]); + SQRADDAC(a[7], a[15]); + SQRADDAC(a[8], a[14]); + SQRADDAC(a[9], a[13]); + SQRADDAC(a[10], a[12]); + SQRADDDB; + SQRADD(a[11], a[11]); + COMBA_STORE(b[22]); + + /* output 23 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[23]); + SQRADDAC(a[1], a[22]); + SQRADDAC(a[2], a[21]); + SQRADDAC(a[3], a[20]); + SQRADDAC(a[4], a[19]); + SQRADDAC(a[5], a[18]); + SQRADDAC(a[6], a[17]); + SQRADDAC(a[7], a[16]); + SQRADDAC(a[8], a[15]); + SQRADDAC(a[9], a[14]); + SQRADDAC(a[10], a[13]); + SQRADDAC(a[11], a[12]); + SQRADDDB; + COMBA_STORE(b[23]); + + /* output 24 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[24]); + SQRADDAC(a[1], a[23]); + SQRADDAC(a[2], a[22]); + SQRADDAC(a[3], a[21]); + SQRADDAC(a[4], a[20]); + SQRADDAC(a[5], a[19]); + SQRADDAC(a[6], a[18]); + SQRADDAC(a[7], a[17]); + SQRADDAC(a[8], a[16]); + SQRADDAC(a[9], a[15]); + SQRADDAC(a[10], a[14]); + SQRADDAC(a[11], a[13]); + SQRADDDB; + SQRADD(a[12], a[12]); + COMBA_STORE(b[24]); + + /* output 25 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[25]); + SQRADDAC(a[1], a[24]); + SQRADDAC(a[2], a[23]); + SQRADDAC(a[3], a[22]); + SQRADDAC(a[4], a[21]); + SQRADDAC(a[5], a[20]); + SQRADDAC(a[6], a[19]); + SQRADDAC(a[7], a[18]); + SQRADDAC(a[8], a[17]); + SQRADDAC(a[9], a[16]); + SQRADDAC(a[10], a[15]); + SQRADDAC(a[11], a[14]); + SQRADDAC(a[12], a[13]); + SQRADDDB; + COMBA_STORE(b[25]); + + /* output 26 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[26]); + SQRADDAC(a[1], a[25]); + SQRADDAC(a[2], a[24]); + SQRADDAC(a[3], a[23]); + SQRADDAC(a[4], a[22]); + SQRADDAC(a[5], a[21]); + SQRADDAC(a[6], a[20]); + SQRADDAC(a[7], a[19]); + SQRADDAC(a[8], a[18]); + SQRADDAC(a[9], a[17]); + SQRADDAC(a[10], a[16]); + SQRADDAC(a[11], a[15]); + SQRADDAC(a[12], a[14]); + SQRADDDB; + SQRADD(a[13], a[13]); + COMBA_STORE(b[26]); + + /* output 27 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[27]); + SQRADDAC(a[1], a[26]); + SQRADDAC(a[2], a[25]); + SQRADDAC(a[3], a[24]); + SQRADDAC(a[4], a[23]); + SQRADDAC(a[5], a[22]); + SQRADDAC(a[6], a[21]); + SQRADDAC(a[7], a[20]); + SQRADDAC(a[8], a[19]); + SQRADDAC(a[9], a[18]); + SQRADDAC(a[10], a[17]); + SQRADDAC(a[11], a[16]); + SQRADDAC(a[12], a[15]); + SQRADDAC(a[13], a[14]); + SQRADDDB; + COMBA_STORE(b[27]); + + /* output 28 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[28]); + SQRADDAC(a[1], a[27]); + SQRADDAC(a[2], a[26]); + SQRADDAC(a[3], a[25]); + SQRADDAC(a[4], a[24]); + SQRADDAC(a[5], a[23]); + SQRADDAC(a[6], a[22]); + SQRADDAC(a[7], a[21]); + SQRADDAC(a[8], a[20]); + SQRADDAC(a[9], a[19]); + SQRADDAC(a[10], a[18]); + SQRADDAC(a[11], a[17]); + SQRADDAC(a[12], a[16]); + SQRADDAC(a[13], a[15]); + SQRADDDB; + SQRADD(a[14], a[14]); + COMBA_STORE(b[28]); + + /* output 29 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[29]); + SQRADDAC(a[1], a[28]); + SQRADDAC(a[2], a[27]); + SQRADDAC(a[3], a[26]); + SQRADDAC(a[4], a[25]); + SQRADDAC(a[5], a[24]); + SQRADDAC(a[6], a[23]); + SQRADDAC(a[7], a[22]); + SQRADDAC(a[8], a[21]); + SQRADDAC(a[9], a[20]); + SQRADDAC(a[10], a[19]); + SQRADDAC(a[11], a[18]); + SQRADDAC(a[12], a[17]); + SQRADDAC(a[13], a[16]); + SQRADDAC(a[14], a[15]); + SQRADDDB; + COMBA_STORE(b[29]); + + /* output 30 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[30]); + SQRADDAC(a[1], a[29]); + SQRADDAC(a[2], a[28]); + SQRADDAC(a[3], a[27]); + SQRADDAC(a[4], a[26]); + SQRADDAC(a[5], a[25]); + SQRADDAC(a[6], a[24]); + SQRADDAC(a[7], a[23]); + SQRADDAC(a[8], a[22]); + SQRADDAC(a[9], a[21]); + SQRADDAC(a[10], a[20]); + SQRADDAC(a[11], a[19]); + SQRADDAC(a[12], a[18]); + SQRADDAC(a[13], a[17]); + SQRADDAC(a[14], a[16]); + SQRADDDB; + SQRADD(a[15], a[15]); + COMBA_STORE(b[30]); + + /* output 31 */ + CARRY_FORWARD; + SQRADDSC(a[0], a[31]); + SQRADDAC(a[1], a[30]); + SQRADDAC(a[2], a[29]); + SQRADDAC(a[3], a[28]); + SQRADDAC(a[4], a[27]); + SQRADDAC(a[5], a[26]); + SQRADDAC(a[6], a[25]); + SQRADDAC(a[7], a[24]); + SQRADDAC(a[8], a[23]); + SQRADDAC(a[9], a[22]); + SQRADDAC(a[10], a[21]); + SQRADDAC(a[11], a[20]); + SQRADDAC(a[12], a[19]); + SQRADDAC(a[13], a[18]); + SQRADDAC(a[14], a[17]); + SQRADDAC(a[15], a[16]); + SQRADDDB; + COMBA_STORE(b[31]); + + /* output 32 */ + CARRY_FORWARD; + SQRADDSC(a[1], a[31]); + SQRADDAC(a[2], a[30]); + SQRADDAC(a[3], a[29]); + SQRADDAC(a[4], a[28]); + SQRADDAC(a[5], a[27]); + SQRADDAC(a[6], a[26]); + SQRADDAC(a[7], a[25]); + SQRADDAC(a[8], a[24]); + SQRADDAC(a[9], a[23]); + SQRADDAC(a[10], a[22]); + SQRADDAC(a[11], a[21]); + SQRADDAC(a[12], a[20]); + SQRADDAC(a[13], a[19]); + SQRADDAC(a[14], a[18]); + SQRADDAC(a[15], a[17]); + SQRADDDB; + SQRADD(a[16], a[16]); + COMBA_STORE(b[32]); + + /* output 33 */ + CARRY_FORWARD; + SQRADDSC(a[2], a[31]); + SQRADDAC(a[3], a[30]); + SQRADDAC(a[4], a[29]); + SQRADDAC(a[5], a[28]); + SQRADDAC(a[6], a[27]); + SQRADDAC(a[7], a[26]); + SQRADDAC(a[8], a[25]); + SQRADDAC(a[9], a[24]); + SQRADDAC(a[10], a[23]); + SQRADDAC(a[11], a[22]); + SQRADDAC(a[12], a[21]); + SQRADDAC(a[13], a[20]); + SQRADDAC(a[14], a[19]); + SQRADDAC(a[15], a[18]); + SQRADDAC(a[16], a[17]); + SQRADDDB; + COMBA_STORE(b[33]); + + /* output 34 */ + CARRY_FORWARD; + SQRADDSC(a[3], a[31]); + SQRADDAC(a[4], a[30]); + SQRADDAC(a[5], a[29]); + SQRADDAC(a[6], a[28]); + SQRADDAC(a[7], a[27]); + SQRADDAC(a[8], a[26]); + SQRADDAC(a[9], a[25]); + SQRADDAC(a[10], a[24]); + SQRADDAC(a[11], a[23]); + SQRADDAC(a[12], a[22]); + SQRADDAC(a[13], a[21]); + SQRADDAC(a[14], a[20]); + SQRADDAC(a[15], a[19]); + SQRADDAC(a[16], a[18]); + SQRADDDB; + SQRADD(a[17], a[17]); + COMBA_STORE(b[34]); + + /* output 35 */ + CARRY_FORWARD; + SQRADDSC(a[4], a[31]); + SQRADDAC(a[5], a[30]); + SQRADDAC(a[6], a[29]); + SQRADDAC(a[7], a[28]); + SQRADDAC(a[8], a[27]); + SQRADDAC(a[9], a[26]); + SQRADDAC(a[10], a[25]); + SQRADDAC(a[11], a[24]); + SQRADDAC(a[12], a[23]); + SQRADDAC(a[13], a[22]); + SQRADDAC(a[14], a[21]); + SQRADDAC(a[15], a[20]); + SQRADDAC(a[16], a[19]); + SQRADDAC(a[17], a[18]); + SQRADDDB; + COMBA_STORE(b[35]); + + /* output 36 */ + CARRY_FORWARD; + SQRADDSC(a[5], a[31]); + SQRADDAC(a[6], a[30]); + SQRADDAC(a[7], a[29]); + SQRADDAC(a[8], a[28]); + SQRADDAC(a[9], a[27]); + SQRADDAC(a[10], a[26]); + SQRADDAC(a[11], a[25]); + SQRADDAC(a[12], a[24]); + SQRADDAC(a[13], a[23]); + SQRADDAC(a[14], a[22]); + SQRADDAC(a[15], a[21]); + SQRADDAC(a[16], a[20]); + SQRADDAC(a[17], a[19]); + SQRADDDB; + SQRADD(a[18], a[18]); + COMBA_STORE(b[36]); + + /* output 37 */ + CARRY_FORWARD; + SQRADDSC(a[6], a[31]); + SQRADDAC(a[7], a[30]); + SQRADDAC(a[8], a[29]); + SQRADDAC(a[9], a[28]); + SQRADDAC(a[10], a[27]); + SQRADDAC(a[11], a[26]); + SQRADDAC(a[12], a[25]); + SQRADDAC(a[13], a[24]); + SQRADDAC(a[14], a[23]); + SQRADDAC(a[15], a[22]); + SQRADDAC(a[16], a[21]); + SQRADDAC(a[17], a[20]); + SQRADDAC(a[18], a[19]); + SQRADDDB; + COMBA_STORE(b[37]); + + /* output 38 */ + CARRY_FORWARD; + SQRADDSC(a[7], a[31]); + SQRADDAC(a[8], a[30]); + SQRADDAC(a[9], a[29]); + SQRADDAC(a[10], a[28]); + SQRADDAC(a[11], a[27]); + SQRADDAC(a[12], a[26]); + SQRADDAC(a[13], a[25]); + SQRADDAC(a[14], a[24]); + SQRADDAC(a[15], a[23]); + SQRADDAC(a[16], a[22]); + SQRADDAC(a[17], a[21]); + SQRADDAC(a[18], a[20]); + SQRADDDB; + SQRADD(a[19], a[19]); + COMBA_STORE(b[38]); + + /* output 39 */ + CARRY_FORWARD; + SQRADDSC(a[8], a[31]); + SQRADDAC(a[9], a[30]); + SQRADDAC(a[10], a[29]); + SQRADDAC(a[11], a[28]); + SQRADDAC(a[12], a[27]); + SQRADDAC(a[13], a[26]); + SQRADDAC(a[14], a[25]); + SQRADDAC(a[15], a[24]); + SQRADDAC(a[16], a[23]); + SQRADDAC(a[17], a[22]); + SQRADDAC(a[18], a[21]); + SQRADDAC(a[19], a[20]); + SQRADDDB; + COMBA_STORE(b[39]); + + /* output 40 */ + CARRY_FORWARD; + SQRADDSC(a[9], a[31]); + SQRADDAC(a[10], a[30]); + SQRADDAC(a[11], a[29]); + SQRADDAC(a[12], a[28]); + SQRADDAC(a[13], a[27]); + SQRADDAC(a[14], a[26]); + SQRADDAC(a[15], a[25]); + SQRADDAC(a[16], a[24]); + SQRADDAC(a[17], a[23]); + SQRADDAC(a[18], a[22]); + SQRADDAC(a[19], a[21]); + SQRADDDB; + SQRADD(a[20], a[20]); + COMBA_STORE(b[40]); + + /* output 41 */ + CARRY_FORWARD; + SQRADDSC(a[10], a[31]); + SQRADDAC(a[11], a[30]); + SQRADDAC(a[12], a[29]); + SQRADDAC(a[13], a[28]); + SQRADDAC(a[14], a[27]); + SQRADDAC(a[15], a[26]); + SQRADDAC(a[16], a[25]); + SQRADDAC(a[17], a[24]); + SQRADDAC(a[18], a[23]); + SQRADDAC(a[19], a[22]); + SQRADDAC(a[20], a[21]); + SQRADDDB; + COMBA_STORE(b[41]); + + /* output 42 */ + CARRY_FORWARD; + SQRADDSC(a[11], a[31]); + SQRADDAC(a[12], a[30]); + SQRADDAC(a[13], a[29]); + SQRADDAC(a[14], a[28]); + SQRADDAC(a[15], a[27]); + SQRADDAC(a[16], a[26]); + SQRADDAC(a[17], a[25]); + SQRADDAC(a[18], a[24]); + SQRADDAC(a[19], a[23]); + SQRADDAC(a[20], a[22]); + SQRADDDB; + SQRADD(a[21], a[21]); + COMBA_STORE(b[42]); + + /* output 43 */ + CARRY_FORWARD; + SQRADDSC(a[12], a[31]); + SQRADDAC(a[13], a[30]); + SQRADDAC(a[14], a[29]); + SQRADDAC(a[15], a[28]); + SQRADDAC(a[16], a[27]); + SQRADDAC(a[17], a[26]); + SQRADDAC(a[18], a[25]); + SQRADDAC(a[19], a[24]); + SQRADDAC(a[20], a[23]); + SQRADDAC(a[21], a[22]); + SQRADDDB; + COMBA_STORE(b[43]); + + /* output 44 */ + CARRY_FORWARD; + SQRADDSC(a[13], a[31]); + SQRADDAC(a[14], a[30]); + SQRADDAC(a[15], a[29]); + SQRADDAC(a[16], a[28]); + SQRADDAC(a[17], a[27]); + SQRADDAC(a[18], a[26]); + SQRADDAC(a[19], a[25]); + SQRADDAC(a[20], a[24]); + SQRADDAC(a[21], a[23]); + SQRADDDB; + SQRADD(a[22], a[22]); + COMBA_STORE(b[44]); + + /* output 45 */ + CARRY_FORWARD; + SQRADDSC(a[14], a[31]); + SQRADDAC(a[15], a[30]); + SQRADDAC(a[16], a[29]); + SQRADDAC(a[17], a[28]); + SQRADDAC(a[18], a[27]); + SQRADDAC(a[19], a[26]); + SQRADDAC(a[20], a[25]); + SQRADDAC(a[21], a[24]); + SQRADDAC(a[22], a[23]); + SQRADDDB; + COMBA_STORE(b[45]); + + /* output 46 */ + CARRY_FORWARD; + SQRADDSC(a[15], a[31]); + SQRADDAC(a[16], a[30]); + SQRADDAC(a[17], a[29]); + SQRADDAC(a[18], a[28]); + SQRADDAC(a[19], a[27]); + SQRADDAC(a[20], a[26]); + SQRADDAC(a[21], a[25]); + SQRADDAC(a[22], a[24]); + SQRADDDB; + SQRADD(a[23], a[23]); + COMBA_STORE(b[46]); + + /* output 47 */ + CARRY_FORWARD; + SQRADDSC(a[16], a[31]); + SQRADDAC(a[17], a[30]); + SQRADDAC(a[18], a[29]); + SQRADDAC(a[19], a[28]); + SQRADDAC(a[20], a[27]); + SQRADDAC(a[21], a[26]); + SQRADDAC(a[22], a[25]); + SQRADDAC(a[23], a[24]); + SQRADDDB; + COMBA_STORE(b[47]); + + /* output 48 */ + CARRY_FORWARD; + SQRADDSC(a[17], a[31]); + SQRADDAC(a[18], a[30]); + SQRADDAC(a[19], a[29]); + SQRADDAC(a[20], a[28]); + SQRADDAC(a[21], a[27]); + SQRADDAC(a[22], a[26]); + SQRADDAC(a[23], a[25]); + SQRADDDB; + SQRADD(a[24], a[24]); + COMBA_STORE(b[48]); + + /* output 49 */ + CARRY_FORWARD; + SQRADDSC(a[18], a[31]); + SQRADDAC(a[19], a[30]); + SQRADDAC(a[20], a[29]); + SQRADDAC(a[21], a[28]); + SQRADDAC(a[22], a[27]); + SQRADDAC(a[23], a[26]); + SQRADDAC(a[24], a[25]); + SQRADDDB; + COMBA_STORE(b[49]); + + /* output 50 */ + CARRY_FORWARD; + SQRADDSC(a[19], a[31]); + SQRADDAC(a[20], a[30]); + SQRADDAC(a[21], a[29]); + SQRADDAC(a[22], a[28]); + SQRADDAC(a[23], a[27]); + SQRADDAC(a[24], a[26]); + SQRADDDB; + SQRADD(a[25], a[25]); + COMBA_STORE(b[50]); + + /* output 51 */ + CARRY_FORWARD; + SQRADDSC(a[20], a[31]); + SQRADDAC(a[21], a[30]); + SQRADDAC(a[22], a[29]); + SQRADDAC(a[23], a[28]); + SQRADDAC(a[24], a[27]); + SQRADDAC(a[25], a[26]); + SQRADDDB; + COMBA_STORE(b[51]); + + /* output 52 */ + CARRY_FORWARD; + SQRADDSC(a[21], a[31]); + SQRADDAC(a[22], a[30]); + SQRADDAC(a[23], a[29]); + SQRADDAC(a[24], a[28]); + SQRADDAC(a[25], a[27]); + SQRADDDB; + SQRADD(a[26], a[26]); + COMBA_STORE(b[52]); + + /* output 53 */ + CARRY_FORWARD; + SQRADDSC(a[22], a[31]); + SQRADDAC(a[23], a[30]); + SQRADDAC(a[24], a[29]); + SQRADDAC(a[25], a[28]); + SQRADDAC(a[26], a[27]); + SQRADDDB; + COMBA_STORE(b[53]); + + /* output 54 */ + CARRY_FORWARD; + SQRADDSC(a[23], a[31]); + SQRADDAC(a[24], a[30]); + SQRADDAC(a[25], a[29]); + SQRADDAC(a[26], a[28]); + SQRADDDB; + SQRADD(a[27], a[27]); + COMBA_STORE(b[54]); + + /* output 55 */ + CARRY_FORWARD; + SQRADDSC(a[24], a[31]); + SQRADDAC(a[25], a[30]); + SQRADDAC(a[26], a[29]); + SQRADDAC(a[27], a[28]); + SQRADDDB; + COMBA_STORE(b[55]); + + /* output 56 */ + CARRY_FORWARD; + SQRADDSC(a[25], a[31]); + SQRADDAC(a[26], a[30]); + SQRADDAC(a[27], a[29]); + SQRADDDB; + SQRADD(a[28], a[28]); + COMBA_STORE(b[56]); + + /* output 57 */ + CARRY_FORWARD; + SQRADDSC(a[26], a[31]); + SQRADDAC(a[27], a[30]); + SQRADDAC(a[28], a[29]); + SQRADDDB; + COMBA_STORE(b[57]); + + /* output 58 */ + CARRY_FORWARD; + SQRADD2(a[27], a[31]); + SQRADD2(a[28], a[30]); + SQRADD(a[29], a[29]); + COMBA_STORE(b[58]); + + /* output 59 */ + CARRY_FORWARD; + SQRADD2(a[28], a[31]); + SQRADD2(a[29], a[30]); + COMBA_STORE(b[59]); + + /* output 60 */ + CARRY_FORWARD; + SQRADD2(a[29], a[31]); + SQRADD(a[30], a[30]); + COMBA_STORE(b[60]); + + /* output 61 */ + CARRY_FORWARD; + SQRADD2(a[30], a[31]); + COMBA_STORE(b[61]); + + /* output 62 */ + CARRY_FORWARD; + SQRADD(a[31], a[31]); + COMBA_STORE(b[62]); + COMBA_STORE2(b[63]); + COMBA_FINI; + + B->used = 64; + B->sign = ZPOS; + memcpy(B->dp, b, 64 * sizeof(mp_digit)); + mp_clamp(B); +} diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm new file mode 100644 index 000000000..cb432583f --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_masm.asm @@ -0,0 +1,13066 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +;/* TomsFastMath, a fast ISO C bignum library. +; * +; * This project is meant to fill in where LibTomMath +; * falls short. That is speed ;-) +; * +; * This project is public domain and free for all purposes. +; * +; * Tom St Denis, tomstdenis@iahu.ca +; */ + +;/* +; * The source file from which this assembly was derived +; * comes from TFM v0.03, which has the above license. +; * This source was from mp_comba_amd64.sun.s and convert to +; * MASM code set. +; */ + +.CODE + +externdef memcpy:PROC + +public s_mp_mul_comba_4 +public s_mp_mul_comba_8 +public s_mp_mul_comba_16 +public s_mp_mul_comba_32 +public s_mp_sqr_comba_8 +public s_mp_sqr_comba_16 +public s_mp_sqr_comba_32 + + +; void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + sub rsp, 64 + mov r9, qword ptr [16+rdi] + mov rbx, rdx + mov rdx, qword ptr [16+rsi] + mov rax, qword ptr [r9] + mov qword ptr [-64+64+rsp], rax + mov r8, qword ptr [8+r9] + mov qword ptr [-56+64+rsp], r8 + mov rbp, qword ptr [16+r9] + mov qword ptr [-48+64+rsp], rbp + mov r12, qword ptr [24+r9] + mov qword ptr [-40+64+rsp], r12 + mov rcx, qword ptr [rdx] + mov qword ptr [-32+64+rsp], rcx + mov r10, qword ptr [8+rdx] + mov qword ptr [-24+64+rsp], r10 + mov r11, qword ptr [16+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-16+64+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [24+rdx] + mov qword ptr [-8+64+rsp], rax + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-24+64+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-32+64+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-32+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-64+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-24+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-32+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-56+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-16+64+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-24+64+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-48+64+rsp] + mul qword ptr [-8+64+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-16+64+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-40+64+rsp] + mul qword ptr [-8+64+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [48+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [56+r11], rcx + mov dword ptr [8+rbx], 8 + jne L9 + ALIGN 16 +L18: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L9 + lea r10d, dword ptr [-2+rdx] + cmp dword ptr [r11+r10*8], 0 + je L18 +L9: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 64 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_4 ENDP + + +; void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C) + + ALIGN 16 +s_mp_mul_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 8+128 + mov rdx, qword ptr [16+rdi] + mov r8, qword ptr [rdx] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-80+128+rsp], r10 + mov r11, qword ptr [48+rdx] + mov qword ptr [-72+128+rsp], r11 + mov rax, qword ptr [56+rdx] + mov rdx, qword ptr [16+rsi] + mov qword ptr [-64+128+rsp], rax + mov r8, qword ptr [rdx] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [8+rdx] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [16+rdx] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [24+rdx] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [32+rdx] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [40+rdx] + mov qword ptr [-16+128+rsp], r10 + mov r11, qword ptr [48+rdx] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [-8+128+rsp], r11 + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [56+rdx] + mov qword ptr [128+rsp], rax + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-48+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-56+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-56+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [-8+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [-8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [112+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [120+r11], rcx + mov dword ptr [8+rbx], 16 + jne L35 + ALIGN 16 +L43: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L35 + lea eax, dword ptr [-2+rdx] + cmp dword ptr [r11+rax*8], 0 + je L43 +L35: + mov r11d, dword ptr [8+rbx] + xor edx, edx + test r11d, r11d + cmovne edx, esi + mov dword ptr [rbx], edx + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_8 ENDP + + +; void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); + + ALIGN 16 +s_mp_mul_comba_16 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push r12 + push rbp + push rbx + mov rbx, rdx + sub rsp, 136+128 + mov rax, qword ptr [16+rdi] + mov r8, qword ptr [rax] + mov qword ptr [-120+128+rsp], r8 + mov rbp, qword ptr [8+rax] + mov qword ptr [-112+128+rsp], rbp + mov r9, qword ptr [16+rax] + mov qword ptr [-104+128+rsp], r9 + mov r12, qword ptr [24+rax] + mov qword ptr [-96+128+rsp], r12 + mov rcx, qword ptr [32+rax] + mov qword ptr [-88+128+rsp], rcx + mov r10, qword ptr [40+rax] + mov qword ptr [-80+128+rsp], r10 + mov rdx, qword ptr [48+rax] + mov qword ptr [-72+128+rsp], rdx + mov r11, qword ptr [56+rax] + mov qword ptr [-64+128+rsp], r11 + mov r8, qword ptr [64+rax] + mov qword ptr [-56+128+rsp], r8 + mov rbp, qword ptr [72+rax] + mov qword ptr [-48+128+rsp], rbp + mov r9, qword ptr [80+rax] + mov qword ptr [-40+128+rsp], r9 + mov r12, qword ptr [88+rax] + mov qword ptr [-32+128+rsp], r12 + mov rcx, qword ptr [96+rax] + mov qword ptr [-24+128+rsp], rcx + mov r10, qword ptr [104+rax] + mov qword ptr [-16+128+rsp], r10 + mov rdx, qword ptr [112+rax] + mov qword ptr [-8+128+rsp], rdx + mov r11, qword ptr [120+rax] + mov qword ptr [128+rsp], r11 + mov r11, qword ptr [16+rsi] + mov r8, qword ptr [r11] + mov qword ptr [8+128+rsp], r8 + mov rbp, qword ptr [8+r11] + mov qword ptr [16+128+rsp], rbp + mov r9, qword ptr [16+r11] + mov qword ptr [24+128+rsp], r9 + mov r12, qword ptr [24+r11] + mov qword ptr [32+128+rsp], r12 + mov rcx, qword ptr [32+r11] + mov qword ptr [40+128+rsp], rcx + mov r10, qword ptr [40+r11] + mov qword ptr [48+128+rsp], r10 + mov rdx, qword ptr [48+r11] + mov qword ptr [56+128+rsp], rdx + mov rax, qword ptr [56+r11] + mov qword ptr [64+128+rsp], rax + mov r8, qword ptr [64+r11] + mov qword ptr [72+128+rsp], r8 + mov rbp, qword ptr [72+r11] + mov qword ptr [80+128+rsp], rbp + mov r9, qword ptr [80+r11] + mov qword ptr [88+128+rsp], r9 + mov r12, qword ptr [88+r11] + mov qword ptr [96+128+rsp], r12 + mov rcx, qword ptr [96+r11] + mov qword ptr [104+128+rsp], rcx + mov r10, qword ptr [104+r11] + mov qword ptr [112+128+rsp], r10 + mov rdx, qword ptr [112+r11] + xor r10d, r10d + mov r8, r10 + mov r9, r10 + mov rbp, r10 + mov qword ptr [120+128+rsp], rdx + mov rax, qword ptr [120+r11] + mov qword ptr [128+128+rsp], rax + mov r11, qword ptr [16+rbx] + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc r9, rdx + adc rbp, 0 + mov qword ptr [r11], r8 + mov r8, rbp + mov rbp, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [16+128+rsp] + add r9, rax + adc r8, rdx + adc rbp, 0 + mov r12, rbp + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [8+128+rsp] + add r9, rax + adc r8, rdx + adc r12, 0 + mov qword ptr [8+r11], r9 + mov r9, r12 + mov r12, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc r12, 0 + mov rcx, r12 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [16+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [24+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [32+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [40+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [48+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [56+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [64+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [72+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [80+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [88+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [96+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [104+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [8+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [112+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-120+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [16+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [8+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [120+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-112+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [24+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [16+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [128+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-104+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [32+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [24+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [136+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-96+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [40+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [32+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [144+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-88+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [48+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [40+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [152+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-80+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [56+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [48+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [160+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-72+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [64+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [56+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [168+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-64+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [72+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [64+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [176+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-56+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [80+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [72+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [184+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-48+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [88+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [80+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [192+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-40+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [96+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [88+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [200+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-32+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [104+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [96+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [208+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-24+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [112+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov rbp, r9 + mov r12, r8 + mov rax, qword ptr [128+rsp] + mul qword ptr [104+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [216+r11], rcx + mov r9, r12 + mov r8, rbp + mov rcx, r10 + mov rax, qword ptr [-16+128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [120+128+rsp] + add r8, rax + adc r9, rdx + adc rcx, 0 + mov rbp, r9 + mov r12, rcx + mov rax, qword ptr [128+rsp] + mul qword ptr [112+128+rsp] + add r8, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [224+r11], r8 + mov r9, r12 + mov rcx, rbp + mov r8, r10 + mov rax, qword ptr [-8+128+rsp] + mul qword ptr [128+128+rsp] + add rcx, rax + adc r9, rdx + adc r8, 0 + mov r12, r8 + mov rbp, r9 + mov rax, qword ptr [128+rsp] + mul qword ptr [120+128+rsp] + add rcx, rax + adc rbp, rdx + adc r12, 0 + mov qword ptr [232+r11], rcx + mov r8, rbp + mov rcx, r12 + mov rax, qword ptr [128+rsp] + mul qword ptr [128+128+rsp] + add r8, rax + adc rcx, rdx + adc r10, 0 + mov qword ptr [240+r11], r8 + mov esi, dword ptr [rsi] + xor esi, dword ptr [rdi] + test rcx, rcx + mov qword ptr [248+r11], rcx + mov dword ptr [8+rbx], 32 + jne L76 + ALIGN 16 +L84: + mov edx, dword ptr [8+rbx] + lea edi, dword ptr [-1+rdx] + test edi, edi + mov dword ptr [8+rbx], edi + je L76 + lea eax, dword ptr [-2+rdx] + cmp dword ptr [r11+rax*8], 0 + je L84 +L76: + mov edx, dword ptr [8+rbx] + xor r11d, r11d + test edx, edx + cmovne r11d, esi + mov dword ptr [rbx], r11d + add rsp, 136+128 + pop rbx + pop rbp + pop r12 + + pop rsi + pop rdi + + ret + +s_mp_mul_comba_16 ENDP + +; void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C) + + + ALIGN 16 +s_mp_mul_comba_32 PROC ; a "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + + push rbp + mov rbp, rsp + push r13 + mov r13, rdx +; mov edx, 256 + mov r8d, 256 + push r12 + mov r12, rsi + push rbx + mov rbx, rdi + sub rsp, 520+32 ; +32 for "home" storage +; mov rsi, qword ptr [16+rdi] +; lea rdi, qword ptr [-544+rbp] + mov rdx, qword ptr [16+rdi] + lea rcx, qword ptr [-544+rbp] + call memcpy +; mov rsi, qword ptr [16+r12] +; lea rdi, qword ptr [-288+rbp] +; mov edx, 256 + mov rdx, qword ptr [16+r12] + lea rcx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov r9, qword ptr [16+r13] + xor r8d, r8d + mov rsi, r8 + mov rdi, r8 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [r9], rsi + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-280+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov r11, r10 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-288+rbp] + add rdi, rax + adc rsi, rdx + adc r11, 0 + mov qword ptr [8+r9], rdi + mov rdi, r11 + mov r11, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc r11, 0 + mov rcx, r11 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-528+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [16+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-520+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [24+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-512+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [32+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-504+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [40+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-496+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [48+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-488+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [56+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-480+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [64+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-472+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [72+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-464+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [80+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-456+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [88+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-448+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [96+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-440+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [104+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-432+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [112+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-424+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [120+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-416+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [128+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-408+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [136+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-400+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [144+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-392+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [152+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-384+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [160+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-376+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [168+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-368+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [176+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-360+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [184+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-352+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [192+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-344+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [200+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-336+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [208+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-328+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [216+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-320+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [224+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-312+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [232+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-304+rbp] + mul qword ptr [-288+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [240+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-544+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-280+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-288+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [248+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-536+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-272+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-280+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [256+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-528+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-264+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-272+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [264+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-520+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-256+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-264+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [272+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-512+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-248+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-256+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [280+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-504+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-240+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-248+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [288+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-496+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-232+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-240+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [296+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-488+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-224+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-232+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [304+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-480+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-216+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-224+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [312+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-472+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-184+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-192+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-200+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-208+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-216+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [320+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-464+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-192+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-200+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-208+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [328+r9], rcx + mov rdi, r11 + mov r11, r10 + mov r10, r8 + mov rax, qword ptr [-456+rbp] + mul qword ptr [-40+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-48+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-56+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-64+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-72+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-80+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-88+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-96+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-104+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-112+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-120+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-128+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-136+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-144+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-152+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-160+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-168+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-176+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-184+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-192+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-200+rbp] + add r11, rax + adc rdi, rdx + adc r10, 0 + mov qword ptr [336+r9], r11 + mov rsi, r10 + mov r10, r8 + mov rax, qword ptr [-448+rbp] + mul qword ptr [-40+rbp] + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov rcx, r10 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-48+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-56+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-64+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-72+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-80+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-88+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-96+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-104+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-112+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-120+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-128+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-136+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-144+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-152+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-160+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-168+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-176+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-184+rbp] + add rdi, rax + adc rsi, rdx + adc rcx, 0 + mov r11, rsi + mov r10, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-192+rbp] + add rdi, rax + adc r11, rdx + adc r10, 0 + mov qword ptr [344+r9], rdi + mov rcx, r11 + mov rdi, r10 + mov r11, r8 + mov rax, qword ptr [-440+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc r11, 0 + mov rsi, r11 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-176+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-184+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [352+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-432+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-168+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-176+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [360+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-424+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-160+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-168+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [368+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-416+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-152+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-160+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [376+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-408+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-144+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-152+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [384+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-400+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-136+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-144+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [392+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-392+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-128+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-136+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [400+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-384+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-120+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-128+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [408+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-376+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-112+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-120+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [416+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-368+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-104+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-112+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [424+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-360+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-96+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-104+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [432+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-352+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-88+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-96+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [440+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-344+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-80+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-88+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [448+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-336+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-72+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-80+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [456+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-328+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-64+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-72+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [464+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-320+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-56+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r10, rdi + mov r11, rcx + mov rax, qword ptr [-296+rbp] + mul qword ptr [-64+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [472+r9], rsi + mov rdi, r11 + mov rcx, r10 + mov rsi, r8 + mov rax, qword ptr [-312+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-48+rbp] + add rcx, rax + adc rdi, rdx + adc rsi, 0 + mov r10, rdi + mov r11, rsi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-56+rbp] + add rcx, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [480+r9], rcx + mov rdi, r11 + mov rsi, r10 + mov rcx, r8 + mov rax, qword ptr [-304+rbp] + mul qword ptr [-40+rbp] + add rsi, rax + adc rdi, rdx + adc rcx, 0 + mov r11, rcx + mov r10, rdi + mov rax, qword ptr [-296+rbp] + mul qword ptr [-48+rbp] + add rsi, rax + adc r10, rdx + adc r11, 0 + mov qword ptr [488+r9], rsi + mov rcx, r10 + mov rsi, r11 + mov rax, qword ptr [-296+rbp] + mul qword ptr [-40+rbp] + add rcx, rax + adc rsi, rdx + adc r8, 0 + mov qword ptr [496+r9], rcx + mov ecx, dword ptr [r12] + xor ecx, dword ptr [rbx] + test rsi, rsi + mov qword ptr [504+r9], rsi + mov dword ptr [8+r13], 64 + jne L149 + ALIGN 16 +L157: + mov edx, dword ptr [8+r13] + lea ebx, dword ptr [-1+rdx] + test ebx, ebx + mov dword ptr [8+r13], ebx + je L149 + lea r12d, dword ptr [-2+rdx] + cmp dword ptr [r9+r12*8], 0 + je L157 +L149: + mov r9d, dword ptr [8+r13] + xor edx, edx + test r9d, r9d + cmovne edx, ecx + mov dword ptr [r13], edx + add rsp, 520+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_mul_comba_32 ENDP + + +; void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_4 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + push rbx + sub rsp, 80 + mov r11, rsi + xor esi, esi + mov r10, rsi + mov rbp, rsi + mov r8, rsi + mov rbx, rsi + mov rcx, qword ptr [16+rdi] + mov rdi, rsi + mov rax, qword ptr [rcx] + mul rax + add r10, rax + adc rbx, rdx + adc rdi, 0 + mov qword ptr [-72+80+rsp], r10 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rdi, rdx + adc rbp, 0 + add rbx, rax + adc rdi, rdx + adc rbp, 0 + mov qword ptr [-64+80+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rdi, rax + adc rbp, rdx + adc r8, 0 + add rdi, rax + adc rbp, rdx + adc r8, 0 + mov rbx, rbp + mov rbp, r8 + mov rax, qword ptr [8+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-56+80+rsp], rdi + mov r9, rbp + mov r8, rbx + mov rdi, rsi + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r8, rax + adc r9, rdx + adc rdi, 0 + add r8, rax + adc r9, rdx + adc rdi, 0 + mov rbx, r9 + mov rbp, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r8, rax + adc rbx, rdx + adc rbp, 0 + add r8, rax + adc rbx, rdx + adc rbp, 0 + mov qword ptr [-48+80+rsp], r8 + mov r9, rbp + mov rdi, rbx + mov r8, rsi + mov dword ptr [8+r11], 8 + mov dword ptr [r11], 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add rdi, rax + adc r9, rdx + adc r8, 0 + add rdi, rax + adc r9, rdx + adc r8, 0 + mov rbx, r9 + mov rbp, r8 + mov rax, qword ptr [16+rcx] + mul rax + add rdi, rax + adc rbx, rdx + adc rbp, 0 + mov rax, rbp + mov qword ptr [-40+80+rsp], rdi + mov rbp, rbx + mov rdi, rax + mov rbx, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add rbp, rax + adc rdi, rdx + adc rbx, 0 + add rbp, rax + adc rdi, rdx + adc rbx, 0 + mov qword ptr [-32+80+rsp], rbp + mov r9, rbx + mov rax, qword ptr [24+rcx] + mul rax + add rdi, rax + adc r9, rdx + adc rsi, 0 + mov rdx, qword ptr [16+r11] + mov qword ptr [-24+80+rsp], rdi + mov qword ptr [-16+80+rsp], r9 + mov qword ptr [rdx], r10 + mov r8, qword ptr [-64+80+rsp] + mov qword ptr [8+rdx], r8 + mov rbp, qword ptr [-56+80+rsp] + mov qword ptr [16+rdx], rbp + mov rdi, qword ptr [-48+80+rsp] + mov qword ptr [24+rdx], rdi + mov rsi, qword ptr [-40+80+rsp] + mov qword ptr [32+rdx], rsi + mov rbx, qword ptr [-32+80+rsp] + mov qword ptr [40+rdx], rbx + mov rcx, qword ptr [-24+80+rsp] + mov qword ptr [48+rdx], rcx + mov rax, qword ptr [-16+80+rsp] + mov qword ptr [56+rdx], rax + mov edx, dword ptr [8+r11] + test edx, edx + je L168 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r11] + mov r10d, ecx + cmp dword ptr [rsi+r10*8], 0 + jne L166 + mov edx, ecx + ALIGN 16 +L167: + test edx, edx + mov ecx, edx + je L171 + dec edx + mov eax, edx + cmp dword ptr [rsi+rax*8], 0 + je L167 + mov dword ptr [8+r11], ecx + mov edx, ecx +L166: + test edx, edx + je L168 + mov eax, dword ptr [r11] + jmp L169 + +L171: + mov dword ptr [8+r11], edx +L168: + xor eax, eax +L169: + add rsp, 80 + pop rbx + pop rbp + mov dword ptr [r11], eax + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_4 ENDP + + +; void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_8 PROC + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov rdx, r8 + mov rcx, r9 + + push r14 + xor r9d, r9d + mov r14, r9 + mov r10, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbp + mov rbp, rsi + mov rsi, r9 + push rbx + mov rbx, r9 + sub rsp, 8+128 + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r14, rax + adc rbx, rdx + adc r12, 0 + mov qword ptr [-120+128+rsp], r14 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc r12, rdx + adc r10, 0 + add rbx, rax + adc r12, rdx + adc r10, 0 + mov qword ptr [-112+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add r12, rax + adc r10, rdx + adc r13, 0 + add r12, rax + adc r10, rdx + adc r13, 0 + mov rbx, r10 + mov r10, r13 + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-104+128+rsp], r12 + mov rdi, r10 + mov r11, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r11, rax + adc rdi, rdx + adc rsi, 0 + add r11, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov r10, rsi + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r11, rax + adc rbx, rdx + adc r10, 0 + add r11, rax + adc rbx, rdx + adc r10, 0 + mov rsi, r9 + mov qword ptr [-96+128+rsp], r11 + mov r8, r10 + mov r12, rbx + mov r11, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r12, rax + adc r8, rdx + adc r13, 0 + add r12, rax + adc r8, rdx + adc r13, 0 + mov rbx, r8 + mov r10, r13 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r12, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-88+128+rsp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-80+128+rsp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-72+128+rsp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + add rbx, r8 + adc r10, rdi + adc rax, rsi + add rbx, r8 + adc r10, rdi + adc rax, rsi + mov qword ptr [-64+128+rsp], rbx + mov r11, rax + mov rbx, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rsi, rbx + mov rdi, r13 + mov rbx, r11 + mov r13, r12 + mov r11, rsi + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-56+128+rsp], r10 + mov r10, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor r13, r13 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc r13, 0 + mov r12, rdi + mov rax, r13 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov qword ptr [-48+128+rsp], rbx + mov r12, r11 + mov rsi, r10 + mov rbx, r9 + mov r11, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc rbx, 0 + add r12, rax + adc rsi, rdx + adc rbx, 0 + mov r13, rbx + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov r13, r9 + mov rax, qword ptr [40+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-40+128+rsp], r12 + mov r8, rbx + mov rdi, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc r8, rdx + adc r11, 0 + add rdi, rax + adc r8, rdx + adc r11, 0 + mov r10, r8 + mov rbx, r11 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add rdi, rax + adc r10, rdx + adc rbx, 0 + add rdi, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-32+128+rsp], rdi + mov rsi, rbx + mov r12, r10 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r12, rax + adc rsi, rdx + adc r13, 0 + add r12, rax + adc rsi, rdx + adc r13, 0 + mov r10, rsi + mov rbx, r13 + mov rax, qword ptr [48+rcx] + mul rax + add r12, rax + adc r10, rdx + adc rbx, 0 + mov qword ptr [-24+128+rsp], r12 + mov rdi, r10 + mov rsi, rbx + mov r10, r9 + mov dword ptr [8+rbp], 16 + mov dword ptr [rbp], 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add rdi, rax + adc rsi, rdx + adc r10, 0 + add rdi, rax + adc rsi, rdx + adc r10, 0 + mov qword ptr [-16+128+rsp], rdi + mov r8, r10 + mov rax, qword ptr [56+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov rax, qword ptr [16+rbp] + mov qword ptr [-8+128+rsp], rsi + mov qword ptr [128+rsp], r8 + mov qword ptr [rax], r14 + mov rbx, qword ptr [-112+128+rsp] + mov qword ptr [8+rax], rbx + mov rcx, qword ptr [-104+128+rsp] + mov qword ptr [16+rax], rcx + mov rdx, qword ptr [-96+128+rsp] + mov qword ptr [24+rax], rdx + mov r14, qword ptr [-88+128+rsp] + mov qword ptr [32+rax], r14 + mov r13, qword ptr [-80+128+rsp] + mov qword ptr [40+rax], r13 + mov r12, qword ptr [-72+128+rsp] + mov qword ptr [48+rax], r12 + mov r11, qword ptr [-64+128+rsp] + mov qword ptr [56+rax], r11 + mov r10, qword ptr [-56+128+rsp] + mov qword ptr [64+rax], r10 + mov r9, qword ptr [-48+128+rsp] + mov qword ptr [72+rax], r9 + mov r8, qword ptr [-40+128+rsp] + mov qword ptr [80+rax], r8 + mov rdi, qword ptr [-32+128+rsp] + mov qword ptr [88+rax], rdi + mov rsi, qword ptr [-24+128+rsp] + mov qword ptr [96+rax], rsi + mov rbx, qword ptr [-16+128+rsp] + mov qword ptr [104+rax], rbx + mov rcx, qword ptr [-8+128+rsp] + mov qword ptr [112+rax], rcx + mov rdx, qword ptr [128+rsp] + mov qword ptr [120+rax], rdx + mov edx, dword ptr [8+rbp] + test edx, edx + je L192 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+rbp] + mov r14d, ecx + cmp dword ptr [rsi+r14*8], 0 + jne L190 + mov edx, ecx + ALIGN 16 +L191: + test edx, edx + mov ecx, edx + je L195 + dec edx + mov r9d, edx + cmp dword ptr [rsi+r9*8], 0 + je L191 + mov dword ptr [8+rbp], ecx + mov edx, ecx +L190: + test edx, edx + je L192 + mov eax, dword ptr [rbp] + jmp L193 + +L195: + mov dword ptr [8+rbp], edx +L192: + xor eax, eax +L193: + mov dword ptr [rbp], eax + add rsp, 8+128 + pop rbx + pop rbp + pop r12 + pop r13 + pop r14 + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_8 ENDP + + +; void s_mp_sqr_comba_16(const mp_int *A, mp_int *B) + + ALIGN 16 +s_mp_sqr_comba_16 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r9d, r9d + mov r8, r9 + mov r11, r9 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r9 + push r13 + mov r13, r9 + push r12 + mov r12, r9 + push rbx + mov rbx, r9 + sub rsp, 256+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-288+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-280+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r10, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-272+rbp], rsi + mov rdi, r10 + mov rsi, r9 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r10, rax + adc rdi, rdx + adc r11, 0 + add r10, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov rbx, r11 + mov rdi, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov r11, r9 + mov qword ptr [-264+rbp], r10 + mov r8, rbx + mov r13, r12 + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r10, r12 + mov r8, r9 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-256+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r11, rsi + add rbx, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-248+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-240+rbp], r10 + mov r10, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-232+rbp], rbx + mov rbx, r9 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-224+rbp], r10 + mov rsi, r12 + mov r10, rbx + mov r12, r9 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r10, rdi + adc r12, rsi + add r11, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-216+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc rbx, r13 + adc rax, r12 + add r10, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r10, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-208+rbp], r10 + mov r10, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add r11, r8 + adc r10, rdi + adc rdx, rsi + add r11, r8 + adc r10, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-200+rbp], r11 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rdx, rdi + mov r11, rsi + add r10, r8 + adc r12, rdx + adc rax, r11 + add r10, r8 + adc r12, rdx + adc rax, r11 + mov rbx, rdx + mov r13, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-192+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-184+rbp], r12 + mov r12, r13 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [56+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r10 + mov r10, r13 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r9 + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r13, rsi + add r12, r8 + adc r10, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov rbx, rdi + mov rdx, rsi + add r10, r8 + adc r12, rbx + adc rax, rdx + add r10, r8 + adc r12, rbx + adc rax, rdx + mov r11, rdx + mov r13, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r10, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r10 + mov r11, r9 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r10, r13 + mov rbx, r9 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r10, rdi + adc r11, rsi + add r12, r8 + adc r10, rdi + adc r11, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [72+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r10 + mov r10, r11 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [80+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r10 + mov r10, r11 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r9 + add rbx, r8 + adc r10, rdi + adc rdx, rsi + add rbx, r8 + adc r10, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r9 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rbx, r12 + add r10, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r9 + mov rax, qword ptr [88+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r10 + mov r10, r11 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r10, rdi + adc r12, rsi + add rbx, r8 + adc r10, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r9 + mov r13, rdi + mov r12, rsi + add r10, r8 + adc r11, r13 + adc rax, r12 + add r10, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [96+rcx] + mul rax + add r10, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r10 + mov r10, r9 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r12, rdi + mov rax, rsi + mov rsi, r9 + add rbx, r8 + adc r11, r12 + adc r10, rax + add rbx, r8 + adc r11, r12 + adc r10, rax + mov r12, r9 + mov qword ptr [-88+rbp], rbx + mov r13, r11 + mov r11, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add r13, rax + adc r11, rdx + adc r12, 0 + add r13, rax + adc r11, rdx + adc r12, 0 + mov rdi, r12 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add r13, rax + adc r11, rdx + adc rdi, 0 + add r13, rax + adc r11, rdx + adc rdi, 0 + mov rbx, r11 + mov r10, rdi + mov r11, r9 + mov rax, qword ptr [104+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r10, 0 + mov qword ptr [-80+rbp], r13 + mov r8, r10 + mov r10, rbx + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add r10, rax + adc r8, rdx + adc rsi, 0 + add r10, rax + adc r8, rdx + adc rsi, 0 + mov r12, r8 + mov rbx, rsi + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add r10, rax + adc r12, rdx + adc rbx, 0 + add r10, rax + adc r12, rdx + adc rbx, 0 + mov qword ptr [-72+rbp], r10 + mov r13, rbx + mov rbx, r12 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rbx, rax + adc r13, rdx + adc r11, 0 + add rbx, rax + adc r13, rdx + adc r11, 0 + mov r12, r11 + mov r10, r13 + mov rax, qword ptr [112+rcx] + mul rax + add rbx, rax + adc r10, rdx + adc r12, 0 + mov qword ptr [-64+rbp], rbx + mov rdi, r10 + mov rbx, r9 + mov rsi, r12 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rdi, rax + adc rsi, rdx + adc rbx, 0 + add rdi, rax + adc rsi, rdx + adc rbx, 0 + mov qword ptr [-56+rbp], rdi + mov r8, rbx + mov rax, qword ptr [120+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r9, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 32 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-288+rbp] +; mov edx, 256 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-288+rbp] + mov r8d, 256 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L232 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r9d, ecx + cmp dword ptr [rsi+r9*8], 0 + jne L230 + mov edx, ecx + ALIGN 16 +L231: + test edx, edx + mov ecx, edx + je L235 + dec edx + mov eax, edx + cmp dword ptr [rsi+rax*8], 0 + je L231 + mov dword ptr [8+r14], ecx + mov edx, ecx +L230: + test edx, edx + je L232 + mov eax, dword ptr [r14] + jmp L233 + +L235: + mov dword ptr [8+r14], edx +L232: + xor eax, eax +L233: + mov dword ptr [r14], eax + add rsp, 256+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_16 ENDP + + +; void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + + ALIGN 16 +s_mp_sqr_comba_32 PROC ; A "FRAME" function + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + + push rbp + xor r10d, r10d + mov r8, r10 + mov r11, r10 + mov rbp, rsp + push r14 + mov r14, rsi + mov rsi, r10 + push r13 + mov r13, r10 + push r12 + mov r12, r10 + push rbx + mov rbx, r10 + sub rsp, 512+32 ; +32 for "home" storage + mov rcx, qword ptr [16+rdi] + mov rax, qword ptr [rcx] + mul rax + add r8, rax + adc rbx, rdx + adc rsi, 0 + mov qword ptr [-544+rbp], r8 + mov rax, qword ptr [rcx] + mul qword ptr [8+rcx] + add rbx, rax + adc rsi, rdx + adc r12, 0 + add rbx, rax + adc rsi, rdx + adc r12, 0 + mov qword ptr [-536+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [16+rcx] + add rsi, rax + adc r12, rdx + adc r13, 0 + add rsi, rax + adc r12, rdx + adc r13, 0 + mov rbx, r12 + mov r9, r13 + mov rax, qword ptr [8+rcx] + mul rax + add rsi, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-528+rbp], rsi + mov rdi, r9 + mov rsi, r10 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [24+rcx] + add r9, rax + adc rdi, rdx + adc r11, 0 + add r9, rax + adc rdi, rdx + adc r11, 0 + mov r12, rdi + mov r13, r11 + mov rdi, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [16+rcx] + add r9, rax + adc r12, rdx + adc r13, 0 + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-520+rbp], r9 + mov r8, r13 + mov r13, r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [32+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rax, qword ptr [8+rcx] + mul qword ptr [24+rcx] + add r13, rax + adc r8, rdx + adc r12, 0 + add r13, rax + adc r8, rdx + adc r12, 0 + mov rbx, r8 + mov r9, r12 + mov r8, r10 + mov rax, qword ptr [16+rcx] + mul rax + add r13, rax + adc rbx, rdx + adc r9, 0 + mov qword ptr [-512+rbp], r13 + mov rax, qword ptr [rcx] + mul qword ptr [40+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [24+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r11, rsi + add rbx, r8 + adc r9, rdi + adc r11, rsi + mov qword ptr [-504+rbp], rbx + mov rax, qword ptr [rcx] + mul qword ptr [48+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [24+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-496+rbp], r9 + mov r9, r11 + mov rax, qword ptr [rcx] + mul qword ptr [56+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [32+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov r11, rdx + mov qword ptr [-488+rbp], rbx + mov rbx, r10 + mov rax, qword ptr [rcx] + mul qword ptr [64+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rax, qword ptr [32+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov rdi, r13 + mov qword ptr [-480+rbp], r9 + mov rsi, r12 + mov r9, rbx + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [72+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [40+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r11, r8 + adc r9, rdi + adc r12, rsi + add r11, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-472+rbp], r11 + mov rbx, r12 + mov rax, qword ptr [rcx] + mul qword ptr [80+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rax, r12 + add r9, r8 + adc rbx, r13 + adc rax, r12 + mov rdx, rax + mov r11, rbx + mov rdi, r13 + mov rbx, rdx + mov rsi, r12 + mov rax, qword ptr [40+rcx] + mul rax + add r9, rax + adc r11, rdx + adc rbx, 0 + mov qword ptr [-464+rbp], r9 + mov r9, rbx + mov rax, qword ptr [rcx] + mul qword ptr [88+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [48+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add r11, r8 + adc r9, rdi + adc rdx, rsi + add r11, r8 + adc r9, rdi + adc rdx, rsi + mov r13, rdx + mov qword ptr [-456+rbp], r11 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [96+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, rdi + mov r11, rsi + add r9, r8 + adc r12, rax + adc r13, r11 + add r9, r8 + adc r12, rax + adc r13, r11 + mov rbx, rax + mov rsi, r11 + mov rax, qword ptr [48+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, rbx + mov qword ptr [-448+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [104+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [56+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-440+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [112+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r13 + mov rbx, rdi + mov r13, rsi + add r9, r8 + adc rdx, rbx + adc r12, r13 + add r9, r8 + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov rdi, rbx + mov rsi, r11 + mov rax, qword ptr [56+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-432+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [120+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [8+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [64+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-424+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [128+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [72+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [64+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-416+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [136+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [72+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-408+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [144+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [80+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [72+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-400+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [152+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [80+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-392+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [160+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [88+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [80+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-384+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [168+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [88+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-376+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [176+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [96+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [88+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-368+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [184+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [152+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [144+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [136+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [128+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [120+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [112+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [104+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [96+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov rdi, rdx + mov qword ptr [-360+rbp], r12 + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [192+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rbx, r8 + mov rax, rdi + add r9, rsi + adc r12, rbx + adc r13, rax + add r9, rsi + adc r12, rbx + adc r13, rax + mov r11, rax + mov r8, rbx + mov rax, qword ptr [96+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rdi, r11 + mov qword ptr [-352+rbp], r9 + mov r9, r13 + mov rax, qword ptr [rcx] + mul qword ptr [200+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [104+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-344+rbp], r12 + mov r12, r10 + mov rax, qword ptr [rcx] + mul qword ptr [208+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [104+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-336+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [216+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [112+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-328+rbp], r12 + mov rax, qword ptr [rcx] + mul qword ptr [224+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [112+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-320+rbp], r9 + mov rbx, r13 + mov r9, r10 + mov rax, qword ptr [rcx] + mul qword ptr [232+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [120+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r9, rdi + add r12, rsi + adc rbx, r8 + adc r9, rdi + mov qword ptr [-312+rbp], r12 + mov r13, r9 + mov rax, qword ptr [rcx] + mul qword ptr [240+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov r11, r8 + mov rdx, rdi + add rbx, rsi + adc r13, r11 + adc rax, rdx + add rbx, rsi + adc r13, r11 + adc rax, rdx + mov r9, rdx + mov rdx, rax + mov r12, r13 + mov r8, r11 + mov r13, rdx + mov rdi, r9 + mov rax, qword ptr [120+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-304+rbp], rbx + mov rbx, r13 + mov r13, r10 + mov rax, qword ptr [rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [8+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [16+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [128+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc rbx, r8 + adc r13, rdi + add r12, rsi + adc rbx, r8 + adc r13, rdi + mov qword ptr [-296+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [8+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [16+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [24+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov r11, r8 + mov rax, rdi + add rbx, rsi + adc r12, r11 + adc r13, rax + add rbx, rsi + adc r12, r11 + adc r13, rax + mov r9, rax + mov r8, r11 + mov rax, qword ptr [128+rcx] + mul rax + add rbx, rax + adc r12, rdx + adc r13, 0 + mov rdi, r9 + mov qword ptr [-288+rbp], rbx + mov r9, r13 + mov rax, qword ptr [16+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov r13, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [32+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [136+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-280+rbp], r12 + mov r12, r10 + mov rax, qword ptr [24+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [32+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [40+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rdx, r13 + mov rbx, r8 + mov r13, rdi + add r9, rsi + adc rdx, rbx + adc r12, r13 + add r9, rsi + adc rdx, rbx + adc r12, r13 + mov rax, r12 + mov r11, r13 + mov r12, rdx + mov r13, rax + mov r8, rbx + mov rdi, r11 + mov rax, qword ptr [136+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-272+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [32+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [40+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [48+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [144+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r13, rdi + add r12, rsi + adc r9, r8 + adc r13, rdi + mov qword ptr [-264+rbp], r12 + mov rax, qword ptr [40+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [48+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [56+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r13 + mov rdx, r10 + mov rbx, r8 + mov r12, rdi + add r9, rsi + adc rax, rbx + adc rdx, r12 + add r9, rsi + adc rax, rbx + adc rdx, r12 + mov rdi, rdx + mov r11, r12 + mov r8, rbx + mov r12, rax + mov r13, rdi + mov rdi, r11 + mov rax, qword ptr [144+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov r11, r10 + mov qword ptr [-256+rbp], r9 + mov r9, r13 + mov rax, qword ptr [48+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [56+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [64+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [152+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + add r12, rsi + adc r9, r8 + adc r11, rdi + add r12, rsi + adc r9, r8 + adc r11, rdi + mov qword ptr [-248+rbp], r12 + mov r13, r11 + mov rax, qword ptr [56+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [64+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [72+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [160+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, r10 + mov rdx, rsi + mov rbx, r8 + mov r12, rdi + add r9, rdx + adc r13, rbx + adc rax, r12 + add r9, rdx + adc r13, rbx + adc rax, r12 + mov r11, r12 + mov r8, rdx + mov rdx, rax + mov r12, r13 + mov rdi, rbx + mov r13, rdx + mov rsi, r11 + mov rax, qword ptr [152+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov qword ptr [-240+rbp], r9 + mov r9, r13 + mov r13, r10 + mov rax, qword ptr [64+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [72+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [80+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [160+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r8 + mov rdx, rdi + mov rbx, rsi + add r12, rax + adc r9, rdx + adc r13, rbx + add r12, rax + adc r9, rdx + adc r13, rbx + mov qword ptr [-232+rbp], r12 + mov r8, rdx + mov rsi, rax + mov rdi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [72+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [80+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [88+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [168+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [160+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-224+rbp], r9 + mov r9, r13 + mov rax, qword ptr [80+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [96+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [168+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-216+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [88+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [96+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [104+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [176+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [168+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-208+rbp], r9 + mov r9, r13 + mov rax, qword ptr [96+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [112+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [176+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-200+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [104+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [112+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [120+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [184+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [176+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-192+rbp], r9 + mov r9, r13 + mov rax, qword ptr [112+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [128+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [184+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, r8 + mov rax, rdi + mov rdx, rsi + add r12, rbx + adc r9, rax + adc r13, rdx + add r12, rbx + adc r9, rax + adc r13, rdx + mov qword ptr [-184+rbp], r12 + mov rdi, rdx + mov r8, rax + mov rsi, rbx + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [120+rcx] + mul qword ptr [248+rcx] + mov rsi, rax + mov r8, rdx + xor rdi, rdi + mov rax, qword ptr [128+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [136+rcx] + mul qword ptr [232+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [224+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [216+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [208+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [200+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [192+rcx] + add rsi, rax + adc r8, rdx + adc rdi, 0 + mov rax, rsi + mov rbx, r8 + mov rdx, rdi + add r9, rax + adc r12, rbx + adc r13, rdx + add r9, rax + adc r12, rbx + adc r13, rdx + mov r11, rdx + mov r8, rax + mov rdi, rbx + mov rax, qword ptr [184+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-176+rbp], r9 + mov r9, r13 + mov rax, qword ptr [128+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [144+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [192+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc r13, rsi + add r12, r8 + adc r9, rdi + adc r13, rsi + mov qword ptr [-168+rbp], r12 + mov r12, r13 + mov r13, r10 + mov rax, qword ptr [136+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [144+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [152+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rbx, rdi + mov rax, rsi + add r9, r8 + adc r12, rbx + adc r13, rax + add r9, r8 + adc r12, rbx + adc r13, rax + mov r11, rax + mov rdi, rbx + mov rbx, r10 + mov rax, qword ptr [192+rcx] + mul rax + add r9, rax + adc r12, rdx + adc r13, 0 + mov rsi, r11 + mov qword ptr [-160+rbp], r9 + mov r9, r13 + mov rax, qword ptr [144+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [152+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [160+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [200+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add r12, r8 + adc r9, rdi + adc rbx, rsi + add r12, r8 + adc r9, rdi + adc rbx, rsi + mov qword ptr [-152+rbp], r12 + mov rax, qword ptr [152+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [160+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [168+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc rbx, r13 + adc rdx, r12 + add r9, r8 + adc rbx, r13 + adc rdx, r12 + mov rax, rdx + mov rdi, r13 + mov rsi, r12 + mov r11, rax + mov r12, r10 + mov rax, qword ptr [200+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-144+rbp], r9 + mov r9, r11 + mov rax, qword ptr [160+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [168+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [176+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [208+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-136+rbp], rbx + mov r11, r12 + mov rax, qword ptr [168+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [176+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [184+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov rax, qword ptr [208+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-128+rbp], r9 + mov r9, r11 + mov rax, qword ptr [176+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [184+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [192+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [216+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rdx, r10 + add rbx, r8 + adc r9, rdi + adc rdx, rsi + add rbx, r8 + adc r9, rdi + adc rdx, rsi + mov qword ptr [-120+rbp], rbx + mov r11, rdx + mov rbx, r10 + mov rax, qword ptr [184+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [192+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [200+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rbx, r12 + add r9, r8 + adc r11, r13 + adc rbx, r12 + mov rdx, rbx + mov rdi, r13 + mov rbx, r11 + mov rsi, r12 + mov r11, rdx + mov r12, r10 + mov rax, qword ptr [216+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-112+rbp], r9 + mov r9, r11 + mov rax, qword ptr [192+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [200+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [208+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [224+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + add rbx, r8 + adc r9, rdi + adc r12, rsi + add rbx, r8 + adc r9, rdi + adc r12, rsi + mov qword ptr [-104+rbp], rbx + mov r11, r12 + mov rax, qword ptr [200+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [208+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [216+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, r10 + mov r13, rdi + mov r12, rsi + add r9, r8 + adc r11, r13 + adc rax, r12 + add r9, r8 + adc r11, r13 + adc rax, r12 + mov rdx, rax + mov rbx, r11 + mov rdi, r13 + mov r11, rdx + mov rsi, r12 + mov r12, r10 + mov rax, qword ptr [224+rcx] + mul rax + add r9, rax + adc rbx, rdx + adc r11, 0 + mov qword ptr [-96+rbp], r9 + mov r9, r10 + mov rax, qword ptr [208+rcx] + mul qword ptr [248+rcx] + mov r8, rax + mov rdi, rdx + xor rsi, rsi + mov rax, qword ptr [216+rcx] + mul qword ptr [240+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov rax, qword ptr [224+rcx] + mul qword ptr [232+rcx] + add r8, rax + adc rdi, rdx + adc rsi, 0 + mov r13, rdi + mov rax, rsi + add rbx, r8 + adc r11, r13 + adc r9, rax + add rbx, r8 + adc r11, r13 + adc r9, rax + mov qword ptr [-88+rbp], rbx + mov rsi, r11 + mov r8, r9 + mov rax, qword ptr [216+rcx] + mul qword ptr [248+rcx] + add rsi, rax + adc r8, rdx + adc r12, 0 + add rsi, rax + adc r8, rdx + adc r12, 0 + mov r11, r12 + mov rax, qword ptr [224+rcx] + mul qword ptr [240+rcx] + add rsi, rax + adc r8, rdx + adc r11, 0 + add rsi, rax + adc r8, rdx + adc r11, 0 + mov r13, r8 + mov rbx, r11 + mov rax, qword ptr [232+rcx] + mul rax + add rsi, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-80+rbp], rsi + mov r12, rbx + mov rdi, r13 + mov r13, r10 + mov rax, qword ptr [224+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc r12, rdx + adc r13, 0 + add rdi, rax + adc r12, rdx + adc r13, 0 + mov r9, r12 + mov r12, r13 + mov rax, qword ptr [232+rcx] + mul qword ptr [240+rcx] + add rdi, rax + adc r9, rdx + adc r12, 0 + add rdi, rax + adc r9, rdx + adc r12, 0 + mov qword ptr [-72+rbp], rdi + mov r11, r9 + mov rbx, r12 + mov r9, r10 + mov rax, qword ptr [232+rcx] + mul qword ptr [248+rcx] + add r11, rax + adc rbx, rdx + adc r9, 0 + add r11, rax + adc rbx, rdx + adc r9, 0 + mov r13, rbx + mov rbx, r9 + mov r9, r10 + mov rax, qword ptr [240+rcx] + mul rax + add r11, rax + adc r13, rdx + adc rbx, 0 + mov qword ptr [-64+rbp], r11 + mov rdi, r13 + mov rsi, rbx + mov rax, qword ptr [240+rcx] + mul qword ptr [248+rcx] + add rdi, rax + adc rsi, rdx + adc r9, 0 + add rdi, rax + adc rsi, rdx + adc r9, 0 + mov qword ptr [-56+rbp], rdi + mov r8, r9 + mov rax, qword ptr [248+rcx] + mul rax + add rsi, rax + adc r8, rdx + adc r10, 0 + mov qword ptr [-48+rbp], rsi + mov qword ptr [-40+rbp], r8 + mov dword ptr [8+r14], 64 + mov dword ptr [r14], 0 +; mov rdi, qword ptr [16+r14] +; lea rsi, qword ptr [-544+rbp] +; mov edx, 512 + mov rcx, qword ptr [16+r14] + lea rdx, qword ptr [-544+rbp] + mov r8d, 512 + call memcpy + mov edx, dword ptr [8+r14] + test edx, edx + je L304 + lea ecx, dword ptr [-1+rdx] + mov rsi, qword ptr [16+r14] + mov r10d, ecx + cmp dword ptr [rsi+r10*8], 0 + jne L302 + mov edx, ecx + ALIGN 16 +L303: + test edx, edx + mov ecx, edx + je L307 + dec edx + mov eax, edx + cmp dword ptr [rsi+rax*8], 0 + je L303 + mov dword ptr [8+r14], ecx + mov edx, ecx +L302: + test edx, edx + je L304 + mov eax, dword ptr [r14] + jmp L305 + +L307: + mov dword ptr [8+r14], edx +L304: + xor eax, eax +L305: + mov dword ptr [r14], eax + add rsp, 512+32 ; +32 for "home" storage + pop rbx + pop r12 + pop r13 + pop r14 + pop rbp + + pop rsi + pop rdi + + ret + +s_mp_sqr_comba_32 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s new file mode 100644 index 000000000..a5181df33 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_comba_amd64_sun.s @@ -0,0 +1,16097 @@ +//* TomsFastMath, a fast ISO C bignum library. +/ * +/ * This project is meant to fill in where LibTomMath +/ * falls short. That is speed ;-) +/ * +/ * This project is public domain and free for all purposes. +/ * +/ * Tom St Denis, tomstdenis@iahu.ca +/ */ + +//* +/ * The source file from which this assembly was derived +/ * comes from TFM v0.03, which has the above license. +/ * This source was compiled with an unnamed compiler at +/ * the highest optimization level. Afterwards, the +/ * trailing .section was removed because it causes errors +/ * in the Studio 10 compiler on AMD 64. +/ */ + + .file "mp_comba.c" + .text + .align 16 +.globl s_mp_mul_comba_4 + .type s_mp_mul_comba_4, @function +s_mp_mul_comba_4: +.LFB2: + pushq %r12 +.LCFI0: + pushq %rbp +.LCFI1: + pushq %rbx +.LCFI2: + movq 16(%rdi), %r9 + movq %rdx, %rbx + movq 16(%rsi), %rdx + movq (%r9), %rax + movq %rax, -64(%rsp) + movq 8(%r9), %r8 + movq %r8, -56(%rsp) + movq 16(%r9), %rbp + movq %rbp, -48(%rsp) + movq 24(%r9), %r12 + movq %r12, -40(%rsp) + movq (%rdx), %rcx + movq %rcx, -32(%rsp) + movq 8(%rdx), %r10 + movq %r10, -24(%rsp) + movq 16(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -16(%rsp) + movq 16(%rbx), %r11 + movq 24(%rdx), %rax + movq %rax, -8(%rsp) +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -56(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -56(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -48(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -40(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -56(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -48(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -40(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -40(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 48(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 56(%r11) + movl $8, 8(%rbx) + jne .L9 + .align 16 +.L18: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L9 + leal -2(%rdx), %r10d + cmpq $0, (%r11,%r10,8) + je .L18 +.L9: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + popq %rbx + popq %rbp + popq %r12 + ret +.LFE2: + .size s_mp_mul_comba_4, .-s_mp_mul_comba_4 + .align 16 +.globl s_mp_mul_comba_8 + .type s_mp_mul_comba_8, @function +s_mp_mul_comba_8: +.LFB3: + pushq %r12 +.LCFI3: + pushq %rbp +.LCFI4: + pushq %rbx +.LCFI5: + movq %rdx, %rbx + subq $8, %rsp +.LCFI6: + movq 16(%rdi), %rdx + movq (%rdx), %r8 + movq %r8, -120(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -112(%rsp) + movq 16(%rdx), %r9 + movq %r9, -104(%rsp) + movq 24(%rdx), %r12 + movq %r12, -96(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -88(%rsp) + movq 40(%rdx), %r10 + movq %r10, -80(%rsp) + movq 48(%rdx), %r11 + movq %r11, -72(%rsp) + movq 56(%rdx), %rax + movq 16(%rsi), %rdx + movq %rax, -64(%rsp) + movq (%rdx), %r8 + movq %r8, -56(%rsp) + movq 8(%rdx), %rbp + movq %rbp, -48(%rsp) + movq 16(%rdx), %r9 + movq %r9, -40(%rsp) + movq 24(%rdx), %r12 + movq %r12, -32(%rsp) + movq 32(%rdx), %rcx + movq %rcx, -24(%rsp) + movq 40(%rdx), %r10 + movq %r10, -16(%rsp) + movq 48(%rdx), %r11 + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %r11, -8(%rsp) + movq 16(%rbx), %r11 + movq 56(%rdx), %rax + movq %rax, (%rsp) +/APP + movq -120(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq -56(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq -16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq -24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq -8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -64(%rsp),%rax + mulq -16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq (%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq -64(%rsp),%rax + mulq -8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq -64(%rsp),%rax + mulq (%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 112(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 120(%r11) + movl $16, 8(%rbx) + jne .L35 + .align 16 +.L43: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L35 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L43 +.L35: + movl 8(%rbx), %r11d + xorl %edx, %edx + testl %r11d, %r11d + cmovne %esi, %edx + movl %edx, (%rbx) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE3: + .size s_mp_mul_comba_8, .-s_mp_mul_comba_8 + .align 16 +.globl s_mp_mul_comba_16 + .type s_mp_mul_comba_16, @function +s_mp_mul_comba_16: +.LFB4: + pushq %r12 +.LCFI7: + pushq %rbp +.LCFI8: + pushq %rbx +.LCFI9: + movq %rdx, %rbx + subq $136, %rsp +.LCFI10: + movq 16(%rdi), %rax + movq (%rax), %r8 + movq %r8, -120(%rsp) + movq 8(%rax), %rbp + movq %rbp, -112(%rsp) + movq 16(%rax), %r9 + movq %r9, -104(%rsp) + movq 24(%rax), %r12 + movq %r12, -96(%rsp) + movq 32(%rax), %rcx + movq %rcx, -88(%rsp) + movq 40(%rax), %r10 + movq %r10, -80(%rsp) + movq 48(%rax), %rdx + movq %rdx, -72(%rsp) + movq 56(%rax), %r11 + movq %r11, -64(%rsp) + movq 64(%rax), %r8 + movq %r8, -56(%rsp) + movq 72(%rax), %rbp + movq %rbp, -48(%rsp) + movq 80(%rax), %r9 + movq %r9, -40(%rsp) + movq 88(%rax), %r12 + movq %r12, -32(%rsp) + movq 96(%rax), %rcx + movq %rcx, -24(%rsp) + movq 104(%rax), %r10 + movq %r10, -16(%rsp) + movq 112(%rax), %rdx + movq %rdx, -8(%rsp) + movq 120(%rax), %r11 + movq %r11, (%rsp) + movq 16(%rsi), %r11 + movq (%r11), %r8 + movq %r8, 8(%rsp) + movq 8(%r11), %rbp + movq %rbp, 16(%rsp) + movq 16(%r11), %r9 + movq %r9, 24(%rsp) + movq 24(%r11), %r12 + movq %r12, 32(%rsp) + movq 32(%r11), %rcx + movq %rcx, 40(%rsp) + movq 40(%r11), %r10 + movq %r10, 48(%rsp) + movq 48(%r11), %rdx + movq %rdx, 56(%rsp) + movq 56(%r11), %rax + movq %rax, 64(%rsp) + movq 64(%r11), %r8 + movq %r8, 72(%rsp) + movq 72(%r11), %rbp + movq %rbp, 80(%rsp) + movq 80(%r11), %r9 + movq %r9, 88(%rsp) + movq 88(%r11), %r12 + movq %r12, 96(%rsp) + movq 96(%r11), %rcx + movq %rcx, 104(%rsp) + movq 104(%r11), %r10 + movq %r10, 112(%rsp) + movq 112(%r11), %rdx + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r9 + movq %r10, %rbp + movq %rdx, 120(%rsp) + movq 120(%r11), %rax + movq %rax, 128(%rsp) + movq 16(%rbx), %r11 +/APP + movq -120(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rbp + +/NO_APP + movq %r8, (%r11) + movq %rbp, %r8 + movq %r10, %rbp +/APP + movq -120(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%rbp + +/NO_APP + movq %rbp, %r12 +/APP + movq -112(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r9 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r9, 8(%r11) + movq %r12, %r9 + movq %r10, %r12 +/APP + movq -120(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %r12, %rcx +/APP + movq -112(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -104(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 16(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -96(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 24(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -88(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 32(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -80(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 40(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -72(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 48(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -64(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 56(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -56(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 64(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -48(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 72(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -40(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 80(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -32(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 88(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -24(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 96(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq -16(%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 104(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -120(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -112(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq -8(%rsp),%rax + mulq 8(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 112(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -120(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -112(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -104(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 16(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 8(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 120(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -112(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -104(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -96(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 24(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 16(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 128(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -104(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -96(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -88(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 32(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 24(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 136(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -96(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -88(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -80(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 40(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 32(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 144(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -88(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -80(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -72(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 48(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 40(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 152(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -80(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -72(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -64(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 56(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 48(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 160(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -72(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -64(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -56(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 64(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 56(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 168(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -64(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -56(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -48(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 72(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 64(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 176(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -56(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -48(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -40(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 80(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 72(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 184(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -48(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -40(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -32(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 88(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 80(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 192(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -40(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -32(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -24(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 96(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 88(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 200(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -32(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -24(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -16(%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 104(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 96(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 208(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -24(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -16(%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + + movq -8(%rsp),%rax + mulq 112(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbp + movq %r8, %r12 +/APP + movq (%rsp),%rax + mulq 104(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 216(%r11) + movq %r12, %r9 + movq %rbp, %r8 + movq %r10, %rcx +/APP + movq -16(%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + + movq -8(%rsp),%rax + mulq 120(%rsp) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rcx + +/NO_APP + movq %r9, %rbp + movq %rcx, %r12 +/APP + movq (%rsp),%rax + mulq 112(%rsp) + addq %rax,%r8 + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %r8, 224(%r11) + movq %r12, %r9 + movq %rbp, %rcx + movq %r10, %r8 +/APP + movq -8(%rsp),%rax + mulq 128(%rsp) + addq %rax,%rcx + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r8, %r12 + movq %r9, %rbp +/APP + movq (%rsp),%rax + mulq 120(%rsp) + addq %rax,%rcx + adcq %rdx,%rbp + adcq $0,%r12 + +/NO_APP + movq %rcx, 232(%r11) + movq %rbp, %r8 + movq %r12, %rcx +/APP + movq (%rsp),%rax + mulq 128(%rsp) + addq %rax,%r8 + adcq %rdx,%rcx + adcq $0,%r10 + +/NO_APP + movq %r8, 240(%r11) + movl (%rsi), %esi + xorl (%rdi), %esi + testq %rcx, %rcx + movq %rcx, 248(%r11) + movl $32, 8(%rbx) + jne .L76 + .align 16 +.L84: + movl 8(%rbx), %edx + leal -1(%rdx), %edi + testl %edi, %edi + movl %edi, 8(%rbx) + je .L76 + leal -2(%rdx), %eax + cmpq $0, (%r11,%rax,8) + je .L84 +.L76: + movl 8(%rbx), %edx + xorl %r11d, %r11d + testl %edx, %edx + cmovne %esi, %r11d + movl %r11d, (%rbx) + addq $136, %rsp + popq %rbx + popq %rbp + popq %r12 + ret +.LFE4: + .size s_mp_mul_comba_16, .-s_mp_mul_comba_16 + .align 16 +.globl s_mp_mul_comba_32 + .type s_mp_mul_comba_32, @function +s_mp_mul_comba_32: +.LFB5: + pushq %rbp +.LCFI11: + movq %rsp, %rbp +.LCFI12: + pushq %r13 +.LCFI13: + movq %rdx, %r13 + movl $256, %edx + pushq %r12 +.LCFI14: + movq %rsi, %r12 + pushq %rbx +.LCFI15: + movq %rdi, %rbx + subq $520, %rsp +.LCFI16: + movq 16(%rdi), %rsi + leaq -544(%rbp), %rdi + call memcpy@PLT + movq 16(%r12), %rsi + leaq -288(%rbp), %rdi + movl $256, %edx + call memcpy@PLT + movq 16(%r13), %r9 + xorl %r8d, %r8d + movq %r8, %rsi + movq %r8, %rdi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %rsi, (%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -544(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %r11 +/APP + movq -536(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r11 + +/NO_APP + movq %rdi, 8(%r9) + movq %r11, %rdi + movq %r8, %r11 +/APP + movq -544(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rcx +/APP + movq -536(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -528(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 16(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -520(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 24(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -512(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 32(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -504(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 40(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -496(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 48(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -488(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 56(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -480(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 64(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -472(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 72(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -464(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 80(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -456(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 88(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -448(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 96(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -440(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 104(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -432(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 112(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -424(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 120(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -416(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 128(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -408(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 136(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -400(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 144(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -392(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 152(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -384(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 160(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -376(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 168(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -368(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 176(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -360(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 184(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -352(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 192(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -344(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 200(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -336(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 208(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -328(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 216(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -320(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 224(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -312(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 232(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -544(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -536(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -304(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 240(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -544(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -536(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -528(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -288(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 248(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -536(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -528(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -520(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -280(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 256(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -528(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -520(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -512(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -272(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 264(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -520(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -512(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -504(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -264(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 272(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -512(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -504(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -496(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -256(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 280(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -504(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -496(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -488(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -248(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 288(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -496(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -488(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -480(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -240(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 296(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -488(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -480(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -472(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -232(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 304(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -480(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -472(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -464(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -224(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 312(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -472(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -464(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -456(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -448(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -440(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -216(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 320(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -464(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -456(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -448(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -440(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -432(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -200(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -208(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 328(%r9) + movq %r11, %rdi + movq %r10, %r11 + movq %r8, %r10 +/APP + movq -456(%rbp),%rax + mulq -40(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -448(%rbp),%rax + mulq -48(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -440(%rbp),%rax + mulq -56(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -432(%rbp),%rax + mulq -64(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -424(%rbp),%rax + mulq -72(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -416(%rbp),%rax + mulq -80(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -408(%rbp),%rax + mulq -88(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -400(%rbp),%rax + mulq -96(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -392(%rbp),%rax + mulq -104(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -384(%rbp),%rax + mulq -112(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -376(%rbp),%rax + mulq -120(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -368(%rbp),%rax + mulq -128(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -360(%rbp),%rax + mulq -136(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -352(%rbp),%rax + mulq -144(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -344(%rbp),%rax + mulq -152(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -336(%rbp),%rax + mulq -160(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -328(%rbp),%rax + mulq -168(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -320(%rbp),%rax + mulq -176(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -312(%rbp),%rax + mulq -184(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -304(%rbp),%rax + mulq -192(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + + movq -296(%rbp),%rax + mulq -200(%rbp) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%r10 + +/NO_APP + movq %r11, 336(%r9) + movq %r10, %rsi + movq %r8, %r10 +/APP + movq -448(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %r10, %rcx +/APP + movq -440(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -432(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rcx + +/NO_APP + movq %rsi, %r11 + movq %rcx, %r10 +/APP + movq -296(%rbp),%rax + mulq -192(%rbp) + addq %rax,%rdi + adcq %rdx,%r11 + adcq $0,%r10 + +/NO_APP + movq %rdi, 344(%r9) + movq %r11, %rcx + movq %r10, %rdi + movq %r8, %r11 +/APP + movq -440(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %r11, %rsi +/APP + movq -432(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -424(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -184(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 352(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -432(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -424(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -416(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -176(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 360(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -424(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -416(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -408(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -168(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 368(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -416(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -408(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -400(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -160(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 376(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -408(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -400(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -392(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -152(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 384(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -400(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -392(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -384(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -144(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 392(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -392(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -384(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -376(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -136(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 400(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -384(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -376(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -368(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -128(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 408(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -376(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -368(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -360(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -120(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 416(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -368(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -360(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -352(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -112(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 424(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -360(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -352(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -344(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -104(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 432(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -352(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -344(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -336(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -96(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 440(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -344(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -336(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -328(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -88(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 448(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -336(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -328(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -320(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -80(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 456(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -328(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -320(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -312(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -72(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 464(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -320(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -312(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + + movq -304(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rdi, %r10 + movq %rcx, %r11 +/APP + movq -296(%rbp),%rax + mulq -64(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 472(%r9) + movq %r11, %rdi + movq %r10, %rcx + movq %r8, %rsi +/APP + movq -312(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + + movq -304(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rcx + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r10 + movq %rsi, %r11 +/APP + movq -296(%rbp),%rax + mulq -56(%rbp) + addq %rax,%rcx + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rcx, 480(%r9) + movq %r11, %rdi + movq %r10, %rsi + movq %r8, %rcx +/APP + movq -304(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rsi + adcq %rdx,%rdi + adcq $0,%rcx + +/NO_APP + movq %rcx, %r11 + movq %rdi, %r10 +/APP + movq -296(%rbp),%rax + mulq -48(%rbp) + addq %rax,%rsi + adcq %rdx,%r10 + adcq $0,%r11 + +/NO_APP + movq %rsi, 488(%r9) + movq %r10, %rcx + movq %r11, %rsi +/APP + movq -296(%rbp),%rax + mulq -40(%rbp) + addq %rax,%rcx + adcq %rdx,%rsi + adcq $0,%r8 + +/NO_APP + movq %rcx, 496(%r9) + movl (%r12), %ecx + xorl (%rbx), %ecx + testq %rsi, %rsi + movq %rsi, 504(%r9) + movl $64, 8(%r13) + jne .L149 + .align 16 +.L157: + movl 8(%r13), %edx + leal -1(%rdx), %ebx + testl %ebx, %ebx + movl %ebx, 8(%r13) + je .L149 + leal -2(%rdx), %r12d + cmpq $0, (%r9,%r12,8) + je .L157 +.L149: + movl 8(%r13), %r9d + xorl %edx, %edx + testl %r9d, %r9d + cmovne %ecx, %edx + movl %edx, (%r13) + addq $520, %rsp + popq %rbx + popq %r12 + popq %r13 + leave + ret +.LFE5: + .size s_mp_mul_comba_32, .-s_mp_mul_comba_32 + .align 16 +.globl s_mp_sqr_comba_4 + .type s_mp_sqr_comba_4, @function +s_mp_sqr_comba_4: +.LFB6: + pushq %rbp +.LCFI17: + movq %rsi, %r11 + xorl %esi, %esi + movq %rsi, %r10 + movq %rsi, %rbp + movq %rsi, %r8 + pushq %rbx +.LCFI18: + movq %rsi, %rbx + movq 16(%rdi), %rcx + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%rdi + +/NO_APP + movq %r10, -72(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + addq %rax,%rbx + adcq %rdx,%rdi + adcq $0,%rbp + +/NO_APP + movq %rbx, -64(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%rbp + adcq $0,%r8 + +/NO_APP + movq %rbp, %rbx + movq %r8, %rbp +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rdi, -56(%rsp) + movq %rbp, %r9 + movq %rbx, %r8 + movq %rsi, %rdi +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + addq %rax,%r8 + adcq %rdx,%r9 + adcq $0,%rdi + +/NO_APP + movq %r9, %rbx + movq %rdi, %rbp +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %r8, -48(%rsp) + movq %rbp, %r9 + movq %rbx, %rdi + movq %rsi, %r8 + movl $8, 8(%r11) + movl $0, (%r11) +/APP + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r8 + +/NO_APP + movq %r9, %rbx + movq %r8, %rbp +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%rbx + adcq $0,%rbp + +/NO_APP + movq %rbp, %rax + movq %rdi, -40(%rsp) + movq %rbx, %rbp + movq %rax, %rdi + movq %rsi, %rbx +/APP + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + addq %rax,%rbp + adcq %rdx,%rdi + adcq $0,%rbx + +/NO_APP + movq %rbp, -32(%rsp) + movq %rbx, %r9 +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%rsi + +/NO_APP + movq 16(%r11), %rdx + movq %rdi, -24(%rsp) + movq %r9, -16(%rsp) + movq %r10, (%rdx) + movq -64(%rsp), %r8 + movq %r8, 8(%rdx) + movq -56(%rsp), %rbp + movq %rbp, 16(%rdx) + movq -48(%rsp), %rdi + movq %rdi, 24(%rdx) + movq -40(%rsp), %rsi + movq %rsi, 32(%rdx) + movq -32(%rsp), %rbx + movq %rbx, 40(%rdx) + movq -24(%rsp), %rcx + movq %rcx, 48(%rdx) + movq -16(%rsp), %rax + movq %rax, 56(%rdx) + movl 8(%r11), %edx + testl %edx, %edx + je .L168 + leal -1(%rdx), %ecx + movq 16(%r11), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L166 + movl %ecx, %edx + .align 16 +.L167: + testl %edx, %edx + movl %edx, %ecx + je .L171 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L167 + movl %ecx, 8(%r11) + movl %ecx, %edx +.L166: + testl %edx, %edx + je .L168 + popq %rbx + popq %rbp + movl (%r11), %eax + movl %eax, (%r11) + ret +.L171: + movl %edx, 8(%r11) + .align 16 +.L168: + popq %rbx + popq %rbp + xorl %eax, %eax + movl %eax, (%r11) + ret +.LFE6: + .size s_mp_sqr_comba_4, .-s_mp_sqr_comba_4 + .align 16 +.globl s_mp_sqr_comba_8 + .type s_mp_sqr_comba_8, @function +s_mp_sqr_comba_8: +.LFB7: + pushq %r14 +.LCFI19: + xorl %r9d, %r9d + movq %r9, %r14 + movq %r9, %r10 + pushq %r13 +.LCFI20: + movq %r9, %r13 + pushq %r12 +.LCFI21: + movq %r9, %r12 + pushq %rbp +.LCFI22: + movq %rsi, %rbp + movq %r9, %rsi + pushq %rbx +.LCFI23: + movq %r9, %rbx + subq $8, %rsp +.LCFI24: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r14 + adcq %rdx,%rbx + adcq $0,%r12 + +/NO_APP + movq %r14, -120(%rsp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r10 + +/NO_APP + movq %rbx, -112(%rsp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%r13 + +/NO_APP + movq %r10, %rbx + movq %r13, %r10 + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -104(%rsp) + movq %r10, %rdi + movq %rbx, %r11 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + addq %rax,%r11 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %r10 + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r9, %rsi + movq %r11, -96(%rsp) + movq %r10, %r8 + movq %rbx, %r12 + movq %r9, %r11 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%r8 + adcq $0,%r13 + +/NO_APP + movq %r8, %rbx + movq %r13, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r12, -88(%rsp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -80(%rsp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -72(%rsp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rax + +/NO_APP + movq %rbx, -64(%rsp) + movq %rax, %r11 + movq %r9, %rbx +/APP + movq 8(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rsi + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %r13 + movq %rsi, %r11 +/APP + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -56(%rsp) + movq %r9, %r10 +/APP + movq 16(%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %r13,%r13 + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%r13 + +/NO_APP + movq %rdi, %r12 + movq %r13, %rax +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %rbx, -48(%rsp) + movq %r11, %r12 + movq %r10, %rsi + movq %r9, %rbx + movq %r9, %r11 +/APP + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rbx, %r13 +/APP + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx + movq %r9, %r13 +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -40(%rsp) + movq %rbx, %r8 + movq %r10, %rdi +/APP + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rdi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r10 + movq %r11, %rbx +/APP + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %rdi, -32(%rsp) + movq %rbx, %rsi + movq %r10, %r12 +/APP + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + addq %rax,%r12 + adcq %rdx,%rsi + adcq $0,%r13 + +/NO_APP + movq %rsi, %r10 + movq %r13, %rbx +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r12 + adcq %rdx,%r10 + adcq $0,%rbx + +/NO_APP + movq %r12, -24(%rsp) + movq %r10, %rdi + movq %rbx, %rsi + movq %r9, %r10 + movl $16, 8(%rbp) + movl $0, (%rbp) +/APP + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r10 + +/NO_APP + movq %rdi, -16(%rsp) + movq %r10, %r8 +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq 16(%rbp), %rax + movq %rsi, -8(%rsp) + movq %r8, (%rsp) + movq %r14, (%rax) + movq -112(%rsp), %rbx + movq %rbx, 8(%rax) + movq -104(%rsp), %rcx + movq %rcx, 16(%rax) + movq -96(%rsp), %rdx + movq %rdx, 24(%rax) + movq -88(%rsp), %r14 + movq %r14, 32(%rax) + movq -80(%rsp), %r13 + movq %r13, 40(%rax) + movq -72(%rsp), %r12 + movq %r12, 48(%rax) + movq -64(%rsp), %r11 + movq %r11, 56(%rax) + movq -56(%rsp), %r10 + movq %r10, 64(%rax) + movq -48(%rsp), %r9 + movq %r9, 72(%rax) + movq -40(%rsp), %r8 + movq %r8, 80(%rax) + movq -32(%rsp), %rdi + movq %rdi, 88(%rax) + movq -24(%rsp), %rsi + movq %rsi, 96(%rax) + movq -16(%rsp), %rbx + movq %rbx, 104(%rax) + movq -8(%rsp), %rcx + movq %rcx, 112(%rax) + movq (%rsp), %rdx + movq %rdx, 120(%rax) + movl 8(%rbp), %edx + testl %edx, %edx + je .L192 + leal -1(%rdx), %ecx + movq 16(%rbp), %rsi + mov %ecx, %r14d + cmpq $0, (%rsi,%r14,8) + jne .L190 + movl %ecx, %edx + .align 16 +.L191: + testl %edx, %edx + movl %edx, %ecx + je .L195 + decl %edx + mov %edx, %r9d + cmpq $0, (%rsi,%r9,8) + je .L191 + movl %ecx, 8(%rbp) + movl %ecx, %edx +.L190: + testl %edx, %edx + je .L192 + movl (%rbp), %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.L195: + movl %edx, 8(%rbp) + .align 16 +.L192: + xorl %eax, %eax + movl %eax, (%rbp) + addq $8, %rsp + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + ret +.LFE7: + .size s_mp_sqr_comba_8, .-s_mp_sqr_comba_8 + .align 16 +.globl s_mp_sqr_comba_16 + .type s_mp_sqr_comba_16, @function +s_mp_sqr_comba_16: +.LFB8: + pushq %rbp +.LCFI25: + xorl %r9d, %r9d + movq %r9, %r8 + movq %r9, %r11 + movq %rsp, %rbp +.LCFI26: + pushq %r14 +.LCFI27: + movq %rsi, %r14 + movq %r9, %rsi + pushq %r13 +.LCFI28: + movq %r9, %r13 + pushq %r12 +.LCFI29: + movq %r9, %r12 + pushq %rbx +.LCFI30: + movq %r9, %rbx + subq $256, %rsp +.LCFI31: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -288(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -280(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r10 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %rsi, -272(%rbp) + movq %r10, %rdi + movq %r9, %rsi + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r10 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %rbx + movq %r9, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r9, %r11 + movq %r10, -264(%rbp) + movq %rbx, %r8 + movq %r12, %r13 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r10 + movq %r9, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -256(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -248(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -240(%rbp) + movq %r11, %r10 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -232(%rbp) + movq %r9, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r10, -224(%rbp) + movq %r12, %rsi + movq %rbx, %r10 + movq %r9, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -216(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r10, -208(%rbp) + movq %rbx, %r10 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -200(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rdx + movq %rsi, %r11 +/APP + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + addq %r8,%r10 + adcq %rdx,%r12 + adcq %r11,%rax + +/NO_APP + movq %rdx, %rbx + movq %rax, %r13 + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r10, -192(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %r13, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -176(%rbp) + movq %r13, %r10 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r9, %r13 +/APP + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 +/APP + movq 8(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %rbx + movq %rsi, %rdx +/APP + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + addq %r8,%r10 + adcq %rbx,%r12 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r11 + movq %rax, %r13 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r10, -160(%rbp) + movq %r9, %r11 +/APP + movq 16(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r13, %r10 + movq %r9, %rbx +/APP + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + addq %r8,%r12 + adcq %rdi,%r10 + adcq %rsi,%r11 + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 24(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -144(%rbp) + movq %r11, %r10 +/APP + movq 32(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 40(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -128(%rbp) + movq %r11, %r10 +/APP + movq 48(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r9, %rbx +/APP + movq 56(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r9, %r12 +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -112(%rbp) + movq %r11, %r10 +/APP + movq 64(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r10 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 72(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r9, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r10 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r10 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r10, -96(%rbp) + movq %r9, %r10 +/APP + movq 80(%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r12 + movq %rsi, %rax + movq %r9, %rsi +/APP + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + addq %r8,%rbx + adcq %r12,%r11 + adcq %rax,%r10 + +/NO_APP + movq %r9, %r12 + movq %rbx, -88(%rbp) + movq %r11, %r13 + movq %r10, %r11 +/APP + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%r12 + +/NO_APP + movq %r12, %rdi +/APP + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + addq %rax,%r13 + adcq %rdx,%r11 + adcq $0,%rdi + +/NO_APP + movq %r11, %rbx + movq %rdi, %r10 + movq %r9, %r11 +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r10 + +/NO_APP + movq %r13, -80(%rbp) + movq %r10, %r8 + movq %rbx, %r10 +/APP + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + addq %rax,%r10 + adcq %rdx,%r8 + adcq $0,%rsi + +/NO_APP + movq %r8, %r12 + movq %rsi, %rbx +/APP + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + addq %rax,%r10 + adcq %rdx,%r12 + adcq $0,%rbx + +/NO_APP + movq %r10, -72(%rbp) + movq %rbx, %r13 + movq %r12, %rbx +/APP + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + addq %rax,%rbx + adcq %rdx,%r13 + adcq $0,%r11 + +/NO_APP + movq %r11, %r12 + movq %r13, %r10 +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r10 + adcq $0,%r12 + +/NO_APP + movq %rbx, -64(%rbp) + movq %r10, %rdi + movq %r9, %rbx + movq %r12, %rsi +/APP + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%rbx + +/NO_APP + movq %rdi, -56(%rbp) + movq %rbx, %r8 +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r9 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -288(%rbp), %rsi + movl $256, %edx + movq %r8, -40(%rbp) + movl $32, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L232 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r9d + cmpq $0, (%rsi,%r9,8) + jne .L230 + movl %ecx, %edx + .align 16 +.L231: + testl %edx, %edx + movl %edx, %ecx + je .L235 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L231 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L230: + testl %edx, %edx + je .L232 + movl (%r14), %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L235: + movl %edx, 8(%r14) + .align 16 +.L232: + xorl %eax, %eax + movl %eax, (%r14) + addq $256, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE8: + .size s_mp_sqr_comba_16, .-s_mp_sqr_comba_16 + .align 16 +.globl s_mp_sqr_comba_32 + .type s_mp_sqr_comba_32, @function +s_mp_sqr_comba_32: +.LFB9: + pushq %rbp +.LCFI32: + xorl %r10d, %r10d + movq %r10, %r8 + movq %r10, %r11 + movq %rsp, %rbp +.LCFI33: + pushq %r14 +.LCFI34: + movq %rsi, %r14 + movq %r10, %rsi + pushq %r13 +.LCFI35: + movq %r10, %r13 + pushq %r12 +.LCFI36: + movq %r10, %r12 + pushq %rbx +.LCFI37: + movq %r10, %rbx + subq $512, %rsp +.LCFI38: + movq 16(%rdi), %rcx +/APP + movq (%rcx),%rax + mulq %rax + addq %rax,%r8 + adcq %rdx,%rbx + adcq $0,%rsi + +/NO_APP + movq %r8, -544(%rbp) +/APP + movq (%rcx),%rax + mulq 8(%rcx) + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + addq %rax,%rbx + adcq %rdx,%rsi + adcq $0,%r12 + +/NO_APP + movq %rbx, -536(%rbp) +/APP + movq (%rcx),%rax + mulq 16(%rcx) + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rsi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %rbx + movq %r13, %r9 +/APP + movq 8(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rsi, -528(%rbp) + movq %r9, %rdi + movq %r10, %rsi + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 24(%rcx) + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + addq %rax,%r9 + adcq %rdx,%rdi + adcq $0,%r11 + +/NO_APP + movq %rdi, %r12 + movq %r11, %r13 + movq %r10, %rdi +/APP + movq 8(%rcx),%rax + mulq 16(%rcx) + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -520(%rbp) + movq %r13, %r8 + movq %r12, %r13 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 32(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + + movq 8(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%r13 + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r8, %rbx + movq %r12, %r9 + movq %r10, %r8 +/APP + movq 16(%rcx),%rax + mulq %rax + addq %rax,%r13 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %r13, -512(%rbp) +/APP + movq (%rcx),%rax + mulq 40(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 24(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r11 + +/NO_APP + movq %rbx, -504(%rbp) +/APP + movq (%rcx),%rax + mulq 48(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 24(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -496(%rbp) + movq %r11, %r9 +/APP + movq (%rcx),%rax + mulq 56(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 32(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r11 + movq %rbx, -488(%rbp) + movq %r10, %rbx +/APP + movq (%rcx),%rax + mulq 64(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + + movq 32(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r13, %rdi + movq %r9, -480(%rbp) + movq %r12, %rsi + movq %rbx, %r9 + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 72(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 40(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %r11, -472(%rbp) + movq %r12, %rbx +/APP + movq (%rcx),%rax + mulq 80(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %rbx, %r11 + movq %r13, %rdi + movq %rdx, %rbx + movq %r12, %rsi +/APP + movq 40(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r11 + adcq $0,%rbx + +/NO_APP + movq %r9, -464(%rbp) + movq %rbx, %r9 +/APP + movq (%rcx),%rax + mulq 88(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 48(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%r11 + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rdx, %r13 + movq %r11, -456(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 96(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rax + movq %rsi, %r11 +/APP + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + addq %r8,%r9 + adcq %rax,%r12 + adcq %r11,%r13 + +/NO_APP + movq %rax, %rbx + movq %r11, %rsi +/APP + movq 48(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, %rdi + movq %r9, -448(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 104(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 56(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -440(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 112(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r13, %rdx + movq %rdi, %rbx + movq %rsi, %r13 +/APP + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %r8,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %rdi + movq %r11, %rsi +/APP + movq 56(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -432(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 120(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 8(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 64(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -424(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 128(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 72(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 64(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -416(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 136(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 72(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -408(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 144(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 80(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 72(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -400(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 152(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 80(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -392(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 160(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 88(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 80(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -384(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 168(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 88(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -376(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 176(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 96(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 88(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -368(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 184(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 16(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 24(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 32(%rcx),%rax + mulq 152(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 40(%rcx),%rax + mulq 144(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 48(%rcx),%rax + mulq 136(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 56(%rcx),%rax + mulq 128(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 64(%rcx),%rax + mulq 120(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 72(%rcx),%rax + mulq 112(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 104(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 96(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %rdi + movq %r12, -360(%rbp) + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 192(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax +/APP + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %rsi,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %r8 +/APP + movq 96(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rdi + movq %r9, -352(%rbp) + movq %r13, %r9 +/APP + movq (%rcx),%rax + mulq 200(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 104(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -344(%rbp) + movq %r10, %r12 +/APP + movq (%rcx),%rax + mulq 208(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 104(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -336(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 216(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 112(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -328(%rbp) +/APP + movq (%rcx),%rax + mulq 224(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 112(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -320(%rbp) + movq %r13, %rbx + movq %r10, %r9 +/APP + movq (%rcx),%rax + mulq 232(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 120(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r9 + +/NO_APP + movq %r12, -312(%rbp) + movq %r9, %r13 +/APP + movq (%rcx),%rax + mulq 240(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %r8, %r11 + movq %rdi, %rdx +/APP + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + addq %rsi,%rbx + adcq %r11,%r13 + adcq %rdx,%rax + +/NO_APP + movq %rdx, %r9 + movq %rax, %rdx + movq %r13, %r12 + movq %r11, %r8 + movq %rdx, %r13 + movq %r9, %rdi +/APP + movq 120(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %rbx, -304(%rbp) + movq %r13, %rbx + movq %r10, %r13 +/APP + movq (%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 8(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 16(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 128(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%rbx + adcq %rdi,%r13 + +/NO_APP + movq %r12, -296(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 8(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 16(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 24(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r8, %r11 + movq %rdi, %rax +/APP + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + addq %rsi,%rbx + adcq %r11,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r9 + movq %r11, %r8 +/APP + movq 128(%rcx),%rax + mulq %rax + addq %rax,%rbx + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, %rdi + movq %rbx, -288(%rbp) + movq %r13, %r9 +/APP + movq 16(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + +/NO_APP + movq %r10, %r13 +/APP + movq 24(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 32(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 136(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -280(%rbp) + movq %r10, %r12 +/APP + movq 24(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 32(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 40(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rdx + movq %r8, %rbx + movq %rdi, %r13 +/APP + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + addq %rsi,%r9 + adcq %rbx,%rdx + adcq %r13,%r12 + +/NO_APP + movq %r12, %rax + movq %r13, %r11 + movq %rdx, %r12 + movq %rax, %r13 + movq %rbx, %r8 + movq %r11, %rdi +/APP + movq 136(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -272(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 32(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 40(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 48(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 144(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r13 + +/NO_APP + movq %r12, -264(%rbp) +/APP + movq 40(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 48(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 56(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r13, %rax + movq %r10, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + addq %rsi,%r9 + adcq %rbx,%rax + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rdi + movq %r12, %r11 + movq %rbx, %r8 + movq %rax, %r12 + movq %rdi, %r13 + movq %r11, %rdi +/APP + movq 144(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r10, %r11 + movq %r9, -256(%rbp) + movq %r13, %r9 +/APP + movq 48(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 56(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 64(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 152(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + addq %rsi,%r12 + adcq %r8,%r9 + adcq %rdi,%r11 + +/NO_APP + movq %r12, -248(%rbp) + movq %r11, %r13 +/APP + movq 56(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 64(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 72(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 80(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 160(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %r10, %rax + movq %rsi, %rdx + movq %r8, %rbx + movq %rdi, %r12 +/APP + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + addq %rdx,%r9 + adcq %rbx,%r13 + adcq %r12,%rax + +/NO_APP + movq %r12, %r11 + movq %rdx, %r8 + movq %rax, %rdx + movq %r13, %r12 + movq %rbx, %rdi + movq %rdx, %r13 + movq %r11, %rsi +/APP + movq 152(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r9, -240(%rbp) + movq %r13, %r9 + movq %r10, %r13 +/APP + movq 64(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 72(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 80(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 88(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 160(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rax + movq %rdi, %rdx + movq %rsi, %rbx +/APP + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + addq %rax,%r12 + adcq %rdx,%r9 + adcq %rbx,%r13 + +/NO_APP + movq %r12, -232(%rbp) + movq %rdx, %r8 + movq %rax, %rsi + movq %rbx, %rdi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 72(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 80(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 88(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 96(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 168(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 160(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -224(%rbp) + movq %r13, %r9 +/APP + movq 80(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 96(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 104(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 168(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -216(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 88(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 96(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 104(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 112(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 176(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 168(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -208(%rbp) + movq %r13, %r9 +/APP + movq 96(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 112(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 120(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 176(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -200(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 104(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 112(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 120(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 128(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 184(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 176(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -192(%rbp) + movq %r13, %r9 +/APP + movq 112(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 128(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 136(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 184(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r8, %rbx + movq %rdi, %rax + movq %rsi, %rdx +/APP + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + addq %rbx,%r12 + adcq %rax,%r9 + adcq %rdx,%r13 + +/NO_APP + movq %r12, -184(%rbp) + movq %rdx, %rdi + movq %rax, %r8 + movq %rbx, %rsi + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 120(%rcx),%rax + mulq 248(%rcx) + movq %rax,%rsi + movq %rdx,%r8 + xorq %rdi,%rdi + + movq 128(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 136(%rcx),%rax + mulq 232(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 144(%rcx),%rax + mulq 224(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 152(%rcx),%rax + mulq 216(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 160(%rcx),%rax + mulq 208(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 168(%rcx),%rax + mulq 200(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + + movq 176(%rcx),%rax + mulq 192(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%rdi + +/NO_APP + movq %rsi, %rax + movq %r8, %rbx + movq %rdi, %rdx +/APP + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + addq %rax,%r9 + adcq %rbx,%r12 + adcq %rdx,%r13 + +/NO_APP + movq %rdx, %r11 + movq %rax, %r8 + movq %rbx, %rdi +/APP + movq 184(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -176(%rbp) + movq %r13, %r9 +/APP + movq 128(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + +/NO_APP + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 144(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 192(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%r13 + +/NO_APP + movq %r12, -168(%rbp) + movq %r13, %r12 + movq %r10, %r13 +/APP + movq 136(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 144(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 152(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %rbx + movq %rsi, %rax +/APP + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + addq %r8,%r9 + adcq %rbx,%r12 + adcq %rax,%r13 + +/NO_APP + movq %rax, %r11 + movq %rbx, %rdi + movq %r10, %rbx +/APP + movq 192(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r11, %rsi + movq %r9, -160(%rbp) + movq %r13, %r9 +/APP + movq 144(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 152(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 160(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 200(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + addq %r8,%r12 + adcq %rdi,%r9 + adcq %rsi,%rbx + +/NO_APP + movq %r12, -152(%rbp) +/APP + movq 152(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 160(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 168(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + addq %r8,%r9 + adcq %r13,%rbx + adcq %r12,%rdx + +/NO_APP + movq %rdx, %rax + movq %r13, %rdi + movq %r12, %rsi + movq %rax, %r11 + movq %r10, %r12 +/APP + movq 200(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -144(%rbp) + movq %r11, %r9 +/APP + movq 160(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 168(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 176(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 208(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -136(%rbp) + movq %r12, %r11 +/APP + movq 168(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 176(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 184(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi +/APP + movq 208(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -128(%rbp) + movq %r11, %r9 +/APP + movq 176(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 184(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 192(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 216(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rdx +/APP + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%rdx + +/NO_APP + movq %rbx, -120(%rbp) + movq %rdx, %r11 + movq %r10, %rbx +/APP + movq 184(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 192(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 200(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rbx + +/NO_APP + movq %rbx, %rdx + movq %r13, %rdi + movq %r11, %rbx + movq %r12, %rsi + movq %rdx, %r11 + movq %r10, %r12 +/APP + movq 216(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -112(%rbp) + movq %r11, %r9 +/APP + movq 192(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 200(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 208(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 224(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + addq %r8,%rbx + adcq %rdi,%r9 + adcq %rsi,%r12 + +/NO_APP + movq %rbx, -104(%rbp) + movq %r12, %r11 +/APP + movq 200(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 208(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 216(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %r10, %rax + movq %rdi, %r13 + movq %rsi, %r12 +/APP + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + addq %r8,%r9 + adcq %r13,%r11 + adcq %r12,%rax + +/NO_APP + movq %rax, %rdx + movq %r11, %rbx + movq %r13, %rdi + movq %rdx, %r11 + movq %r12, %rsi + movq %r10, %r12 +/APP + movq 224(%rcx),%rax + mulq %rax + addq %rax,%r9 + adcq %rdx,%rbx + adcq $0,%r11 + +/NO_APP + movq %r9, -96(%rbp) + movq %r10, %r9 +/APP + movq 208(%rcx),%rax + mulq 248(%rcx) + movq %rax,%r8 + movq %rdx,%rdi + xorq %rsi,%rsi + + movq 216(%rcx),%rax + mulq 240(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + + movq 224(%rcx),%rax + mulq 232(%rcx) + addq %rax,%r8 + adcq %rdx,%rdi + adcq $0,%rsi + +/NO_APP + movq %rdi, %r13 + movq %rsi, %rax +/APP + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + addq %r8,%rbx + adcq %r13,%r11 + adcq %rax,%r9 + +/NO_APP + movq %rbx, -88(%rbp) + movq %r11, %rsi + movq %r9, %r8 +/APP + movq 216(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r12 + +/NO_APP + movq %r12, %r11 +/APP + movq 224(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r11 + +/NO_APP + movq %r8, %r13 + movq %r11, %rbx +/APP + movq 232(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %rsi, -80(%rbp) + movq %rbx, %r12 + movq %r13, %rdi + movq %r10, %r13 +/APP + movq 224(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + addq %rax,%rdi + adcq %rdx,%r12 + adcq $0,%r13 + +/NO_APP + movq %r12, %r9 + movq %r13, %r12 +/APP + movq 232(%rcx),%rax + mulq 240(%rcx) + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + addq %rax,%rdi + adcq %rdx,%r9 + adcq $0,%r12 + +/NO_APP + movq %rdi, -72(%rbp) + movq %r9, %r11 + movq %r12, %rbx + movq %r10, %r9 +/APP + movq 232(%rcx),%rax + mulq 248(%rcx) + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + addq %rax,%r11 + adcq %rdx,%rbx + adcq $0,%r9 + +/NO_APP + movq %rbx, %r13 + movq %r9, %rbx + movq %r10, %r9 +/APP + movq 240(%rcx),%rax + mulq %rax + addq %rax,%r11 + adcq %rdx,%r13 + adcq $0,%rbx + +/NO_APP + movq %r11, -64(%rbp) + movq %r13, %rdi + movq %rbx, %rsi +/APP + movq 240(%rcx),%rax + mulq 248(%rcx) + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + addq %rax,%rdi + adcq %rdx,%rsi + adcq $0,%r9 + +/NO_APP + movq %rdi, -56(%rbp) + movq %r9, %r8 +/APP + movq 248(%rcx),%rax + mulq %rax + addq %rax,%rsi + adcq %rdx,%r8 + adcq $0,%r10 + +/NO_APP + movq %rsi, -48(%rbp) + movq 16(%r14), %rdi + leaq -544(%rbp), %rsi + movl $512, %edx + movq %r8, -40(%rbp) + movl $64, 8(%r14) + movl $0, (%r14) + call memcpy@PLT + movl 8(%r14), %edx + testl %edx, %edx + je .L304 + leal -1(%rdx), %ecx + movq 16(%r14), %rsi + mov %ecx, %r10d + cmpq $0, (%rsi,%r10,8) + jne .L302 + movl %ecx, %edx + .align 16 +.L303: + testl %edx, %edx + movl %edx, %ecx + je .L307 + decl %edx + mov %edx, %eax + cmpq $0, (%rsi,%rax,8) + je .L303 + movl %ecx, 8(%r14) + movl %ecx, %edx +.L302: + testl %edx, %edx + je .L304 + movl (%r14), %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.L307: + movl %edx, 8(%r14) + .align 16 +.L304: + xorl %eax, %eax + movl %eax, (%r14) + addq $512, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + leave + ret +.LFE9: + .size s_mp_sqr_comba_32, .-s_mp_sqr_comba_32 diff --git a/security/nss/lib/freebl/mpi/mp_gf2m-priv.h b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h new file mode 100644 index 000000000..5be4da4bf --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m-priv.h @@ -0,0 +1,73 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_PRIV_H_ +#define _MP_GF2M_PRIV_H_ + +#include "mpi-priv.h" + +extern const mp_digit mp_gf2m_sqr_tb[16]; + +#if defined(MP_USE_UINT_DIGIT) +#define MP_DIGIT_BITS 32 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 5 +#define MP_DIGIT_BITS_MASK 0x1f +#else +#define MP_DIGIT_BITS 64 +/* enable fast divide and mod operations on MP_DIGIT_BITS */ +#define MP_DIGIT_BITS_LOG_2 6 +#define MP_DIGIT_BITS_MASK 0x3f +#endif + +/* Platform-specific macros for fast binary polynomial squaring. */ +#if MP_DIGIT_BITS == 32 +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#else +#define gf2m_SQR1(w) \ + mp_gf2m_sqr_tb[(w) >> 60 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 56 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 52 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 48 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 44 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 40 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 36 & 0xF] << 8 | mp_gf2m_sqr_tb[(w) >> 32 & 0xF] +#define gf2m_SQR0(w) \ + mp_gf2m_sqr_tb[(w) >> 28 & 0xF] << 56 | mp_gf2m_sqr_tb[(w) >> 24 & 0xF] << 48 | \ + mp_gf2m_sqr_tb[(w) >> 20 & 0xF] << 40 | mp_gf2m_sqr_tb[(w) >> 16 & 0xF] << 32 | \ + mp_gf2m_sqr_tb[(w) >> 12 & 0xF] << 24 | mp_gf2m_sqr_tb[(w) >> 8 & 0xF] << 16 | \ + mp_gf2m_sqr_tb[(w) >> 4 & 0xF] << 8 | mp_gf2m_sqr_tb[(w)&0xF] +#endif + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +void s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b); + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0); + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0); + +#endif /* _MP_GF2M_PRIV_H_ */ diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.c b/security/nss/lib/freebl/mpi/mp_gf2m.c new file mode 100644 index 000000000..5a096adde --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.c @@ -0,0 +1,678 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mp_gf2m.h" +#include "mp_gf2m-priv.h" +#include "mplogic.h" +#include "mpi-priv.h" + +const mp_digit mp_gf2m_sqr_tb[16] = + { + 0, 1, 4, 5, 16, 17, 20, 21, + 64, 65, 68, 69, 80, 81, 84, 85 + }; + +/* Multiply two binary polynomials mp_digits a, b. + * Result is a polynomial with degree < 2 * MP_DIGIT_BITS - 1. + * Output in two mp_digits rh, rl. + */ +#if MP_DIGIT_BITS == 32 +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[8], top2b = a >> 30; + register mp_digit a1, a2, a4; + + a1 = a & (0x3FFFFFFF); + a2 = a1 << 1; + a4 = a2 << 1; + + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + + s = tab[b & 0x7]; + l = s; + s = tab[b >> 3 & 0x7]; + l ^= s << 3; + h = s >> 29; + s = tab[b >> 6 & 0x7]; + l ^= s << 6; + h ^= s >> 26; + s = tab[b >> 9 & 0x7]; + l ^= s << 9; + h ^= s >> 23; + s = tab[b >> 12 & 0x7]; + l ^= s << 12; + h ^= s >> 20; + s = tab[b >> 15 & 0x7]; + l ^= s << 15; + h ^= s >> 17; + s = tab[b >> 18 & 0x7]; + l ^= s << 18; + h ^= s >> 14; + s = tab[b >> 21 & 0x7]; + l ^= s << 21; + h ^= s >> 11; + s = tab[b >> 24 & 0x7]; + l ^= s << 24; + h ^= s >> 8; + s = tab[b >> 27 & 0x7]; + l ^= s << 27; + h ^= s >> 5; + s = tab[b >> 30]; + l ^= s << 30; + h ^= s >> 2; + + /* compensate for the top two bits of a */ + + if (top2b & 01) { + l ^= b << 30; + h ^= b >> 2; + } + if (top2b & 02) { + l ^= b << 31; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#else +void +s_bmul_1x1(mp_digit *rh, mp_digit *rl, const mp_digit a, const mp_digit b) +{ + register mp_digit h, l, s; + mp_digit tab[16], top3b = a >> 61; + register mp_digit a1, a2, a4, a8; + + a1 = a & (0x1FFFFFFFFFFFFFFFULL); + a2 = a1 << 1; + a4 = a2 << 1; + a8 = a4 << 1; + tab[0] = 0; + tab[1] = a1; + tab[2] = a2; + tab[3] = a1 ^ a2; + tab[4] = a4; + tab[5] = a1 ^ a4; + tab[6] = a2 ^ a4; + tab[7] = a1 ^ a2 ^ a4; + tab[8] = a8; + tab[9] = a1 ^ a8; + tab[10] = a2 ^ a8; + tab[11] = a1 ^ a2 ^ a8; + tab[12] = a4 ^ a8; + tab[13] = a1 ^ a4 ^ a8; + tab[14] = a2 ^ a4 ^ a8; + tab[15] = a1 ^ a2 ^ a4 ^ a8; + + s = tab[b & 0xF]; + l = s; + s = tab[b >> 4 & 0xF]; + l ^= s << 4; + h = s >> 60; + s = tab[b >> 8 & 0xF]; + l ^= s << 8; + h ^= s >> 56; + s = tab[b >> 12 & 0xF]; + l ^= s << 12; + h ^= s >> 52; + s = tab[b >> 16 & 0xF]; + l ^= s << 16; + h ^= s >> 48; + s = tab[b >> 20 & 0xF]; + l ^= s << 20; + h ^= s >> 44; + s = tab[b >> 24 & 0xF]; + l ^= s << 24; + h ^= s >> 40; + s = tab[b >> 28 & 0xF]; + l ^= s << 28; + h ^= s >> 36; + s = tab[b >> 32 & 0xF]; + l ^= s << 32; + h ^= s >> 32; + s = tab[b >> 36 & 0xF]; + l ^= s << 36; + h ^= s >> 28; + s = tab[b >> 40 & 0xF]; + l ^= s << 40; + h ^= s >> 24; + s = tab[b >> 44 & 0xF]; + l ^= s << 44; + h ^= s >> 20; + s = tab[b >> 48 & 0xF]; + l ^= s << 48; + h ^= s >> 16; + s = tab[b >> 52 & 0xF]; + l ^= s << 52; + h ^= s >> 12; + s = tab[b >> 56 & 0xF]; + l ^= s << 56; + h ^= s >> 8; + s = tab[b >> 60]; + l ^= s << 60; + h ^= s >> 4; + + /* compensate for the top three bits of a */ + + if (top3b & 01) { + l ^= b << 61; + h ^= b >> 3; + } + if (top3b & 02) { + l ^= b << 62; + h ^= b >> 2; + } + if (top3b & 04) { + l ^= b << 63; + h ^= b >> 1; + } + + *rh = h; + *rl = l; +} +#endif + +/* Compute xor-multiply of two binary polynomials (a1, a0) x (b1, b0) + * result is a binary polynomial in 4 mp_digits r[4]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_2x2(mp_digit *r, const mp_digit a1, const mp_digit a0, const mp_digit b1, + const mp_digit b0) +{ + mp_digit m1, m0; + /* r[3] = h1, r[2] = h0; r[1] = l1; r[0] = l0 */ + s_bmul_1x1(r + 3, r + 2, a1, b1); + s_bmul_1x1(r + 1, r, a0, b0); + s_bmul_1x1(&m1, &m0, a0 ^ a1, b0 ^ b1); + /* Correction on m1 ^= l1 ^ h1; m0 ^= l0 ^ h0; */ + r[2] ^= m1 ^ r[1] ^ r[3]; /* h0 ^= m1 ^ l1 ^ h1; */ + r[1] = r[3] ^ r[2] ^ r[0] ^ m1 ^ m0; /* l1 ^= l0 ^ h0 ^ m0; */ +} + +/* Compute xor-multiply of two binary polynomials (a2, a1, a0) x (b2, b1, b0) + * result is a binary polynomial in 6 mp_digits r[6]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_3x3(mp_digit *r, const mp_digit a2, const mp_digit a1, const mp_digit a0, + const mp_digit b2, const mp_digit b1, const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_1x1(r + 5, r + 4, a2, b2); /* fill top 2 words */ + s_bmul_2x2(zm, a1, a2 ^ a0, b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3]; + zm[2] ^= r[2]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute xor-multiply of two binary polynomials (a3, a2, a1, a0) x (b3, b2, b1, b0) + * result is a binary polynomial in 8 mp_digits r[8]. + * The caller MUST ensure that r has the right amount of space allocated. + */ +void +s_bmul_4x4(mp_digit *r, const mp_digit a3, const mp_digit a2, const mp_digit a1, + const mp_digit a0, const mp_digit b3, const mp_digit b2, const mp_digit b1, + const mp_digit b0) +{ + mp_digit zm[4]; + + s_bmul_2x2(r + 4, a3, a2, b3, b2); /* fill top 4 words */ + s_bmul_2x2(zm, a3 ^ a1, a2 ^ a0, b3 ^ b1, b2 ^ b0); /* fill middle 4 words */ + s_bmul_2x2(r, a1, a0, b1, b0); /* fill bottom 4 words */ + + zm[3] ^= r[3] ^ r[7]; + zm[2] ^= r[2] ^ r[6]; + zm[1] ^= r[1] ^ r[5]; + zm[0] ^= r[0] ^ r[4]; + + r[5] ^= zm[3]; + r[4] ^= zm[2]; + r[3] ^= zm[1]; + r[2] ^= zm[0]; +} + +/* Compute addition of two binary polynomials a and b, + * store result in c; c could be a or b, a and b could be equal; + * c is the bitwise XOR of a and b. + */ +mp_err +mp_badd(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; + mp_size ix; + mp_size used_pa, used_pb; + mp_err res = MP_OKAY; + + /* Add all digits up to the precision of b. If b had more + * precision than a initially, swap a, b first + */ + if (MP_USED(a) >= MP_USED(b)) { + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used_pa = MP_USED(a); + used_pb = MP_USED(b); + } else { + pa = MP_DIGITS(b); + pb = MP_DIGITS(a); + used_pa = MP_USED(b); + used_pb = MP_USED(a); + } + + /* Make sure c has enough precision for the output value */ + MP_CHECKOK(s_mp_pad(c, used_pa)); + + /* Do word-by-word xor */ + pc = MP_DIGITS(c); + for (ix = 0; ix < used_pb; ix++) { + (*pc++) = (*pa++) ^ (*pb++); + } + + /* Finish the rest of digits until we're actually done */ + for (; ix < used_pa; ++ix) { + *pc++ = *pa++; + } + + MP_USED(c) = used_pa; + MP_SIGN(c) = ZPOS; + s_mp_clamp(c); + +CLEANUP: + return res; +} + +#define s_mp_div2(a) MP_CHECKOK(mpl_rsh((a), (a), 1)); + +/* Compute binary polynomial multiply d = a * b */ +static void +s_bmul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ = a0b0 ^ carry; + carry = a1b1; + } + *d = carry; +} + +/* Compute binary polynomial xor multiply accumulate d ^= a * b */ +static void +s_bmul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *d) +{ + mp_digit a_i, a0b0, a1b1, carry = 0; + while (a_len--) { + a_i = *a++; + s_bmul_1x1(&a1b1, &a0b0, a_i, b); + *d++ ^= a0b0 ^ carry; + carry = a1b1; + } + *d ^= carry; +} + +/* Compute binary polynomial xor multiply c = a * b. + * All parameters may be identical. + */ +mp_err +mp_bmul(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pb, b_i; + mp_int tmp; + mp_size ib, a_used, b_used; + mp_err res = MP_OKAY; + + MP_DIGITS(&tmp) = 0; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + MP_CHECKOK(mp_init_copy(&tmp, b)); + b = &tmp; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b if b longer */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + MP_CHECKOK(s_mp_pad(c, USED(a) + USED(b))); + + pb = MP_DIGITS(b); + s_bmul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + a_used = MP_USED(a); + b_used = MP_USED(b); + MP_USED(c) = a_used + b_used; + for (ib = 1; ib < b_used; ib++) { + b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_bmul_d_add(MP_DIGITS(a), a_used, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + a_used) = b_i; + } + + s_mp_clamp(c); + + SIGN(c) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute modular reduction of a and store result in r. + * r could be a. + * For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err +mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + int j, k; + int n, dN, d0, d1; + mp_digit zz, *z, tmp; + mp_size used; + mp_err res = MP_OKAY; + + /* The algorithm does the reduction in place in r, + * if a != r, copy a into r first so reduction can be done in r + */ + if (a != r) { + MP_CHECKOK(mp_copy(a, r)); + } + z = MP_DIGITS(r); + + /* start reduction */ + /*dN = p[0] / MP_DIGIT_BITS; */ + dN = p[0] >> MP_DIGIT_BITS_LOG_2; + used = MP_USED(r); + + for (j = used - 1; j > dN;) { + + zz = z[j]; + if (zz == 0) { + j--; + continue; + } + z[j] = 0; + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k] */ + n = p[0] - p[k]; + /*d0 = n % MP_DIGIT_BITS; */ + d0 = n & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + /*n /= MP_DIGIT_BITS; */ + n >>= MP_DIGIT_BITS_LOG_2; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* reducing component t^0 */ + n = dN; + /*d0 = p[0] % MP_DIGIT_BITS;*/ + d0 = p[0] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[j - n] ^= (zz >> d0); + if (d0) + z[j - n - 1] ^= (zz << d1); + } + + /* final round of reduction */ + while (j == dN) { + + /* d0 = p[0] % MP_DIGIT_BITS; */ + d0 = p[0] & MP_DIGIT_BITS_MASK; + zz = z[dN] >> d0; + if (zz == 0) + break; + d1 = MP_DIGIT_BITS - d0; + + /* clear up the top d1 bits */ + if (d0) { + z[dN] = (z[dN] << d1) >> d1; + } else { + z[dN] = 0; + } + *z ^= zz; /* reduction t^0 component */ + + for (k = 1; p[k] > 0; k++) { + /* reducing component t^p[k]*/ + /* n = p[k] / MP_DIGIT_BITS; */ + n = p[k] >> MP_DIGIT_BITS_LOG_2; + /* d0 = p[k] % MP_DIGIT_BITS; */ + d0 = p[k] & MP_DIGIT_BITS_MASK; + d1 = MP_DIGIT_BITS - d0; + z[n] ^= (zz << d0); + tmp = zz >> d1; + if (d0 && tmp) + z[n + 1] ^= tmp; + } + } + + s_mp_clamp(r); +CLEANUP: + return res; +} + +/* Compute the product of two polynomials a and b, reduce modulo p, + * Store the result in r. r could be a or b; a could be b. + */ +mp_err +mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], mp_int *r) +{ + mp_err res; + + if (a == b) + return mp_bsqrmod(a, p, r); + if ((res = mp_bmul(a, b, r)) != MP_OKAY) + return res; + return mp_bmod(r, p, r); +} + +/* Compute binary polynomial squaring c = a*a mod p . + * Parameter r and a can be identical. + */ + +mp_err +mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r) +{ + mp_digit *pa, *pr, a_i; + mp_int tmp; + mp_size ia, a_used; + mp_err res; + + ARGCHK(a != NULL && r != NULL, MP_BADARG); + MP_DIGITS(&tmp) = 0; + + if (a == r) { + MP_CHECKOK(mp_init_copy(&tmp, a)); + a = &tmp; + } + + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + MP_CHECKOK(s_mp_pad(r, 2 * USED(a))); + + pa = MP_DIGITS(a); + pr = MP_DIGITS(r); + a_used = MP_USED(a); + MP_USED(r) = 2 * a_used; + + for (ia = 0; ia < a_used; ia++) { + a_i = *pa++; + *pr++ = gf2m_SQR0(a_i); + *pr++ = gf2m_SQR1(a_i); + } + + MP_CHECKOK(mp_bmod(r, p, r)); + s_mp_clamp(r); + SIGN(r) = ZPOS; + +CLEANUP: + mp_clear(&tmp); + return res; +} + +/* Compute binary polynomial y/x mod p, y divided by x, reduce modulo p. + * Store the result in r. r could be x or y, and x could equal y. + * Uses algorithm Modular_Division_GF(2^m) from + * Chang-Shantz, S. "From Euclid's GCD to Montgomery Multiplication to + * the Great Divide". + */ +int +mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r) +{ + mp_int aa, bb, uu; + mp_int *a, *b, *u, *v; + mp_err res = MP_OKAY; + + MP_DIGITS(&aa) = 0; + MP_DIGITS(&bb) = 0; + MP_DIGITS(&uu) = 0; + + MP_CHECKOK(mp_init_copy(&aa, x)); + MP_CHECKOK(mp_init_copy(&uu, y)); + MP_CHECKOK(mp_init_copy(&bb, pp)); + MP_CHECKOK(s_mp_pad(r, USED(pp))); + MP_USED(r) = 1; + MP_DIGIT(r, 0) = 0; + + a = &aa; + b = &bb; + u = &uu; + v = r; + /* reduce x and y mod p */ + MP_CHECKOK(mp_bmod(a, p, a)); + MP_CHECKOK(mp_bmod(u, p, u)); + + while (!mp_isodd(a)) { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } + + do { + if (mp_cmp_mag(b, a) > 0) { + MP_CHECKOK(mp_badd(b, a, b)); + MP_CHECKOK(mp_badd(v, u, v)); + do { + s_mp_div2(b); + if (mp_isodd(v)) { + MP_CHECKOK(mp_badd(v, pp, v)); + } + s_mp_div2(v); + } while (!mp_isodd(b)); + } else if ((MP_DIGIT(a, 0) == 1) && (MP_USED(a) == 1)) + break; + else { + MP_CHECKOK(mp_badd(a, b, a)); + MP_CHECKOK(mp_badd(u, v, u)); + do { + s_mp_div2(a); + if (mp_isodd(u)) { + MP_CHECKOK(mp_badd(u, pp, u)); + } + s_mp_div2(u); + } while (!mp_isodd(a)); + } + } while (1); + + MP_CHECKOK(mp_copy(u, r)); + +CLEANUP: + mp_clear(&aa); + mp_clear(&bb); + mp_clear(&uu); + return res; +} + +/* Convert the bit-string representation of a polynomial a into an array + * of integers corresponding to the bits with non-zero coefficient. + * Up to max elements of the array will be filled. Return value is total + * number of coefficients that would be extracted if array was large enough. + */ +int +mp_bpoly2arr(const mp_int *a, unsigned int p[], int max) +{ + int i, j, k; + mp_digit top_bit, mask; + + top_bit = 1; + top_bit <<= MP_DIGIT_BIT - 1; + + for (k = 0; k < max; k++) + p[k] = 0; + k = 0; + + for (i = MP_USED(a) - 1; i >= 0; i--) { + mask = top_bit; + for (j = MP_DIGIT_BIT - 1; j >= 0; j--) { + if (MP_DIGITS(a)[i] & mask) { + if (k < max) + p[k] = MP_DIGIT_BIT * i + j; + k++; + } + mask >>= 1; + } + } + + return k; +} + +/* Convert the coefficient array representation of a polynomial to a + * bit-string. The array must be terminated by 0. + */ +mp_err +mp_barr2poly(const unsigned int p[], mp_int *a) +{ + + mp_err res = MP_OKAY; + int i; + + mp_zero(a); + for (i = 0; p[i] > 0; i++) { + MP_CHECKOK(mpl_set_bit(a, p[i], 1)); + } + MP_CHECKOK(mpl_set_bit(a, 0, 1)); + +CLEANUP: + return res; +} diff --git a/security/nss/lib/freebl/mpi/mp_gf2m.h b/security/nss/lib/freebl/mpi/mp_gf2m.h new file mode 100644 index 000000000..ed2c85493 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mp_gf2m.h @@ -0,0 +1,28 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _MP_GF2M_H_ +#define _MP_GF2M_H_ + +#include "mpi.h" + +mp_err mp_badd(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_bmul(const mp_int *a, const mp_int *b, mp_int *c); + +/* For modular arithmetic, the irreducible polynomial f(t) is represented + * as an array of int[], where f(t) is of the form: + * f(t) = t^p[0] + t^p[1] + ... + t^p[k] + * where m = p[0] > p[1] > ... > p[k] = 0. + */ +mp_err mp_bmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bmulmod(const mp_int *a, const mp_int *b, const unsigned int p[], + mp_int *r); +mp_err mp_bsqrmod(const mp_int *a, const unsigned int p[], mp_int *r); +mp_err mp_bdivmod(const mp_int *y, const mp_int *x, const mp_int *pp, + const unsigned int p[], mp_int *r); + +int mp_bpoly2arr(const mp_int *a, unsigned int p[], int max); +mp_err mp_barr2poly(const unsigned int p[], mp_int *a); + +#endif /* _MP_GF2M_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpcpucache.c b/security/nss/lib/freebl/mpi/mpcpucache.c new file mode 100644 index 000000000..6fed35239 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache.c @@ -0,0 +1,808 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" +#include "prtypes.h" + +/* + * This file implements a single function: s_mpi_getProcessorLineSize(); + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * Currently the file returns good data for most modern x86 processors, and + * reasonable data on 64-bit ppc processors. All other processors are assumed + * to have a cache line size of 32 bytes unless modified by target.mk. + * + */ + +#if defined(i386) || defined(__i386) || defined(__X86__) || defined(_M_IX86) || defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +/* X86 processors have special instructions that tell us about the cache */ +#include "string.h" + +#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64) +#define AMD_64 1 +#endif + +/* Generic CPUID function */ +#if defined(AMD_64) + +#if defined(__GNUC__) + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + __asm__("cpuid\n\t" + : "=a"(*eax), + "=b"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +#elif defined(_MSC_VER) + +#include + +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + int intrinsic_out[4]; + + __cpuid(intrinsic_out, op); + *eax = intrinsic_out[0]; + *ebx = intrinsic_out[1]; + *ecx = intrinsic_out[2]; + *edx = intrinsic_out[3]; +} + +#endif + +#else /* !defined(AMD_64) */ + +/* x86 */ + +#if defined(__GNUC__) +void +freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx) +{ + /* Some older processors don't fill the ecx register with cpuid, so clobber it + * before calling cpuid, so that there's no risk of picking random bits that + * erroneously indicate that absent CPU features are present. + * Also, GCC isn't smart enough to save the ebx PIC register on its own + * in this case, so do it by hand. Use edi to store ebx and pass the + * value returned in ebx from cpuid through edi. */ + __asm__("xor %%ecx, %%ecx\n\t" + "mov %%ebx,%%edi\n\t" + "cpuid\n\t" + "xchgl %%ebx,%%edi\n\t" + : "=a"(*eax), + "=D"(*ebx), + "=c"(*ecx), + "=d"(*edx) + : "0"(op)); +} + +/* + * try flipping a processor flag to determine CPU type + */ +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm__("pushfl\n\t" /* get the flags */ + "popl %0\n\t" + "movl %0,%1\n\t" /* save the original flags */ + "xorl %2,%0\n\t" /* flip the bit */ + "pushl %0\n\t" /* set the flags */ + "popfl\n\t" + "pushfl\n\t" /* get the flags again (for return) */ + "popl %0\n\t" + "pushl %1\n\t" /* restore the original flags */ + "popfl\n\t" + : "=r"(changedFlags), + "=r"(originalFlags), + "=r"(flag) + : "2"(flag)); + return changedFlags ^ originalFlags; +} + +#elif defined(_MSC_VER) + +/* + * windows versions of the above assembler + */ +#define wcpuid __asm __emit 0fh __asm __emit 0a2h +void +freebl_cpuid(unsigned long op, unsigned long *Reax, + unsigned long *Rebx, unsigned long *Recx, unsigned long *Redx) +{ + unsigned long Leax, Lebx, Lecx, Ledx; + __asm { + pushad + xor ecx,ecx + mov eax,op + wcpuid + mov Leax,eax + mov Lebx,ebx + mov Lecx,ecx + mov Ledx,edx + popad + } + *Reax = Leax; + *Rebx = Lebx; + *Recx = Lecx; + *Redx = Ledx; +} + +static unsigned long +changeFlag(unsigned long flag) +{ + unsigned long changedFlags, originalFlags; + __asm { + push eax + push ebx + pushfd /* get the flags */ + pop eax + push eax /* save the flags on the stack */ + mov originalFlags,eax /* save the original flags */ + mov ebx,flag + xor eax,ebx /* flip the bit */ + push eax /* set the flags */ + popfd + pushfd /* get the flags again (for return) */ + pop eax + popfd /* restore the original flags */ + mov changedFlags,eax + pop ebx + pop eax + } + return changedFlags ^ originalFlags; +} +#endif + +#endif + +#if !defined(AMD_64) +#define AC_FLAG 0x40000 +#define ID_FLAG 0x200000 + +/* 386 processors can't flip the AC_FLAG, intel AP Note AP-485 */ +static int +is386() +{ + return changeFlag(AC_FLAG) == 0; +} + +/* 486 processors can't flip the ID_FLAG, intel AP Note AP-485 */ +static int +is486() +{ + return changeFlag(ID_FLAG) == 0; +} +#endif + +/* + * table for Intel Cache. + * See Intel Application Note AP-485 for more information + */ + +typedef unsigned char CacheTypeEntry; + +typedef enum { + Cache_NONE = 0, + Cache_UNKNOWN = 1, + Cache_TLB = 2, + Cache_TLBi = 3, + Cache_TLBd = 4, + Cache_Trace = 5, + Cache_L1 = 6, + Cache_L1i = 7, + Cache_L1d = 8, + Cache_L2 = 9, + Cache_L2i = 10, + Cache_L2d = 11, + Cache_L3 = 12, + Cache_L3i = 13, + Cache_L3d = 14 +} CacheType; + +struct _cache { + CacheTypeEntry type; + unsigned char lineSize; +}; +static const struct _cache CacheMap[256] = { + /* 00 */ { Cache_NONE, 0 }, + /* 01 */ { Cache_TLBi, 0 }, + /* 02 */ { Cache_TLBi, 0 }, + /* 03 */ { Cache_TLBd, 0 }, + /* 04 */ { + Cache_TLBd, + }, + /* 05 */ { Cache_UNKNOWN, 0 }, + /* 06 */ { Cache_L1i, 32 }, + /* 07 */ { Cache_UNKNOWN, 0 }, + /* 08 */ { Cache_L1i, 32 }, + /* 09 */ { Cache_UNKNOWN, 0 }, + /* 0a */ { Cache_L1d, 32 }, + /* 0b */ { Cache_UNKNOWN, 0 }, + /* 0c */ { Cache_L1d, 32 }, + /* 0d */ { Cache_UNKNOWN, 0 }, + /* 0e */ { Cache_UNKNOWN, 0 }, + /* 0f */ { Cache_UNKNOWN, 0 }, + /* 10 */ { Cache_UNKNOWN, 0 }, + /* 11 */ { Cache_UNKNOWN, 0 }, + /* 12 */ { Cache_UNKNOWN, 0 }, + /* 13 */ { Cache_UNKNOWN, 0 }, + /* 14 */ { Cache_UNKNOWN, 0 }, + /* 15 */ { Cache_UNKNOWN, 0 }, + /* 16 */ { Cache_UNKNOWN, 0 }, + /* 17 */ { Cache_UNKNOWN, 0 }, + /* 18 */ { Cache_UNKNOWN, 0 }, + /* 19 */ { Cache_UNKNOWN, 0 }, + /* 1a */ { Cache_UNKNOWN, 0 }, + /* 1b */ { Cache_UNKNOWN, 0 }, + /* 1c */ { Cache_UNKNOWN, 0 }, + /* 1d */ { Cache_UNKNOWN, 0 }, + /* 1e */ { Cache_UNKNOWN, 0 }, + /* 1f */ { Cache_UNKNOWN, 0 }, + /* 20 */ { Cache_UNKNOWN, 0 }, + /* 21 */ { Cache_UNKNOWN, 0 }, + /* 22 */ { Cache_L3, 64 }, + /* 23 */ { Cache_L3, 64 }, + /* 24 */ { Cache_UNKNOWN, 0 }, + /* 25 */ { Cache_L3, 64 }, + /* 26 */ { Cache_UNKNOWN, 0 }, + /* 27 */ { Cache_UNKNOWN, 0 }, + /* 28 */ { Cache_UNKNOWN, 0 }, + /* 29 */ { Cache_L3, 64 }, + /* 2a */ { Cache_UNKNOWN, 0 }, + /* 2b */ { Cache_UNKNOWN, 0 }, + /* 2c */ { Cache_L1d, 64 }, + /* 2d */ { Cache_UNKNOWN, 0 }, + /* 2e */ { Cache_UNKNOWN, 0 }, + /* 2f */ { Cache_UNKNOWN, 0 }, + /* 30 */ { Cache_L1i, 64 }, + /* 31 */ { Cache_UNKNOWN, 0 }, + /* 32 */ { Cache_UNKNOWN, 0 }, + /* 33 */ { Cache_UNKNOWN, 0 }, + /* 34 */ { Cache_UNKNOWN, 0 }, + /* 35 */ { Cache_UNKNOWN, 0 }, + /* 36 */ { Cache_UNKNOWN, 0 }, + /* 37 */ { Cache_UNKNOWN, 0 }, + /* 38 */ { Cache_UNKNOWN, 0 }, + /* 39 */ { Cache_L2, 64 }, + /* 3a */ { Cache_UNKNOWN, 0 }, + /* 3b */ { Cache_L2, 64 }, + /* 3c */ { Cache_L2, 64 }, + /* 3d */ { Cache_UNKNOWN, 0 }, + /* 3e */ { Cache_UNKNOWN, 0 }, + /* 3f */ { Cache_UNKNOWN, 0 }, + /* 40 */ { Cache_L2, 0 }, + /* 41 */ { Cache_L2, 32 }, + /* 42 */ { Cache_L2, 32 }, + /* 43 */ { Cache_L2, 32 }, + /* 44 */ { Cache_L2, 32 }, + /* 45 */ { Cache_L2, 32 }, + /* 46 */ { Cache_UNKNOWN, 0 }, + /* 47 */ { Cache_UNKNOWN, 0 }, + /* 48 */ { Cache_UNKNOWN, 0 }, + /* 49 */ { Cache_UNKNOWN, 0 }, + /* 4a */ { Cache_UNKNOWN, 0 }, + /* 4b */ { Cache_UNKNOWN, 0 }, + /* 4c */ { Cache_UNKNOWN, 0 }, + /* 4d */ { Cache_UNKNOWN, 0 }, + /* 4e */ { Cache_UNKNOWN, 0 }, + /* 4f */ { Cache_UNKNOWN, 0 }, + /* 50 */ { Cache_TLBi, 0 }, + /* 51 */ { Cache_TLBi, 0 }, + /* 52 */ { Cache_TLBi, 0 }, + /* 53 */ { Cache_UNKNOWN, 0 }, + /* 54 */ { Cache_UNKNOWN, 0 }, + /* 55 */ { Cache_UNKNOWN, 0 }, + /* 56 */ { Cache_UNKNOWN, 0 }, + /* 57 */ { Cache_UNKNOWN, 0 }, + /* 58 */ { Cache_UNKNOWN, 0 }, + /* 59 */ { Cache_UNKNOWN, 0 }, + /* 5a */ { Cache_UNKNOWN, 0 }, + /* 5b */ { Cache_TLBd, 0 }, + /* 5c */ { Cache_TLBd, 0 }, + /* 5d */ { Cache_TLBd, 0 }, + /* 5e */ { Cache_UNKNOWN, 0 }, + /* 5f */ { Cache_UNKNOWN, 0 }, + /* 60 */ { Cache_UNKNOWN, 0 }, + /* 61 */ { Cache_UNKNOWN, 0 }, + /* 62 */ { Cache_UNKNOWN, 0 }, + /* 63 */ { Cache_UNKNOWN, 0 }, + /* 64 */ { Cache_UNKNOWN, 0 }, + /* 65 */ { Cache_UNKNOWN, 0 }, + /* 66 */ { Cache_L1d, 64 }, + /* 67 */ { Cache_L1d, 64 }, + /* 68 */ { Cache_L1d, 64 }, + /* 69 */ { Cache_UNKNOWN, 0 }, + /* 6a */ { Cache_UNKNOWN, 0 }, + /* 6b */ { Cache_UNKNOWN, 0 }, + /* 6c */ { Cache_UNKNOWN, 0 }, + /* 6d */ { Cache_UNKNOWN, 0 }, + /* 6e */ { Cache_UNKNOWN, 0 }, + /* 6f */ { Cache_UNKNOWN, 0 }, + /* 70 */ { Cache_Trace, 1 }, + /* 71 */ { Cache_Trace, 1 }, + /* 72 */ { Cache_Trace, 1 }, + /* 73 */ { Cache_UNKNOWN, 0 }, + /* 74 */ { Cache_UNKNOWN, 0 }, + /* 75 */ { Cache_UNKNOWN, 0 }, + /* 76 */ { Cache_UNKNOWN, 0 }, + /* 77 */ { Cache_UNKNOWN, 0 }, + /* 78 */ { Cache_UNKNOWN, 0 }, + /* 79 */ { Cache_L2, 64 }, + /* 7a */ { Cache_L2, 64 }, + /* 7b */ { Cache_L2, 64 }, + /* 7c */ { Cache_L2, 64 }, + /* 7d */ { Cache_UNKNOWN, 0 }, + /* 7e */ { Cache_UNKNOWN, 0 }, + /* 7f */ { Cache_UNKNOWN, 0 }, + /* 80 */ { Cache_UNKNOWN, 0 }, + /* 81 */ { Cache_UNKNOWN, 0 }, + /* 82 */ { Cache_L2, 32 }, + /* 83 */ { Cache_L2, 32 }, + /* 84 */ { Cache_L2, 32 }, + /* 85 */ { Cache_L2, 32 }, + /* 86 */ { Cache_L2, 64 }, + /* 87 */ { Cache_L2, 64 }, + /* 88 */ { Cache_UNKNOWN, 0 }, + /* 89 */ { Cache_UNKNOWN, 0 }, + /* 8a */ { Cache_UNKNOWN, 0 }, + /* 8b */ { Cache_UNKNOWN, 0 }, + /* 8c */ { Cache_UNKNOWN, 0 }, + /* 8d */ { Cache_UNKNOWN, 0 }, + /* 8e */ { Cache_UNKNOWN, 0 }, + /* 8f */ { Cache_UNKNOWN, 0 }, + /* 90 */ { Cache_UNKNOWN, 0 }, + /* 91 */ { Cache_UNKNOWN, 0 }, + /* 92 */ { Cache_UNKNOWN, 0 }, + /* 93 */ { Cache_UNKNOWN, 0 }, + /* 94 */ { Cache_UNKNOWN, 0 }, + /* 95 */ { Cache_UNKNOWN, 0 }, + /* 96 */ { Cache_UNKNOWN, 0 }, + /* 97 */ { Cache_UNKNOWN, 0 }, + /* 98 */ { Cache_UNKNOWN, 0 }, + /* 99 */ { Cache_UNKNOWN, 0 }, + /* 9a */ { Cache_UNKNOWN, 0 }, + /* 9b */ { Cache_UNKNOWN, 0 }, + /* 9c */ { Cache_UNKNOWN, 0 }, + /* 9d */ { Cache_UNKNOWN, 0 }, + /* 9e */ { Cache_UNKNOWN, 0 }, + /* 9f */ { Cache_UNKNOWN, 0 }, + /* a0 */ { Cache_UNKNOWN, 0 }, + /* a1 */ { Cache_UNKNOWN, 0 }, + /* a2 */ { Cache_UNKNOWN, 0 }, + /* a3 */ { Cache_UNKNOWN, 0 }, + /* a4 */ { Cache_UNKNOWN, 0 }, + /* a5 */ { Cache_UNKNOWN, 0 }, + /* a6 */ { Cache_UNKNOWN, 0 }, + /* a7 */ { Cache_UNKNOWN, 0 }, + /* a8 */ { Cache_UNKNOWN, 0 }, + /* a9 */ { Cache_UNKNOWN, 0 }, + /* aa */ { Cache_UNKNOWN, 0 }, + /* ab */ { Cache_UNKNOWN, 0 }, + /* ac */ { Cache_UNKNOWN, 0 }, + /* ad */ { Cache_UNKNOWN, 0 }, + /* ae */ { Cache_UNKNOWN, 0 }, + /* af */ { Cache_UNKNOWN, 0 }, + /* b0 */ { Cache_TLBi, 0 }, + /* b1 */ { Cache_UNKNOWN, 0 }, + /* b2 */ { Cache_UNKNOWN, 0 }, + /* b3 */ { Cache_TLBd, 0 }, + /* b4 */ { Cache_UNKNOWN, 0 }, + /* b5 */ { Cache_UNKNOWN, 0 }, + /* b6 */ { Cache_UNKNOWN, 0 }, + /* b7 */ { Cache_UNKNOWN, 0 }, + /* b8 */ { Cache_UNKNOWN, 0 }, + /* b9 */ { Cache_UNKNOWN, 0 }, + /* ba */ { Cache_UNKNOWN, 0 }, + /* bb */ { Cache_UNKNOWN, 0 }, + /* bc */ { Cache_UNKNOWN, 0 }, + /* bd */ { Cache_UNKNOWN, 0 }, + /* be */ { Cache_UNKNOWN, 0 }, + /* bf */ { Cache_UNKNOWN, 0 }, + /* c0 */ { Cache_UNKNOWN, 0 }, + /* c1 */ { Cache_UNKNOWN, 0 }, + /* c2 */ { Cache_UNKNOWN, 0 }, + /* c3 */ { Cache_UNKNOWN, 0 }, + /* c4 */ { Cache_UNKNOWN, 0 }, + /* c5 */ { Cache_UNKNOWN, 0 }, + /* c6 */ { Cache_UNKNOWN, 0 }, + /* c7 */ { Cache_UNKNOWN, 0 }, + /* c8 */ { Cache_UNKNOWN, 0 }, + /* c9 */ { Cache_UNKNOWN, 0 }, + /* ca */ { Cache_UNKNOWN, 0 }, + /* cb */ { Cache_UNKNOWN, 0 }, + /* cc */ { Cache_UNKNOWN, 0 }, + /* cd */ { Cache_UNKNOWN, 0 }, + /* ce */ { Cache_UNKNOWN, 0 }, + /* cf */ { Cache_UNKNOWN, 0 }, + /* d0 */ { Cache_UNKNOWN, 0 }, + /* d1 */ { Cache_UNKNOWN, 0 }, + /* d2 */ { Cache_UNKNOWN, 0 }, + /* d3 */ { Cache_UNKNOWN, 0 }, + /* d4 */ { Cache_UNKNOWN, 0 }, + /* d5 */ { Cache_UNKNOWN, 0 }, + /* d6 */ { Cache_UNKNOWN, 0 }, + /* d7 */ { Cache_UNKNOWN, 0 }, + /* d8 */ { Cache_UNKNOWN, 0 }, + /* d9 */ { Cache_UNKNOWN, 0 }, + /* da */ { Cache_UNKNOWN, 0 }, + /* db */ { Cache_UNKNOWN, 0 }, + /* dc */ { Cache_UNKNOWN, 0 }, + /* dd */ { Cache_UNKNOWN, 0 }, + /* de */ { Cache_UNKNOWN, 0 }, + /* df */ { Cache_UNKNOWN, 0 }, + /* e0 */ { Cache_UNKNOWN, 0 }, + /* e1 */ { Cache_UNKNOWN, 0 }, + /* e2 */ { Cache_UNKNOWN, 0 }, + /* e3 */ { Cache_UNKNOWN, 0 }, + /* e4 */ { Cache_UNKNOWN, 0 }, + /* e5 */ { Cache_UNKNOWN, 0 }, + /* e6 */ { Cache_UNKNOWN, 0 }, + /* e7 */ { Cache_UNKNOWN, 0 }, + /* e8 */ { Cache_UNKNOWN, 0 }, + /* e9 */ { Cache_UNKNOWN, 0 }, + /* ea */ { Cache_UNKNOWN, 0 }, + /* eb */ { Cache_UNKNOWN, 0 }, + /* ec */ { Cache_UNKNOWN, 0 }, + /* ed */ { Cache_UNKNOWN, 0 }, + /* ee */ { Cache_UNKNOWN, 0 }, + /* ef */ { Cache_UNKNOWN, 0 }, + /* f0 */ { Cache_UNKNOWN, 0 }, + /* f1 */ { Cache_UNKNOWN, 0 }, + /* f2 */ { Cache_UNKNOWN, 0 }, + /* f3 */ { Cache_UNKNOWN, 0 }, + /* f4 */ { Cache_UNKNOWN, 0 }, + /* f5 */ { Cache_UNKNOWN, 0 }, + /* f6 */ { Cache_UNKNOWN, 0 }, + /* f7 */ { Cache_UNKNOWN, 0 }, + /* f8 */ { Cache_UNKNOWN, 0 }, + /* f9 */ { Cache_UNKNOWN, 0 }, + /* fa */ { Cache_UNKNOWN, 0 }, + /* fb */ { Cache_UNKNOWN, 0 }, + /* fc */ { Cache_UNKNOWN, 0 }, + /* fd */ { Cache_UNKNOWN, 0 }, + /* fe */ { Cache_UNKNOWN, 0 }, + /* ff */ { Cache_UNKNOWN, 0 } +}; + +/* + * use the above table to determine the CacheEntryLineSize. + */ +static void +getIntelCacheEntryLineSize(unsigned long val, int *level, + unsigned long *lineSize) +{ + CacheType type; + + type = CacheMap[val].type; + /* only interested in data caches */ + /* NOTE val = 0x40 is a special value that means no L2 or L3 cache. + * this data check has the side effect of rejecting that entry. If + * that wasn't the case, we could have to reject it explicitly */ + if (CacheMap[val].lineSize == 0) { + return; + } + /* look at the caches, skip types we aren't interested in. + * if we already have a value for a lower level cache, skip the + * current entry */ + if ((type == Cache_L1) || (type == Cache_L1d)) { + *level = 1; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 2) && ((type == Cache_L2) || (type == Cache_L2d))) { + *level = 2; + *lineSize = CacheMap[val].lineSize; + } else if ((*level >= 3) && ((type == Cache_L3) || (type == Cache_L3d))) { + *level = 3; + *lineSize = CacheMap[val].lineSize; + } + return; +} + +static void +getIntelRegisterCacheLineSize(unsigned long val, + int *level, unsigned long *lineSize) +{ + getIntelCacheEntryLineSize(val >> 24 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 16 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val >> 8 & 0xff, level, lineSize); + getIntelCacheEntryLineSize(val & 0xff, level, lineSize); +} + +/* + * returns '0' if no recognized cache is found, or if the cache + * information is supported by this processor + */ +static unsigned long +getIntelCacheLineSize(int cpuidLevel) +{ + int level = 4; + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + int repeat, count; + + if (cpuidLevel < 2) { + return 0; + } + + /* command '2' of the cpuid is intel's cache info call. Each byte of the + * 4 registers contain a potential descriptor for the cache. The CacheMap + * table maps the cache entry with the processor cache. Register 'al' + * contains a count value that cpuid '2' needs to be called in order to + * find all the cache descriptors. Only registers with the high bit set + * to 'zero' have valid descriptors. This code loops through all the + * required calls to cpuid '2' and passes any valid descriptors it finds + * to the getIntelRegisterCacheLineSize code, which breaks the registers + * down into their component descriptors. In the end the lineSize of the + * lowest level cache data cache is returned. */ + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + repeat = eax & 0xf; + for (count = 0; count < repeat; count++) { + if ((eax & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(eax & 0xffffff00, &level, &lineSize); + } + if ((ebx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ebx, &level, &lineSize); + } + if ((ecx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(ecx, &level, &lineSize); + } + if ((edx & 0x80000000) == 0) { + getIntelRegisterCacheLineSize(edx, &level, &lineSize); + } + if (count + 1 != repeat) { + freebl_cpuid(2, &eax, &ebx, &ecx, &edx); + } + } + return lineSize; +} + +/* + * returns '0' if the cache info is not supported by this processor. + * This is based on the AMD extended cache commands for cpuid. + * (see "AMD Processor Recognition Application Note" Publication 20734). + * Some other processors use the identical scheme. + * (see "Processor Recognition, Transmeta Corporation"). + */ +static unsigned long +getOtherCacheLineSize(unsigned long cpuidLevel) +{ + unsigned long lineSize = 0; + unsigned long eax, ebx, ecx, edx; + + /* get the Extended CPUID level */ + freebl_cpuid(0x80000000, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + + if (cpuidLevel >= 0x80000005) { + freebl_cpuid(0x80000005, &eax, &ebx, &ecx, &edx); + lineSize = ecx & 0xff; /* line Size, L1 Data Cache */ + } + return lineSize; +} + +static const char *const manMap[] = { +#define INTEL 0 + "GenuineIntel", +#define AMD 1 + "AuthenticAMD", +#define CYRIX 2 + "CyrixInstead", +#define CENTAUR 2 + "CentaurHauls", +#define NEXGEN 3 + "NexGenDriven", +#define TRANSMETA 4 + "GenuineTMx86", +#define RISE 5 + "RiseRiseRise", +#define UMC 6 + "UMC UMC UMC ", +#define SIS 7 + "Sis Sis Sis ", +#define NATIONAL 8 + "Geode by NSC", +}; + +static const int n_manufacturers = sizeof(manMap) / sizeof(manMap[0]); + +#define MAN_UNKNOWN 9 + +#if !defined(AMD_64) +#define SSE2_FLAG (1 << 26) +unsigned long +s_mpi_is_sse2() +{ + unsigned long eax, ebx, ecx, edx; + + if (is386() || is486()) { + return 0; + } + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + + /* has no SSE2 extensions */ + if (eax == 0) { + return 0; + } + + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + return (edx & SSE2_FLAG) == SSE2_FLAG; +} +#endif + +unsigned long +s_mpi_getProcessorLineSize() +{ + unsigned long eax, ebx, ecx, edx; + PRUint32 cpuid[3]; + unsigned long cpuidLevel; + unsigned long cacheLineSize = 0; + int manufacturer = MAN_UNKNOWN; + int i; + char string[13]; + +#if !defined(AMD_64) + if (is386()) { + return 0; /* 386 had no cache */ + } + if (is486()) { + return 32; /* really? need more info */ + } +#endif + + /* Pentium, cpuid command is available */ + freebl_cpuid(0, &eax, &ebx, &ecx, &edx); + cpuidLevel = eax; + /* string holds the CPU's manufacturer ID string - a twelve + * character ASCII string stored in ebx, edx, ecx, and + * the 32-bit extended feature flags are in edx, ecx. + */ + cpuid[0] = ebx; + cpuid[1] = ecx; + cpuid[2] = edx; + memcpy(string, cpuid, sizeof(cpuid)); + string[12] = 0; + + manufacturer = MAN_UNKNOWN; + for (i = 0; i < n_manufacturers; i++) { + if (strcmp(manMap[i], string) == 0) { + manufacturer = i; + } + } + + if (manufacturer == INTEL) { + cacheLineSize = getIntelCacheLineSize(cpuidLevel); + } else { + cacheLineSize = getOtherCacheLineSize(cpuidLevel); + } + /* doesn't support cache info based on cpuid. This means + * an old pentium class processor, which have cache lines of + * 32. If we learn differently, we can use a switch based on + * the Manufacturer id */ + if (cacheLineSize == 0) { + cacheLineSize = 32; + } + return cacheLineSize; +} +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +#if defined(__ppc64__) +/* + * Sigh, The PPC has some really nice features to help us determine cache + * size, since it had lots of direct control functions to do so. The POWER + * processor even has an instruction to do this, but it was dropped in + * PowerPC. Unfortunately most of them are not available in user mode. + * + * The dcbz function would be a great way to determine cache line size except + * 1) it only works on write-back memory (it throws an exception otherwise), + * and 2) because so many mac programs 'knew' the processor cache size was + * 32 bytes, they used this instruction as a fast 'zero 32 bytes'. Now the new + * G5 processor has 128 byte cache, but dcbz only clears 32 bytes to keep + * these programs happy. dcbzl work if 64 bit instructions are supported. + * If you know 64 bit instructions are supported, and that stack is + * write-back, you can use this code. + */ +#include "memory.h" + +/* clear the cache line that contains 'array' */ +static inline void +dcbzl(char *array) +{ + register char *a asm("r2") = array; + __asm__ __volatile__("dcbzl %0,r0" + : "=r"(a) + : "0"(a)); +} + +#define PPC_DO_ALIGN(x, y) ((char *)((((long long)(x)) + ((y)-1)) & ~((y)-1))) + +#define PPC_MAX_LINE_SIZE 256 +unsigned long +s_mpi_getProcessorLineSize() +{ + char testArray[2 * PPC_MAX_LINE_SIZE + 1]; + char *test; + int i; + + /* align the array on a maximum line size boundary, so we + * know we are starting to clear from the first address */ + test = PPC_DO_ALIGN(testArray, PPC_MAX_LINE_SIZE); + /* set all the values to 1's */ + memset(test, 0xff, PPC_MAX_LINE_SIZE); + /* clear one cache block starting at 'test' */ + dcbzl(test); + + /* find the size of the cleared area, that's our block size */ + for (i = PPC_MAX_LINE_SIZE; i != 0; i = i / 2) { + if (test[i - 1] == 0) { + return i; + } + } + return 0; +} + +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +/* + * put other processor and platform specific cache code here + * return the smallest cache line size in bytes on the processor + * (usually the L1 cache). If the OS has a call, this would be + * a greate place to put it. + * + * If there is no cache, return 0; + * + * define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED so the generic functions + * below aren't compiled. + * + */ + +/* target.mk can define MPI_CACHE_LINE_SIZE if it's common for the family or + * OS */ +#if defined(MPI_CACHE_LINE_SIZE) && !defined(MPI_GET_PROCESSOR_LINE_SIZE_DEFINED) + +unsigned long +s_mpi_getProcessorLineSize() +{ + return MPI_CACHE_LINE_SIZE; +} +#define MPI_GET_PROCESSOR_LINE_SIZE_DEFINED 1 +#endif + +/* If no way to get the processor cache line size has been defined, assume + * it's 32 bytes (most common value, does not significantly impact performance) + */ +#ifndef MPI_GET_PROCESSOR_LINE_SIZE_DEFINED +unsigned long +s_mpi_getProcessorLineSize() +{ + return 32; +} +#endif + +#ifdef TEST_IT +#include + +main() +{ + printf("line size = %d\n", s_mpi_getProcessorLineSize()); +} +#endif diff --git a/security/nss/lib/freebl/mpi/mpcpucache_amd64.s b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s new file mode 100644 index 000000000..d493b4762 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_amd64.s @@ -0,0 +1,861 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 80 +manMap: + .quad .LC0 + .quad .LC1 + .quad .LC2 + .quad .LC3 + .quad .LC4 + .quad .LC5 + .quad .LC6 + .quad .LC7 + .quad .LC8 + .quad .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 16 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: +.LFB2: + movq %rdx, %r10 + pushq %rbx +.LCFI0: + movq %rcx, %r11 + movq %rdi, %rax +/APP + cpuid + +/NO_APP + movq %rax, (%rsi) + movq %rbx, (%r10) + popq %rbx + movq %rcx, (%r11) + movq %rdx, (%r8) + ret +.LFE2: + .size freebl_cpuid, .-freebl_cpuid + .align 16 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: +.LFB3: + leaq CacheMap(%rip), %r9 + movq %rdx, %r10 + movzbl 1(%r9,%rdi,2), %ecx + movzbl (%r9,%rdi,2), %r8d + testb %cl, %cl + je .L2 + cmpl $6, %r8d + sete %dl + cmpl $8, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L4 + movl $1, (%rsi) +.L9: + movzbl %cl, %eax + movq %rax, (%r10) + ret + .align 16 +.L4: + movl (%rsi), %r11d + cmpl $1, %r11d + jg .L11 +.L6: + cmpl $2, %r11d + jle .L2 + cmpl $12, %r8d + sete %dl + cmpl $14, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L2 + movzbq 1(%r9,%rdi,2), %rax + movl $3, (%rsi) + movq %rax, (%r10) + .align 16 +.L2: + rep ; ret + .align 16 +.L11: + cmpl $9, %r8d + sete %dl + cmpl $11, %r8d + sete %al + orl %edx, %eax + testb $1, %al + je .L6 + movl $2, (%rsi) + jmp .L9 +.LFE3: + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 16 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: +.LFB4: + pushq %rbp +.LCFI1: + movq %rsp, %rbp +.LCFI2: + movq %rbx, -24(%rbp) +.LCFI3: + movq %rdi, %rbx + shrq $24, %rdi + movq %r12, -16(%rbp) +.LCFI4: + movq %r13, -8(%rbp) +.LCFI5: + andl $255, %edi + subq $24, %rsp +.LCFI6: + movq %rsi, %r13 + movq %rdx, %r12 + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $16, %rdi + andl $255, %edi + call getIntelCacheEntryLineSize + movq %rbx, %rdi + movq %r12, %rdx + movq %r13, %rsi + shrq $8, %rdi + andl $255, %ebx + andl $255, %edi + call getIntelCacheEntryLineSize + movq %r12, %rdx + movq %r13, %rsi + movq %rbx, %rdi + movq 8(%rsp), %r12 + movq (%rsp), %rbx + movq 16(%rsp), %r13 + leave + jmp getIntelCacheEntryLineSize +.LFE4: + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 16 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: +.LFB7: + pushq %rbp +.LCFI7: + xorl %edi, %edi + movq %rsp, %rbp +.LCFI8: + pushq %r15 +.LCFI9: + leaq -136(%rbp), %r8 + leaq -144(%rbp), %rcx + leaq -152(%rbp), %rdx + pushq %r14 +.LCFI10: + leaq -160(%rbp), %rsi + leaq -128(%rbp), %r14 + pushq %r13 +.LCFI11: + leaq manMap(%rip), %r13 + pushq %r12 +.LCFI12: + movl $9, %r12d + pushq %rbx +.LCFI13: + xorl %ebx, %ebx + subq $200, %rsp +.LCFI14: + call freebl_cpuid + movq -152(%rbp), %rax + movq -160(%rbp), %r15 + movb $0, -116(%rbp) + movl %eax, -128(%rbp) + movq -136(%rbp), %rax + movl %eax, -124(%rbp) + movq -144(%rbp), %rax + movl %eax, -120(%rbp) + .align 16 +.L18: + movslq %ebx,%rax + movq %r14, %rsi + movq (%r13,%rax,8), %rdi + call strcmp@PLT + testl %eax, %eax + cmove %ebx, %r12d + incl %ebx + cmpl $9, %ebx + jle .L18 + testl %r12d, %r12d + jne .L19 + xorl %eax, %eax + decl %r15d + movl $4, -204(%rbp) + movq $0, -200(%rbp) + jle .L21 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + xorl %ebx, %ebx + call freebl_cpuid + movq -192(%rbp), %rdi + movl %edi, %r12d + andl $15, %r12d + cmpl %r12d, %ebx + jl .L30 + jmp .L38 + .align 16 +.L25: + movq -184(%rbp), %rdi + testl $2147483648, %edi + je .L40 +.L26: + movq -176(%rbp), %rdi + testl $2147483648, %edi + je .L41 +.L27: + movq -168(%rbp), %rdi + testl $2147483648, %edi + je .L42 +.L28: + incl %ebx + cmpl %r12d, %ebx + je .L24 + leaq -168(%rbp), %r8 + leaq -176(%rbp), %rcx + leaq -184(%rbp), %rdx + leaq -192(%rbp), %rsi + movl $2, %edi + call freebl_cpuid +.L24: + cmpl %r12d, %ebx + jge .L38 + movq -192(%rbp), %rdi +.L30: + testl $2147483648, %edi + jne .L25 + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + andl $4294967040, %edi + call getIntelRegisterCacheLineSize + movq -184(%rbp), %rdi + testl $2147483648, %edi + jne .L26 +.L40: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -176(%rbp), %rdi + testl $2147483648, %edi + jne .L27 +.L41: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + movq -168(%rbp), %rdi + testl $2147483648, %edi + jne .L28 +.L42: + leaq -200(%rbp), %rdx + leaq -204(%rbp), %rsi + call getIntelRegisterCacheLineSize + jmp .L28 +.L38: + movq -200(%rbp), %rax +.L21: + movq %rax, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L19: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483648, %edi + xorl %ebx, %ebx + call freebl_cpuid + movl $2147483652, %eax + cmpq %rax, -240(%rbp) + ja .L43 +.L32: + movq %rbx, %rdx + movl $32, %eax + testq %rdx, %rdx + cmoveq %rax, %rdx + addq $200, %rsp + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + leave + movq %rdx, %rax + ret +.L43: + leaq -216(%rbp), %r8 + leaq -224(%rbp), %rcx + leaq -232(%rbp), %rdx + leaq -240(%rbp), %rsi + movl $2147483653, %edi + call freebl_cpuid + movzbq -224(%rbp), %rbx + jmp .L32 +.LFE7: + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpcpucache_x86.s b/security/nss/lib/freebl/mpi/mpcpucache_x86.s new file mode 100644 index 000000000..af17ebcb4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpcpucache_x86.s @@ -0,0 +1,902 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "mpcpucache.c" +/ .section .rodata.str1.1,"aMS",@progbits,1 + .section .rodata +.LC0: + .string "GenuineIntel" +.LC1: + .string "AuthenticAMD" +.LC2: + .string "CyrixInstead" +.LC3: + .string "CentaurHauls" +.LC4: + .string "NexGenDriven" +.LC5: + .string "GenuineTMx86" +.LC6: + .string "RiseRiseRise" +.LC7: + .string "UMC UMC UMC " +.LC8: + .string "Sis Sis Sis " +.LC9: + .string "Geode by NSC" + .section .data.rel.ro.local,"aw",@progbits + .align 32 + .type manMap, @object + .size manMap, 40 +manMap: + .long .LC0 + .long .LC1 + .long .LC2 + .long .LC3 + .long .LC4 + .long .LC5 + .long .LC6 + .long .LC7 + .long .LC8 + .long .LC9 + .section .rodata + .align 32 + .type CacheMap, @object + .size CacheMap, 512 +CacheMap: + .byte 0 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .zero 1 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 7 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 8 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 12 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 7 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 8 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 5 + .byte 1 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 32 + .byte 9 + .byte 64 + .byte 9 + .byte 64 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 3 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 4 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .byte 1 + .byte 0 + .text + .align 4 +.globl freebl_cpuid + .type freebl_cpuid, @function +freebl_cpuid: + pushl %ebp + pushl %edi + pushl %esi + subl $8, %esp + movl %edx, %ebp +/APP + pushl %ebx + xorl %ecx, %ecx + cpuid + mov %ebx,%esi + popl %ebx + +/NO_APP + movl %eax, (%ebp) + movl 24(%esp), %eax + movl %esi, (%eax) + movl 28(%esp), %eax + movl %ecx, (%eax) + movl 32(%esp), %eax + movl %edx, (%eax) + addl $8, %esp + popl %esi + popl %edi + popl %ebp + ret + .size freebl_cpuid, .-freebl_cpuid + .align 4 + .type changeFlag, @function +changeFlag: +/APP + pushfl + popl %edx + movl %edx,%ecx + xorl %eax,%edx + pushl %edx + popfl + pushfl + popl %edx + pushl %ecx + popfl + +/NO_APP + xorl %ecx, %edx + movl %edx, %eax + ret + .size changeFlag, .-changeFlag + .align 4 + .type getIntelCacheEntryLineSize, @function +getIntelCacheEntryLineSize: + pushl %edi + pushl %esi + pushl %ebx + call .L17 +.L17: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L17], %ebx + movzbl CacheMap@GOTOFF(%ebx,%eax,2), %ecx + movb 1+CacheMap@GOTOFF(%ebx,%eax,2), %al + testb %al, %al + movl 16(%esp), %edi + je .L3 + cmpl $6, %ecx + je .L6 + cmpl $8, %ecx + je .L6 + movl (%edx), %esi + cmpl $1, %esi + jg .L15 +.L8: + cmpl $2, %esi + jle .L3 + cmpl $12, %ecx + je .L12 + cmpl $14, %ecx + je .L12 + .align 4 +.L3: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L6: + movzbl %al, %eax + movl $1, (%edx) + movl %eax, (%edi) +.L16: + popl %ebx + popl %esi + popl %edi + ret + .align 4 +.L15: + cmpl $9, %ecx + je .L9 + cmpl $11, %ecx + jne .L8 +.L9: + movzbl %al, %eax + movl $2, (%edx) + movl %eax, (%edi) + jmp .L16 +.L12: + movzbl %al, %eax + movl $3, (%edx) + movl %eax, (%edi) + jmp .L16 + .size getIntelCacheEntryLineSize, .-getIntelCacheEntryLineSize + .align 4 + .type getIntelRegisterCacheLineSize, @function +getIntelRegisterCacheLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ecx + movl 8(%ebp), %edi + movl %eax, %esi + movl %edx, -12(%ebp) + shrl $24, %eax + pushl %edi + call getIntelCacheEntryLineSize + movl %esi, %eax + pushl %edi + shrl $16, %eax + movl -12(%ebp), %edx + andl $255, %eax + call getIntelCacheEntryLineSize + pushl %edi + movl %esi, %edx + movzbl %dh, %eax + movl -12(%ebp), %edx + call getIntelCacheEntryLineSize + andl $255, %esi + movl %edi, 8(%ebp) + movl -12(%ebp), %edx + addl $12, %esp + leal -8(%ebp), %esp + movl %esi, %eax + popl %esi + popl %edi + leave + jmp getIntelCacheEntryLineSize + .size getIntelRegisterCacheLineSize, .-getIntelRegisterCacheLineSize + .align 4 +.globl s_mpi_getProcessorLineSize + .type s_mpi_getProcessorLineSize, @function +s_mpi_getProcessorLineSize: + pushl %ebp + movl %esp, %ebp + pushl %edi + pushl %esi + pushl %ebx + subl $188, %esp + call .L52 +.L52: + popl %ebx + addl $_GLOBAL_OFFSET_TABLE_+[.-.L52], %ebx + movl $9, -168(%ebp) + movl $262144, %eax + call changeFlag + xorl %edx, %edx + testl %eax, %eax + jne .L50 +.L19: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %edx, %eax + popl %edi + leave + ret + .align 4 +.L50: + movl $2097152, %eax + call changeFlag + testl %eax, %eax + movl $32, %edx + je .L19 + leal -108(%ebp), %eax + pushl %eax + leal -112(%ebp), %eax + pushl %eax + leal -116(%ebp), %eax + pushl %eax + leal -120(%ebp), %edx + xorl %eax, %eax + call freebl_cpuid + movl -120(%ebp), %eax + movl %eax, -164(%ebp) + movl -116(%ebp), %eax + movl %eax, -104(%ebp) + movl -108(%ebp), %eax + movl %eax, -100(%ebp) + movl -112(%ebp), %eax + movl %eax, -96(%ebp) + movb $0, -92(%ebp) + xorl %esi, %esi + addl $12, %esp + leal -104(%ebp), %edi + .align 4 +.L28: + subl $8, %esp + pushl %edi + pushl manMap@GOTOFF(%ebx,%esi,4) + call strcmp@PLT + addl $16, %esp + testl %eax, %eax + jne .L26 + movl %esi, -168(%ebp) +.L26: + incl %esi + cmpl $9, %esi + jle .L28 + movl -168(%ebp), %eax + testl %eax, %eax + jne .L29 + xorl %eax, %eax + cmpl $1, -164(%ebp) + movl $4, -144(%ebp) + movl $0, -140(%ebp) + jle .L41 + leal -124(%ebp), %edx + movl %edx, -188(%ebp) + leal -128(%ebp), %eax + pushl %edx + movl %eax, -184(%ebp) + leal -132(%ebp), %edx + pushl %eax + movl %edx, -180(%ebp) + movl $2, %eax + pushl %edx + leal -136(%ebp), %edx + call freebl_cpuid + movl -136(%ebp), %eax + movl %eax, %edi + andl $15, %edi + xorl %esi, %esi + addl $12, %esp + leal -140(%ebp), %edx + cmpl %edi, %esi + movl %edx, -176(%ebp) + jl .L40 + jmp .L48 + .align 4 +.L49: + movl -136(%ebp), %eax +.L40: + testl %eax, %eax + js .L35 + xorb %al, %al + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L35: + movl -132(%ebp), %eax + testl %eax, %eax + js .L36 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L36: + movl -128(%ebp), %eax + testl %eax, %eax + js .L37 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L37: + movl -124(%ebp), %eax + testl %eax, %eax + js .L38 + pushl -176(%ebp) + leal -144(%ebp), %edx + call getIntelRegisterCacheLineSize + popl %eax +.L38: + incl %esi + cmpl %edi, %esi + je .L34 + pushl -188(%ebp) + pushl -184(%ebp) + pushl -180(%ebp) + leal -136(%ebp), %edx + movl $2, %eax + call freebl_cpuid + addl $12, %esp +.L34: + cmpl %edi, %esi + jl .L49 +.L48: + movl -140(%ebp), %eax +.L41: + testl %eax, %eax + jne .L44 + movb $32, %al +.L44: + leal -12(%ebp), %esp + popl %ebx + popl %esi + movl %eax, %edx + movl %edx, %eax + popl %edi + leave + ret +.L29: + leal -148(%ebp), %eax + movl %eax, -192(%ebp) + movl $0, -172(%ebp) + leal -152(%ebp), %edi + pushl %eax + pushl %edi + leal -156(%ebp), %esi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483648, %eax + call freebl_cpuid + addl $12, %esp + cmpl $-2147483644, -160(%ebp) + ja .L51 +.L42: + movl -172(%ebp), %eax + jmp .L41 +.L51: + pushl -192(%ebp) + pushl %edi + pushl %esi + leal -160(%ebp), %edx + movl $-2147483643, %eax + call freebl_cpuid + movzbl -152(%ebp), %edx + addl $12, %esp + movl %edx, -172(%ebp) + jmp .L42 + .size s_mpi_getProcessorLineSize, .-s_mpi_getProcessorLineSize diff --git a/security/nss/lib/freebl/mpi/mpi-config.h b/security/nss/lib/freebl/mpi/mpi-config.h new file mode 100644 index 000000000..f365592a4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-config.h @@ -0,0 +1,68 @@ +/* Default configuration for MPI library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_CONFIG_H_ +#define MPI_CONFIG_H_ + +/* + For boolean options, + 0 = no + 1 = yes + + Other options are documented individually. + + */ + +#ifndef MP_IOFUNC +#define MP_IOFUNC 0 /* include mp_print() ? */ +#endif + +#ifndef MP_MODARITH +#define MP_MODARITH 1 /* include modular arithmetic ? */ +#endif + +#ifndef MP_NUMTH +#define MP_NUMTH 1 /* include number theoretic functions? */ +#endif + +#ifndef MP_LOGTAB +#define MP_LOGTAB 1 /* use table of logs instead of log()? */ +#endif + +#ifndef MP_MEMSET +#define MP_MEMSET 1 /* use memset() to zero buffers? */ +#endif + +#ifndef MP_MEMCPY +#define MP_MEMCPY 1 /* use memcpy() to copy buffers? */ +#endif + +#ifndef MP_ARGCHK +/* + 0 = no parameter checks + 1 = runtime checks, continue execution and return an error to caller + 2 = assertions; dump core on parameter errors + */ +#ifdef DEBUG +#define MP_ARGCHK 2 /* how to check input arguments */ +#else +#define MP_ARGCHK 1 /* how to check input arguments */ +#endif +#endif + +#ifndef MP_DEBUG +#define MP_DEBUG 0 /* print diagnostic output? */ +#endif + +#ifndef MP_DEFPREC +#define MP_DEFPREC 64 /* default precision, in digits */ +#endif + +#ifndef MP_SQUARE +#define MP_SQUARE 1 /* use separate squaring code? */ +#endif + +#endif /* ifndef MPI_CONFIG_H_ */ diff --git a/security/nss/lib/freebl/mpi/mpi-priv.h b/security/nss/lib/freebl/mpi/mpi-priv.h new file mode 100644 index 000000000..b34452c48 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi-priv.h @@ -0,0 +1,243 @@ +/* + * mpi-priv.h - Private header file for MPI + * Arbitrary precision integer arithmetic library + * + * NOTE WELL: the content of this header file is NOT part of the "public" + * API for the MPI library, and may change at any time. + * Application programs that use libmpi should NOT include this header file. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#ifndef _MPI_PRIV_H_ +#define _MPI_PRIV_H_ 1 + +#include "mpi.h" +#include +#include +#include + +#if MP_DEBUG +#include + +#define DIAG(T, V) \ + { \ + fprintf(stderr, T); \ + mp_print(V, stderr); \ + fputc('\n', stderr); \ + } +#else +#define DIAG(T, V) +#endif + +/* If we aren't using a wired-in logarithm table, we need to include + the math library to get the log() function + */ + +/* {{{ s_logv_2[] - log table for 2 in various bases */ + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ + +extern const float s_logv_2[]; +#define LOG_V_2(R) s_logv_2[(R)] + +#else + +/* + If MP_LOGTAB is not defined, use the math library to compute the + logarithms on the fly. Otherwise, use the table. + Pick which works best for your system. + */ + +#include +#define LOG_V_2(R) (log(2.0) / log(R)) + +#endif /* if MP_LOGTAB */ + +/* }}} */ + +/* {{{ Digit arithmetic macros */ + +/* + When adding and multiplying digits, the results can be larger than + can be contained in an mp_digit. Thus, an mp_word is used. These + macros mask off the upper and lower digits of the mp_word (the + mp_word may be more than 2 mp_digits wide, but we only concern + ourselves with the low-order 2 mp_digits) + */ + +#define CARRYOUT(W) (mp_digit)((W) >> DIGIT_BIT) +#define ACCUM(W) (mp_digit)(W) + +#define MP_MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MP_MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MP_HOWMANY(a, b) (((a) + (b)-1) / (b)) +#define MP_ROUNDUP(a, b) (MP_HOWMANY(a, b) * (b)) + +/* }}} */ + +/* {{{ Comparison constants */ + +#define MP_LT -1 +#define MP_EQ 0 +#define MP_GT 1 + +/* }}} */ + +/* {{{ private function declarations */ + +void s_mp_setz(mp_digit *dp, mp_size count); /* zero digits */ +void s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count); /* copy */ +void *s_mp_alloc(size_t nb, size_t ni); /* general allocator */ +void s_mp_free(void *ptr); /* general free function */ + +mp_err s_mp_grow(mp_int *mp, mp_size min); /* increase allocated size */ +mp_err s_mp_pad(mp_int *mp, mp_size min); /* left pad with zeroes */ + +void s_mp_clamp(mp_int *mp); /* clip leading zeroes */ + +void s_mp_exch(mp_int *a, mp_int *b); /* swap a and b in place */ + +mp_err s_mp_lshd(mp_int *mp, mp_size p); /* left-shift by p digits */ +void s_mp_rshd(mp_int *mp, mp_size p); /* right-shift by p digits */ +mp_err s_mp_mul_2d(mp_int *mp, mp_digit d); /* multiply by 2^d in place */ +void s_mp_div_2d(mp_int *mp, mp_digit d); /* divide by 2^d in place */ +void s_mp_mod_2d(mp_int *mp, mp_digit d); /* modulo 2^d in place */ +void s_mp_div_2(mp_int *mp); /* divide by 2 in place */ +mp_err s_mp_mul_2(mp_int *mp); /* multiply by 2 in place */ +mp_err s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd); +/* normalize for division */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d); /* unsigned digit addition */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d); /* unsigned digit subtract */ +mp_err s_mp_mul_d(mp_int *mp, mp_digit d); /* unsigned digit multiply */ +mp_err s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r); +/* unsigned digit divide */ +mp_err s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu); +/* Barrett reduction */ +mp_err s_mp_add(mp_int *a, const mp_int *b); /* magnitude addition */ +mp_err s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_sub(mp_int *a, const mp_int *b); /* magnitude subtract */ +mp_err s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c); +mp_err s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset); +/* a += b * RADIX^offset */ +mp_err s_mp_mul(mp_int *a, const mp_int *b); /* magnitude multiply */ +#if MP_SQUARE +mp_err s_mp_sqr(mp_int *a); /* magnitude square */ +#else +#define s_mp_sqr(a) s_mp_mul(a, a) +#endif +mp_err s_mp_div(mp_int *rem, mp_int *div, mp_int *quot); /* magnitude div */ +mp_err s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err s_mp_2expt(mp_int *a, mp_digit k); /* a = 2^k */ +int s_mp_cmp(const mp_int *a, const mp_int *b); /* magnitude comparison */ +int s_mp_cmp_d(const mp_int *a, mp_digit d); /* magnitude digit compare */ +int s_mp_ispow2(const mp_int *v); /* is v a power of 2? */ +int s_mp_ispow2d(mp_digit d); /* is d a power of 2? */ + +int s_mp_tovalue(char ch, int r); /* convert ch to value */ +char s_mp_todigit(mp_digit val, int r, int low); /* convert val to digit */ +int s_mp_outlen(int bits, int r); /* output length in bytes */ +mp_digit s_mp_invmod_radix(mp_digit P); /* returns (P ** -1) mod RADIX */ +mp_err s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c); +mp_err s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c); +mp_err s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c); + +#ifdef NSS_USE_COMBA + +#define IS_POWER_OF_2(a) ((a) && !((a) & ((a)-1))) + +void s_mp_mul_comba_4(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_8(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_16(const mp_int *A, const mp_int *B, mp_int *C); +void s_mp_mul_comba_32(const mp_int *A, const mp_int *B, mp_int *C); + +void s_mp_sqr_comba_4(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_8(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_16(const mp_int *A, mp_int *B); +void s_mp_sqr_comba_32(const mp_int *A, mp_int *B); + +#endif /* end NSS_USE_COMBA */ + +/* ------ mpv functions, operate on arrays of digits, not on mp_int's ------ */ +#if defined(__OS2__) && defined(__IBMC__) +#define MPI_ASM_DECL __cdecl +#else +#define MPI_ASM_DECL +#endif + +#ifdef MPI_AMD64 + +mp_digit MPI_ASM_DECL s_mpv_mul_set_vec64(mp_digit *, mp_digit *, mp_size, mp_digit); +mp_digit MPI_ASM_DECL s_mpv_mul_add_vec64(mp_digit *, const mp_digit *, mp_size, mp_digit); + +/* c = a * b */ +#define s_mpv_mul_d(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_set_vec64(c, a, a_len, b) + +/* c += a * b */ +#define s_mpv_mul_d_add(a, a_len, b, c) \ + ((mp_digit *)c)[a_len] = s_mpv_mul_add_vec64(c, a, a_len, b) + +#else + +void MPI_ASM_DECL s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); +void MPI_ASM_DECL s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c); + +#endif + +void MPI_ASM_DECL s_mpv_mul_d_add_prop(const mp_digit *a, + mp_size a_len, mp_digit b, + mp_digit *c); +void MPI_ASM_DECL s_mpv_sqr_add_prop(const mp_digit *a, + mp_size a_len, + mp_digit *sqrs); + +mp_err MPI_ASM_DECL s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, + mp_digit divisor, mp_digit *quot, mp_digit *rem); + +/* c += a * b * (MP_RADIX ** offset); */ +/* Callers of this macro should be aware that the return type might vary; + * it should be treated as a void function. */ +#define s_mp_mul_d_add_offset(a, b, c, off) \ + s_mpv_mul_d_add_prop(MP_DIGITS(a), MP_USED(a), b, MP_DIGITS(c) + off) + +typedef struct { + mp_int N; /* modulus N */ + mp_digit n0prime; /* n0' = - (n0 ** -1) mod MP_RADIX */ +} mp_mont_modulus; + +mp_err s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm); +mp_err s_mp_redc(mp_int *T, mp_mont_modulus *mmm); + +/* + * s_mpi_getProcessorLineSize() returns the size in bytes of the cache line + * if a cache exists, or zero if there is no cache. If more than one + * cache line exists, it should return the smallest line size (which is + * usually the L1 cache). + * + * mp_modexp uses this information to make sure that private key information + * isn't being leaked through the cache. + * + * see mpcpucache.c for the implementation. + */ +unsigned long s_mpi_getProcessorLineSize(); + +/* }}} */ +#endif diff --git a/security/nss/lib/freebl/mpi/mpi.c b/security/nss/lib/freebl/mpi/mpi.c new file mode 100644 index 000000000..f6f75439c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.c @@ -0,0 +1,4839 @@ +/* + * mpi.c + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#if defined(OSF1) +#include +#endif + +#if defined(__arm__) && \ + ((defined(__thumb__) && !defined(__thumb2__)) || defined(__ARM_ARCH_3__)) +/* 16-bit thumb or ARM v3 doesn't work inlined assember version */ +#undef MP_ASSEMBLY_MULTIPLY +#undef MP_ASSEMBLY_SQUARE +#endif + +#if MP_LOGTAB +/* + A table of the logs of 2 for various bases (the 0 and 1 entries of + this table are meaningless and should not be referenced). + + This table is used to compute output lengths for the mp_toradix() + function. Since a number n in radix r takes up about log_r(n) + digits, we estimate the output size by taking the least integer + greater than log_r(n), where: + + log_r(n) = log_2(n) * log_r(2) + + This table, therefore, is a table of log_r(2) for 2 <= r <= 36, + which are the output bases supported. + */ +#include "logtab.h" +#endif + +#ifdef CT_VERIF +#include +#endif + +/* {{{ Constant strings */ + +/* Constant strings returned by mp_strerror() */ +static const char *mp_err_string[] = { + "unknown result code", /* say what? */ + "boolean true", /* MP_OKAY, MP_YES */ + "boolean false", /* MP_NO */ + "out of memory", /* MP_MEM */ + "argument out of range", /* MP_RANGE */ + "invalid input parameter", /* MP_BADARG */ + "result is undefined" /* MP_UNDEF */ +}; + +/* Value to digit maps for radix conversion */ + +/* s_dmap_1 - standard digits and letters */ +static const char *s_dmap_1 = + "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/"; + +/* }}} */ + +/* {{{ Default precision manipulation */ + +/* Default precision for newly created mp_int's */ +static mp_size s_mp_defprec = MP_DEFPREC; + +mp_size +mp_get_prec(void) +{ + return s_mp_defprec; + +} /* end mp_get_prec() */ + +void +mp_set_prec(mp_size prec) +{ + if (prec == 0) + s_mp_defprec = MP_DEFPREC; + else + s_mp_defprec = prec; + +} /* end mp_set_prec() */ + +/* }}} */ + +#ifdef CT_VERIF +void +mp_taint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_UNDEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} + +void +mp_untaint(mp_int *mp) +{ + size_t i; + for (i = 0; i < mp->used; ++i) { + VALGRIND_MAKE_MEM_DEFINED(&(mp->dp[i]), sizeof(mp_digit)); + } +} +#endif + +/*------------------------------------------------------------------------*/ +/* {{{ mp_init(mp) */ + +/* + mp_init(mp) + + Initialize a new zero-valued mp_int. Returns MP_OKAY if successful, + MP_MEM if memory could not be allocated for the structure. + */ + +mp_err +mp_init(mp_int *mp) +{ + return mp_init_size(mp, s_mp_defprec); + +} /* end mp_init() */ + +/* }}} */ + +/* {{{ mp_init_size(mp, prec) */ + +/* + mp_init_size(mp, prec) + + Initialize a new zero-valued mp_int with at least the given + precision; returns MP_OKAY if successful, or MP_MEM if memory could + not be allocated for the structure. + */ + +mp_err +mp_init_size(mp_int *mp, mp_size prec) +{ + ARGCHK(mp != NULL && prec > 0, MP_BADARG); + + prec = MP_ROUNDUP(prec, s_mp_defprec); + if ((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL) + return MP_MEM; + + SIGN(mp) = ZPOS; + USED(mp) = 1; + ALLOC(mp) = prec; + + return MP_OKAY; + +} /* end mp_init_size() */ + +/* }}} */ + +/* {{{ mp_init_copy(mp, from) */ + +/* + mp_init_copy(mp, from) + + Initialize mp as an exact copy of from. Returns MP_OKAY if + successful, MP_MEM if memory could not be allocated for the new + structure. + */ + +mp_err +mp_init_copy(mp_int *mp, const mp_int *from) +{ + ARGCHK(mp != NULL && from != NULL, MP_BADARG); + + if (mp == from) + return MP_OKAY; + + if ((DIGITS(mp) = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), DIGITS(mp), USED(from)); + USED(mp) = USED(from); + ALLOC(mp) = ALLOC(from); + SIGN(mp) = SIGN(from); + + return MP_OKAY; + +} /* end mp_init_copy() */ + +/* }}} */ + +/* {{{ mp_copy(from, to) */ + +/* + mp_copy(from, to) + + Copies the mp_int 'from' to the mp_int 'to'. It is presumed that + 'to' has already been initialized (if not, use mp_init_copy() + instead). If 'from' and 'to' are identical, nothing happens. + */ + +mp_err +mp_copy(const mp_int *from, mp_int *to) +{ + ARGCHK(from != NULL && to != NULL, MP_BADARG); + + if (from == to) + return MP_OKAY; + + { /* copy */ + mp_digit *tmp; + + /* + If the allocated buffer in 'to' already has enough space to hold + all the used digits of 'from', we'll re-use it to avoid hitting + the memory allocater more than necessary; otherwise, we'd have + to grow anyway, so we just allocate a hunk and make the copy as + usual + */ + if (ALLOC(to) >= USED(from)) { + s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from)); + s_mp_copy(DIGITS(from), DIGITS(to), USED(from)); + + } else { + if ((tmp = s_mp_alloc(ALLOC(from), sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(from), tmp, USED(from)); + + if (DIGITS(to) != NULL) { + s_mp_setz(DIGITS(to), ALLOC(to)); + s_mp_free(DIGITS(to)); + } + + DIGITS(to) = tmp; + ALLOC(to) = ALLOC(from); + } + + /* Copy the precision and sign from the original */ + USED(to) = USED(from); + SIGN(to) = SIGN(from); + } /* end copy */ + + return MP_OKAY; + +} /* end mp_copy() */ + +/* }}} */ + +/* {{{ mp_exch(mp1, mp2) */ + +/* + mp_exch(mp1, mp2) + + Exchange mp1 and mp2 without allocating any intermediate memory + (well, unless you count the stack space needed for this call and the + locals it creates...). This cannot fail. + */ + +void +mp_exch(mp_int *mp1, mp_int *mp2) +{ +#if MP_ARGCHK == 2 + assert(mp1 != NULL && mp2 != NULL); +#else + if (mp1 == NULL || mp2 == NULL) + return; +#endif + + s_mp_exch(mp1, mp2); + +} /* end mp_exch() */ + +/* }}} */ + +/* {{{ mp_clear(mp) */ + +/* + mp_clear(mp) + + Release the storage used by an mp_int, and void its fields so that + if someone calls mp_clear() again for the same int later, we won't + get tollchocked. + */ + +void +mp_clear(mp_int *mp) +{ + if (mp == NULL) + return; + + if (DIGITS(mp) != NULL) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = NULL; + } + + USED(mp) = 0; + ALLOC(mp) = 0; + +} /* end mp_clear() */ + +/* }}} */ + +/* {{{ mp_zero(mp) */ + +/* + mp_zero(mp) + + Set mp to zero. Does not change the allocated size of the structure, + and therefore cannot fail (except on a bad argument, which we ignore) + */ +void +mp_zero(mp_int *mp) +{ + if (mp == NULL) + return; + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + +} /* end mp_zero() */ + +/* }}} */ + +/* {{{ mp_set(mp, d) */ + +void +mp_set(mp_int *mp, mp_digit d) +{ + if (mp == NULL) + return; + + mp_zero(mp); + DIGIT(mp, 0) = d; + +} /* end mp_set() */ + +/* }}} */ + +/* {{{ mp_set_int(mp, z) */ + +mp_err +mp_set_int(mp_int *mp, long z) +{ + int ix; + unsigned long v = labs(z); + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + mp_zero(mp); + if (z == 0) + return MP_OKAY; /* shortcut for zero */ + + if (sizeof v <= sizeof(mp_digit)) { + DIGIT(mp, 0) = v; + } else { + for (ix = sizeof(long) - 1; ix >= 0; ix--) { + if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY) + return res; + + res = s_mp_add_d(mp, (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX)); + if (res != MP_OKAY) + return res; + } + } + if (z < 0) + SIGN(mp) = NEG; + + return MP_OKAY; + +} /* end mp_set_int() */ + +/* }}} */ + +/* {{{ mp_set_ulong(mp, z) */ + +mp_err +mp_set_ulong(mp_int *mp, unsigned long z) +{ + int ix; + mp_err res; + + ARGCHK(mp != NULL, MP_BADARG); + + mp_zero(mp); + if (z == 0) + return MP_OKAY; /* shortcut for zero */ + + if (sizeof z <= sizeof(mp_digit)) { + DIGIT(mp, 0) = z; + } else { + for (ix = sizeof(long) - 1; ix >= 0; ix--) { + if ((res = s_mp_mul_d(mp, (UCHAR_MAX + 1))) != MP_OKAY) + return res; + + res = s_mp_add_d(mp, (mp_digit)((z >> (ix * CHAR_BIT)) & UCHAR_MAX)); + if (res != MP_OKAY) + return res; + } + } + return MP_OKAY; +} /* end mp_set_ulong() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Digit arithmetic */ + +/* {{{ mp_add_d(a, d, b) */ + +/* + mp_add_d(a, d, b) + + Compute the sum b = a + d, for a single digit d. Respects the sign of + its primary addend (single digits are unsigned anyway). + */ + +mp_err +mp_add_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == ZPOS) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_add_d() */ + +/* }}} */ + +/* {{{ mp_sub_d(a, d, b) */ + +/* + mp_sub_d(a, d, b) + + Compute the difference b = a - d, for a single digit d. Respects the + sign of its subtrahend (single digits are unsigned anyway). + */ + +mp_err +mp_sub_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_int tmp; + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + + if (SIGN(&tmp) == NEG) { + if ((res = s_mp_add_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else if (s_mp_cmp_d(&tmp, d) >= 0) { + if ((res = s_mp_sub_d(&tmp, d)) != MP_OKAY) + goto CLEANUP; + } else { + mp_neg(&tmp, &tmp); + + DIGIT(&tmp, 0) = d - DIGIT(&tmp, 0); + SIGN(&tmp) = NEG; + } + + if (s_mp_cmp_d(&tmp, 0) == 0) + SIGN(&tmp) = ZPOS; + + s_mp_exch(&tmp, b); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sub_d() */ + +/* }}} */ + +/* {{{ mp_mul_d(a, d, b) */ + +/* + mp_mul_d(a, d, b) + + Compute the product b = a * d, for a single digit d. Respects the sign + of its multiplicand (single digits are unsigned anyway) + */ + +mp_err +mp_mul_d(const mp_int *a, mp_digit d, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if (d == 0) { + mp_zero(b); + return MP_OKAY; + } + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + res = s_mp_mul_d(b, d); + + return res; + +} /* end mp_mul_d() */ + +/* }}} */ + +/* {{{ mp_mul_2(a, c) */ + +mp_err +mp_mul_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + return s_mp_mul_2(c); + +} /* end mp_mul_2() */ + +/* }}} */ + +/* {{{ mp_div_d(a, d, q, r) */ + +/* + mp_div_d(a, d, q, r) + + Compute the quotient q = a / d and remainder r = a mod d, for a + single digit d. Respects the sign of its divisor (single digits are + unsigned anyway). + */ + +mp_err +mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r) +{ + mp_err res; + mp_int qp; + mp_digit rem = 0; + int pow; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_RANGE; + + /* Shortcut for powers of two ... */ + if ((pow = s_mp_ispow2d(d)) >= 0) { + mp_digit mask; + + mask = ((mp_digit)1 << pow) - 1; + rem = DIGIT(a, 0) & mask; + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) { + return res; + } + s_mp_div_2d(q, pow); + } + + if (r) + *r = rem; + + return MP_OKAY; + } + + if ((res = mp_init_copy(&qp, a)) != MP_OKAY) + return res; + + res = s_mp_div_d(&qp, d, &rem); + + if (s_mp_cmp_d(&qp, 0) == 0) + SIGN(q) = ZPOS; + + if (r) { + *r = rem; + } + + if (q) + s_mp_exch(&qp, q); + + mp_clear(&qp); + return res; + +} /* end mp_div_d() */ + +/* }}} */ + +/* {{{ mp_div_2(a, c) */ + +/* + mp_div_2(a, c) + + Compute c = a / 2, disregarding the remainder. + */ + +mp_err +mp_div_2(const mp_int *a, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + s_mp_div_2(c); + + return MP_OKAY; + +} /* end mp_div_2() */ + +/* }}} */ + +/* {{{ mp_expt_d(a, d, b) */ + +mp_err +mp_expt_d(const mp_int *a, mp_digit d, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + DIGIT(&s, 0) = 1; + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Full arithmetic */ + +/* {{{ mp_abs(a, b) */ + +/* + mp_abs(a, b) + + Compute b = |a|. 'a' and 'b' may be identical. + */ + +mp_err +mp_abs(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + SIGN(b) = ZPOS; + + return MP_OKAY; + +} /* end mp_abs() */ + +/* }}} */ + +/* {{{ mp_neg(a, b) */ + +/* + mp_neg(a, b) + + Compute b = -a. 'a' and 'b' may be identical. + */ + +mp_err +mp_neg(const mp_int *a, mp_int *b) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + if (s_mp_cmp_d(b, 0) == MP_EQ) + SIGN(b) = ZPOS; + else + SIGN(b) = (SIGN(b) == NEG) ? ZPOS : NEG; + + return MP_OKAY; + +} /* end mp_neg() */ + +/* }}} */ + +/* {{{ mp_add(a, b, c) */ + +/* + mp_add(a, b, c) + + Compute c = a + b. All parameters may be identical. + */ + +mp_err +mp_add(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (SIGN(a) == SIGN(b)) { /* same sign: add values, keep sign */ + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (s_mp_cmp(a, b) >= 0) { /* different sign: |a| >= |b| */ + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { /* different sign: |a| < |b| */ + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + +CLEANUP: + return res; + +} /* end mp_add() */ + +/* }}} */ + +/* {{{ mp_sub(a, b, c) */ + +/* + mp_sub(a, b, c) + + Compute c = a - b. All parameters may be identical. + */ + +mp_err +mp_sub(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_err res; + int magDiff; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == b) { + mp_zero(c); + return MP_OKAY; + } + + if (MP_SIGN(a) != MP_SIGN(b)) { + MP_CHECKOK(s_mp_add_3arg(a, b, c)); + } else if (!(magDiff = s_mp_cmp(a, b))) { + mp_zero(c); + res = MP_OKAY; + } else if (magDiff > 0) { + MP_CHECKOK(s_mp_sub_3arg(a, b, c)); + } else { + MP_CHECKOK(s_mp_sub_3arg(b, a, c)); + MP_SIGN(c) = !MP_SIGN(a); + } + + if (s_mp_cmp_d(c, 0) == MP_EQ) + MP_SIGN(c) = MP_ZPOS; + +CLEANUP: + return res; + +} /* end mp_sub() */ + +/* }}} */ + +/* {{{ mp_mul(a, b, c) */ + +/* + mp_mul(a, b, c) + + Compute c = a * b. All parameters may be identical. + */ +mp_err +mp_mul(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pb; + mp_int tmp; + mp_err res; + mp_size ib; + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (a == c) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + if (a == b) + b = &tmp; + a = &tmp; + } else if (b == c) { + if ((res = mp_init_copy(&tmp, b)) != MP_OKAY) + return res; + b = &tmp; + } else { + MP_DIGITS(&tmp) = 0; + } + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + if ((res = s_mp_pad(c, USED(a) + USED(b))) != MP_OKAY) + goto CLEANUP; + +#ifdef NSS_USE_COMBA + if ((MP_USED(a) == MP_USED(b)) && IS_POWER_OF_2(MP_USED(b))) { + if (MP_USED(a) == 4) { + s_mp_mul_comba_4(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_mul_comba_8(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_mul_comba_16(a, b, c); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_mul_comba_32(a, b, c); + goto CLEANUP; + } + } +#endif + + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), MP_USED(a), *pb++, MP_DIGITS(c)); + + /* Outer loop: Digits of b */ + useda = MP_USED(a); + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_mpv_mul_d_add(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + else + MP_DIGIT(c, ib + useda) = b_i; + } + + s_mp_clamp(c); + + if (SIGN(a) == SIGN(b) || s_mp_cmp_d(c, 0) == MP_EQ) + SIGN(c) = ZPOS; + else + SIGN(c) = NEG; + +CLEANUP: + mp_clear(&tmp); + return res; +} /* end mp_mul() */ + +/* }}} */ + +/* {{{ mp_sqr(a, sqr) */ + +#if MP_SQUARE +/* + Computes the square of a. This can be done more + efficiently than a general multiplication, because many of the + computation steps are redundant when squaring. The inner product + step is a bit more complicated, but we save a fair number of + iterations of the multiplication loop. + */ + +/* sqr = a^2; Caller provides both a and tmp; */ +mp_err +mp_sqr(const mp_int *a, mp_int *sqr) +{ + mp_digit *pa; + mp_digit d; + mp_err res; + mp_size ix; + mp_int tmp; + int count; + + ARGCHK(a != NULL && sqr != NULL, MP_BADARG); + + if (a == sqr) { + if ((res = mp_init_copy(&tmp, a)) != MP_OKAY) + return res; + a = &tmp; + } else { + DIGITS(&tmp) = 0; + res = MP_OKAY; + } + + ix = 2 * MP_USED(a); + if (ix > MP_ALLOC(sqr)) { + MP_USED(sqr) = 1; + MP_CHECKOK(s_mp_grow(sqr, ix)); + } + MP_USED(sqr) = ix; + MP_DIGIT(sqr, 0) = 0; + +#ifdef NSS_USE_COMBA + if (IS_POWER_OF_2(MP_USED(a))) { + if (MP_USED(a) == 4) { + s_mp_sqr_comba_4(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 8) { + s_mp_sqr_comba_8(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 16) { + s_mp_sqr_comba_16(a, sqr); + goto CLEANUP; + } + if (MP_USED(a) == 32) { + s_mp_sqr_comba_32(a, sqr); + goto CLEANUP; + } + } +#endif + + pa = MP_DIGITS(a); + count = MP_USED(a) - 1; + if (count > 0) { + d = *pa++; + s_mpv_mul_d(pa, count, d, MP_DIGITS(sqr) + 1); + for (ix = 3; --count > 0; ix += 2) { + d = *pa++; + s_mpv_mul_d_add(pa, count, d, MP_DIGITS(sqr) + ix); + } /* for(ix ...) */ + MP_DIGIT(sqr, MP_USED(sqr) - 1) = 0; /* above loop stopped short of this. */ + + /* now sqr *= 2 */ + s_mp_mul_2(sqr); + } else { + MP_DIGIT(sqr, 1) = 0; + } + + /* now add the squares of the digits of a to sqr. */ + s_mpv_sqr_add_prop(MP_DIGITS(a), MP_USED(a), MP_DIGITS(sqr)); + + SIGN(sqr) = ZPOS; + s_mp_clamp(sqr); + +CLEANUP: + mp_clear(&tmp); + return res; + +} /* end mp_sqr() */ +#endif + +/* }}} */ + +/* {{{ mp_div(a, b, q, r) */ + +/* + mp_div(a, b, q, r) + + Compute q = a / b and r = a mod b. Input parameters may be re-used + as output parameters. If q or r is NULL, that portion of the + computation will be discarded (although it will still be computed) + */ +mp_err +mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r) +{ + mp_err res; + mp_int *pQ, *pR; + mp_int qtmp, rtmp, btmp; + int cmp; + mp_sign signA; + mp_sign signB; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + signA = MP_SIGN(a); + signB = MP_SIGN(b); + + if (mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + + DIGITS(&qtmp) = 0; + DIGITS(&rtmp) = 0; + DIGITS(&btmp) = 0; + + /* Set up some temporaries... */ + if (!r || r == a || r == b) { + MP_CHECKOK(mp_init_copy(&rtmp, a)); + pR = &rtmp; + } else { + MP_CHECKOK(mp_copy(a, r)); + pR = r; + } + + if (!q || q == a || q == b) { + MP_CHECKOK(mp_init_size(&qtmp, MP_USED(a))); + pQ = &qtmp; + } else { + MP_CHECKOK(s_mp_pad(q, MP_USED(a))); + pQ = q; + mp_zero(pQ); + } + + /* + If |a| <= |b|, we can compute the solution without division; + otherwise, we actually do the work required. + */ + if ((cmp = s_mp_cmp(a, b)) <= 0) { + if (cmp) { + /* r was set to a above. */ + mp_zero(pQ); + } else { + mp_set(pQ, 1); + mp_zero(pR); + } + } else { + MP_CHECKOK(mp_init_copy(&btmp, b)); + MP_CHECKOK(s_mp_div(pR, &btmp, pQ)); + } + + /* Compute the signs for the output */ + MP_SIGN(pR) = signA; /* Sr = Sa */ + /* Sq = ZPOS if Sa == Sb */ /* Sq = NEG if Sa != Sb */ + MP_SIGN(pQ) = (signA == signB) ? ZPOS : NEG; + + if (s_mp_cmp_d(pQ, 0) == MP_EQ) + SIGN(pQ) = ZPOS; + if (s_mp_cmp_d(pR, 0) == MP_EQ) + SIGN(pR) = ZPOS; + + /* Copy output, if it is needed */ + if (q && q != pQ) + s_mp_exch(pQ, q); + + if (r && r != pR) + s_mp_exch(pR, r); + +CLEANUP: + mp_clear(&btmp); + mp_clear(&rtmp); + mp_clear(&qtmp); + + return res; + +} /* end mp_div() */ + +/* }}} */ + +/* {{{ mp_div_2d(a, d, q, r) */ + +mp_err +mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r) +{ + mp_err res; + + ARGCHK(a != NULL, MP_BADARG); + + if (q) { + if ((res = mp_copy(a, q)) != MP_OKAY) + return res; + } + if (r) { + if ((res = mp_copy(a, r)) != MP_OKAY) + return res; + } + if (q) { + s_mp_div_2d(q, d); + } + if (r) { + s_mp_mod_2d(r, d); + } + + return MP_OKAY; + +} /* end mp_div_2d() */ + +/* }}} */ + +/* {{{ mp_expt(a, b, c) */ + +/* + mp_expt(a, b, c) + + Compute c = a ** b, that is, raise a to the b power. Uses a + standard iterative square-and-multiply technique. + */ + +mp_err +mp_expt(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int s, x; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + + mp_set(&s, 1); + + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + /* Loop over low-order digits in ascending order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over bits of each non-maximal digit */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Consider now the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + } + + if (mp_iseven(b)) + SIGN(&s) = SIGN(a); + + res = mp_copy(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_expt() */ + +/* }}} */ + +/* {{{ mp_2expt(a, k) */ + +/* Compute a = 2^k */ + +mp_err +mp_2expt(mp_int *a, mp_digit k) +{ + ARGCHK(a != NULL, MP_BADARG); + + return s_mp_2expt(a, k); + +} /* end mp_2expt() */ + +/* }}} */ + +/* {{{ mp_mod(a, m, c) */ + +/* + mp_mod(a, m, c) + + Compute c = a (mod m). Result will always be 0 <= c < m. + */ + +mp_err +mp_mod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + int mag; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if (SIGN(m) == NEG) + return MP_RANGE; + + /* + If |a| > m, we need to divide to get the remainder and take the + absolute value. + + If |a| < m, we don't need to do any division, just copy and adjust + the sign (if a is negative). + + If |a| == m, we can simply set the result to zero. + + This order is intended to minimize the average path length of the + comparison chain on common workloads -- the most frequent cases are + that |a| != m, so we do those first. + */ + if ((mag = s_mp_cmp(a, m)) > 0) { + if ((res = mp_div(a, m, NULL, c)) != MP_OKAY) + return res; + + if (SIGN(c) == NEG) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else if (mag < 0) { + if ((res = mp_copy(a, c)) != MP_OKAY) + return res; + + if (mp_cmp_z(a) < 0) { + if ((res = mp_add(c, m, c)) != MP_OKAY) + return res; + } + + } else { + mp_zero(c); + } + + return MP_OKAY; + +} /* end mp_mod() */ + +/* }}} */ + +/* {{{ mp_mod_d(a, d, c) */ + +/* + mp_mod_d(a, d, c) + + Compute c = a (mod d). Result will always be 0 <= c < d + */ +mp_err +mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if (s_mp_cmp_d(a, d) > 0) { + if ((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY) + return res; + + } else { + if (SIGN(a) == NEG) + rem = d - DIGIT(a, 0); + else + rem = DIGIT(a, 0); + } + + if (c) + *c = rem; + + return MP_OKAY; + +} /* end mp_mod_d() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Modular arithmetic */ + +#if MP_MODARITH +/* {{{ mp_addmod(a, b, m, c) */ + +/* + mp_addmod(a, b, m, c) + + Compute c = (a + b) mod m + */ + +mp_err +mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_add(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_submod(a, b, m, c) */ + +/* + mp_submod(a, b, m, c) + + Compute c = (a - b) mod m + */ + +mp_err +mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sub(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_mulmod(a, b, m, c) */ + +/* + mp_mulmod(a, b, m, c) + + Compute c = (a * b) mod m + */ + +mp_err +mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_mul(a, b, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; +} + +/* }}} */ + +/* {{{ mp_sqrmod(a, m, c) */ + +#if MP_SQUARE +mp_err +mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + + ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG); + + if ((res = mp_sqr(a, c)) != MP_OKAY) + return res; + if ((res = mp_mod(c, m, c)) != MP_OKAY) + return res; + + return MP_OKAY; + +} /* end mp_sqrmod() */ +#endif + +/* }}} */ + +/* {{{ s_mp_exptmod(a, b, m, c) */ + +/* + s_mp_exptmod(a, b, m, c) + + Compute c = (a ** b) mod m. Uses a standard square-and-multiply + method with modular reductions at each step. (This is basically the + same code as mp_expt(), except for the addition of the reductions) + + The modular reductions are done using Barrett's algorithm (see + s_mp_reduce() below for details) + */ + +mp_err +s_mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c) +{ + mp_int s, x, mu; + mp_err res; + mp_digit d; + unsigned int dig, bit; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0) + return MP_RANGE; + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto X; + if ((res = mp_init(&mu)) != MP_OKAY) + goto MU; + + mp_set(&s, 1); + + /* mu = b^2k / m */ + if ((res = s_mp_add_d(&mu, 1)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_lshd(&mu, 2 * USED(m))) != MP_OKAY) + goto CLEANUP; + if ((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY) + goto CLEANUP; + + /* Loop over digits of b in ascending order, except highest order */ + for (dig = 0; dig < (USED(b) - 1); dig++) { + d = DIGIT(b, dig); + + /* Loop over the bits of the lower-order digits */ + for (bit = 0; bit < DIGIT_BIT; bit++) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + } + + /* Now do the last digit... */ + d = DIGIT(b, dig); + + while (d) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + d >>= 1; + + if ((res = s_mp_sqr(&x)) != MP_OKAY) + goto CLEANUP; + if ((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&mu); +MU: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end s_mp_exptmod() */ + +/* }}} */ + +/* {{{ mp_exptmod_d(a, d, m, c) */ + +mp_err +mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c) +{ + mp_int s, x; + mp_err res; + + ARGCHK(a != NULL && c != NULL, MP_BADARG); + + if ((res = mp_init(&s)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + goto X; + + mp_set(&s, 1); + + while (d != 0) { + if (d & 1) { + if ((res = s_mp_mul(&s, &x)) != MP_OKAY || + (res = mp_mod(&s, m, &s)) != MP_OKAY) + goto CLEANUP; + } + + d /= 2; + + if ((res = s_mp_sqr(&x)) != MP_OKAY || + (res = mp_mod(&x, m, &x)) != MP_OKAY) + goto CLEANUP; + } + + s_mp_exch(&s, c); + +CLEANUP: + mp_clear(&x); +X: + mp_clear(&s); + + return res; + +} /* end mp_exptmod_d() */ + +/* }}} */ +#endif /* if MP_MODARITH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Comparison functions */ + +/* {{{ mp_cmp_z(a) */ + +/* + mp_cmp_z(a) + + Compare a <=> 0. Returns <0 if a<0, 0 if a=0, >0 if a>0. + */ + +int +mp_cmp_z(const mp_int *a) +{ + if (SIGN(a) == NEG) + return MP_LT; + else if (USED(a) == 1 && DIGIT(a, 0) == 0) + return MP_EQ; + else + return MP_GT; + +} /* end mp_cmp_z() */ + +/* }}} */ + +/* {{{ mp_cmp_d(a, d) */ + +/* + mp_cmp_d(a, d) + + Compare a <=> d. Returns <0 if a0 if a>d + */ + +int +mp_cmp_d(const mp_int *a, mp_digit d) +{ + ARGCHK(a != NULL, MP_EQ); + + if (SIGN(a) == NEG) + return MP_LT; + + return s_mp_cmp_d(a, d); + +} /* end mp_cmp_d() */ + +/* }}} */ + +/* {{{ mp_cmp(a, b) */ + +int +mp_cmp(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + if (SIGN(a) == SIGN(b)) { + int mag; + + if ((mag = s_mp_cmp(a, b)) == MP_EQ) + return MP_EQ; + + if (SIGN(a) == ZPOS) + return mag; + else + return -mag; + + } else if (SIGN(a) == ZPOS) { + return MP_GT; + } else { + return MP_LT; + } + +} /* end mp_cmp() */ + +/* }}} */ + +/* {{{ mp_cmp_mag(a, b) */ + +/* + mp_cmp_mag(a, b) + + Compares |a| <=> |b|, and returns an appropriate comparison result + */ + +int +mp_cmp_mag(const mp_int *a, const mp_int *b) +{ + ARGCHK(a != NULL && b != NULL, MP_EQ); + + return s_mp_cmp(a, b); + +} /* end mp_cmp_mag() */ + +/* }}} */ + +/* {{{ mp_isodd(a) */ + +/* + mp_isodd(a) + + Returns a true (non-zero) value if a is odd, false (zero) otherwise. + */ +int +mp_isodd(const mp_int *a) +{ + ARGCHK(a != NULL, 0); + + return (int)(DIGIT(a, 0) & 1); + +} /* end mp_isodd() */ + +/* }}} */ + +/* {{{ mp_iseven(a) */ + +int +mp_iseven(const mp_int *a) +{ + return !mp_isodd(a); + +} /* end mp_iseven() */ + +/* }}} */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ Number theoretic functions */ + +#if MP_NUMTH +/* {{{ mp_gcd(a, b, c) */ + +/* + Like the old mp_gcd() function, except computes the GCD using the + binary algorithm due to Josef Stein in 1961 (via Knuth). + */ +mp_err +mp_gcd(mp_int *a, mp_int *b, mp_int *c) +{ + mp_err res; + mp_int u, v, t; + mp_size k = 0; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ) + return MP_RANGE; + if (mp_cmp_z(a) == MP_EQ) { + return mp_copy(b, c); + } else if (mp_cmp_z(b) == MP_EQ) { + return mp_copy(a, c); + } + + if ((res = mp_init(&t)) != MP_OKAY) + return res; + if ((res = mp_init_copy(&u, a)) != MP_OKAY) + goto U; + if ((res = mp_init_copy(&v, b)) != MP_OKAY) + goto V; + + SIGN(&u) = ZPOS; + SIGN(&v) = ZPOS; + + /* Divide out common factors of 2 until at least 1 of a, b is even */ + while (mp_iseven(&u) && mp_iseven(&v)) { + s_mp_div_2(&u); + s_mp_div_2(&v); + ++k; + } + + /* Initialize t */ + if (mp_isodd(&u)) { + if ((res = mp_copy(&v, &t)) != MP_OKAY) + goto CLEANUP; + + /* t = -v */ + if (SIGN(&v) == ZPOS) + SIGN(&t) = NEG; + else + SIGN(&t) = ZPOS; + + } else { + if ((res = mp_copy(&u, &t)) != MP_OKAY) + goto CLEANUP; + } + + for (;;) { + while (mp_iseven(&t)) { + s_mp_div_2(&t); + } + + if (mp_cmp_z(&t) == MP_GT) { + if ((res = mp_copy(&t, &u)) != MP_OKAY) + goto CLEANUP; + + } else { + if ((res = mp_copy(&t, &v)) != MP_OKAY) + goto CLEANUP; + + /* v = -t */ + if (SIGN(&t) == ZPOS) + SIGN(&v) = NEG; + else + SIGN(&v) = ZPOS; + } + + if ((res = mp_sub(&u, &v, &t)) != MP_OKAY) + goto CLEANUP; + + if (s_mp_cmp_d(&t, 0) == MP_EQ) + break; + } + + s_mp_2expt(&v, k); /* v = 2^k */ + res = mp_mul(&u, &v, c); /* c = u * v */ + +CLEANUP: + mp_clear(&v); +V: + mp_clear(&u); +U: + mp_clear(&t); + + return res; + +} /* end mp_gcd() */ + +/* }}} */ + +/* {{{ mp_lcm(a, b, c) */ + +/* We compute the least common multiple using the rule: + + ab = [a, b](a, b) + + ... by computing the product, and dividing out the gcd. + */ + +mp_err +mp_lcm(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int gcd, prod; + mp_err res; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + /* Set up temporaries */ + if ((res = mp_init(&gcd)) != MP_OKAY) + return res; + if ((res = mp_init(&prod)) != MP_OKAY) + goto GCD; + + if ((res = mp_mul(a, b, &prod)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_gcd(a, b, &gcd)) != MP_OKAY) + goto CLEANUP; + + res = mp_div(&prod, &gcd, c, NULL); + +CLEANUP: + mp_clear(&prod); +GCD: + mp_clear(&gcd); + + return res; + +} /* end mp_lcm() */ + +/* }}} */ + +/* {{{ mp_xgcd(a, b, g, x, y) */ + +/* + mp_xgcd(a, b, g, x, y) + + Compute g = (a, b) and values x and y satisfying Bezout's identity + (that is, ax + by = g). This uses the binary extended GCD algorithm + based on the Stein algorithm used for mp_gcd() + See algorithm 14.61 in Handbook of Applied Cryptogrpahy. + */ + +mp_err +mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y) +{ + mp_int gx, xc, yc, u, v, A, B, C, D; + mp_int *clean[9]; + mp_err res; + int last = -1; + + if (mp_cmp_z(b) == 0) + return MP_RANGE; + + /* Initialize all these variables we need */ + MP_CHECKOK(mp_init(&u)); + clean[++last] = &u; + MP_CHECKOK(mp_init(&v)); + clean[++last] = &v; + MP_CHECKOK(mp_init(&gx)); + clean[++last] = &gx; + MP_CHECKOK(mp_init(&A)); + clean[++last] = &A; + MP_CHECKOK(mp_init(&B)); + clean[++last] = &B; + MP_CHECKOK(mp_init(&C)); + clean[++last] = &C; + MP_CHECKOK(mp_init(&D)); + clean[++last] = &D; + MP_CHECKOK(mp_init_copy(&xc, a)); + clean[++last] = &xc; + mp_abs(&xc, &xc); + MP_CHECKOK(mp_init_copy(&yc, b)); + clean[++last] = &yc; + mp_abs(&yc, &yc); + + mp_set(&gx, 1); + + /* Divide by two until at least one of them is odd */ + while (mp_iseven(&xc) && mp_iseven(&yc)) { + mp_size nx = mp_trailing_zeros(&xc); + mp_size ny = mp_trailing_zeros(&yc); + mp_size n = MP_MIN(nx, ny); + s_mp_div_2d(&xc, n); + s_mp_div_2d(&yc, n); + MP_CHECKOK(s_mp_mul_2d(&gx, n)); + } + + MP_CHECKOK(mp_copy(&xc, &u)); + MP_CHECKOK(mp_copy(&yc, &v)); + mp_set(&A, 1); + mp_set(&D, 1); + + /* Loop through binary GCD algorithm */ + do { + while (mp_iseven(&u)) { + s_mp_div_2(&u); + + if (mp_iseven(&A) && mp_iseven(&B)) { + s_mp_div_2(&A); + s_mp_div_2(&B); + } else { + MP_CHECKOK(mp_add(&A, &yc, &A)); + s_mp_div_2(&A); + MP_CHECKOK(mp_sub(&B, &xc, &B)); + s_mp_div_2(&B); + } + } + + while (mp_iseven(&v)) { + s_mp_div_2(&v); + + if (mp_iseven(&C) && mp_iseven(&D)) { + s_mp_div_2(&C); + s_mp_div_2(&D); + } else { + MP_CHECKOK(mp_add(&C, &yc, &C)); + s_mp_div_2(&C); + MP_CHECKOK(mp_sub(&D, &xc, &D)); + s_mp_div_2(&D); + } + } + + if (mp_cmp(&u, &v) >= 0) { + MP_CHECKOK(mp_sub(&u, &v, &u)); + MP_CHECKOK(mp_sub(&A, &C, &A)); + MP_CHECKOK(mp_sub(&B, &D, &B)); + } else { + MP_CHECKOK(mp_sub(&v, &u, &v)); + MP_CHECKOK(mp_sub(&C, &A, &C)); + MP_CHECKOK(mp_sub(&D, &B, &D)); + } + } while (mp_cmp_z(&u) != 0); + + /* copy results to output */ + if (x) + MP_CHECKOK(mp_copy(&C, x)); + + if (y) + MP_CHECKOK(mp_copy(&D, y)); + + if (g) + MP_CHECKOK(mp_mul(&gx, &v, g)); + +CLEANUP: + while (last >= 0) + mp_clear(clean[last--]); + + return res; + +} /* end mp_xgcd() */ + +/* }}} */ + +mp_size +mp_trailing_zeros(const mp_int *mp) +{ + mp_digit d; + mp_size n = 0; + unsigned int ix; + + if (!mp || !MP_DIGITS(mp) || !mp_cmp_z(mp)) + return n; + + for (ix = 0; !(d = MP_DIGIT(mp, ix)) && (ix < MP_USED(mp)); ++ix) + n += MP_DIGIT_BIT; + if (!d) + return 0; /* shouldn't happen, but ... */ +#if !defined(MP_USE_UINT_DIGIT) + if (!(d & 0xffffffffU)) { + d >>= 32; + n += 32; + } +#endif + if (!(d & 0xffffU)) { + d >>= 16; + n += 16; + } + if (!(d & 0xffU)) { + d >>= 8; + n += 8; + } + if (!(d & 0xfU)) { + d >>= 4; + n += 4; + } + if (!(d & 0x3U)) { + d >>= 2; + n += 2; + } + if (!(d & 0x1U)) { + d >>= 1; + n += 1; + } +#if MP_ARGCHK == 2 + assert(0 != (d & 1)); +#endif + return n; +} + +/* Given a and prime p, computes c and k such that a*c == 2**k (mod p). +** Returns k (positive) or error (negative). +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_almost_inverse(const mp_int *a, const mp_int *p, mp_int *c) +{ + mp_err res; + mp_err k = 0; + mp_int d, f, g; + + ARGCHK(a && p && c, MP_BADARG); + + MP_DIGITS(&d) = 0; + MP_DIGITS(&f) = 0; + MP_DIGITS(&g) = 0; + MP_CHECKOK(mp_init(&d)); + MP_CHECKOK(mp_init_copy(&f, a)); /* f = a */ + MP_CHECKOK(mp_init_copy(&g, p)); /* g = p */ + + mp_set(c, 1); + mp_zero(&d); + + if (mp_cmp_z(&f) == 0) { + res = MP_UNDEF; + } else + for (;;) { + int diff_sign; + while (mp_iseven(&f)) { + mp_size n = mp_trailing_zeros(&f); + if (!n) { + res = MP_UNDEF; + goto CLEANUP; + } + s_mp_div_2d(&f, n); + MP_CHECKOK(s_mp_mul_2d(&d, n)); + k += n; + } + if (mp_cmp_d(&f, 1) == MP_EQ) { /* f == 1 */ + res = k; + break; + } + diff_sign = mp_cmp(&f, &g); + if (diff_sign < 0) { /* f < g */ + s_mp_exch(&f, &g); + s_mp_exch(c, &d); + } else if (diff_sign == 0) { /* f == g */ + res = MP_UNDEF; /* a and p are not relatively prime */ + break; + } + if ((MP_DIGIT(&f, 0) % 4) == (MP_DIGIT(&g, 0) % 4)) { + MP_CHECKOK(mp_sub(&f, &g, &f)); /* f = f - g */ + MP_CHECKOK(mp_sub(c, &d, c)); /* c = c - d */ + } else { + MP_CHECKOK(mp_add(&f, &g, &f)); /* f = f + g */ + MP_CHECKOK(mp_add(c, &d, c)); /* c = c + d */ + } + } + if (res >= 0) { + while (MP_SIGN(c) != MP_ZPOS) { + MP_CHECKOK(mp_add(c, p, c)); + } + res = k; + } + +CLEANUP: + mp_clear(&d); + mp_clear(&f); + mp_clear(&g); + return res; +} + +/* Compute T = (P ** -1) mod MP_RADIX. Also works for 16-bit mp_digits. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_digit +s_mp_invmod_radix(mp_digit P) +{ + mp_digit T = P; + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); + T *= 2 - (P * T); +#if !defined(MP_USE_UINT_DIGIT) + T *= 2 - (P * T); + T *= 2 - (P * T); +#endif + return T; +} + +/* Given c, k, and prime p, where a*c == 2**k (mod p), +** Compute x = (a ** -1) mod p. This is similar to Montgomery reduction. +** This technique from the paper "Fast Modular Reciprocals" (unpublished) +** by Richard Schroeppel (a.k.a. Captain Nemo). +*/ +mp_err +s_mp_fixup_reciprocal(const mp_int *c, const mp_int *p, int k, mp_int *x) +{ + int k_orig = k; + mp_digit r; + mp_size ix; + mp_err res; + + if (mp_cmp_z(c) < 0) { /* c < 0 */ + MP_CHECKOK(mp_add(c, p, x)); /* x = c + p */ + } else { + MP_CHECKOK(mp_copy(c, x)); /* x = c */ + } + + /* make sure x is large enough */ + ix = MP_HOWMANY(k, MP_DIGIT_BIT) + MP_USED(p) + 1; + ix = MP_MAX(ix, MP_USED(x)); + MP_CHECKOK(s_mp_pad(x, ix)); + + r = 0 - s_mp_invmod_radix(MP_DIGIT(p, 0)); + + for (ix = 0; k > 0; ix++) { + int j = MP_MIN(k, MP_DIGIT_BIT); + mp_digit v = r * MP_DIGIT(x, ix); + if (j < MP_DIGIT_BIT) { + v &= ((mp_digit)1 << j) - 1; /* v = v mod (2 ** j) */ + } + s_mp_mul_d_add_offset(p, v, x, ix); /* x += p * v * (RADIX ** ix) */ + k -= j; + } + s_mp_clamp(x); + s_mp_div_2d(x, k_orig); + res = MP_OKAY; + +CLEANUP: + return res; +} + +/* compute mod inverse using Schroeppel's method, only if m is odd */ +mp_err +s_mp_invmod_odd_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + int k; + mp_err res; + mp_int x; + + ARGCHK(a && m && c, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + if (mp_iseven(m)) + return MP_UNDEF; + + MP_DIGITS(&x) = 0; + + if (a == c) { + if ((res = mp_init_copy(&x, a)) != MP_OKAY) + return res; + if (a == m) + m = &x; + a = &x; + } else if (m == c) { + if ((res = mp_init_copy(&x, m)) != MP_OKAY) + return res; + m = &x; + } else { + MP_DIGITS(&x) = 0; + } + + MP_CHECKOK(s_mp_almost_inverse(a, m, c)); + k = res; + MP_CHECKOK(s_mp_fixup_reciprocal(c, m, k, c)); +CLEANUP: + mp_clear(&x); + return res; +} + +/* Known good algorithm for computing modular inverse. But slow. */ +mp_err +mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_int g, x; + mp_err res; + + ARGCHK(a && m && c, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + MP_DIGITS(&g) = 0; + MP_DIGITS(&x) = 0; + MP_CHECKOK(mp_init(&x)); + MP_CHECKOK(mp_init(&g)); + + MP_CHECKOK(mp_xgcd(a, m, &g, &x, NULL)); + + if (mp_cmp_d(&g, 1) != MP_EQ) { + res = MP_UNDEF; + goto CLEANUP; + } + + res = mp_mod(&x, m, c); + SIGN(c) = SIGN(a); + +CLEANUP: + mp_clear(&x); + mp_clear(&g); + + return res; +} + +/* modular inverse where modulus is 2**k. */ +/* c = a**-1 mod 2**k */ +mp_err +s_mp_invmod_2d(const mp_int *a, mp_size k, mp_int *c) +{ + mp_err res; + mp_size ix = k + 4; + mp_int t0, t1, val, tmp, two2k; + + static const mp_digit d2 = 2; + static const mp_int two = { MP_ZPOS, 1, 1, (mp_digit *)&d2 }; + + if (mp_iseven(a)) + return MP_UNDEF; + if (k <= MP_DIGIT_BIT) { + mp_digit i = s_mp_invmod_radix(MP_DIGIT(a, 0)); + if (k < MP_DIGIT_BIT) + i &= ((mp_digit)1 << k) - (mp_digit)1; + mp_set(c, i); + return MP_OKAY; + } + MP_DIGITS(&t0) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&val) = 0; + MP_DIGITS(&tmp) = 0; + MP_DIGITS(&two2k) = 0; + MP_CHECKOK(mp_init_copy(&val, a)); + s_mp_mod_2d(&val, k); + MP_CHECKOK(mp_init_copy(&t0, &val)); + MP_CHECKOK(mp_init_copy(&t1, &t0)); + MP_CHECKOK(mp_init(&tmp)); + MP_CHECKOK(mp_init(&two2k)); + MP_CHECKOK(s_mp_2expt(&two2k, k)); + do { + MP_CHECKOK(mp_mul(&val, &t1, &tmp)); + MP_CHECKOK(mp_sub(&two, &tmp, &tmp)); + MP_CHECKOK(mp_mul(&t1, &tmp, &t1)); + s_mp_mod_2d(&t1, k); + while (MP_SIGN(&t1) != MP_ZPOS) { + MP_CHECKOK(mp_add(&t1, &two2k, &t1)); + } + if (mp_cmp(&t1, &t0) == MP_EQ) + break; + MP_CHECKOK(mp_copy(&t1, &t0)); + } while (--ix > 0); + if (!ix) { + res = MP_UNDEF; + } else { + mp_exch(c, &t1); + } + +CLEANUP: + mp_clear(&t0); + mp_clear(&t1); + mp_clear(&val); + mp_clear(&tmp); + mp_clear(&two2k); + return res; +} + +mp_err +s_mp_invmod_even_m(const mp_int *a, const mp_int *m, mp_int *c) +{ + mp_err res; + mp_size k; + mp_int oddFactor, evenFactor; /* factors of the modulus */ + mp_int oddPart, evenPart; /* parts to combine via CRT. */ + mp_int C2, tmp1, tmp2; + + /*static const mp_digit d1 = 1; */ + /*static const mp_int one = { MP_ZPOS, 1, 1, (mp_digit *)&d1 }; */ + + if ((res = s_mp_ispow2(m)) >= 0) { + k = res; + return s_mp_invmod_2d(a, k, c); + } + MP_DIGITS(&oddFactor) = 0; + MP_DIGITS(&evenFactor) = 0; + MP_DIGITS(&oddPart) = 0; + MP_DIGITS(&evenPart) = 0; + MP_DIGITS(&C2) = 0; + MP_DIGITS(&tmp1) = 0; + MP_DIGITS(&tmp2) = 0; + + MP_CHECKOK(mp_init_copy(&oddFactor, m)); /* oddFactor = m */ + MP_CHECKOK(mp_init(&evenFactor)); + MP_CHECKOK(mp_init(&oddPart)); + MP_CHECKOK(mp_init(&evenPart)); + MP_CHECKOK(mp_init(&C2)); + MP_CHECKOK(mp_init(&tmp1)); + MP_CHECKOK(mp_init(&tmp2)); + + k = mp_trailing_zeros(m); + s_mp_div_2d(&oddFactor, k); + MP_CHECKOK(s_mp_2expt(&evenFactor, k)); + + /* compute a**-1 mod oddFactor. */ + MP_CHECKOK(s_mp_invmod_odd_m(a, &oddFactor, &oddPart)); + /* compute a**-1 mod evenFactor, where evenFactor == 2**k. */ + MP_CHECKOK(s_mp_invmod_2d(a, k, &evenPart)); + + /* Use Chinese Remainer theorem to compute a**-1 mod m. */ + /* let m1 = oddFactor, v1 = oddPart, + * let m2 = evenFactor, v2 = evenPart. + */ + + /* Compute C2 = m1**-1 mod m2. */ + MP_CHECKOK(s_mp_invmod_2d(&oddFactor, k, &C2)); + + /* compute u = (v2 - v1)*C2 mod m2 */ + MP_CHECKOK(mp_sub(&evenPart, &oddPart, &tmp1)); + MP_CHECKOK(mp_mul(&tmp1, &C2, &tmp2)); + s_mp_mod_2d(&tmp2, k); + while (MP_SIGN(&tmp2) != MP_ZPOS) { + MP_CHECKOK(mp_add(&tmp2, &evenFactor, &tmp2)); + } + + /* compute answer = v1 + u*m1 */ + MP_CHECKOK(mp_mul(&tmp2, &oddFactor, c)); + MP_CHECKOK(mp_add(&oddPart, c, c)); + /* not sure this is necessary, but it's low cost if not. */ + MP_CHECKOK(mp_mod(c, m, c)); + +CLEANUP: + mp_clear(&oddFactor); + mp_clear(&evenFactor); + mp_clear(&oddPart); + mp_clear(&evenPart); + mp_clear(&C2); + mp_clear(&tmp1); + mp_clear(&tmp2); + return res; +} + +/* {{{ mp_invmod(a, m, c) */ + +/* + mp_invmod(a, m, c) + + Compute c = a^-1 (mod m), if there is an inverse for a (mod m). + This is equivalent to the question of whether (a, m) = 1. If not, + MP_UNDEF is returned, and there is no inverse. + */ + +mp_err +mp_invmod(const mp_int *a, const mp_int *m, mp_int *c) +{ + + ARGCHK(a && m && c, MP_BADARG); + + if (mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0) + return MP_RANGE; + + if (mp_isodd(m)) { + return s_mp_invmod_odd_m(a, m, c); + } + if (mp_iseven(a)) + return MP_UNDEF; /* not invertable */ + + return s_mp_invmod_even_m(a, m, c); + +} /* end mp_invmod() */ + +/* }}} */ +#endif /* if MP_NUMTH */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ mp_print(mp, ofp) */ + +#if MP_IOFUNC +/* + mp_print(mp, ofp) + + Print a textual representation of the given mp_int on the output + stream 'ofp'. Output is generated using the internal radix. + */ + +void +mp_print(mp_int *mp, FILE *ofp) +{ + int ix; + + if (mp == NULL || ofp == NULL) + return; + + fputc((SIGN(mp) == NEG) ? '-' : '+', ofp); + + for (ix = USED(mp) - 1; ix >= 0; ix--) { + fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix)); + } + +} /* end mp_print() */ + +#endif /* if MP_IOFUNC */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* {{{ More I/O Functions */ + +/* {{{ mp_read_raw(mp, str, len) */ + +/* + mp_read_raw(mp, str, len) + + Read in a raw value (base 256) into the given mp_int + */ + +mp_err +mp_read_raw(mp_int *mp, char *str, int len) +{ + int ix; + mp_err res; + unsigned char *ustr = (unsigned char *)str; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + /* Get sign from first byte */ + if (ustr[0]) + SIGN(mp) = NEG; + else + SIGN(mp) = ZPOS; + + /* Read the rest of the digits */ + for (ix = 1; ix < len; ix++) { + if ((res = mp_mul_d(mp, 256, mp)) != MP_OKAY) + return res; + if ((res = mp_add_d(mp, ustr[ix], mp)) != MP_OKAY) + return res; + } + + return MP_OKAY; + +} /* end mp_read_raw() */ + +/* }}} */ + +/* {{{ mp_raw_size(mp) */ + +int +mp_raw_size(mp_int *mp) +{ + ARGCHK(mp != NULL, 0); + + return (USED(mp) * sizeof(mp_digit)) + 1; + +} /* end mp_raw_size() */ + +/* }}} */ + +/* {{{ mp_toraw(mp, str) */ + +mp_err +mp_toraw(mp_int *mp, char *str) +{ + int ix, jx, pos = 1; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + + str[0] = (char)SIGN(mp); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + str[pos++] = (char)(d >> (jx * CHAR_BIT)); + } + } + + return MP_OKAY; + +} /* end mp_toraw() */ + +/* }}} */ + +/* {{{ mp_read_radix(mp, str, radix) */ + +/* + mp_read_radix(mp, str, radix) + + Read an integer from the given string, and set mp to the resulting + value. The input is presumed to be in base 10. Leading non-digit + characters are ignored, and the function reads until a non-digit + character or the end of the string. + */ + +mp_err +mp_read_radix(mp_int *mp, const char *str, int radix) +{ + int ix = 0, val = 0; + mp_err res; + mp_sign sig = ZPOS; + + ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX, + MP_BADARG); + + mp_zero(mp); + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while (str[ix] && + (s_mp_tovalue(str[ix], radix) < 0) && + str[ix] != '-' && + str[ix] != '+') { + ++ix; + } + + if (str[ix] == '-') { + sig = NEG; + ++ix; + } else if (str[ix] == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++ix; + } + + while ((val = s_mp_tovalue(str[ix], radix)) >= 0) { + if ((res = s_mp_mul_d(mp, radix)) != MP_OKAY) + return res; + if ((res = s_mp_add_d(mp, val)) != MP_OKAY) + return res; + ++ix; + } + + if (s_mp_cmp_d(mp, 0) == MP_EQ) + SIGN(mp) = ZPOS; + else + SIGN(mp) = sig; + + return MP_OKAY; + +} /* end mp_read_radix() */ + +mp_err +mp_read_variable_radix(mp_int *a, const char *str, int default_radix) +{ + int radix = default_radix; + int cx; + mp_sign sig = ZPOS; + mp_err res; + + /* Skip leading non-digit characters until a digit or '-' or '+' */ + while ((cx = *str) != 0 && + (s_mp_tovalue(cx, radix) < 0) && + cx != '-' && + cx != '+') { + ++str; + } + + if (cx == '-') { + sig = NEG; + ++str; + } else if (cx == '+') { + sig = ZPOS; /* this is the default anyway... */ + ++str; + } + + if (str[0] == '0') { + if ((str[1] | 0x20) == 'x') { + radix = 16; + str += 2; + } else { + radix = 8; + str++; + } + } + res = mp_read_radix(a, str, radix); + if (res == MP_OKAY) { + MP_SIGN(a) = (s_mp_cmp_d(a, 0) == MP_EQ) ? ZPOS : sig; + } + return res; +} + +/* }}} */ + +/* {{{ mp_radix_size(mp, radix) */ + +int +mp_radix_size(mp_int *mp, int radix) +{ + int bits; + + if (!mp || radix < 2 || radix > MAX_RADIX) + return 0; + + bits = USED(mp) * DIGIT_BIT - 1; + + return s_mp_outlen(bits, radix); + +} /* end mp_radix_size() */ + +/* }}} */ + +/* {{{ mp_toradix(mp, str, radix) */ + +mp_err +mp_toradix(mp_int *mp, char *str, int radix) +{ + int ix, pos = 0; + + ARGCHK(mp != NULL && str != NULL, MP_BADARG); + ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE); + + if (mp_cmp_z(mp) == MP_EQ) { + str[0] = '0'; + str[1] = '\0'; + } else { + mp_err res; + mp_int tmp; + mp_sign sgn; + mp_digit rem, rdx = (mp_digit)radix; + char ch; + + if ((res = mp_init_copy(&tmp, mp)) != MP_OKAY) + return res; + + /* Save sign for later, and take absolute value */ + sgn = SIGN(&tmp); + SIGN(&tmp) = ZPOS; + + /* Generate output digits in reverse order */ + while (mp_cmp_z(&tmp) != 0) { + if ((res = mp_div_d(&tmp, rdx, &tmp, &rem)) != MP_OKAY) { + mp_clear(&tmp); + return res; + } + + /* Generate digits, use capital letters */ + ch = s_mp_todigit(rem, radix, 0); + + str[pos++] = ch; + } + + /* Add - sign if original value was negative */ + if (sgn == NEG) + str[pos++] = '-'; + + /* Add trailing NUL to end the string */ + str[pos--] = '\0'; + + /* Reverse the digits and sign indicator */ + ix = 0; + while (ix < pos) { + char tmp = str[ix]; + + str[ix] = str[pos]; + str[pos] = tmp; + ++ix; + --pos; + } + + mp_clear(&tmp); + } + + return MP_OKAY; + +} /* end mp_toradix() */ + +/* }}} */ + +/* {{{ mp_tovalue(ch, r) */ + +int +mp_tovalue(char ch, int r) +{ + return s_mp_tovalue(ch, r); + +} /* end mp_tovalue() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_strerror(ec) */ + +/* + mp_strerror(ec) + + Return a string describing the meaning of error code 'ec'. The + string returned is allocated in static memory, so the caller should + not attempt to modify or free the memory associated with this + string. + */ +const char * +mp_strerror(mp_err ec) +{ + int aec = (ec < 0) ? -ec : ec; + + /* Code values are negative, so the senses of these comparisons + are accurate */ + if (ec < MP_LAST_CODE || ec > MP_OKAY) { + return mp_err_string[0]; /* unknown error code */ + } else { + return mp_err_string[aec + 1]; + } + +} /* end mp_strerror() */ + +/* }}} */ + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static function definitions (internal use only) */ + +/* {{{ Memory management */ + +/* {{{ s_mp_grow(mp, min) */ + +/* Make sure there are at least 'min' digits allocated to mp */ +mp_err +s_mp_grow(mp_int *mp, mp_size min) +{ + if (min > ALLOC(mp)) { + mp_digit *tmp; + + /* Set min to next nearest default precision block size */ + min = MP_ROUNDUP(min, s_mp_defprec); + + if ((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL) + return MP_MEM; + + s_mp_copy(DIGITS(mp), tmp, USED(mp)); + + s_mp_setz(DIGITS(mp), ALLOC(mp)); + s_mp_free(DIGITS(mp)); + DIGITS(mp) = tmp; + ALLOC(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_grow() */ + +/* }}} */ + +/* {{{ s_mp_pad(mp, min) */ + +/* Make sure the used size of mp is at least 'min', growing if needed */ +mp_err +s_mp_pad(mp_int *mp, mp_size min) +{ + if (min > USED(mp)) { + mp_err res; + + /* Make sure there is room to increase precision */ + if (min > ALLOC(mp)) { + if ((res = s_mp_grow(mp, min)) != MP_OKAY) + return res; + } else { + s_mp_setz(DIGITS(mp) + USED(mp), min - USED(mp)); + } + + /* Increase precision; should already be 0-filled */ + USED(mp) = min; + } + + return MP_OKAY; + +} /* end s_mp_pad() */ + +/* }}} */ + +/* {{{ s_mp_setz(dp, count) */ + +/* Set 'count' digits pointed to by dp to be zeroes */ +void +s_mp_setz(mp_digit *dp, mp_size count) +{ +#if MP_MEMSET == 0 + int ix; + + for (ix = 0; ix < count; ix++) + dp[ix] = 0; +#else + memset(dp, 0, count * sizeof(mp_digit)); +#endif + +} /* end s_mp_setz() */ + +/* }}} */ + +/* {{{ s_mp_copy(sp, dp, count) */ + +/* Copy 'count' digits from sp to dp */ +void +s_mp_copy(const mp_digit *sp, mp_digit *dp, mp_size count) +{ +#if MP_MEMCPY == 0 + int ix; + + for (ix = 0; ix < count; ix++) + dp[ix] = sp[ix]; +#else + memcpy(dp, sp, count * sizeof(mp_digit)); +#endif +} /* end s_mp_copy() */ + +/* }}} */ + +/* {{{ s_mp_alloc(nb, ni) */ + +/* Allocate ni records of nb bytes each, and return a pointer to that */ +void * +s_mp_alloc(size_t nb, size_t ni) +{ + return calloc(nb, ni); + +} /* end s_mp_alloc() */ + +/* }}} */ + +/* {{{ s_mp_free(ptr) */ + +/* Free the memory pointed to by ptr */ +void +s_mp_free(void *ptr) +{ + if (ptr) { + free(ptr); + } +} /* end s_mp_free() */ + +/* }}} */ + +/* {{{ s_mp_clamp(mp) */ + +/* Remove leading zeroes from the given value */ +void +s_mp_clamp(mp_int *mp) +{ + mp_size used = MP_USED(mp); + while (used > 1 && DIGIT(mp, used - 1) == 0) + --used; + MP_USED(mp) = used; +} /* end s_mp_clamp() */ + +/* }}} */ + +/* {{{ s_mp_exch(a, b) */ + +/* Exchange the data for a and b; (b, a) = (a, b) */ +void +s_mp_exch(mp_int *a, mp_int *b) +{ + mp_int tmp; + + tmp = *a; + *a = *b; + *b = tmp; + +} /* end s_mp_exch() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Arithmetic helpers */ + +/* {{{ s_mp_lshd(mp, p) */ + +/* + Shift mp leftward by p digits, growing if needed, and zero-filling + the in-shifted digits at the right end. This is a convenient + alternative to multiplication by powers of the radix + */ + +mp_err +s_mp_lshd(mp_int *mp, mp_size p) +{ + mp_err res; + unsigned int ix; + + if (p == 0) + return MP_OKAY; + + if (MP_USED(mp) == 1 && MP_DIGIT(mp, 0) == 0) + return MP_OKAY; + + if ((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY) + return res; + + /* Shift all the significant figures over as needed */ + for (ix = USED(mp) - p; ix-- > 0;) { + DIGIT(mp, ix + p) = DIGIT(mp, ix); + } + + /* Fill the bottom digits with zeroes */ + for (ix = 0; (mp_size)ix < p; ix++) + DIGIT(mp, ix) = 0; + + return MP_OKAY; + +} /* end s_mp_lshd() */ + +/* }}} */ + +/* {{{ s_mp_mul_2d(mp, d) */ + +/* + Multiply the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value. + */ +mp_err +s_mp_mul_2d(mp_int *mp, mp_digit d) +{ + mp_err res; + mp_digit dshift, bshift; + mp_digit mask; + + ARGCHK(mp != NULL, MP_BADARG); + + dshift = d / MP_DIGIT_BIT; + bshift = d % MP_DIGIT_BIT; + /* bits to be shifted out of the top word */ + if (bshift) { + mask = (mp_digit)~0 << (MP_DIGIT_BIT - bshift); + mask &= MP_DIGIT(mp, MP_USED(mp) - 1); + } else { + mask = 0; + } + + if (MP_OKAY != (res = s_mp_pad(mp, MP_USED(mp) + dshift + (mask != 0)))) + return res; + + if (dshift && MP_OKAY != (res = s_mp_lshd(mp, dshift))) + return res; + + if (bshift) { + mp_digit *pa = MP_DIGITS(mp); + mp_digit *alim = pa + MP_USED(mp); + mp_digit prev = 0; + + for (pa += dshift; pa < alim;) { + mp_digit x = *pa; + *pa++ = (x << bshift) | prev; + prev = x >> (DIGIT_BIT - bshift); + } + } + + s_mp_clamp(mp); + return MP_OKAY; +} /* end s_mp_mul_2d() */ + +/* {{{ s_mp_rshd(mp, p) */ + +/* + Shift mp rightward by p digits. Maintains the invariant that + digits above the precision are all zero. Digits shifted off the + end are lost. Cannot fail. + */ + +void +s_mp_rshd(mp_int *mp, mp_size p) +{ + mp_size ix; + mp_digit *src, *dst; + + if (p == 0) + return; + + /* Shortcut when all digits are to be shifted off */ + if (p >= USED(mp)) { + s_mp_setz(DIGITS(mp), ALLOC(mp)); + USED(mp) = 1; + SIGN(mp) = ZPOS; + return; + } + + /* Shift all the significant figures over as needed */ + dst = MP_DIGITS(mp); + src = dst + p; + for (ix = USED(mp) - p; ix > 0; ix--) + *dst++ = *src++; + + MP_USED(mp) -= p; + /* Fill the top digits with zeroes */ + while (p-- > 0) + *dst++ = 0; + +} /* end s_mp_rshd() */ + +/* }}} */ + +/* {{{ s_mp_div_2(mp) */ + +/* Divide by two -- take advantage of radix properties to do it fast */ +void +s_mp_div_2(mp_int *mp) +{ + s_mp_div_2d(mp, 1); + +} /* end s_mp_div_2() */ + +/* }}} */ + +/* {{{ s_mp_mul_2(mp) */ + +mp_err +s_mp_mul_2(mp_int *mp) +{ + mp_digit *pd; + unsigned int ix, used; + mp_digit kin = 0; + + /* Shift digits leftward by 1 bit */ + used = MP_USED(mp); + pd = MP_DIGITS(mp); + for (ix = 0; ix < used; ix++) { + mp_digit d = *pd; + *pd++ = (d << 1) | kin; + kin = (d >> (DIGIT_BIT - 1)); + } + + /* Deal with rollover from last digit */ + if (kin) { + if (ix >= ALLOC(mp)) { + mp_err res; + if ((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY) + return res; + } + + DIGIT(mp, ix) = kin; + USED(mp) += 1; + } + + return MP_OKAY; + +} /* end s_mp_mul_2() */ + +/* }}} */ + +/* {{{ s_mp_mod_2d(mp, d) */ + +/* + Remainder the integer by 2^d, where d is a number of bits. This + amounts to a bitwise AND of the value, and does not require the full + division code + */ +void +s_mp_mod_2d(mp_int *mp, mp_digit d) +{ + mp_size ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT); + mp_size ix; + mp_digit dmask; + + if (ndig >= USED(mp)) + return; + + /* Flush all the bits above 2^d in its digit */ + dmask = ((mp_digit)1 << nbit) - 1; + DIGIT(mp, ndig) &= dmask; + + /* Flush all digits above the one with 2^d in it */ + for (ix = ndig + 1; ix < USED(mp); ix++) + DIGIT(mp, ix) = 0; + + s_mp_clamp(mp); + +} /* end s_mp_mod_2d() */ + +/* }}} */ + +/* {{{ s_mp_div_2d(mp, d) */ + +/* + Divide the integer by 2^d, where d is a number of bits. This + amounts to a bitwise shift of the value, and does not require the + full division code (used in Barrett reduction, see below) + */ +void +s_mp_div_2d(mp_int *mp, mp_digit d) +{ + int ix; + mp_digit save, next, mask; + + s_mp_rshd(mp, d / DIGIT_BIT); + d %= DIGIT_BIT; + if (d) { + mask = ((mp_digit)1 << d) - 1; + save = 0; + for (ix = USED(mp) - 1; ix >= 0; ix--) { + next = DIGIT(mp, ix) & mask; + DIGIT(mp, ix) = (DIGIT(mp, ix) >> d) | (save << (DIGIT_BIT - d)); + save = next; + } + } + s_mp_clamp(mp); + +} /* end s_mp_div_2d() */ + +/* }}} */ + +/* {{{ s_mp_norm(a, b, *d) */ + +/* + s_mp_norm(a, b, *d) + + Normalize a and b for division, where b is the divisor. In order + that we might make good guesses for quotient digits, we want the + leading digit of b to be at least half the radix, which we + accomplish by multiplying a and b by a power of 2. The exponent + (shift count) is placed in *pd, so that the remainder can be shifted + back at the end of the division process. + */ + +mp_err +s_mp_norm(mp_int *a, mp_int *b, mp_digit *pd) +{ + mp_digit d; + mp_digit mask; + mp_digit b_msd; + mp_err res = MP_OKAY; + + d = 0; + mask = DIGIT_MAX & ~(DIGIT_MAX >> 1); /* mask is msb of digit */ + b_msd = DIGIT(b, USED(b) - 1); + while (!(b_msd & mask)) { + b_msd <<= 1; + ++d; + } + + if (d) { + MP_CHECKOK(s_mp_mul_2d(a, d)); + MP_CHECKOK(s_mp_mul_2d(b, d)); + } + + *pd = d; +CLEANUP: + return res; + +} /* end s_mp_norm() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive digit arithmetic */ + +/* {{{ s_mp_add_d(mp, d) */ + +/* Add d to |mp| in place */ +mp_err s_mp_add_d(mp_int *mp, mp_digit d) /* unsigned digit addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; + mp_size ix = 1; + + w = (mp_word)DIGIT(mp, 0) + d; + DIGIT(mp, 0) = ACCUM(w); + k = CARRYOUT(w); + + while (ix < USED(mp) && k) { + w = (mp_word)DIGIT(mp, ix) + k; + DIGIT(mp, ix) = ACCUM(w); + k = CARRYOUT(w); + ++ix; + } + + if (k != 0) { + mp_err res; + + if ((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY) + return res; + + DIGIT(mp, ix) = (mp_digit)k; + } + + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit sum, mp_i, carry = 0; + mp_err res = MP_OKAY; + int used = (int)MP_USED(mp); + + mp_i = *pmp; + *pmp++ = sum = d + mp_i; + carry = (sum < d); + while (carry && --used > 0) { + mp_i = *pmp; + *pmp++ = sum = carry + mp_i; + carry = !sum; + } + if (carry && !used) { + /* mp is growing */ + used = MP_USED(mp); + MP_CHECKOK(s_mp_pad(mp, used + 1)); + MP_DIGIT(mp, used) = carry; + } +CLEANUP: + return res; +#endif +} /* end s_mp_add_d() */ + +/* }}} */ + +/* {{{ s_mp_sub_d(mp, d) */ + +/* Subtract d from |mp| in place, assumes |mp| > d */ +mp_err s_mp_sub_d(mp_int *mp, mp_digit d) /* unsigned digit subtract */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_word w, b = 0; + mp_size ix = 1; + + /* Compute initial subtraction */ + w = (RADIX + (mp_word)DIGIT(mp, 0)) - d; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, 0) = ACCUM(w); + + /* Propagate borrows leftward */ + while (b && ix < USED(mp)) { + w = (RADIX + (mp_word)DIGIT(mp, ix)) - b; + b = CARRYOUT(w) ? 0 : 1; + DIGIT(mp, ix) = ACCUM(w); + ++ix; + } + + /* Remove leading zeroes */ + s_mp_clamp(mp); + + /* If we have a borrow out, it's a violation of the input invariant */ + if (b) + return MP_RANGE; + else + return MP_OKAY; +#else + mp_digit *pmp = MP_DIGITS(mp); + mp_digit mp_i, diff, borrow; + mp_size used = MP_USED(mp); + + mp_i = *pmp; + *pmp++ = diff = mp_i - d; + borrow = (diff > mp_i); + while (borrow && --used) { + mp_i = *pmp; + *pmp++ = diff = mp_i - borrow; + borrow = (diff > mp_i); + } + s_mp_clamp(mp); + return (borrow && !used) ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub_d() */ + +/* }}} */ + +/* {{{ s_mp_mul_d(a, d) */ + +/* Compute a = a * d, single digit multiplication */ +mp_err +s_mp_mul_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_size used; + int pow; + + if (!d) { + mp_zero(a); + return MP_OKAY; + } + if (d == 1) + return MP_OKAY; + if (0 <= (pow = s_mp_ispow2d(d))) { + return s_mp_mul_2d(a, (mp_digit)pow); + } + + used = MP_USED(a); + MP_CHECKOK(s_mp_pad(a, used + 1)); + + s_mpv_mul_d(MP_DIGITS(a), used, d, MP_DIGITS(a)); + + s_mp_clamp(a); + +CLEANUP: + return res; + +} /* end s_mp_mul_d() */ + +/* }}} */ + +/* {{{ s_mp_div_d(mp, d, r) */ + +/* + s_mp_div_d(mp, d, r) + + Compute the quotient mp = mp / d and remainder r = mp mod d, for a + single digit d. If r is null, the remainder will be discarded. + */ + +mp_err +s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + mp_word w = 0, q; +#else + mp_digit w = 0, q; +#endif + int ix; + mp_err res; + mp_int quot; + mp_int rem; + + if (d == 0) + return MP_RANGE; + if (d == 1) { + if (r) + *r = 0; + return MP_OKAY; + } + /* could check for power of 2 here, but mp_div_d does that. */ + if (MP_USED(mp) == 1) { + mp_digit n = MP_DIGIT(mp, 0); + mp_digit rem; + + q = n / d; + rem = n % d; + MP_DIGIT(mp, 0) = q; + if (r) + *r = rem; + return MP_OKAY; + } + + MP_DIGITS(&rem) = 0; + MP_DIGITS(") = 0; + /* Make room for the quotient */ + MP_CHECKOK(mp_init_size(", USED(mp))); + +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + for (ix = USED(mp) - 1; ix >= 0; ix--) { + w = (w << DIGIT_BIT) | DIGIT(mp, ix); + + if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + s_mp_lshd(", 1); + DIGIT(", 0) = (mp_digit)q; + } +#else + { + mp_digit p; +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + mp_digit norm; +#endif + + MP_CHECKOK(mp_init_copy(&rem, mp)); + +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + MP_DIGIT(", 0) = d; + MP_CHECKOK(s_mp_norm(&rem, ", &norm)); + if (norm) + d <<= norm; + MP_DIGIT(", 0) = 0; +#endif + + p = 0; + for (ix = USED(&rem) - 1; ix >= 0; ix--) { + w = DIGIT(&rem, ix); + + if (p) { + MP_CHECKOK(s_mpv_div_2dx1d(p, w, d, &q, &w)); + } else if (w >= d) { + q = w / d; + w = w % d; + } else { + q = 0; + } + + MP_CHECKOK(s_mp_lshd(", 1)); + DIGIT(", 0) = q; + p = w; + } +#if !defined(MP_ASSEMBLY_DIV_2DX1D) + if (norm) + w >>= norm; +#endif + } +#endif + + /* Deliver the remainder, if desired */ + if (r) { + *r = (mp_digit)w; + } + + s_mp_clamp("); + mp_exch(", mp); +CLEANUP: + mp_clear("); + mp_clear(&rem); + + return res; +} /* end s_mp_div_d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive full arithmetic */ + +/* {{{ s_mp_add(a, b) */ + +/* Compute a = |a| + |b| */ +mp_err s_mp_add(mp_int *a, const mp_int *b) /* magnitude addition */ +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_digit *pa, *pb; + mp_size ix; + mp_size used; + mp_err res; + + /* Make sure a has enough precision for the output value */ + if ((USED(b) > USED(a)) && (res = s_mp_pad(a, USED(b))) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa + *pb++; + *pa++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pa++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + used = MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + while (w && ix < used) { + w = w + *pa; + *pa++ = ACCUM(w); + w = CARRYOUT(w); + ++ix; + } +#else + while (carry && ix < used) { + sum = carry + *pa; + *pa++ = sum; + carry = !sum; + ++ix; + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, ix) = (mp_digit)w; + } +#else + if (carry) { + if ((res = s_mp_pad(a, used + 1)) != MP_OKAY) + return res; + + DIGIT(a, used) = carry; + } +#endif + + return MP_OKAY; +} /* end s_mp_add() */ + +/* }}} */ + +/* Compute c = |a| + |b| */ /* magnitude addition */ +mp_err +s_mp_add_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w = 0; +#else + mp_digit sum, carry = 0, d; +#endif + mp_size ix; + mp_size used; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = a; + a = b; + b = xch; + } + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + exchange step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + used = MP_USED(b); + for (ix = 0; ix < used; ix++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++ + *pb++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + d = *pa++; + sum = d + *pb++; + d = (sum < d); /* detect overflow */ + *pc++ = sum += carry; + carry = d + (sum < carry); /* detect overflow */ +#endif + } + + /* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ + for (used = MP_USED(a); ix < used; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w = CARRYOUT(w); +#else + *pc++ = sum = carry + *pa++; + carry = (sum < carry); +#endif + } + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (w) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = (mp_digit)w; + ++used; + } +#else + if (carry) { + if ((res = s_mp_pad(c, used + 1)) != MP_OKAY) + return res; + + DIGIT(c, used) = carry; + ++used; + } +#endif + MP_USED(c) = used; + return MP_OKAY; +} +/* {{{ s_mp_add_offset(a, b, offset) */ + +/* Compute a = |a| + ( |b| * (RADIX ** offset) ) */ +mp_err +s_mp_add_offset(mp_int *a, mp_int *b, mp_size offset) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + mp_word w, k = 0; +#else + mp_digit d, sum, carry = 0; +#endif + mp_size ib; + mp_size ia; + mp_size lim; + mp_err res; + + /* Make sure a has enough precision for the output value */ + lim = MP_USED(b) + offset; + if ((lim > USED(a)) && (res = s_mp_pad(a, lim)) != MP_OKAY) + return res; + + /* + Add up all digits up to the precision of b. If b had initially + the same precision as a, or greater, we took care of it by the + padding step above, so there is no problem. If b had initially + less precision, we'll have to make sure the carry out is duly + propagated upward among the higher-order digits of the sum. + */ + lim = USED(b); + for (ib = 0, ia = offset; ib < lim; ib++, ia++) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + w = (mp_word)DIGIT(a, ia) + DIGIT(b, ib) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); +#else + d = MP_DIGIT(a, ia); + sum = d + MP_DIGIT(b, ib); + d = (sum < d); + MP_DIGIT(a, ia) = sum += carry; + carry = d + (sum < carry); +#endif + } + +/* If we run out of 'b' digits before we're actually done, make + sure the carries get propagated upward... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + for (lim = MP_USED(a); k && (ia < lim); ++ia) { + w = (mp_word)DIGIT(a, ia) + k; + DIGIT(a, ia) = ACCUM(w); + k = CARRYOUT(w); + } +#else + for (lim = MP_USED(a); carry && (ia < lim); ++ia) { + d = MP_DIGIT(a, ia); + MP_DIGIT(a, ia) = sum = d + carry; + carry = (sum < d); + } +#endif + +/* If there's an overall carry out, increase precision and include + it. We could have done this initially, but why touch the memory + allocator unless we're sure we have to? + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_ADD_WORD) + if (k) { + if ((res = s_mp_pad(a, USED(a) + 1)) != MP_OKAY) + return res; + + DIGIT(a, ia) = (mp_digit)k; + } +#else + if (carry) { + if ((res = s_mp_pad(a, lim + 1)) != MP_OKAY) + return res; + + DIGIT(a, lim) = carry; + } +#endif + s_mp_clamp(a); + + return MP_OKAY; + +} /* end s_mp_add_offset() */ + +/* }}} */ + +/* {{{ s_mp_sub(a, b) */ + +/* Compute a = |a| - |b|, assumes |a| >= |b| */ +mp_err s_mp_sub(mp_int *a, const mp_int *b) /* magnitude subtract */ +{ + mp_digit *pa, *pb, *limit; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + limit = pb + MP_USED(b); + while (pb < limit) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa - *pb++; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa; + diff = d - *pb++; + d = (diff > d); /* detect borrow */ + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pa++ = diff; + borrow = d; +#endif + } + limit = MP_DIGITS(a) + MP_USED(a); +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + while (w && pa < limit) { + w = w + *pa; + *pa++ = ACCUM(w); + w >>= MP_DIGIT_BIT; + } +#else + while (borrow && pa < limit) { + d = *pa; + *pa++ = diff = d - borrow; + borrow = (diff > d); + } +#endif + + /* Clobber any leading zeroes we created */ + s_mp_clamp(a); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} /* end s_mp_sub() */ + +/* }}} */ + +/* Compute c = |a| - |b|, assumes |a| >= |b| */ /* magnitude subtract */ +mp_err +s_mp_sub_3arg(const mp_int *a, const mp_int *b, mp_int *c) +{ + mp_digit *pa, *pb, *pc; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + mp_sword w = 0; +#else + mp_digit d, diff, borrow = 0; +#endif + int ix, limit; + mp_err res; + + MP_SIGN(c) = MP_SIGN(a); + + /* Make sure a has enough precision for the output value */ + if (MP_OKAY != (res = s_mp_pad(c, MP_USED(a)))) + return res; + + /* + Subtract and propagate borrow. Up to the precision of b, this + accounts for the digits of b; after that, we just make sure the + carries get to the right place. This saves having to pad b out to + the precision of a just to make the loops work right... + */ + pa = MP_DIGITS(a); + pb = MP_DIGITS(b); + pc = MP_DIGITS(c); + limit = MP_USED(b); + for (ix = 0; ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++ - *pb++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + diff = d - *pb++; + d = (diff > d); + if (borrow && --diff == MP_DIGIT_MAX) + ++d; + *pc++ = diff; + borrow = d; +#endif + } + for (limit = MP_USED(a); ix < limit; ++ix) { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + w = w + *pa++; + *pc++ = ACCUM(w); + w >>= MP_DIGIT_BIT; +#else + d = *pa++; + *pc++ = diff = d - borrow; + borrow = (diff > d); +#endif + } + + /* Clobber any leading zeroes we created */ + MP_USED(c) = ix; + s_mp_clamp(c); + +/* + If there was a borrow out, then |b| > |a| in violation + of our input invariant. We've already done the work, + but we'll at least complain about it... + */ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_SUB_WORD) + return w ? MP_RANGE : MP_OKAY; +#else + return borrow ? MP_RANGE : MP_OKAY; +#endif +} +/* {{{ s_mp_mul(a, b) */ + +/* Compute a = |a| * |b| */ +mp_err +s_mp_mul(mp_int *a, const mp_int *b) +{ + return mp_mul(a, b, a); +} /* end s_mp_mul() */ + +/* }}} */ + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } +#elif defined(OSF1) +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + Plo = asm("mulq %a0, %a1, %v0", a, b); \ + Phi = asm("umulh %a0, %a1, %v0", a, b); \ + } +#else +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + mp_digit a0b1, a1b0; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (b & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (b >> MP_HALF_DIGIT_BIT); \ + a0b1 = (a & MP_HALF_DIGIT_MAX) * (b >> MP_HALF_DIGIT_BIT); \ + a1b0 = (a >> MP_HALF_DIGIT_BIT) * (b & MP_HALF_DIGIT_MAX); \ + a1b0 += a0b1; \ + Phi += a1b0 >> MP_HALF_DIGIT_BIT; \ + if (a1b0 < a0b1) \ + Phi += MP_HALF_RADIX; \ + a1b0 <<= MP_HALF_DIGIT_BIT; \ + Plo += a1b0; \ + if (Plo < a1b0) \ + ++Phi; \ + } +#endif + +#if !defined(MP_ASSEMBLY_MULTIPLY) +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} +#endif + +#if defined(MP_USE_UINT_DIGIT) && defined(MP_USE_LONG_LONG_MULTIPLY) +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_SQR_D(a, Phi, Plo) \ + { \ + unsigned long long square = (unsigned long long)a * a; \ + Plo = (mp_digit)square; \ + Phi = (mp_digit)(square >> MP_DIGIT_BIT); \ + } +#elif defined(OSF1) +#define MP_SQR_D(a, Phi, Plo) \ + { \ + Plo = asm("mulq %a0, %a0, %v0", a); \ + Phi = asm("umulh %a0, %a0, %v0", a); \ + } +#else +#define MP_SQR_D(a, Phi, Plo) \ + { \ + mp_digit Pmid; \ + Plo = (a & MP_HALF_DIGIT_MAX) * (a & MP_HALF_DIGIT_MAX); \ + Phi = (a >> MP_HALF_DIGIT_BIT) * (a >> MP_HALF_DIGIT_BIT); \ + Pmid = (a & MP_HALF_DIGIT_MAX) * (a >> MP_HALF_DIGIT_BIT); \ + Phi += Pmid >> (MP_HALF_DIGIT_BIT - 1); \ + Pmid <<= (MP_HALF_DIGIT_BIT + 1); \ + Plo += Pmid; \ + if (Plo < Pmid) \ + ++Phi; \ + } +#endif + +#if !defined(MP_ASSEMBLY_SQUARE) +/* Add the squares of the digits of a to the digits of b. */ +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_MUL_WORD) + mp_word w; + mp_digit d; + mp_size ix; + + w = 0; +#define ADD_SQUARE(n) \ + d = pa[n]; \ + w += (d * (mp_word)d) + ps[2 * n]; \ + ps[2 * n] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + ps[2 * n + 1]; \ + ps[2 * n + 1] = ACCUM(w); \ + w = (w >> DIGIT_BIT) + + for (ix = a_len; ix >= 4; ix -= 4) { + ADD_SQUARE(0); + ADD_SQUARE(1); + ADD_SQUARE(2); + ADD_SQUARE(3); + pa += 4; + ps += 8; + } + if (ix) { + ps += 2 * ix; + pa += ix; + switch (ix) { + case 3: + ADD_SQUARE(-3); /* FALLTHRU */ + case 2: + ADD_SQUARE(-2); /* FALLTHRU */ + case 1: + ADD_SQUARE(-1); /* FALLTHRU */ + case 0: + break; + } + } + while (w) { + w += *ps; + *ps++ = ACCUM(w); + w = (w >> DIGIT_BIT); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *pa++; + mp_digit a0a0, a1a1; + + MP_SQR_D(a_i, a1a1, a0a0); + + /* here a1a1 and a0a0 constitute a_i ** 2 */ + a0a0 += carry; + if (a0a0 < carry) + ++a1a1; + + /* now add to ps */ + a0a0 += a_i = *ps; + if (a0a0 < a_i) + ++a1a1; + *ps++ = a0a0; + a1a1 += a_i = *ps; + carry = (a1a1 < a_i); + *ps++ = a1a1; + } + while (carry) { + mp_digit s_i = *ps; + carry += s_i; + *ps++ = carry; + carry = carry < s_i; + } +#endif +} +#endif + +#if (defined(MP_NO_MP_WORD) || defined(MP_NO_DIV_WORD)) && !defined(MP_ASSEMBLY_DIV_2DX1D) +/* +** Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized +** so its high bit is 1. This code is from NSPR. +*/ +mp_err +s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + mp_digit d1, d0, q1, q0; + mp_digit r1, r0, m; + + d1 = divisor >> MP_HALF_DIGIT_BIT; + d0 = divisor & MP_HALF_DIGIT_MAX; + r1 = Nhi % d1; + q1 = Nhi / d1; + m = q1 * d0; + r1 = (r1 << MP_HALF_DIGIT_BIT) | (Nlo >> MP_HALF_DIGIT_BIT); + if (r1 < m) { + q1--, r1 += divisor; + if (r1 >= divisor && r1 < m) { + q1--, r1 += divisor; + } + } + r1 -= m; + r0 = r1 % d1; + q0 = r1 / d1; + m = q0 * d0; + r0 = (r0 << MP_HALF_DIGIT_BIT) | (Nlo & MP_HALF_DIGIT_MAX); + if (r0 < m) { + q0--, r0 += divisor; + if (r0 >= divisor && r0 < m) { + q0--, r0 += divisor; + } + } + if (qp) + *qp = (q1 << MP_HALF_DIGIT_BIT) | q0; + if (rp) + *rp = r0 - m; + return MP_OKAY; +} +#endif + +#if MP_SQUARE +/* {{{ s_mp_sqr(a) */ + +mp_err +s_mp_sqr(mp_int *a) +{ + mp_err res; + mp_int tmp; + + if ((res = mp_init_size(&tmp, 2 * USED(a))) != MP_OKAY) + return res; + res = mp_sqr(a, &tmp); + if (res == MP_OKAY) { + s_mp_exch(&tmp, a); + } + mp_clear(&tmp); + return res; +} + +/* }}} */ +#endif + +/* {{{ s_mp_div(a, b) */ + +/* + s_mp_div(a, b) + + Compute a = a / b and b = a mod b. Assumes b > a. + */ + +mp_err s_mp_div(mp_int *rem, /* i: dividend, o: remainder */ + mp_int *div, /* i: divisor */ + mp_int *quot) /* i: 0; o: quotient */ +{ + mp_int part, t; +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + mp_word q_msd; +#else + mp_digit q_msd; +#endif + mp_err res; + mp_digit d; + mp_digit div_msd; + int ix; + + if (mp_cmp_z(div) == 0) + return MP_RANGE; + + DIGITS(&t) = 0; + /* Shortcut if divisor is power of two */ + if ((ix = s_mp_ispow2(div)) >= 0) { + MP_CHECKOK(mp_copy(rem, quot)); + s_mp_div_2d(quot, (mp_digit)ix); + s_mp_mod_2d(rem, (mp_digit)ix); + + return MP_OKAY; + } + + MP_SIGN(rem) = ZPOS; + MP_SIGN(div) = ZPOS; + MP_SIGN(&part) = ZPOS; + + /* A working temporary for division */ + MP_CHECKOK(mp_init_size(&t, MP_ALLOC(rem))); + + /* Normalize to optimize guessing */ + MP_CHECKOK(s_mp_norm(rem, div, &d)); + + /* Perform the division itself...woo! */ + MP_USED(quot) = MP_ALLOC(quot); + + /* Find a partial substring of rem which is at least div */ + /* If we didn't find one, we're finished dividing */ + while (MP_USED(rem) > MP_USED(div) || s_mp_cmp(rem, div) >= 0) { + int i; + int unusedRem; + int partExtended = 0; /* set to true if we need to extend part */ + + unusedRem = MP_USED(rem) - MP_USED(div); + MP_DIGITS(&part) = MP_DIGITS(rem) + unusedRem; + MP_ALLOC(&part) = MP_ALLOC(rem) - unusedRem; + MP_USED(&part) = MP_USED(div); + + /* We have now truncated the part of the remainder to the same length as + * the divisor. If part is smaller than div, extend part by one digit. */ + if (s_mp_cmp(&part, div) < 0) { + --unusedRem; +#if MP_ARGCHK == 2 + assert(unusedRem >= 0); +#endif + --MP_DIGITS(&part); + ++MP_USED(&part); + ++MP_ALLOC(&part); + partExtended = 1; + } + + /* Compute a guess for the next quotient digit */ + q_msd = MP_DIGIT(&part, MP_USED(&part) - 1); + div_msd = MP_DIGIT(div, MP_USED(div) - 1); + if (!partExtended) { + /* In this case, q_msd /= div_msd is always 1. First, since div_msd is + * normalized to have the high bit set, 2*div_msd > MP_DIGIT_MAX. Since + * we didn't extend part, q_msd >= div_msd. Therefore we know that + * div_msd <= q_msd <= MP_DIGIT_MAX < 2*div_msd. Dividing by div_msd we + * get 1 <= q_msd/div_msd < 2. So q_msd /= div_msd must be 1. */ + q_msd = 1; + } else { +#if !defined(MP_NO_MP_WORD) && !defined(MP_NO_DIV_WORD) + q_msd = (q_msd << MP_DIGIT_BIT) | MP_DIGIT(&part, MP_USED(&part) - 2); + q_msd /= div_msd; + if (q_msd == RADIX) + --q_msd; +#else + if (q_msd == div_msd) { + q_msd = MP_DIGIT_MAX; + } else { + mp_digit r; + MP_CHECKOK(s_mpv_div_2dx1d(q_msd, MP_DIGIT(&part, MP_USED(&part) - 2), + div_msd, &q_msd, &r)); + } +#endif + } +#if MP_ARGCHK == 2 + assert(q_msd > 0); /* This case should never occur any more. */ +#endif + if (q_msd <= 0) + break; + + /* See what that multiplies out to */ + mp_copy(div, &t); + MP_CHECKOK(s_mp_mul_d(&t, (mp_digit)q_msd)); + + /* + If it's too big, back it off. We should not have to do this + more than once, or, in rare cases, twice. Knuth describes a + method by which this could be reduced to a maximum of once, but + I didn't implement that here. + * When using s_mpv_div_2dx1d, we may have to do this 3 times. + */ + for (i = 4; s_mp_cmp(&t, &part) > 0 && i > 0; --i) { + --q_msd; + MP_CHECKOK(s_mp_sub(&t, div)); /* t -= div */ + } + if (i < 0) { + res = MP_RANGE; + goto CLEANUP; + } + + /* At this point, q_msd should be the right next digit */ + MP_CHECKOK(s_mp_sub(&part, &t)); /* part -= t */ + s_mp_clamp(rem); + + /* + Include the digit in the quotient. We allocated enough memory + for any quotient we could ever possibly get, so we should not + have to check for failures here + */ + MP_DIGIT(quot, unusedRem) = (mp_digit)q_msd; + } + + /* Denormalize remainder */ + if (d) { + s_mp_div_2d(rem, d); + } + + s_mp_clamp(quot); + +CLEANUP: + mp_clear(&t); + + return res; + +} /* end s_mp_div() */ + +/* }}} */ + +/* {{{ s_mp_2expt(a, k) */ + +mp_err +s_mp_2expt(mp_int *a, mp_digit k) +{ + mp_err res; + mp_size dig, bit; + + dig = k / DIGIT_BIT; + bit = k % DIGIT_BIT; + + mp_zero(a); + if ((res = s_mp_pad(a, dig + 1)) != MP_OKAY) + return res; + + DIGIT(a, dig) |= ((mp_digit)1 << bit); + + return MP_OKAY; + +} /* end s_mp_2expt() */ + +/* }}} */ + +/* {{{ s_mp_reduce(x, m, mu) */ + +/* + Compute Barrett reduction, x (mod m), given a precomputed value for + mu = b^2k / m, where b = RADIX and k = #digits(m). This should be + faster than straight division, when many reductions by the same + value of m are required (such as in modular exponentiation). This + can nearly halve the time required to do modular exponentiation, + as compared to using the full integer divide to reduce. + + This algorithm was derived from the _Handbook of Applied + Cryptography_ by Menezes, Oorschot and VanStone, Ch. 14, + pp. 603-604. + */ + +mp_err +s_mp_reduce(mp_int *x, const mp_int *m, const mp_int *mu) +{ + mp_int q; + mp_err res; + + if ((res = mp_init_copy(&q, x)) != MP_OKAY) + return res; + + s_mp_rshd(&q, USED(m) - 1); /* q1 = x / b^(k-1) */ + s_mp_mul(&q, mu); /* q2 = q1 * mu */ + s_mp_rshd(&q, USED(m) + 1); /* q3 = q2 / b^(k+1) */ + + /* x = x mod b^(k+1), quick (no division) */ + s_mp_mod_2d(x, DIGIT_BIT * (USED(m) + 1)); + + /* q = q * m mod b^(k+1), quick (no division) */ + s_mp_mul(&q, m); + s_mp_mod_2d(&q, DIGIT_BIT * (USED(m) + 1)); + + /* x = x - q */ + if ((res = mp_sub(x, &q, x)) != MP_OKAY) + goto CLEANUP; + + /* If x < 0, add b^(k+1) to it */ + if (mp_cmp_z(x) < 0) { + mp_set(&q, 1); + if ((res = s_mp_lshd(&q, USED(m) + 1)) != MP_OKAY) + goto CLEANUP; + if ((res = mp_add(x, &q, x)) != MP_OKAY) + goto CLEANUP; + } + + /* Back off if it's too big */ + while (mp_cmp(x, m) >= 0) { + if ((res = s_mp_sub(x, m)) != MP_OKAY) + break; + } + +CLEANUP: + mp_clear(&q); + + return res; + +} /* end s_mp_reduce() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive comparisons */ + +/* {{{ s_mp_cmp(a, b) */ + +/* Compare |a| <=> |b|, return 0 if equal, <0 if a0 if a>b */ +int +s_mp_cmp(const mp_int *a, const mp_int *b) +{ + mp_size used_a = MP_USED(a); + { + mp_size used_b = MP_USED(b); + + if (used_a > used_b) + goto IS_GT; + if (used_a < used_b) + goto IS_LT; + } + { + mp_digit *pa, *pb; + mp_digit da = 0, db = 0; + +#define CMP_AB(n) \ + if ((da = pa[n]) != (db = pb[n])) \ + goto done + + pa = MP_DIGITS(a) + used_a; + pb = MP_DIGITS(b) + used_a; + while (used_a >= 4) { + pa -= 4; + pb -= 4; + used_a -= 4; + CMP_AB(3); + CMP_AB(2); + CMP_AB(1); + CMP_AB(0); + } + while (used_a-- > 0 && ((da = *--pa) == (db = *--pb))) + /* do nothing */; + done: + if (da > db) + goto IS_GT; + if (da < db) + goto IS_LT; + } + return MP_EQ; +IS_LT: + return MP_LT; +IS_GT: + return MP_GT; +} /* end s_mp_cmp() */ + +/* }}} */ + +/* {{{ s_mp_cmp_d(a, d) */ + +/* Compare |a| <=> d, return 0 if equal, <0 if a0 if a>d */ +int +s_mp_cmp_d(const mp_int *a, mp_digit d) +{ + if (USED(a) > 1) + return MP_GT; + + if (DIGIT(a, 0) < d) + return MP_LT; + else if (DIGIT(a, 0) > d) + return MP_GT; + else + return MP_EQ; + +} /* end s_mp_cmp_d() */ + +/* }}} */ + +/* {{{ s_mp_ispow2(v) */ + +/* + Returns -1 if the value is not a power of two; otherwise, it returns + k such that v = 2^k, i.e. lg(v). + */ +int +s_mp_ispow2(const mp_int *v) +{ + mp_digit d; + int extra = 0, ix; + + ix = MP_USED(v) - 1; + d = MP_DIGIT(v, ix); /* most significant digit of v */ + + extra = s_mp_ispow2d(d); + if (extra < 0 || ix == 0) + return extra; + + while (--ix >= 0) { + if (DIGIT(v, ix) != 0) + return -1; /* not a power of two */ + extra += MP_DIGIT_BIT; + } + + return extra; + +} /* end s_mp_ispow2() */ + +/* }}} */ + +/* {{{ s_mp_ispow2d(d) */ + +int +s_mp_ispow2d(mp_digit d) +{ + if ((d != 0) && ((d & (d - 1)) == 0)) { /* d is a power of 2 */ + int pow = 0; +#if defined(MP_USE_UINT_DIGIT) + if (d & 0xffff0000U) + pow += 16; + if (d & 0xff00ff00U) + pow += 8; + if (d & 0xf0f0f0f0U) + pow += 4; + if (d & 0xccccccccU) + pow += 2; + if (d & 0xaaaaaaaaU) + pow += 1; +#elif defined(MP_USE_LONG_LONG_DIGIT) + if (d & 0xffffffff00000000ULL) + pow += 32; + if (d & 0xffff0000ffff0000ULL) + pow += 16; + if (d & 0xff00ff00ff00ff00ULL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0ULL) + pow += 4; + if (d & 0xccccccccccccccccULL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaULL) + pow += 1; +#elif defined(MP_USE_LONG_DIGIT) + if (d & 0xffffffff00000000UL) + pow += 32; + if (d & 0xffff0000ffff0000UL) + pow += 16; + if (d & 0xff00ff00ff00ff00UL) + pow += 8; + if (d & 0xf0f0f0f0f0f0f0f0UL) + pow += 4; + if (d & 0xccccccccccccccccUL) + pow += 2; + if (d & 0xaaaaaaaaaaaaaaaaUL) + pow += 1; +#else +#error "unknown type for mp_digit" +#endif + return pow; + } + return -1; + +} /* end s_mp_ispow2d() */ + +/* }}} */ + +/* }}} */ + +/* {{{ Primitive I/O helpers */ + +/* {{{ s_mp_tovalue(ch, r) */ + +/* + Convert the given character to its digit value, in the given radix. + If the given character is not understood in the given radix, -1 is + returned. Otherwise the digit's numeric value is returned. + + The results will be odd if you use a radix < 2 or > 62, you are + expected to know what you're up to. + */ +int +s_mp_tovalue(char ch, int r) +{ + int val, xch; + + if (r > 36) + xch = ch; + else + xch = toupper(ch); + + if (isdigit(xch)) + val = xch - '0'; + else if (isupper(xch)) + val = xch - 'A' + 10; + else if (islower(xch)) + val = xch - 'a' + 36; + else if (xch == '+') + val = 62; + else if (xch == '/') + val = 63; + else + return -1; + + if (val < 0 || val >= r) + return -1; + + return val; + +} /* end s_mp_tovalue() */ + +/* }}} */ + +/* {{{ s_mp_todigit(val, r, low) */ + +/* + Convert val to a radix-r digit, if possible. If val is out of range + for r, returns zero. Otherwise, returns an ASCII character denoting + the value in the given radix. + + The results may be odd if you use a radix < 2 or > 64, you are + expected to know what you're doing. + */ + +char +s_mp_todigit(mp_digit val, int r, int low) +{ + char ch; + + if (val >= r) + return 0; + + ch = s_dmap_1[val]; + + if (r <= 36 && low) + ch = tolower(ch); + + return ch; + +} /* end s_mp_todigit() */ + +/* }}} */ + +/* {{{ s_mp_outlen(bits, radix) */ + +/* + Return an estimate for how long a string is needed to hold a radix + r representation of a number with 'bits' significant bits, plus an + extra for a zero terminator (assuming C style strings here) + */ +int +s_mp_outlen(int bits, int r) +{ + return (int)((double)bits * LOG_V_2(r) + 1.5) + 1; + +} /* end s_mp_outlen() */ + +/* }}} */ + +/* }}} */ + +/* {{{ mp_read_unsigned_octets(mp, str, len) */ +/* mp_read_unsigned_octets(mp, str, len) + Read in a raw value (base 256) into the given mp_int + No sign bit, number is positive. Leading zeros ignored. + */ + +mp_err +mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len) +{ + int count; + mp_err res; + mp_digit d; + + ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG); + + mp_zero(mp); + + count = len % sizeof(mp_digit); + if (count) { + for (d = 0; count-- > 0; --len) { + d = (d << 8) | *str++; + } + MP_DIGIT(mp, 0) = d; + } + + /* Read the rest of the digits */ + for (; len > 0; len -= sizeof(mp_digit)) { + for (d = 0, count = sizeof(mp_digit); count > 0; --count) { + d = (d << 8) | *str++; + } + if (MP_EQ == mp_cmp_z(mp)) { + if (!d) + continue; + } else { + if ((res = s_mp_lshd(mp, 1)) != MP_OKAY) + return res; + } + MP_DIGIT(mp, 0) = d; + } + return MP_OKAY; +} /* end mp_read_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_unsigned_octet_size(mp) */ +unsigned int +mp_unsigned_octet_size(const mp_int *mp) +{ + unsigned int bytes; + int ix; + mp_digit d = 0; + + ARGCHK(mp != NULL, MP_BADARG); + ARGCHK(MP_ZPOS == SIGN(mp), MP_BADARG); + + bytes = (USED(mp) * sizeof(mp_digit)); + + /* subtract leading zeros. */ + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + d = DIGIT(mp, ix); + if (d) + break; + bytes -= sizeof(d); + } + if (!bytes) + return 1; + + /* Have MSD, check digit bytes, high order first */ + for (ix = sizeof(mp_digit) - 1; ix >= 0; ix--) { + unsigned char x = (unsigned char)(d >> (ix * CHAR_BIT)); + if (x) + break; + --bytes; + } + return bytes; +} /* end mp_unsigned_octet_size() */ +/* }}} */ + +/* {{{ mp_to_unsigned_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos && !x) /* suppress leading zeros */ + continue; + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_unsigned_octets() */ +/* }}} */ + +/* {{{ mp_to_signed_octets(mp, str) */ +/* output a buffer of big endian octets no longer than specified. */ +mp_err +mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= maxlen, MP_BADARG); + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos) { + if (!x) /* suppress leading zeros */ + continue; + if (x & 0x80) { /* add one leading zero to make output positive. */ + ARGCHK(bytes + 1 <= maxlen, MP_BADARG); + if (bytes + 1 > maxlen) + return MP_BADARG; + str[pos++] = 0; + } + } + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return pos; +} /* end mp_to_signed_octets() */ +/* }}} */ + +/* {{{ mp_to_fixlen_octets(mp, str) */ +/* output a buffer of big endian octets exactly as long as requested. */ +mp_err +mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size length) +{ + int ix, pos = 0; + unsigned int bytes; + + ARGCHK(mp != NULL && str != NULL && !SIGN(mp), MP_BADARG); + + bytes = mp_unsigned_octet_size(mp); + ARGCHK(bytes <= length, MP_BADARG); + + /* place any needed leading zeros */ + for (; length > bytes; --length) { + *str++ = 0; + } + + /* Iterate over each digit... */ + for (ix = USED(mp) - 1; ix >= 0; ix--) { + mp_digit d = DIGIT(mp, ix); + int jx; + + /* Unpack digit bytes, high order first */ + for (jx = sizeof(mp_digit) - 1; jx >= 0; jx--) { + unsigned char x = (unsigned char)(d >> (jx * CHAR_BIT)); + if (!pos && !x) /* suppress leading zeros */ + continue; + str[pos++] = x; + } + } + if (!pos) + str[pos++] = 0; + return MP_OKAY; +} /* end mp_to_fixlen_octets() */ +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpi.h b/security/nss/lib/freebl/mpi/mpi.h new file mode 100644 index 000000000..64ffe75d5 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi.h @@ -0,0 +1,313 @@ +/* + * mpi.h + * + * Arbitrary precision integer arithmetic library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPI_ +#define _H_MPI_ + +#include "mpi-config.h" + +#include "seccomon.h" +SEC_BEGIN_PROTOS + +#if MP_DEBUG +#undef MP_IOFUNC +#define MP_IOFUNC 1 +#endif + +#if MP_IOFUNC +#include +#include +#endif + +#include + +#if defined(BSDI) +#undef ULLONG_MAX +#endif + +#include + +#define MP_NEG 1 +#define MP_ZPOS 0 + +#define MP_OKAY 0 /* no error, all is well */ +#define MP_YES 0 /* yes (boolean result) */ +#define MP_NO -1 /* no (boolean result) */ +#define MP_MEM -2 /* out of memory */ +#define MP_RANGE -3 /* argument out of range */ +#define MP_BADARG -4 /* invalid parameter */ +#define MP_UNDEF -5 /* answer is undefined */ +#define MP_LAST_CODE MP_UNDEF + +typedef unsigned int mp_sign; +typedef unsigned int mp_size; +typedef int mp_err; + +#define MP_32BIT_MAX 4294967295U + +#if !defined(ULONG_MAX) +#error "ULONG_MAX not defined" +#elif !defined(UINT_MAX) +#error "UINT_MAX not defined" +#elif !defined(USHRT_MAX) +#error "USHRT_MAX not defined" +#endif + +#if defined(ULLONG_MAX) /* C99, Solaris */ +#define MP_ULONG_LONG_MAX ULLONG_MAX +/* MP_ULONG_LONG_MAX was defined to be ULLONG_MAX */ +#elif defined(ULONG_LONG_MAX) /* HPUX */ +#define MP_ULONG_LONG_MAX ULONG_LONG_MAX +#elif defined(ULONGLONG_MAX) /* IRIX, AIX */ +#define MP_ULONG_LONG_MAX ULONGLONG_MAX +#endif + +/* We only use unsigned long for mp_digit iff long is more than 32 bits. */ +#if !defined(MP_USE_UINT_DIGIT) && ULONG_MAX > MP_32BIT_MAX +typedef unsigned long mp_digit; +#define MP_DIGIT_MAX ULONG_MAX +#define MP_DIGIT_FMT "%016lX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_DIGIT +#define MP_USE_LONG_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT + +#elif !defined(MP_USE_UINT_DIGIT) && defined(MP_ULONG_LONG_MAX) +typedef unsigned long long mp_digit; +#define MP_DIGIT_MAX MP_ULONG_LONG_MAX +#define MP_DIGIT_FMT "%016llX" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX UINT_MAX +#undef MP_NO_MP_WORD +#define MP_NO_MP_WORD 1 +#undef MP_USE_LONG_LONG_DIGIT +#define MP_USE_LONG_LONG_DIGIT 1 +#undef MP_USE_LONG_DIGIT + +#else +typedef unsigned int mp_digit; +#define MP_DIGIT_MAX UINT_MAX +#define MP_DIGIT_FMT "%08X" /* printf() format for 1 digit */ +#define MP_HALF_DIGIT_MAX USHRT_MAX +#undef MP_USE_UINT_DIGIT +#define MP_USE_UINT_DIGIT 1 +#undef MP_USE_LONG_LONG_DIGIT +#undef MP_USE_LONG_DIGIT +#endif + +#if !defined(MP_NO_MP_WORD) +#if defined(MP_USE_UINT_DIGIT) && \ + (defined(MP_ULONG_LONG_MAX) || (ULONG_MAX > UINT_MAX)) + +#if (ULONG_MAX > UINT_MAX) +typedef unsigned long mp_word; +typedef long mp_sword; +#define MP_WORD_MAX ULONG_MAX + +#else +typedef unsigned long long mp_word; +typedef long long mp_sword; +#define MP_WORD_MAX MP_ULONG_LONG_MAX +#endif + +#else +#define MP_NO_MP_WORD 1 +#endif +#endif /* !defined(MP_NO_MP_WORD) */ + +#if !defined(MP_WORD_MAX) && defined(MP_DEFINE_SMALL_WORD) +typedef unsigned int mp_word; +typedef int mp_sword; +#define MP_WORD_MAX UINT_MAX +#endif + +#define MP_DIGIT_BIT (CHAR_BIT * sizeof(mp_digit)) +#define MP_WORD_BIT (CHAR_BIT * sizeof(mp_word)) +#define MP_RADIX (1 + (mp_word)MP_DIGIT_MAX) + +#define MP_HALF_DIGIT_BIT (MP_DIGIT_BIT / 2) +#define MP_HALF_RADIX (1 + (mp_digit)MP_HALF_DIGIT_MAX) +/* MP_HALF_RADIX really ought to be called MP_SQRT_RADIX, but it's named +** MP_HALF_RADIX because it's the radix for MP_HALF_DIGITs, and it's +** consistent with the other _HALF_ names. +*/ + +/* Macros for accessing the mp_int internals */ +#define MP_SIGN(MP) ((MP)->sign) +#define MP_USED(MP) ((MP)->used) +#define MP_ALLOC(MP) ((MP)->alloc) +#define MP_DIGITS(MP) ((MP)->dp) +#define MP_DIGIT(MP, N) (MP)->dp[(N)] + +/* This defines the maximum I/O base (minimum is 2) */ +#define MP_MAX_RADIX 64 + +typedef struct { + mp_sign sign; /* sign of this quantity */ + mp_size alloc; /* how many digits allocated */ + mp_size used; /* how many digits used */ + mp_digit *dp; /* the digits themselves */ +} mp_int; + +/* Default precision */ +mp_size mp_get_prec(void); +void mp_set_prec(mp_size prec); + +/* Memory management */ +mp_err mp_init(mp_int *mp); +mp_err mp_init_size(mp_int *mp, mp_size prec); +mp_err mp_init_copy(mp_int *mp, const mp_int *from); +mp_err mp_copy(const mp_int *from, mp_int *to); +void mp_exch(mp_int *mp1, mp_int *mp2); +void mp_clear(mp_int *mp); +void mp_zero(mp_int *mp); +void mp_set(mp_int *mp, mp_digit d); +mp_err mp_set_int(mp_int *mp, long z); +#define mp_set_long(mp, z) mp_set_int(mp, z) +mp_err mp_set_ulong(mp_int *mp, unsigned long z); + +/* Single digit arithmetic */ +mp_err mp_add_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_sub_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_d(const mp_int *a, mp_digit d, mp_int *b); +mp_err mp_mul_2(const mp_int *a, mp_int *c); +mp_err mp_div_d(const mp_int *a, mp_digit d, mp_int *q, mp_digit *r); +mp_err mp_div_2(const mp_int *a, mp_int *c); +mp_err mp_expt_d(const mp_int *a, mp_digit d, mp_int *c); + +/* Sign manipulations */ +mp_err mp_abs(const mp_int *a, mp_int *b); +mp_err mp_neg(const mp_int *a, mp_int *b); + +/* Full arithmetic */ +mp_err mp_add(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_sub(const mp_int *a, const mp_int *b, mp_int *c); +mp_err mp_mul(const mp_int *a, const mp_int *b, mp_int *c); +#if MP_SQUARE +mp_err mp_sqr(const mp_int *a, mp_int *b); +#else +#define mp_sqr(a, b) mp_mul(a, a, b) +#endif +mp_err mp_div(const mp_int *a, const mp_int *b, mp_int *q, mp_int *r); +mp_err mp_div_2d(const mp_int *a, mp_digit d, mp_int *q, mp_int *r); +mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_2expt(mp_int *a, mp_digit k); + +/* Modular arithmetic */ +#if MP_MODARITH +mp_err mp_mod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_mod_d(const mp_int *a, mp_digit d, mp_digit *c); +mp_err mp_addmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_submod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_mulmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +#if MP_SQUARE +mp_err mp_sqrmod(const mp_int *a, const mp_int *m, mp_int *c); +#else +#define mp_sqrmod(a, m, c) mp_mulmod(a, a, m, c) +#endif +mp_err mp_exptmod(const mp_int *a, const mp_int *b, const mp_int *m, mp_int *c); +mp_err mp_exptmod_d(const mp_int *a, mp_digit d, const mp_int *m, mp_int *c); +#endif /* MP_MODARITH */ + +/* Comparisons */ +int mp_cmp_z(const mp_int *a); +int mp_cmp_d(const mp_int *a, mp_digit d); +int mp_cmp(const mp_int *a, const mp_int *b); +int mp_cmp_mag(const mp_int *a, const mp_int *b); +int mp_isodd(const mp_int *a); +int mp_iseven(const mp_int *a); + +/* Number theoretic */ +#if MP_NUMTH +mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c); +mp_err mp_xgcd(const mp_int *a, const mp_int *b, mp_int *g, mp_int *x, mp_int *y); +mp_err mp_invmod(const mp_int *a, const mp_int *m, mp_int *c); +mp_err mp_invmod_xgcd(const mp_int *a, const mp_int *m, mp_int *c); +#endif /* end MP_NUMTH */ + +/* Input and output */ +#if MP_IOFUNC +void mp_print(mp_int *mp, FILE *ofp); +#endif /* end MP_IOFUNC */ + +/* Base conversion */ +mp_err mp_read_raw(mp_int *mp, char *str, int len); +int mp_raw_size(mp_int *mp); +mp_err mp_toraw(mp_int *mp, char *str); +mp_err mp_read_radix(mp_int *mp, const char *str, int radix); +mp_err mp_read_variable_radix(mp_int *a, const char *str, int default_radix); +int mp_radix_size(mp_int *mp, int radix); +mp_err mp_toradix(mp_int *mp, char *str, int radix); +int mp_tovalue(char ch, int r); + +#define mp_tobinary(M, S) mp_toradix((M), (S), 2) +#define mp_tooctal(M, S) mp_toradix((M), (S), 8) +#define mp_todecimal(M, S) mp_toradix((M), (S), 10) +#define mp_tohex(M, S) mp_toradix((M), (S), 16) + +/* Error strings */ +const char *mp_strerror(mp_err ec); + +/* Octet string conversion functions */ +mp_err mp_read_unsigned_octets(mp_int *mp, const unsigned char *str, mp_size len); +unsigned int mp_unsigned_octet_size(const mp_int *mp); +mp_err mp_to_unsigned_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_signed_octets(const mp_int *mp, unsigned char *str, mp_size maxlen); +mp_err mp_to_fixlen_octets(const mp_int *mp, unsigned char *str, mp_size len); + +/* Miscellaneous */ +mp_size mp_trailing_zeros(const mp_int *mp); +void freebl_cpuid(unsigned long op, unsigned long *eax, + unsigned long *ebx, unsigned long *ecx, + unsigned long *edx); + +#define MP_CHECKOK(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP +#define MP_CHECKERR(x) \ + if (MP_OKAY > (res = (x))) \ + goto CLEANUP + +#define NEG MP_NEG +#define ZPOS MP_ZPOS +#define DIGIT_MAX MP_DIGIT_MAX +#define DIGIT_BIT MP_DIGIT_BIT +#define DIGIT_FMT MP_DIGIT_FMT +#define RADIX MP_RADIX +#define MAX_RADIX MP_MAX_RADIX +#define SIGN(MP) MP_SIGN(MP) +#define USED(MP) MP_USED(MP) +#define ALLOC(MP) MP_ALLOC(MP) +#define DIGITS(MP) MP_DIGITS(MP) +#define DIGIT(MP, N) MP_DIGIT(MP, N) + +#if MP_ARGCHK == 1 +#define ARGCHK(X, Y) \ + { \ + if (!(X)) { \ + return (Y); \ + } \ + } +#elif MP_ARGCHK == 2 +#include +#define ARGCHK(X, Y) assert(X) +#else +#define ARGCHK(X, Y) /* */ +#endif + +#ifdef CT_VERIF +void mp_taint(mp_int *mp); +void mp_untaint(mp_int *mp); +#endif + +SEC_END_PROTOS + +#endif /* end _H_MPI_ */ diff --git a/security/nss/lib/freebl/mpi/mpi_amd64.c b/security/nss/lib/freebl/mpi/mpi_amd64.c new file mode 100644 index 000000000..9e538bb6a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64.c @@ -0,0 +1,32 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef MPI_AMD64 +#error This file only works on AMD64 platforms. +#endif + +#include + +/* + * MPI glue + * + */ + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void MPI_ASM_DECL +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + mp_digit w; + mp_digit d; + + d = s_mpv_mul_add_vec64(c, a, a_len, b); + c += a_len; + while (d) { + w = c[0] + d; + d = (w < c[0] || w < d); + *c++ = w; + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_gas.s b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s new file mode 100644 index 000000000..ad6e2b9d7 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_gas.s @@ -0,0 +1,389 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_set_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r = a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L15: + cmpq $8, %r8 # 8 - len + jb .L16 + movq 0(%rsi), %rax # rax = a[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx # p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx # p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx # p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx # p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx # p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx # p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx # p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 + +# ------------------------------------------------------------------------ +# +# Implementation of s_mpv_mul_add_vec which exploits +# the 64X64->128 bit unsigned multiply instruction. +# +# ------------------------------------------------------------------------ + +# r += a * digit, r and a are vectors of length len +# returns the carry digit +# r and a are 64 bit aligned. +# +# uint64_t +# s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +# + +.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: + + xorq %rax, %rax # if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 # Use r8 for len; %rdx is used by mul + xorq %r9, %r9 # cy = 0 + +.L25: + cmpq $8, %r8 # 8 - len + jb .L26 + movq 0(%rsi), %rax # rax = a[0] + movq 0(%rdi), %r10 # r10 = r[0] + movq 8(%rsi), %r11 # prefetch a[1] + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + movq 8(%rdi), %r10 # prefetch r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 # prefetch a[2] + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + movq 16(%rdi), %r10 # prefetch r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 # prefetch a[3] + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + movq 24(%rdi), %r10 # prefetch r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 # prefetch a[4] + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + movq 32(%rdi), %r10 # prefetch r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 # prefetch a[5] + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + movq 40(%rdi), %r10 # prefetch r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 # prefetch a[6] + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + movq 48(%rdi), %r10 # prefetch r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 # prefetch a[7] + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + movq 56(%rdi), %r10 # prefetch r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + movq %r11, %rax + mulq %rcx # p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[7] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 56(%rdi) # r[7] = lo(p) + movq %rdx, %r9 # cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx # p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[0] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 0(%rdi) # r[0] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx # p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[1] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 8(%rdi) # r[1] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx # p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[2] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 16(%rdi) # r[2] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx # p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[3] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 24(%rdi) # r[3] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx # p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[4] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 32(%rdi) # r[4] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx # p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[5] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 40(%rdi) # r[5] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx # p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx # p += r[6] + addq %r9, %rax + adcq $0, %rdx # p += cy + movq %rax, 48(%rdi) # r[6] = lo(p) + movq %rdx, %r9 # cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 + +# Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm new file mode 100644 index 000000000..2120c18f9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_masm.asm @@ -0,0 +1,388 @@ +; This Source Code Form is subject to the terms of the Mozilla Public +; License, v. 2.0. If a copy of the MPL was not distributed with this +; file, You can obtain one at http://mozilla.org/MPL/2.0/. + +; +; This code is converted from mpi_amd64_gas.asm for MASM for x64. +; + +; ------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_set_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +; ------------------------------------------------------------------------ + +; r = a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +.CODE + +s_mpv_mul_set_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L17 + mov r8, rdx + xor r9, r9 + +L15: + cmp r8, 8 + jb L16 + mov rax, [rsi] + mov r11, [8+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [0+rdi], rax + mov r9, rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi, 64 + add rdi, 64 + sub r8, 8 + jz L17 + jmp L15 + +L16: + mov rax, [0+rsi] + mul rcx + add rax, r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [8+rsi] + mul rcx + add rax,r9 + adc rdx,0 + mov [8+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [16+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L17 + mov rax, [24+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [24+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [32+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [32+rdi],rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [40+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [40+rdi], rax + mov r9, rdx + dec r8 + jz L17 + mov rax, [48+rsi] + mul rcx + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L17 + +L17: + mov rax, r9 + pop rsi + pop rdi + ret + +s_mpv_mul_set_vec64 ENDP + + +;------------------------------------------------------------------------ +; +; Implementation of s_mpv_mul_add_vec which exploits +; the 64X64->128 bit unsigned multiply instruction. +; +;------------------------------------------------------------------------ + +; r += a * digit, r and a are vectors of length len +; returns the carry digit +; r and a are 64 bit aligned. +; +; uint64_t +; s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +; + +s_mpv_mul_add_vec64 PROC + + ; compatibilities for paramenter registers + ; + ; About GAS and MASM, the usage of parameter registers are different. + + push rdi + push rsi + + mov rdi, rcx + mov rsi, rdx + mov edx, r8d + mov rcx, r9 + + xor rax, rax + test rdx, rdx + jz L27 + mov r8, rdx + xor r9, r9 + +L25: + cmp r8, 8 + jb L26 + mov rax, [0+rsi] + mov r10, [0+rdi] + mov r11, [8+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [8+rdi] + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [16+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [16+rdi] + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [24+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [24+rdi] + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [32+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [32+rdi] + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [40+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [40+rdi] + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [48+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [48+rdi] + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + mov rax,r11 + mov r11, [56+rsi] + mul rcx + add rax,r10 + adc rdx,0 + mov r10, [56+rdi] + add rax,r9 + adc rdx,0 + mov [48+rdi],rax + mov r9,rdx + mov rax,r11 + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [56+rdi],rax + mov r9,rdx + add rsi,64 + add rdi,64 + sub r8, 8 + jz L27 + jmp L25 + +L26: + mov rax, [0+rsi] + mov r10, [0+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [0+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [8+rsi] + mov r10, [8+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [8+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [16+rsi] + mov r10, [16+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [16+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [24+rsi] + mov r10, [24+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [24+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [32+rsi] + mov r10, [32+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [32+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [40+rsi] + mov r10, [40+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax,r9 + adc rdx,0 + mov [40+rdi],rax + mov r9,rdx + dec r8 + jz L27 + mov rax, [48+rsi] + mov r10, [48+rdi] + mul rcx + add rax,r10 + adc rdx,0 + add rax, r9 + adc rdx, 0 + mov [48+rdi], rax + mov r9, rdx + dec r8 + jz L27 + +L27: + mov rax, r9 + + pop rsi + pop rdi + ret + +s_mpv_mul_add_vec64 ENDP + +END diff --git a/security/nss/lib/freebl/mpi/mpi_amd64_sun.s b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s new file mode 100644 index 000000000..ddd5c40fd --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_amd64_sun.s @@ -0,0 +1,385 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_set_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r = a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_set_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_set_vec64; .type s_mpv_mul_set_vec64, @function; s_mpv_mul_set_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L17 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L15: + cmpq $8, %r8 / 8 - len + jb .L16 + movq 0(%rsi), %rax / rax = a[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L17 + jmp .L15 + +.L16: + movq 0(%rsi), %rax + mulq %rcx / p = a[0] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 8(%rsi), %rax + mulq %rcx / p = a[1] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 16(%rsi), %rax + mulq %rcx / p = a[2] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 24(%rsi), %rax + mulq %rcx / p = a[3] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 32(%rsi), %rax + mulq %rcx / p = a[4] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 40(%rsi), %rax + mulq %rcx / p = a[5] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + movq 48(%rsi), %rax + mulq %rcx / p = a[6] * digit + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L17 + + +.L17: + movq %r9, %rax + ret + +.size s_mpv_mul_set_vec64, .-s_mpv_mul_set_vec64 + +/ ------------------------------------------------------------------------ +/ +/ Implementation of s_mpv_mul_add_vec which exploits +/ the 64X64->128 bit unsigned multiply instruction. +/ +/ ------------------------------------------------------------------------ + +/ r += a * digit, r and a are vectors of length len +/ returns the carry digit +/ r and a are 64 bit aligned. +/ +/ uint64_t +/ s_mpv_mul_add_vec64(uint64_t *r, uint64_t *a, int len, uint64_t digit) +/ + +.text; .align 16; .globl s_mpv_mul_add_vec64; .type s_mpv_mul_add_vec64, @function; s_mpv_mul_add_vec64: + + xorq %rax, %rax / if (len == 0) return (0) + testq %rdx, %rdx + jz .L27 + + movq %rdx, %r8 / Use r8 for len; %rdx is used by mul + xorq %r9, %r9 / cy = 0 + +.L25: + cmpq $8, %r8 / 8 - len + jb .L26 + movq 0(%rsi), %rax / rax = a[0] + movq 0(%rdi), %r10 / r10 = r[0] + movq 8(%rsi), %r11 / prefetch a[1] + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + movq 8(%rdi), %r10 / prefetch r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 16(%rsi), %r11 / prefetch a[2] + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + movq 16(%rdi), %r10 / prefetch r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 24(%rsi), %r11 / prefetch a[3] + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + movq 24(%rdi), %r10 / prefetch r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 32(%rsi), %r11 / prefetch a[4] + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + movq 32(%rdi), %r10 / prefetch r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 40(%rsi), %r11 / prefetch a[5] + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + movq 40(%rdi), %r10 / prefetch r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 48(%rsi), %r11 / prefetch a[6] + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + movq 48(%rdi), %r10 / prefetch r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + movq 56(%rsi), %r11 / prefetch a[7] + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + movq 56(%rdi), %r10 / prefetch r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + movq %r11, %rax + mulq %rcx / p = a[7] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[7] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 56(%rdi) / r[7] = lo(p) + movq %rdx, %r9 / cy = hi(p) + + addq $64, %rsi + addq $64, %rdi + subq $8, %r8 + + jz .L27 + jmp .L25 + +.L26: + movq 0(%rsi), %rax + movq 0(%rdi), %r10 + mulq %rcx / p = a[0] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[0] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 0(%rdi) / r[0] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 8(%rsi), %rax + movq 8(%rdi), %r10 + mulq %rcx / p = a[1] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[1] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 8(%rdi) / r[1] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 16(%rsi), %rax + movq 16(%rdi), %r10 + mulq %rcx / p = a[2] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[2] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 16(%rdi) / r[2] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 24(%rsi), %rax + movq 24(%rdi), %r10 + mulq %rcx / p = a[3] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[3] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 24(%rdi) / r[3] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 32(%rsi), %rax + movq 32(%rdi), %r10 + mulq %rcx / p = a[4] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[4] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 32(%rdi) / r[4] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 40(%rsi), %rax + movq 40(%rdi), %r10 + mulq %rcx / p = a[5] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[5] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 40(%rdi) / r[5] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + movq 48(%rsi), %rax + movq 48(%rdi), %r10 + mulq %rcx / p = a[6] * digit + addq %r10, %rax + adcq $0, %rdx / p += r[6] + addq %r9, %rax + adcq $0, %rdx / p += cy + movq %rax, 48(%rdi) / r[6] = lo(p) + movq %rdx, %r9 / cy = hi(p) + decq %r8 + jz .L27 + + +.L27: + movq %r9, %rax + ret + +.size s_mpv_mul_add_vec64, .-s_mpv_mul_add_vec64 diff --git a/security/nss/lib/freebl/mpi/mpi_arm.c b/security/nss/lib/freebl/mpi/mpi_arm.c new file mode 100644 index 000000000..b5139f28d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_arm.c @@ -0,0 +1,175 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This inlined version is for 32-bit ARM platform only */ + +#if !defined(__arm__) +#error "This is for ARM only" +#endif + +/* 16-bit thumb doesn't work inlined assember version */ +#if (!defined(__thumb__) || defined(__thumb2__)) && !defined(__ARM_ARCH_3__) + +#include "mpi-priv.h" + +#ifdef MP_ASSEMBLY_MULTIPLY +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %2\n" + "str r5, [%3], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%3]\n" + : + : "r"(a), "r"(a_len), "r"(b), "r"(c) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm__ __volatile__( + "mov r5, #0\n" +#ifdef __thumb2__ + "cbz %1, 2f\n" +#else + "cmp %1, r5\n" /* r5 is 0 now */ + "beq 2f\n" +#endif + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%3]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %2\n" + "str r5, [%3], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + + "2:\n" + "str r5, [%3]\n" + : + : "r"(a), "r"(a_len), "r"(b), "r"(c) + : "memory", "cc", "%r4", "%r5", "%r6"); +} + +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r5, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%3]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "ldr r6, [%0], #4\n" + "umlal r5, r4, r6, %2\n" + "str r5, [%3], #4\n" + "mov r5, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r4, 3f\n" +#else + "cmp r4, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r6, [%3]\n" + "adds r5, r6\n" + "adc r4, r4, #0\n" + "str r5, [%3], #4\n" + "movs r5, r4\n" + "bne 2b\n" + + "3:\n" + : + : "r"(a), "r"(a_len), "r"(b), "r"(c) + : "memory", "cc", "%r4", "%r5", "%r6"); +} +#endif + +#ifdef MP_ASSEMBLY_SQUARE +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + if (!a_len) + return; + + __asm__ __volatile__( + "mov r3, #0\n" + + "1:\n" + "mov r4, #0\n" + "ldr r6, [%0], #4\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "umlal r3, r4, r6, r6\n" /* w = r3:r4 */ + "str r3, [%2], #4\n" + + "ldr r5, [%2]\n" + "adds r3, r4, r5\n" + "mov r4, #0\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "mov r3, r4\n" + + "subs %1, #1\n" + "bne 1b\n" + +#ifdef __thumb2__ + "cbz r3, 3f\n" +#else + "cmp r3, #0\n" + "beq 3f\n" +#endif + + "2:\n" + "mov r4, #0\n" + "ldr r5, [%2]\n" + "adds r3, r5\n" + "adc r4, r4, #0\n" + "str r3, [%2], #4\n" + "movs r3, r4\n" + "bne 2b\n" + + "3:" + : + : "r"(pa), "r"(a_len), "r"(ps) + : "memory", "cc", "%r3", "%r4", "%r5", "%r6"); +} +#endif +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_hp.c b/security/nss/lib/freebl/mpi/mpi_hp.c new file mode 100644 index 000000000..0cea7685d --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_hp.c @@ -0,0 +1,81 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file contains routines that perform vector multiplication. */ + +#include "mpi-priv.h" +#include + +#include +/* #include */ +#include + +extern void multacc512( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void maxpy_little( + int length, /* doublewords in multiplicand vector. */ + const mp_digit *scalaraddr, /* Address of scalar. */ + const mp_digit *multiplicand, /* The multiplicand vector. */ + mp_digit *result); /* Where to accumulate the result. */ + +extern void add_diag_little( + int length, /* doublewords in input vector. */ + const mp_digit *root, /* The vector to square. */ + mp_digit *result); /* Where to accumulate the result. */ + +void +s_mpv_sqr_add_prop(const mp_digit *pa, mp_size a_len, mp_digit *ps) +{ + add_diag_little(a_len, pa, ps); +} + +#define MAX_STACK_DIGITS 258 +#define MULTACC512_LEN (512 / MP_DIGIT_BIT) +#define HP_MPY_ADD_FN (a_len == MULTACC512_LEN ? multacc512 : maxpy_little) + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit x[MAX_STACK_DIGITS]; + mp_digit *px = x; + size_t xSize = 0; + + if (a == c) { + if (a_len > MAX_STACK_DIGITS) { + xSize = sizeof(mp_digit) * (a_len + 2); + px = malloc(xSize); + if (!px) + return; + } + memcpy(px, a, a_len * sizeof(*a)); + a = px; + } + s_mp_setz(c, a_len + 1); + HP_MPY_ADD_FN(a_len, &b, a, c); + if (px != x && px) { + memset(px, 0, xSize); + free(px); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + c[a_len] = 0; /* so carry propagation stops here. */ + HP_MPY_ADD_FN(a_len, &b, a, c); +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + mp_digit *c) +{ + HP_MPY_ADD_FN(a_len, &b, a, c); +} diff --git a/security/nss/lib/freebl/mpi/mpi_i86pc.s b/security/nss/lib/freebl/mpi/mpi_i86pc.s new file mode 100644 index 000000000..f80039659 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_i86pc.s @@ -0,0 +1,313 @@ +/ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.text + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L2 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L1: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L1 / jmp if a_len != 0 +L2: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L4 / jmp if a_len == 0 + mov 8(%ebp),%esi / esi = a + cld +L3: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L3 / jmp if a_len != 0 +L4: + mov %ebx,0(%edi) / *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 36: caller's esi + / ebp - 32: caller's edi + / ebp - 28: + / ebp - 24: + / ebp - 20: + / ebp - 16: + / ebp - 12: + / ebp - 8: + / ebp - 4: + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: a argument + / ebp + 12: a_len argument + / ebp + 16: b argument + / ebp + 20: c argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je L6 / jmp if a_len == 0 + cld + mov 8(%ebp),%esi / esi = a +L5: + lodsl / eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx / edx = b + mull %edx / edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax / add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx / add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx / high half of product becomes next carry + + stosl / [es:edi] = ax; edi += 4; + dec %ecx / --a_len + jnz L5 / jmp if a_len != 0 +L6: + cmp $0,%ebx / is carry zero? + jz L8 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L8 +L7: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L7 +L8: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / ebp - 20: caller's esi + / ebp - 16: caller's edi + / ebp - 12: + / ebp - 8: carry + / ebp - 4: a_len local + / ebp + 0: caller's ebp + / ebp + 4: return address + / ebp + 8: pa argument + / ebp + 12: a_len argument + / ebp + 16: ps argument + / ebp + 20: + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx / carry = 0 + mov 12(%ebp),%ecx / a_len + mov 16(%ebp),%edi / edi = ps + cmp $0,%ecx + je L11 / jump if a_len == 0 + cld + mov 8(%ebp),%esi / esi = pa +L10: + lodsl / %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax / add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax / add low word from result + mov 4(%edi),%ebx + stosl / [es:di] = %eax; di += 4; + adc %ebx,%edx / add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl / [es:di] = %eax; di += 4; + dec %ecx / --a_len + jnz L10 / jmp if a_len != 0 +L11: + cmp $0,%ebx / is carry zero? + jz L14 + mov 0(%edi),%eax / add in current word from *c + add %ebx,%eax + stosl / [es:edi] = ax; edi += 4; + jnc L14 +L12: + mov 0(%edi),%eax / add in current word from *c + adc $0,%eax + stosl / [es:edi] = ax; edi += 4; + jc L12 +L14: + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + / + / Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + / so its high bit is 1. This code is from NSPR. + / + / mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + / mp_digit *qp, mp_digit *rp) + + / esp + 0: Caller's ebx + / esp + 4: return address + / esp + 8: Nhi argument + / esp + 12: Nlo argument + / esp + 16: divisor argument + / esp + 20: qp argument + / esp + 24: rp argument + / registers: + / eax: + / ebx: carry + / ecx: a_len + / edx: + / esi: a ptr + / edi: c ptr + / + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax / return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mpi_mips.s b/security/nss/lib/freebl/mpi/mpi_mips.s new file mode 100644 index 000000000..455792bbb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_mips.s @@ -0,0 +1,472 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +#include + .set noreorder + .set noat + + .section .text, 1, 0x00000006, 4, 4 +.text: + .section .text + + .ent s_mpv_mul_d_add + .globl s_mpv_mul_d_add + +s_mpv_mul_d_add: + #/* c += a * b */ + #void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.L.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.L.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.L.3 + # a1 = a[1]; + lwu a5,4(a0) +.L.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.L.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.L.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.L.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .L.6 + addiu a3,a3,4 + # } else { +.L.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .L.6 + dsrl32 t2,t0,0 + # } + # } else { +.L.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.L.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.L.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add + + .ent s_mpv_mul_d_add_prop + .globl s_mpv_mul_d_add_prop + +s_mpv_mul_d_add_prop: + #/* c += a * b */ + #void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit c0, c1; regs a6, a7 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.M.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.M.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.M.3 + # a1 = a[1]; + lwu a5,4(a0) +.M.4: + # a_len -= 2; + addiu a1,a1,-2 + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # w0 += c0; + daddu t0,t0,a6 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # w0 = (mp_word)b * a0; + dmultu a2,a4 # + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.M.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.M.3: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.M.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # w0 += c0; + daddu t0,t0,a6 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # c1 = c[1]; + lwu a7,4(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # w1 += c1; + daddu t1,t1,a7 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .M.6 + addiu a3,a3,8 + # } else { +.M.5: + # w0 += c0; + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + b .M.6 + addiu a3,a3,4 + # } + # } else { +.M.2: + # c0 = c[0]; + lwu a6,0(a3) + # w0 += c0; + mflo t0 + daddu t0,t0,a6 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + addiu a3,a3,4 + # } +.M.6: + + # while (cy) { + beq t2,zero,.M.1 + nop +.M.7: + # mp_word w = (mp_word)*c + cy; + lwu a6,0(a3) + daddu t2,t2,a6 + # *c++ = ACCUM(w); + sw t2,0(a3) + # cy = CARRYOUT(w); + dsrl32 t2,t2,0 + bne t2,zero,.M.7 + addiu a3,a3,4 + + # } +.M.1: + jr ra + nop + #} + # + .end s_mpv_mul_d_add_prop + + .ent s_mpv_mul_d + .globl s_mpv_mul_d + +s_mpv_mul_d: + #/* c = a * b */ + #void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, + # mp_digit *c) + #{ + # mp_digit a0, a1; regs a4, a5 + # mp_digit cy = 0; reg t2 + # mp_word w0, w1; regs t0, t1 + # + # if (a_len) { + beq a1,zero,.N.1 + move t2,zero # cy = 0 + dsll32 a2,a2,0 # "b" is sometimes negative (?!?!) + dsrl32 a2,a2,0 # This clears the upper 32 bits. + # a0 = a[0]; + lwu a4,0(a0) + # w0 = ((mp_word)b * a0); + dmultu a2,a4 + # if (--a_len) { + addiu a1,a1,-1 + beq a1,zero,.N.2 + # while (a_len >= 2) { + sltiu t3,a1,2 + bne t3,zero,.N.3 + # a1 = a[1]; + lwu a5,4(a0) +.N.4: + # a_len -= 2; + addiu a1,a1,-2 + # w0 += cy; + mflo t0 + daddu t0,t0,t2 + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # w1 = (mp_word)b * a1; + dmultu a2,a5 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # a0 = a[2]; + lwu a4,8(a0) + # a += 2; + addiu a0,a0,8 + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # w0 = (mp_word)b * a0; + dmultu a2,a4 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # c += 2; + addiu a3,a3,8 + sltiu t3,a1,2 + beq t3,zero,.N.4 + # a1 = a[1]; + lwu a5,4(a0) + # } +.N.3: + # w0 += cy; + # if (a_len) { + mflo t0 + beq a1,zero,.N.5 + daddu t0,t0,t2 + # w1 = (mp_word)b * a1; + dmultu a2,a5 # + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # w1 += cy; + mflo t1 + daddu t1,t1,t2 + # c[1] = ACCUM(w1); + sw t1,4(a3) + # cy = CARRYOUT(w1); + dsrl32 t2,t1,0 + # c += 1; + b .N.6 + addiu a3,a3,4 + # } else { +.N.5: + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + b .N.6 + dsrl32 t2,t0,0 + # } + # } else { +.N.2: + mflo t0 + # c[0] = ACCUM(w0); + sw t0,0(a3) + # cy = CARRYOUT(w0); + dsrl32 t2,t0,0 + # } +.N.6: + # c[1] = cy; + jr ra + sw t2,4(a3) + # } +.N.1: + jr ra + nop + #} + # + .end s_mpv_mul_d + + + .ent s_mpv_sqr_add_prop + .globl s_mpv_sqr_add_prop + #void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs); + # registers + # a0 *a + # a1 a_len + # a2 *sqr + # a3 digit from *a, a_i + # a4 square of digit from a + # a5,a6 next 2 digits in sqr + # a7,t0 carry +s_mpv_sqr_add_prop: + move a7,zero + move t0,zero + lwu a3,0(a0) + addiu a1,a1,-1 # --a_len + dmultu a3,a3 + beq a1,zero,.P.3 # jump if we've already done the only sqr + addiu a0,a0,4 # ++a +.P.2: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + lwu a3,0(a0) + addiu a0,a0,4 # ++a + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + dmultu a3,a3 + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + addiu a1,a1,-1 # --a_len + dsrl32 a4,a4,0 + bne a1,zero,.P.2 # loop if a_len > 0 + sw a4,-4(a2) +.P.3: + lwu a5,0(a2) + lwu a6,4(a2) + addiu a2,a2,8 # sqrs += 2; + dsll32 a6,a6,0 + daddu a5,a5,a6 + mflo a4 + daddu a6,a5,a4 + sltu a7,a6,a5 # a7 = a6 < a5 detect overflow + daddu a4,a6,t0 + sltu t0,a4,a6 + add t0,t0,a7 + sw a4,-8(a2) + beq t0,zero,.P.9 # jump if no carry + dsrl32 a4,a4,0 +.P.8: + sw a4,-4(a2) + /* propagate final carry */ + lwu a5,0(a2) + daddu a6,a5,t0 + sltu t0,a6,a5 + bne t0,zero,.P.8 # loop if carry persists + addiu a2,a2,4 # sqrs++ +.P.9: + jr ra + sw a4,-4(a2) + + .end s_mpv_sqr_add_prop diff --git a/security/nss/lib/freebl/mpi/mpi_sparc.c b/security/nss/lib/freebl/mpi/mpi_sparc.c new file mode 100644 index 000000000..1e88357af --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sparc.c @@ -0,0 +1,226 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* Multiplication performance enhancements for sparc v8+vis CPUs. */ + +#include "mpi-priv.h" +#include +#include +#include + +/* In the functions below, */ +/* vector y must be 8-byte aligned, and n must be even */ +/* returns carry out of high order word of result */ +/* maximum n is 256 */ + +/* vector x += vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add_inp(mp_digit *x, const mp_digit *y, int n, mp_digit a); + +/* vector z = vector x + vector y * scaler a; where y is of length n words. */ +extern mp_digit mul_add(mp_digit *z, const mp_digit *x, const mp_digit *y, + int n, mp_digit a); + +/* v8 versions of these functions run on any Sparc v8 CPU. */ + +/* This trick works on Sparc V8 CPUs with the Workshop compilers. */ +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + unsigned long long product = (unsigned long long)a * b; \ + Plo = (mp_digit)product; \ + Phi = (mp_digit)(product >> MP_DIGIT_BIT); \ + } + +/* c = a * b */ +static void +v8_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* c += a * b */ +static void +v8_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + *c = d; +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } + *c = carry; +#endif +} + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +static void +v8_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ +#if !defined(MP_NO_MP_WORD) + mp_digit d = 0; + + /* Inner product: Digits of a */ + while (a_len--) { + mp_word w = ((mp_word)b * *a++) + *c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } + + while (d) { + mp_word w = (mp_word)*c + d; + *c++ = ACCUM(w); + d = CARRYOUT(w); + } +#else + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + + a0b0 += a_i = *c; + if (a0b0 < a_i) + ++a1b1; + + *c++ = a0b0; + carry = a1b1; + } + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +#endif +} + +/* These functions run only on v8plus+vis or v9+vis CPUs. */ + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (a == c || ((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + s_mp_setz(c, a_len + 1); + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d(a, a_len, b, c); + } +} + +/* c += a * b, where a is a_len words long. */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + c[a_len] = d; + } else { + v8_mpv_mul_d_add(a, a_len, b, c); + } +} + +/* c += a * b, where a is y words long. */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + mp_digit d; + mp_digit x[258]; + if (a_len <= 256) { + if (((ptrdiff_t)a & 0x7) != 0 || (a_len & 1) != 0) { + mp_digit *px; + px = (((ptrdiff_t)x & 0x7) != 0) ? x + 1 : x; + memcpy(px, a, a_len * sizeof(*a)); + a = px; + if (a_len & 1) { + px[a_len] = 0; + } + } + d = mul_add_inp(c, a, a_len, b); + if (d) { + c += a_len; + do { + mp_digit sum = d + *c; + *c++ = sum; + d = sum < d; + } while (d); + } + } else { + v8_mpv_mul_d_add_prop(a, a_len, b, c); + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_sse2.s b/security/nss/lib/freebl/mpi/mpi_sse2.s new file mode 100644 index 000000000..16a47019c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_sse2.s @@ -0,0 +1,294 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifdef DARWIN +#define s_mpv_mul_d _s_mpv_mul_d +#define s_mpv_mul_d_add _s_mpv_mul_d_add +#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop +#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop +#define s_mpv_div_2dx1d _s_mpv_div_2dx1d +#define TYPE_FUNCTION(x) +#else +#define TYPE_FUNCTION(x) .type x, @function +#endif + +.text + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.private_extern s_mpv_mul_d +TYPE_FUNCTION(s_mpv_mul_d) +s_mpv_mul_d: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # ebx: + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.private_extern s_mpv_mul_d_add +TYPE_FUNCTION(s_mpv_mul_d_add) +s_mpv_mul_d_add: + push %ebp + mov %esp, %ebp + push %edi + push %esi + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + movd 0(%edi), %mm0 + paddq %mm0, %mm2 # add the carry + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, 0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.private_extern s_mpv_mul_d_add_prop +TYPE_FUNCTION(s_mpv_mul_d_add_prop) +s_mpv_mul_d_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + movd 16(%ebp), %mm1 # mm1 = b + mov 20(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a++ + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm1, %mm0 # mm0 = b * *a++ + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add *c++ + movd %mm2, 0(%edi) # store the 32bit result + add $4, %edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # ebp - 12: caller's ebx + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # esi: a ptr + # edi: c ptr +.globl s_mpv_sqr_add_prop +.private_extern s_mpv_sqr_add_prop +TYPE_FUNCTION(s_mpv_sqr_add_prop) +s_mpv_sqr_add_prop: + push %ebp + mov %esp, %ebp + push %edi + push %esi + push %ebx + psubq %mm2, %mm2 # carry = 0 + mov 12(%ebp), %ecx # ecx = a_len + mov 16(%ebp), %edi + cmp $0, %ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp), %esi # esi = a + cld +1: + movd 0(%esi), %mm0 # mm0 = *a + movd 0(%edi), %mm3 # fetch the sum + add $4, %esi + pmuludq %mm0, %mm0 # mm0 = sqr(a) + paddq %mm0, %mm2 # add the carry + paddq %mm3, %mm2 # add the low word + movd 4(%edi), %mm3 + movd %mm2, 0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3, %mm2 # add the high word + movd %mm2, 4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8, %edi + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + movd %mm2, %ebx + cmp $0, %ebx # is carry zero? + jz 4f + mov 0(%edi), %eax + add %ebx, %eax + stosl + jnc 4f +3: + mov 0(%edi), %eax # add in current word from *c + adc $0, %eax + stosl # [es:edi] = ax; edi += 4; + jc 3b +4: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # +.globl s_mpv_div_2dx1d +.private_extern s_mpv_div_2dx1d +TYPE_FUNCTION(s_mpv_div_2dx1d) +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp), %edx + mov 12(%esp), %eax + mov 16(%esp), %ebx + div %ebx + mov 20(%esp), %ebx + mov %eax, 0(%ebx) + mov 24(%esp), %ebx + mov %edx, 0(%ebx) + xor %eax, %eax # return zero + pop %ebx + ret + nop + +#ifndef DARWIN + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous +#endif diff --git a/security/nss/lib/freebl/mpi/mpi_x86.s b/security/nss/lib/freebl/mpi/mpi_x86.s new file mode 100644 index 000000000..8f7e2130c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86.s @@ -0,0 +1,541 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +.ifndef NO_PIC +.macro GET var,reg + movl \var@GOTOFF(%ebx),\reg +.endm +.macro PUT reg,var + movl \reg,\var@GOTOFF(%ebx) +.endm +.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d +.type s_mpv_mul_d,@function +s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add +.type s_mpv_mul_d_add,@function +s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl s_mpv_mul_d_add_prop +.type s_mpv_mul_d_add_prop,@function +s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl s_mpv_sqr_add_prop +.type s_mpv_sqr_add_prop,@function +s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl s_mpv_div_2dx1d +.type s_mpv_div_2dx1d,@function +s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + + # Magic indicating no need for an executable stack +.section .note.GNU-stack, "", @progbits +.previous diff --git a/security/nss/lib/freebl/mpi/mpi_x86_asm.c b/security/nss/lib/freebl/mpi/mpi_x86_asm.c new file mode 100644 index 000000000..4faeef30c --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_asm.c @@ -0,0 +1,531 @@ +/* + * mpi_x86_asm.c - MSVC inline assembly implementation of s_mpv_ functions. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" + +static int is_sse = -1; +extern unsigned long s_mpi_is_sse2(); + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_x86 + jg s_mpv_mul_d_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_sse2 +s_mpv_mul_d_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_2 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_1: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_1 ; jmp if a_len != 0 +L_2: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_6 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_5: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_5 ; jmp if a_len != 0 +L_6: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_x86 + jg s_mpv_mul_d_add_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_sse2 +s_mpv_mul_d_add_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_11 ; jmp if a_len == 0 + mov esi,[ebp+8] ; esi = a + cld +L_10: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_10 ; jmp if a_len != 0 +L_11: + mov [edi],ebx ; *c = carry + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_sse2: + push ebp + mov ebp, esp + push edi + push esi + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_16 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_15: + movd mm0, [esi] ; mm0 = *a++ + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + movd mm0, [edi] + paddq mm2, mm0 ; add the carry + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_15 ; jmp if a_len != 0 +L_16: + movd [edi], mm2 ; *c = carry + emms + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 36: caller's esi + * ebp - 32: caller's edi + * ebp - 28: + * ebp - 24: + * ebp - 20: + * ebp - 16: + * ebp - 12: + * ebp - 8: + * ebp - 4: + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: a argument + * ebp + 12: a_len argument + * ebp + 16: b argument + * ebp + 20: c argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_mul_d_add_prop_x86 + jg s_mpv_mul_d_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_mul_d_add_prop_sse2 +s_mpv_mul_d_add_prop_x86: + push ebp + mov ebp,esp + sub esp,28 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; ecx = a_len + mov edi,[ebp+20] + cmp ecx,0 + je L_21 ; jmp if a_len == 0 + cld + mov esi,[ebp+8] ; esi = a +L_20: + lodsd ; eax = [ds:esi]; esi += 4 + mov edx,[ebp+16] ; edx = b + mul edx ; edx:eax = Phi:Plo = a_i * b + + add eax,ebx ; add carry (ebx) to edx:eax + adc edx,0 + mov ebx,[edi] ; add in current word from *c + add eax,ebx + adc edx,0 + mov ebx,edx ; high half of product becomes next carry + + stosd ; [es:edi] = ax; edi += 4; + dec ecx ; --a_len + jnz L_20 ; jmp if a_len != 0 +L_21: + cmp ebx,0 ; is carry zero? + jz L_23 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_23 +L_22: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_22 +L_23: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_mul_d_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + movd mm1, [ebp+16] ; mm1 = b + mov edi, [ebp+20] + cmp ecx, 0 + je L_26 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_25: + movd mm0, [esi] ; mm0 = *a++ + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm1 ; mm0 = b * *a++ + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add *c++ + movd [edi], mm2 ; store the 32bit result + add edi, 4 + psrlq mm2, 32 ; save the carry + dec ecx ; --a_len + jnz L_25 ; jmp if a_len != 0 +L_26: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_28 + mov eax, [edi] + add eax, ebx + stosd + jnc L_28 +L_27: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_27 +L_28: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * ebp - 20: caller's esi + * ebp - 16: caller's edi + * ebp - 12: + * ebp - 8: carry + * ebp - 4: a_len local + * ebp + 0: caller's ebp + * ebp + 4: return address + * ebp + 8: pa argument + * ebp + 12: a_len argument + * ebp + 16: ps argument + * ebp + 20: + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) void s_mpv_sqr_add_prop(const mp_digit *a, mp_size a_len, mp_digit *sqrs) +{ + __asm { + mov eax, is_sse + cmp eax, 0 + je s_mpv_sqr_add_prop_x86 + jg s_mpv_sqr_add_prop_sse2 + call s_mpi_is_sse2 + mov is_sse, eax + cmp eax, 0 + jg s_mpv_sqr_add_prop_sse2 +s_mpv_sqr_add_prop_x86: + push ebp + mov ebp,esp + sub esp,12 + push edi + push esi + push ebx + mov ebx,0 ; carry = 0 + mov ecx,[ebp+12] ; a_len + mov edi,[ebp+16] ; edi = ps + cmp ecx,0 + je L_31 ; jump if a_len == 0 + cld + mov esi,[ebp+8] ; esi = pa +L_30: + lodsd ; eax = [ds:si]; si += 4; + mul eax + + add eax,ebx ; add "carry" + adc edx,0 + mov ebx,[edi] + add eax,ebx ; add low word from result + mov ebx,[edi+4] + stosd ; [es:di] = eax; di += 4; + adc edx,ebx ; add high word from result + mov ebx,0 + mov eax,edx + adc ebx,0 + stosd ; [es:di] = eax; di += 4; + dec ecx ; --a_len + jnz L_30 ; jmp if a_len != 0 +L_31: + cmp ebx,0 ; is carry zero? + jz L_34 + mov eax,[edi] ; add in current word from *c + add eax,ebx + stosd ; [es:edi] = ax; edi += 4; + jnc L_34 +L_32: + mov eax,[edi] ; add in current word from *c + adc eax,0 + stosd ; [es:edi] = ax; edi += 4; + jc L_32 +L_34: + pop ebx + pop esi + pop edi + leave + ret + nop +s_mpv_sqr_add_prop_sse2: + push ebp + mov ebp, esp + push edi + push esi + push ebx + psubq mm2, mm2 ; carry = 0 + mov ecx, [ebp+12] ; ecx = a_len + mov edi, [ebp+16] + cmp ecx, 0 + je L_36 ; jmp if a_len == 0 + mov esi, [ebp+8] ; esi = a + cld +L_35: + movd mm0, [esi] ; mm0 = *a + movd mm3, [edi] ; fetch the sum + add esi, 4 + pmuludq mm0, mm0 ; mm0 = sqr(a) + paddq mm2, mm0 ; add the carry + paddq mm2, mm3 ; add the low word + movd mm3, [edi+4] + movd [edi], mm2 ; store the 32bit result + psrlq mm2, 32 + paddq mm2, mm3 ; add the high word + movd [edi+4], mm2 ; store the 32bit result + psrlq mm2, 32 ; save the carry. + add edi, 8 + dec ecx ; --a_len + jnz L_35 ; jmp if a_len != 0 +L_36: + movd ebx, mm2 + cmp ebx, 0 ; is carry zero? + jz L_38 + mov eax, [edi] + add eax, ebx + stosd + jnc L_38 +L_37: + mov eax, [edi] ; add in current word from *c + adc eax, 0 + stosd ; [es:edi] = ax; edi += 4; + jc L_37 +L_38: + emms + pop ebx + pop esi + pop edi + leave + ret + nop + } +} + +/* + * Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + * so its high bit is 1. This code is from NSPR. + * + * Dump of assembler code for function s_mpv_div_2dx1d: + * + * esp + 0: Caller's ebx + * esp + 4: return address + * esp + 8: Nhi argument + * esp + 12: Nlo argument + * esp + 16: divisor argument + * esp + 20: qp argument + * esp + 24: rp argument + * registers: + * eax: + * ebx: carry + * ecx: a_len + * edx: + * esi: a ptr + * edi: c ptr + */ +__declspec(naked) mp_err + s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + mp_digit *qp, mp_digit *rp) +{ + __asm { + push ebx + mov edx,[esp+8] + mov eax,[esp+12] + mov ebx,[esp+16] + div ebx + mov ebx,[esp+20] + mov [ebx],eax + mov ebx,[esp+24] + mov [ebx],edx + xor eax,eax ; return zero + pop ebx + ret + nop + } +} diff --git a/security/nss/lib/freebl/mpi/mpi_x86_os2.s b/security/nss/lib/freebl/mpi/mpi_x86_os2.s new file mode 100644 index 000000000..b903e2564 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpi_x86_os2.s @@ -0,0 +1,538 @@ +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +.data +.align 4 + # + # -1 means to call _s_mpi_is_sse to determine if we support sse + # instructions. + # 0 means to use x86 instructions + # 1 means to use sse2 instructions +.type is_sse,@object +.size is_sse,4 +is_sse: .long -1 + +# +# sigh, handle the difference between -fPIC and not PIC +# default to pic, since this file seems to be exclusively +# linux right now (solaris uses mpi_i86pc.s and windows uses +# mpi_x86_asm.c) +# +#.ifndef NO_PIC +#.macro GET var,reg +# movl \var@GOTOFF(%ebx),\reg +#.endm +#.macro PUT reg,var +# movl \reg,\var@GOTOFF(%ebx) +#.endm +#.else +.macro GET var,reg + movl \var,\reg +.endm +.macro PUT reg,var + movl \reg,\var +.endm +#.endif + +.text + + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d +.type _s_mpv_mul_d,@function +_s_mpv_mul_d: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_x86 + jg _s_mpv_mul_d_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_sse2 +_s_mpv_mul_d_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 2f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +1: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 1b # jmp if a_len != 0 +2: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 6f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +5: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 5b # jmp if a_len != 0 +6: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 36: caller's esi + # ebp - 32: caller's edi + # ebp - 28: + # ebp - 24: + # ebp - 20: + # ebp - 16: + # ebp - 12: + # ebp - 8: + # ebp - 4: + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add +.type _s_mpv_mul_d_add,@function +_s_mpv_mul_d_add: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_x86 + jg _s_mpv_mul_d_add_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_sse2 +_s_mpv_mul_d_add_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 11f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +10: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 10b # jmp if a_len != 0 +11: + mov %ebx,0(%edi) # *c = carry + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 16f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +15: + movd 0(%esi),%mm0 # mm0 = *a++ + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + movd 0(%edi),%mm0 + paddq %mm0,%mm2 # add the carry + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 15b # jmp if a_len != 0 +16: + movd %mm2,0(%edi) # *c = carry + emms + pop %esi + pop %edi + leave + ret + nop + + # ebp - 8: caller's esi + # ebp - 4: caller's edi + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: a argument + # ebp + 12: a_len argument + # ebp + 16: b argument + # ebp + 20: c argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr +.globl _s_mpv_mul_d_add_prop +.type _s_mpv_mul_d_add_prop,@function +_s_mpv_mul_d_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_mul_d_add_prop_x86 + jg _s_mpv_mul_d_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_mul_d_add_prop_sse2 +_s_mpv_mul_d_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $28,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 20(%ebp),%edi + cmp $0,%ecx + je 21f # jmp if a_len == 0 + cld + mov 8(%ebp),%esi # esi = a +20: + lodsl # eax = [ds:esi]; esi += 4 + mov 16(%ebp),%edx # edx = b + mull %edx # edx:eax = Phi:Plo = a_i * b + + add %ebx,%eax # add carry (%ebx) to edx:eax + adc $0,%edx + mov 0(%edi),%ebx # add in current word from *c + add %ebx,%eax + adc $0,%edx + mov %edx,%ebx # high half of product becomes next carry + + stosl # [es:edi] = ax; edi += 4; + dec %ecx # --a_len + jnz 20b # jmp if a_len != 0 +21: + cmp $0,%ebx # is carry zero? + jz 23f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 23f +22: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 22b +23: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_mul_d_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + movd 16(%ebp),%mm1 # mm1 = b + mov 20(%ebp),%edi + cmp $0,%ecx + je 26f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +25: + movd 0(%esi),%mm0 # mm0 = *a++ + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm1,%mm0 # mm0 = b * *a++ + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add *c++ + movd %mm2,0(%edi) # store the 32bit result + add $4,%edi + psrlq $32, %mm2 # save the carry + dec %ecx # --a_len + jnz 25b # jmp if a_len != 0 +26: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 28f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 28f +27: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 27b +28: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + + # ebp - 20: caller's esi + # ebp - 16: caller's edi + # ebp - 12: + # ebp - 8: carry + # ebp - 4: a_len local + # ebp + 0: caller's ebp + # ebp + 4: return address + # ebp + 8: pa argument + # ebp + 12: a_len argument + # ebp + 16: ps argument + # ebp + 20: + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + +.globl _s_mpv_sqr_add_prop +.type _s_mpv_sqr_add_prop,@function +_s_mpv_sqr_add_prop: + GET is_sse,%eax + cmp $0,%eax + je _s_mpv_sqr_add_prop_x86 + jg _s_mpv_sqr_add_prop_sse2 + call _s_mpi_is_sse2 + PUT %eax,is_sse + cmp $0,%eax + jg _s_mpv_sqr_add_prop_sse2 +_s_mpv_sqr_add_prop_x86: + push %ebp + mov %esp,%ebp + sub $12,%esp + push %edi + push %esi + push %ebx + movl $0,%ebx # carry = 0 + mov 12(%ebp),%ecx # a_len + mov 16(%ebp),%edi # edi = ps + cmp $0,%ecx + je 31f # jump if a_len == 0 + cld + mov 8(%ebp),%esi # esi = pa +30: + lodsl # %eax = [ds:si]; si += 4; + mull %eax + + add %ebx,%eax # add "carry" + adc $0,%edx + mov 0(%edi),%ebx + add %ebx,%eax # add low word from result + mov 4(%edi),%ebx + stosl # [es:di] = %eax; di += 4; + adc %ebx,%edx # add high word from result + movl $0,%ebx + mov %edx,%eax + adc $0,%ebx + stosl # [es:di] = %eax; di += 4; + dec %ecx # --a_len + jnz 30b # jmp if a_len != 0 +31: + cmp $0,%ebx # is carry zero? + jz 34f + mov 0(%edi),%eax # add in current word from *c + add %ebx,%eax + stosl # [es:edi] = ax; edi += 4; + jnc 34f +32: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 32b +34: + pop %ebx + pop %esi + pop %edi + leave + ret + nop +_s_mpv_sqr_add_prop_sse2: + push %ebp + mov %esp,%ebp + push %edi + push %esi + push %ebx + psubq %mm2,%mm2 # carry = 0 + mov 12(%ebp),%ecx # ecx = a_len + mov 16(%ebp),%edi + cmp $0,%ecx + je 36f # jmp if a_len == 0 + mov 8(%ebp),%esi # esi = a + cld +35: + movd 0(%esi),%mm0 # mm0 = *a + movd 0(%edi),%mm3 # fetch the sum + add $4,%esi + pmuludq %mm0,%mm0 # mm0 = sqr(a) + paddq %mm0,%mm2 # add the carry + paddq %mm3,%mm2 # add the low word + movd 4(%edi),%mm3 + movd %mm2,0(%edi) # store the 32bit result + psrlq $32, %mm2 + paddq %mm3,%mm2 # add the high word + movd %mm2,4(%edi) # store the 32bit result + psrlq $32, %mm2 # save the carry. + add $8,%edi + dec %ecx # --a_len + jnz 35b # jmp if a_len != 0 +36: + movd %mm2,%ebx + cmp $0,%ebx # is carry zero? + jz 38f + mov 0(%edi),%eax + add %ebx, %eax + stosl + jnc 38f +37: + mov 0(%edi),%eax # add in current word from *c + adc $0,%eax + stosl # [es:edi] = ax; edi += 4; + jc 37b +38: + emms + pop %ebx + pop %esi + pop %edi + leave + ret + nop + + # + # Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized + # so its high bit is 1. This code is from NSPR. + # + # mp_err _s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor, + # mp_digit *qp, mp_digit *rp) + + # esp + 0: Caller's ebx + # esp + 4: return address + # esp + 8: Nhi argument + # esp + 12: Nlo argument + # esp + 16: divisor argument + # esp + 20: qp argument + # esp + 24: rp argument + # registers: + # eax: + # ebx: carry + # ecx: a_len + # edx: + # esi: a ptr + # edi: c ptr + # + +.globl _s_mpv_div_2dx1d +.type _s_mpv_div_2dx1d,@function +_s_mpv_div_2dx1d: + push %ebx + mov 8(%esp),%edx + mov 12(%esp),%eax + mov 16(%esp),%ebx + div %ebx + mov 20(%esp),%ebx + mov %eax,0(%ebx) + mov 24(%esp),%ebx + mov %edx,0(%ebx) + xor %eax,%eax # return zero + pop %ebx + ret + nop + diff --git a/security/nss/lib/freebl/mpi/mplogic.c b/security/nss/lib/freebl/mpi/mplogic.c new file mode 100644 index 000000000..89fd03ae8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.c @@ -0,0 +1,443 @@ +/* + * mplogic.c + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mplogic.h" + +/* {{{ Lookup table for population count */ + +static unsigned char bitc[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_not(a, b) - compute b = ~a + mpl_and(a, b, c) - compute c = a & b + mpl_or(a, b, c) - compute c = a | b + mpl_xor(a, b, c) - compute c = a ^ b + */ + +/* {{{ mpl_not(a, b) */ + +mp_err +mpl_not(mp_int *a, mp_int *b) +{ + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + /* This relies on the fact that the digit type is unsigned */ + for (ix = 0; ix < USED(b); ix++) + DIGIT(b, ix) = ~DIGIT(b, ix); + + s_mp_clamp(b); + + return MP_OKAY; + +} /* end mpl_not() */ + +/* }}} */ + +/* {{{ mpl_and(a, b, c) */ + +mp_err +mpl_and(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) <= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) &= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_and() */ + +/* }}} */ + +/* {{{ mpl_or(a, b, c) */ + +mp_err +mpl_or(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) |= DIGIT(other, ix); + + return MP_OKAY; + +} /* end mpl_or() */ + +/* }}} */ + +/* {{{ mpl_xor(a, b, c) */ + +mp_err +mpl_xor(mp_int *a, mp_int *b, mp_int *c) +{ + mp_int *which, *other; + mp_err res; + unsigned int ix; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (USED(a) >= USED(b)) { + which = a; + other = b; + } else { + which = b; + other = a; + } + + if ((res = mp_copy(which, c)) != MP_OKAY) + return res; + + for (ix = 0; ix < USED(which); ix++) + DIGIT(c, ix) ^= DIGIT(other, ix); + + s_mp_clamp(c); + + return MP_OKAY; + +} /* end mpl_xor() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_rsh(a, b, d) - b = a >> d + mpl_lsh(a, b, d) - b = a << d + */ + +/* {{{ mpl_rsh(a, b, d) */ + +mp_err +mpl_rsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + s_mp_div_2d(b, d); + + return MP_OKAY; + +} /* end mpl_rsh() */ + +/* }}} */ + +/* {{{ mpl_lsh(a, b, d) */ + +mp_err +mpl_lsh(const mp_int *a, mp_int *b, mp_digit d) +{ + mp_err res; + + ARGCHK(a != NULL && b != NULL, MP_BADARG); + + if ((res = mp_copy(a, b)) != MP_OKAY) + return res; + + return s_mp_mul_2d(b, d); + +} /* end mpl_lsh() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_num_set(a, num) + + Count the number of set bits in the binary representation of a. + Returns MP_OKAY and sets 'num' to be the number of such bits, if + possible. If num is NULL, the result is thrown away, but it is + not considered an error. + + mpl_num_clear() does basically the same thing for clear bits. + */ + +/* {{{ mpl_num_set(a, num) */ + +mp_err +mpl_num_set(mp_int *a, int *num) +{ + unsigned int ix; + int db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_set() */ + +/* }}} */ + +/* {{{ mpl_num_clear(a, num) */ + +mp_err +mpl_num_clear(mp_int *a, int *num) +{ + unsigned int ix; + int db, nset = 0; + mp_digit cur; + unsigned char reg; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + cur = DIGIT(a, ix); + + for (db = 0; db < sizeof(mp_digit); db++) { + reg = (unsigned char)(cur >> (CHAR_BIT * db)); + + nset += bitc[UCHAR_MAX - reg]; + } + } + + if (num) + *num = nset; + + return MP_OKAY; + +} /* end mpl_num_clear() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* + mpl_parity(a) + + Determines the bitwise parity of the value given. Returns MP_EVEN + if an even number of digits are set, MP_ODD if an odd number are + set. + */ + +/* {{{ mpl_parity(a) */ + +mp_err +mpl_parity(mp_int *a) +{ + unsigned int ix; + int par = 0; + mp_digit cur; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + int shft = (sizeof(mp_digit) * CHAR_BIT) / 2; + + cur = DIGIT(a, ix); + + /* Compute parity for current digit */ + while (shft != 0) { + cur ^= (cur >> shft); + shft >>= 1; + } + cur &= 1; + + /* XOR with running parity so far */ + par ^= cur; + } + + if (par) + return MP_ODD; + else + return MP_EVEN; + +} /* end mpl_parity() */ + +/* }}} */ + +/* + mpl_set_bit + + Returns MP_OKAY or some error code. + Grows a if needed to set a bit to 1. + */ +mp_err +mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value) +{ + mp_size ix; + mp_err rv; + mp_digit mask; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + if (ix + 1 > MP_USED(a)) { + rv = s_mp_pad(a, ix + 1); + if (rv != MP_OKAY) + return rv; + } + + bitNum = bitNum % MP_DIGIT_BIT; + mask = (mp_digit)1 << bitNum; + if (value) + MP_DIGIT(a, ix) |= mask; + else + MP_DIGIT(a, ix) &= ~mask; + s_mp_clamp(a); + return MP_OKAY; +} + +/* + mpl_get_bit + + returns 0 or 1 or some (negative) error code. + */ +mp_err +mpl_get_bit(const mp_int *a, mp_size bitNum) +{ + mp_size bit, ix; + mp_err rv; + + ARGCHK(a != NULL, MP_BADARG); + + ix = bitNum / MP_DIGIT_BIT; + ARGCHK(ix <= MP_USED(a) - 1, MP_RANGE); + + bit = bitNum % MP_DIGIT_BIT; + rv = (mp_err)(MP_DIGIT(a, ix) >> bit) & 1; + return rv; +} + +/* + mpl_get_bits + - Extracts numBits bits from a, where the least significant extracted bit + is bit lsbNum. Returns a negative value if error occurs. + - Because sign bit is used to indicate error, maximum number of bits to + be returned is the lesser of (a) the number of bits in an mp_digit, or + (b) one less than the number of bits in an mp_err. + - lsbNum + numbits can be greater than the number of significant bits in + integer a, as long as bit lsbNum is in the high order digit of a. + */ +mp_err +mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits) +{ + mp_size rshift = (lsbNum % MP_DIGIT_BIT); + mp_size lsWndx = (lsbNum / MP_DIGIT_BIT); + mp_digit *digit = MP_DIGITS(a) + lsWndx; + mp_digit mask = ((1 << numBits) - 1); + + ARGCHK(numBits < CHAR_BIT * sizeof mask, MP_BADARG); + ARGCHK(MP_HOWMANY(lsbNum, MP_DIGIT_BIT) <= MP_USED(a), MP_RANGE); + + if ((numBits + lsbNum % MP_DIGIT_BIT <= MP_DIGIT_BIT) || + (lsWndx + 1 >= MP_USED(a))) { + mask &= (digit[0] >> rshift); + } else { + mask &= ((digit[0] >> rshift) | (digit[1] << (MP_DIGIT_BIT - rshift))); + } + return (mp_err)mask; +} + +/* + mpl_significant_bits + returns number of significnant bits in abs(a). + returns 1 if value is zero. + */ +mp_size +mpl_significant_bits(const mp_int *a) +{ + mp_size bits = 0; + int ix; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = MP_USED(a); ix > 0;) { + mp_digit d; + d = MP_DIGIT(a, --ix); + if (d) { + while (d) { + ++bits; + d >>= 1; + } + break; + } + } + bits += ix * MP_DIGIT_BIT; + if (!bits) + bits = 1; + return bits; +} + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mplogic.h b/security/nss/lib/freebl/mpi/mplogic.h new file mode 100644 index 000000000..a4a6b7735 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mplogic.h @@ -0,0 +1,52 @@ +/* + * mplogic.h + * + * Bitwise logical operations on MPI values + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MPLOGIC_ +#define _H_MPLOGIC_ + +#include "mpi.h" + +/* + The logical operations treat an mp_int as if it were a bit vector, + without regard to its sign (an mp_int is represented in a signed + magnitude format). Values are treated as if they had an infinite + string of zeros left of the most-significant bit. + */ + +/* Parity results */ + +#define MP_EVEN MP_YES +#define MP_ODD MP_NO + +/* Bitwise functions */ + +mp_err mpl_not(mp_int *a, mp_int *b); /* one's complement */ +mp_err mpl_and(mp_int *a, mp_int *b, mp_int *c); /* bitwise AND */ +mp_err mpl_or(mp_int *a, mp_int *b, mp_int *c); /* bitwise OR */ +mp_err mpl_xor(mp_int *a, mp_int *b, mp_int *c); /* bitwise XOR */ + +/* Shift functions */ + +mp_err mpl_rsh(const mp_int *a, mp_int *b, mp_digit d); /* right shift */ +mp_err mpl_lsh(const mp_int *a, mp_int *b, mp_digit d); /* left shift */ + +/* Bit count and parity */ + +mp_err mpl_num_set(mp_int *a, int *num); /* count set bits */ +mp_err mpl_num_clear(mp_int *a, int *num); /* count clear bits */ +mp_err mpl_parity(mp_int *a); /* determine parity */ + +/* Get & Set the value of a bit */ + +mp_err mpl_set_bit(mp_int *a, mp_size bitNum, mp_size value); +mp_err mpl_get_bit(const mp_int *a, mp_size bitNum); +mp_err mpl_get_bits(const mp_int *a, mp_size lsbNum, mp_size numBits); +mp_size mpl_significant_bits(const mp_int *a); + +#endif /* end _H_MPLOGIC_ */ diff --git a/security/nss/lib/freebl/mpi/mpmontg.c b/security/nss/lib/freebl/mpi/mpmontg.c new file mode 100644 index 000000000..06fd41b3a --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpmontg.c @@ -0,0 +1,1141 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This file implements moduluar exponentiation using Montgomery's + * method for modular reduction. This file implements the method + * described as "Improvement 2" in the paper "A Cryptogrpahic Library for + * the Motorola DSP56000" by Stephen R. Dusse' and Burton S. Kaliski Jr. + * published in "Advances in Cryptology: Proceedings of EUROCRYPT '90" + * "Lecture Notes in Computer Science" volume 473, 1991, pg 230-244, + * published by Springer Verlag. + */ + +#define MP_USING_CACHE_SAFE_MOD_EXP 1 +#include +#include "mpi-priv.h" +#include "mplogic.h" +#include "mpprime.h" +#ifdef MP_USING_MONT_MULF +#include "montmulf.h" +#endif +#include /* ptrdiff_t */ +#include + +#define STATIC + +#define MAX_ODD_INTS 32 /* 2 ** (WINDOW_BITS - 1) */ + +/*! computes T = REDC(T), 2^b == R + \param T < RN +*/ +mp_err +s_mp_redc(mp_int *T, mp_mont_modulus *mmm) +{ + mp_err res; + mp_size i; + + i = (MP_USED(&mmm->N) << 1) + 1; + MP_CHECKOK(s_mp_pad(T, i)); + for (i = 0; i < MP_USED(&mmm->N); ++i) { + mp_digit m_i = MP_DIGIT(T, i) * mmm->n0prime; + /* T += N * m_i * (MP_RADIX ** i); */ + s_mp_mul_d_add_offset(&mmm->N, m_i, T, i); + } + s_mp_clamp(T); + + /* T /= R */ + s_mp_rshd(T, MP_USED(&mmm->N)); + + if ((res = s_mp_cmp(T, &mmm->N)) >= 0) { + /* T = T - N */ + MP_CHECKOK(s_mp_sub(T, &mmm->N)); +#ifdef DEBUG + if ((res = mp_cmp(T, &mmm->N)) >= 0) { + res = MP_UNDEF; + goto CLEANUP; + } +#endif + } + res = MP_OKAY; +CLEANUP: + return res; +} + +#if !defined(MP_MONT_USE_MP_MUL) + +/*! c <- REDC( a * b ) mod N + \param a < N i.e. "reduced" + \param b < N i.e. "reduced" + \param mmm modulus N and n0' of N +*/ +mp_err +s_mp_mul_mont(const mp_int *a, const mp_int *b, mp_int *c, + mp_mont_modulus *mmm) +{ + mp_digit *pb; + mp_digit m_i; + mp_err res; + mp_size ib; /* "index b": index of current digit of B */ + mp_size useda, usedb; + + ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG); + + if (MP_USED(a) < MP_USED(b)) { + const mp_int *xch = b; /* switch a and b, to do fewer outer loops */ + b = a; + a = xch; + } + + MP_USED(c) = 1; + MP_DIGIT(c, 0) = 0; + ib = (MP_USED(&mmm->N) << 1) + 1; + if ((res = s_mp_pad(c, ib)) != MP_OKAY) + goto CLEANUP; + + useda = MP_USED(a); + pb = MP_DIGITS(b); + s_mpv_mul_d(MP_DIGITS(a), useda, *pb++, MP_DIGITS(c)); + s_mp_setz(MP_DIGITS(c) + useda + 1, ib - (useda + 1)); + m_i = MP_DIGIT(c, 0) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, 0); + + /* Outer loop: Digits of b */ + usedb = MP_USED(b); + for (ib = 1; ib < usedb; ib++) { + mp_digit b_i = *pb++; + + /* Inner product: Digits of a */ + if (b_i) + s_mpv_mul_d_add_prop(MP_DIGITS(a), useda, b_i, MP_DIGITS(c) + ib); + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + if (usedb < MP_USED(&mmm->N)) { + for (usedb = MP_USED(&mmm->N); ib < usedb; ++ib) { + m_i = MP_DIGIT(c, ib) * mmm->n0prime; + s_mp_mul_d_add_offset(&mmm->N, m_i, c, ib); + } + } + s_mp_clamp(c); + s_mp_rshd(c, MP_USED(&mmm->N)); /* c /= R */ + if (s_mp_cmp(c, &mmm->N) >= 0) { + MP_CHECKOK(s_mp_sub(c, &mmm->N)); + } + res = MP_OKAY; + +CLEANUP: + return res; +} +#endif + +STATIC +mp_err +s_mp_to_mont(const mp_int *x, mp_mont_modulus *mmm, mp_int *xMont) +{ + mp_err res; + + /* xMont = x * R mod N where N is modulus */ + MP_CHECKOK(mp_copy(x, xMont)); + MP_CHECKOK(s_mp_lshd(xMont, MP_USED(&mmm->N))); /* xMont = x << b */ + MP_CHECKOK(mp_div(xMont, &mmm->N, 0, xMont)); /* mod N */ +CLEANUP: + return res; +} + +#ifdef MP_USING_MONT_MULF + +/* the floating point multiply is already cache safe, + * don't turn on cache safe unless we specifically + * force it */ +#ifndef MP_FORCE_CACHE_SAFE +#undef MP_USING_CACHE_SAFE_MOD_EXP +#endif + +unsigned int mp_using_mont_mulf = 1; + +/* computes montgomery square of the integer in mResult */ +#define SQR \ + conv_i32_to_d32_and_d16(dm1, d16Tmp, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, d16Tmp, \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* computes montgomery product of x and the integer in mResult */ +#define MUL(x) \ + conv_i32_to_d32(dm1, mResult, nLen); \ + mont_mulf_noconv(mResult, dm1, oddPowers[x], \ + dTmp, dn, MP_DIGITS(modulus), nLen, dn0) + +/* Do modular exponentiation using floating point multiply code. */ +mp_err +mp_exptmod_f(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_digit *mResult; + double *dBuf = 0, *dm1, *dn, *dSqr, *d16Tmp, *dTmp; + double dn0; + mp_size i; + mp_err res; + int expOff; + int dSize = 0, oddPowSize, dTmpSize; + mp_int accum1; + double *oddPowers[MAX_ODD_INTS]; + + /* function for computing n0prime only works if n0 is odd */ + + MP_DIGITS(&accum1) = 0; + + for (i = 0; i < MAX_ODD_INTS; ++i) + oddPowers[i] = 0; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + MP_CHECKOK(s_mp_pad(&accum1, nLen)); + + oddPowSize = 2 * nLen + 1; + dTmpSize = 2 * oddPowSize; + dSize = sizeof(double) * (nLen * 4 + 1 + + ((odd_ints + 1) * oddPowSize) + dTmpSize); + dBuf = (double *)malloc(dSize); + dm1 = dBuf; /* array of d32 */ + dn = dBuf + nLen; /* array of d32 */ + dSqr = dn + nLen; /* array of d32 */ + d16Tmp = dSqr + nLen; /* array of d16 */ + dTmp = d16Tmp + oddPowSize; + + for (i = 0; i < odd_ints; ++i) { + oddPowers[i] = dTmp; + dTmp += oddPowSize; + } + mResult = (mp_digit *)(dTmp + dTmpSize); /* size is nLen + 1 */ + + /* Make dn and dn0 */ + conv_i32_to_d32(dn, MP_DIGITS(modulus), nLen); + dn0 = (double)(mmm->n0prime & 0xffff); + + /* Make dSqr */ + conv_i32_to_d32_and_d16(dm1, oddPowers[0], MP_DIGITS(montBase), nLen); + mont_mulf_noconv(mResult, dm1, oddPowers[0], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d32(dSqr, mResult, nLen); + + for (i = 1; i < odd_ints; ++i) { + mont_mulf_noconv(mResult, dSqr, oddPowers[i - 1], + dTmp, dn, MP_DIGITS(modulus), nLen, dn0); + conv_i32_to_d16(oddPowers[i], mResult, nLen); + } + + s_mp_copy(MP_DIGITS(&accum1), mResult, nLen); /* from, to, len */ + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR; + } else if (smallExp & 1) { + SQR; + MUL(0); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 1) { + SQR; + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 2); + } else if (smallExp & 2) { + SQR; + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 4); + SQR; + } else if (smallExp & 4) { + SQR; + SQR; + SQR; + SQR; + MUL(smallExp / 8); + SQR; + SQR; + } else if (smallExp & 8) { + SQR; + SQR; + SQR; + MUL(smallExp / 16); + SQR; + SQR; + SQR; + } else if (smallExp & 0x10) { + SQR; + SQR; + MUL(smallExp / 32); + SQR; + SQR; + SQR; + SQR; + } else if (smallExp & 0x20) { + SQR; + MUL(smallExp / 64); + SQR; + SQR; + SQR; + SQR; + SQR; + } else { + abort(); + } + } else { + abort(); + } + } + + s_mp_copy(mResult, MP_DIGITS(&accum1), nLen); /* from, to, len */ + + res = s_mp_redc(&accum1, mmm); + mp_exch(&accum1, result); + +CLEANUP: + mp_clear(&accum1); + if (dBuf) { + if (dSize) + memset(dBuf, 0, dSize); + free(dBuf); + } + + return res; +} +#undef SQR +#undef MUL +#endif + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL(x, a, b) \ + MP_CHECKOK(mp_mul(a, oddPowers + (x), b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, oddPowers + (x), b, mmm)) +#endif + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size odd_ints) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_err res; + int expOff; + mp_int accum1, accum2, power2, oddPowers[MAX_ODD_INTS]; + + /* power2 = base ** 2; oddPowers[i] = base ** (2*i + 1); */ + /* oddPowers[i] = base ** (2*i + 1); */ + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&power2) = 0; + for (i = 0; i < MAX_ODD_INTS; ++i) { + MP_DIGITS(oddPowers + i) = 0; + } + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + MP_CHECKOK(mp_init_copy(&oddPowers[0], montBase)); + + MP_CHECKOK(mp_init_size(&power2, nLen + 2 * MP_USED(montBase) + 2)); + MP_CHECKOK(mp_sqr(montBase, &power2)); /* power2 = montBase ** 2 */ + MP_CHECKOK(s_mp_redc(&power2, mmm)); + + for (i = 1; i < odd_ints; ++i) { + MP_CHECKOK(mp_init_size(oddPowers + i, nLen + 2 * MP_USED(&power2) + 2)); + MP_CHECKOK(mp_mul(oddPowers + (i - 1), &power2, oddPowers + i)); + MP_CHECKOK(s_mp_redc(oddPowers + i, mmm)); + } + + /* set accumulator to montgomery residue of 1 */ + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + pa1 = &accum1; + pa2 = &accum2; + + for (expOff = bits_in_exponent - window_bits; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + if (window_bits == 1) { + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL(0, pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 4) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else if (window_bits == 5) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 2, pa2, pa1); + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 4, pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 8, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 16, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + MUL(smallExp / 32, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else { + abort(); + } + } else if (window_bits == 6) { + if (!smallExp) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + } else if (smallExp & 1) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 2, pa1, pa2); + SWAPPA; + } else if (smallExp & 2) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 4, pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 4) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 8, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 8) { + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp / 16, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x10) { + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp / 32, pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 0x20) { + SQR(pa1, pa2); + MUL(smallExp / 64, pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SWAPPA; + } else { + abort(); + } + } else { + abort(); + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&power2); + for (i = 0; i < odd_ints; ++i) { + mp_clear(oddPowers + i); + } + return res; +} +#undef SQR +#undef MUL + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +unsigned int mp_using_cache_safe_exp = 1; +#endif + +mp_err +mp_set_safe_modexp(int value) +{ +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + mp_using_cache_safe_exp = value; + return MP_OKAY; +#else + if (value == 0) { + return MP_OKAY; + } + return MP_BADARG; +#endif +} + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP +#define WEAVE_WORD_SIZE 4 + +/* + * mpi_to_weave takes an array of bignums, a matrix in which each bignum + * occupies all the columns of a row, and transposes it into a matrix in + * which each bignum occupies a column of every row. The first row of the + * input matrix becomes the first column of the output matrix. The n'th + * row of input becomes the n'th column of output. The input data is said + * to be "interleaved" or "woven" into the output matrix. + * + * The array of bignums is left in this woven form. Each time a single + * bignum value is needed, it is recreated by fetching the n'th column, + * forming a single row which is the new bignum. + * + * The purpose of this interleaving is make it impossible to determine which + * of the bignums is being used in any one operation by examining the pattern + * of cache misses. + * + * The weaving function does not transpose the entire input matrix in one call. + * It transposes 4 rows of mp_ints into their respective columns of output. + * + * This implementation treats each mp_int bignum as an array of mp_digits, + * It stores those bytes as a column of mp_digits in the output matrix. It + * doesn't care if the machine uses big-endian or little-endian byte ordering + * within mp_digits. + * + * "bignums" is an array of mp_ints. + * It points to four rows, four mp_ints, a subset of a larger array of mp_ints. + * + * "weaved" is the weaved output matrix. + * The first byte of bignums[0] is stored in weaved[0]. + * + * "nBignums" is the total number of bignums in the array of which "bignums" + * is a part. + * + * "nDigits" is the size in mp_digits of each mp_int in the "bignums" array. + * mp_ints that use less than nDigits digits are logically padded with zeros + * while being stored in the weaved array. + */ +mp_err mpi_to_weave(const mp_int *bignums, + mp_digit *weaved, + mp_size nDigits, /* in each mp_int of input */ + mp_size nBignums) /* in the entire source array */ +{ + mp_size i; + mp_digit *endDest = weaved + (nDigits * nBignums); + + for (i = 0; i < WEAVE_WORD_SIZE; i++) { + mp_size used = MP_USED(&bignums[i]); + mp_digit *pSrc = MP_DIGITS(&bignums[i]); + mp_digit *endSrc = pSrc + used; + mp_digit *pDest = weaved + i; + + ARGCHK(MP_SIGN(&bignums[i]) == MP_ZPOS, MP_BADARG); + ARGCHK(used <= nDigits, MP_BADARG); + + for (; pSrc < endSrc; pSrc++) { + *pDest = *pSrc; + pDest += nBignums; + } + while (pDest < endDest) { + *pDest = 0; + pDest += nBignums; + } + } + + return MP_OKAY; +} + +/* + * These functions return 0xffffffff if the output is true, and 0 otherwise. + */ +#define CONST_TIME_MSB(x) (0L - ((x) >> (8 * sizeof(x) - 1))) +#define CONST_TIME_EQ_Z(x) CONST_TIME_MSB(~(x) & ((x)-1)) +#define CONST_TIME_EQ(a, b) CONST_TIME_EQ_Z((a) ^ (b)) + +/* Reverse the operation above for one mp_int. + * Reconstruct one mp_int from its column in the weaved array. + * Every read accesses every element of the weaved array, in order to + * avoid timing attacks based on patterns of memory accesses. + */ +mp_err weave_to_mpi(mp_int *a, /* out, result */ + const mp_digit *weaved, /* in, byte matrix */ + mp_size index, /* which column to read */ + mp_size nDigits, /* number of mp_digits in each bignum */ + mp_size nBignums) /* width of the matrix */ +{ + /* these are indices, but need to be the same size as mp_digit + * because of the CONST_TIME operations */ + mp_digit i, j; + mp_digit d; + mp_digit *pDest = MP_DIGITS(a); + + MP_SIGN(a) = MP_ZPOS; + MP_USED(a) = nDigits; + + assert(weaved != NULL); + + /* Fetch the proper column in constant time, indexing over the whole array */ + for (i = 0; i < nDigits; ++i) { + d = 0; + for (j = 0; j < nBignums; ++j) { + d |= weaved[i * nBignums + j] & CONST_TIME_EQ(j, index); + } + pDest[i] = d; + } + + s_mp_clamp(a); + return MP_OKAY; +} + +#define SQR(a, b) \ + MP_CHECKOK(mp_sqr(a, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) + +#if defined(MP_MONT_USE_MP_MUL) +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(mp_mul(a, x, b)); \ + MP_CHECKOK(s_mp_redc(b, mmm)) +#else +#define MUL_NOWEAVE(x, a, b) \ + MP_CHECKOK(s_mp_mul_mont(a, x, b, mmm)) +#endif + +#define MUL(x, a, b) \ + MP_CHECKOK(weave_to_mpi(&tmp, powers, (x), nLen, num_powers)); \ + MUL_NOWEAVE(&tmp, a, b) + +#define SWAPPA \ + ptmp = pa1; \ + pa1 = pa2; \ + pa2 = ptmp +#define MP_ALIGN(x, y) ((((ptrdiff_t)(x)) + ((y)-1)) & (((ptrdiff_t)0) - (y))) + +/* Do modular exponentiation using integer multiply code. */ +mp_err +mp_exptmod_safe_i(const mp_int *montBase, + const mp_int *exponent, + const mp_int *modulus, + mp_int *result, + mp_mont_modulus *mmm, + int nLen, + mp_size bits_in_exponent, + mp_size window_bits, + mp_size num_powers) +{ + mp_int *pa1, *pa2, *ptmp; + mp_size i; + mp_size first_window; + mp_err res; + int expOff; + mp_int accum1, accum2, accum[WEAVE_WORD_SIZE]; + mp_int tmp; + mp_digit *powersArray = NULL; + mp_digit *powers = NULL; + + MP_DIGITS(&accum1) = 0; + MP_DIGITS(&accum2) = 0; + MP_DIGITS(&accum[0]) = 0; + MP_DIGITS(&accum[1]) = 0; + MP_DIGITS(&accum[2]) = 0; + MP_DIGITS(&accum[3]) = 0; + MP_DIGITS(&tmp) = 0; + + /* grab the first window value. This allows us to preload accumulator1 + * and save a conversion, some squares and a multiple*/ + MP_CHECKOK(mpl_get_bits(exponent, + bits_in_exponent - window_bits, window_bits)); + first_window = (mp_size)res; + + MP_CHECKOK(mp_init_size(&accum1, 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum2, 3 * nLen + 2)); + + /* build the first WEAVE_WORD powers inline */ + /* if WEAVE_WORD_SIZE is not 4, this code will have to change */ + if (num_powers > 2) { + MP_CHECKOK(mp_init_size(&accum[0], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[1], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[2], 3 * nLen + 2)); + MP_CHECKOK(mp_init_size(&accum[3], 3 * nLen + 2)); + mp_set(&accum[0], 1); + MP_CHECKOK(s_mp_to_mont(&accum[0], mmm, &accum[0])); + MP_CHECKOK(mp_copy(montBase, &accum[1])); + SQR(montBase, &accum[2]); + MUL_NOWEAVE(montBase, &accum[2], &accum[3]); + powersArray = (mp_digit *)malloc(num_powers * (nLen * sizeof(mp_digit) + 1)); + if (!powersArray) { + res = MP_MEM; + goto CLEANUP; + } + /* powers[i] = base ** (i); */ + powers = (mp_digit *)MP_ALIGN(powersArray, num_powers); + MP_CHECKOK(mpi_to_weave(accum, powers, nLen, num_powers)); + if (first_window < 4) { + MP_CHECKOK(mp_copy(&accum[first_window], &accum1)); + first_window = num_powers; + } + } else { + if (first_window == 0) { + mp_set(&accum1, 1); + MP_CHECKOK(s_mp_to_mont(&accum1, mmm, &accum1)); + } else { + /* assert first_window == 1? */ + MP_CHECKOK(mp_copy(montBase, &accum1)); + } + } + + /* + * calculate all the powers in the powers array. + * this adds 2**(k-1)-2 square operations over just calculating the + * odd powers where k is the window size in the two other mp_modexpt + * implementations in this file. We will get some of that + * back by not needing the first 'k' squares and one multiply for the + * first window. + * Given the value of 4 for WEAVE_WORD_SIZE, this loop will only execute if + * num_powers > 2, in which case powers will have been allocated. + */ + for (i = WEAVE_WORD_SIZE; i < num_powers; i++) { + int acc_index = i & (WEAVE_WORD_SIZE - 1); /* i % WEAVE_WORD_SIZE */ + if (i & 1) { + MUL_NOWEAVE(montBase, &accum[acc_index - 1], &accum[acc_index]); + /* we've filled the array do our 'per array' processing */ + if (acc_index == (WEAVE_WORD_SIZE - 1)) { + MP_CHECKOK(mpi_to_weave(accum, powers + i - (WEAVE_WORD_SIZE - 1), + nLen, num_powers)); + + if (first_window <= i) { + MP_CHECKOK(mp_copy(&accum[first_window & (WEAVE_WORD_SIZE - 1)], + &accum1)); + first_window = num_powers; + } + } + } else { + /* up to 8 we can find 2^i-1 in the accum array, but at 8 we our source + * and target are the same so we need to copy.. After that, the + * value is overwritten, so we need to fetch it from the stored + * weave array */ + if (i > 2 * WEAVE_WORD_SIZE) { + MP_CHECKOK(weave_to_mpi(&accum2, powers, i / 2, nLen, num_powers)); + SQR(&accum2, &accum[acc_index]); + } else { + int half_power_index = (i / 2) & (WEAVE_WORD_SIZE - 1); + if (half_power_index == acc_index) { + /* copy is cheaper than weave_to_mpi */ + MP_CHECKOK(mp_copy(&accum[half_power_index], &accum2)); + SQR(&accum2, &accum[acc_index]); + } else { + SQR(&accum[half_power_index], &accum[acc_index]); + } + } + } + } +/* if the accum1 isn't set, Then there is something wrong with our logic + * above and is an internal programming error. + */ +#if MP_ARGCHK == 2 + assert(MP_USED(&accum1) != 0); +#endif + + /* set accumulator to montgomery residue of 1 */ + pa1 = &accum1; + pa2 = &accum2; + + /* tmp is not used if window_bits == 1. */ + if (window_bits != 1) { + MP_CHECKOK(mp_init_size(&tmp, 3 * nLen + 2)); + } + + for (expOff = bits_in_exponent - window_bits * 2; expOff >= 0; expOff -= window_bits) { + mp_size smallExp; + MP_CHECKOK(mpl_get_bits(exponent, expOff, window_bits)); + smallExp = (mp_size)res; + + /* handle unroll the loops */ + switch (window_bits) { + case 1: + if (!smallExp) { + SQR(pa1, pa2); + SWAPPA; + } else if (smallExp & 1) { + SQR(pa1, pa2); + MUL_NOWEAVE(montBase, pa2, pa1); + } else { + abort(); + } + break; + case 6: + SQR(pa1, pa2); + SQR(pa2, pa1); + /* fall through */ + case 4: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + MUL(smallExp, pa1, pa2); + SWAPPA; + break; + case 5: + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + SQR(pa2, pa1); + SQR(pa1, pa2); + MUL(smallExp, pa2, pa1); + break; + default: + abort(); /* could do a loop? */ + } + } + + res = s_mp_redc(pa1, mmm); + mp_exch(pa1, result); + +CLEANUP: + mp_clear(&accum1); + mp_clear(&accum2); + mp_clear(&accum[0]); + mp_clear(&accum[1]); + mp_clear(&accum[2]); + mp_clear(&accum[3]); + mp_clear(&tmp); + /* PORT_Memset(powers,0,num_powers*nLen*sizeof(mp_digit)); */ + free(powersArray); + return res; +} +#undef SQR +#undef MUL +#endif + +mp_err +mp_exptmod(const mp_int *inBase, const mp_int *exponent, + const mp_int *modulus, mp_int *result) +{ + const mp_int *base; + mp_size bits_in_exponent, i, window_bits, odd_ints; + mp_err res; + int nLen; + mp_int montBase, goodBase; + mp_mont_modulus mmm; +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + static unsigned int max_window_bits; +#endif + + /* function for computing n0prime only works if n0 is odd */ + if (!mp_isodd(modulus)) + return s_mp_exptmod(inBase, exponent, modulus, result); + + MP_DIGITS(&montBase) = 0; + MP_DIGITS(&goodBase) = 0; + + if (mp_cmp(inBase, modulus) < 0) { + base = inBase; + } else { + MP_CHECKOK(mp_init(&goodBase)); + base = &goodBase; + MP_CHECKOK(mp_mod(inBase, modulus, &goodBase)); + } + + nLen = MP_USED(modulus); + MP_CHECKOK(mp_init_size(&montBase, 2 * nLen + 2)); + + mmm.N = *modulus; /* a copy of the mp_int struct */ + + /* compute n0', given n0, n0' = -(n0 ** -1) mod MP_RADIX + ** where n0 = least significant mp_digit of N, the modulus. + */ + mmm.n0prime = 0 - s_mp_invmod_radix(MP_DIGIT(modulus, 0)); + + MP_CHECKOK(s_mp_to_mont(base, &mmm, &montBase)); + + bits_in_exponent = mpl_significant_bits(exponent); +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + if (bits_in_exponent > 780) + window_bits = 6; + else if (bits_in_exponent > 256) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + } else +#endif + if (bits_in_exponent > 480) + window_bits = 6; + else if (bits_in_exponent > 160) + window_bits = 5; + else if (bits_in_exponent > 20) + window_bits = 4; + /* RSA public key exponents are typically under 20 bits (common values + * are: 3, 17, 65537) and a 4-bit window is inefficient + */ + else + window_bits = 1; + +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + /* + * clamp the window size based on + * the cache line size. + */ + if (!max_window_bits) { + unsigned long cache_size = s_mpi_getProcessorLineSize(); + /* processor has no cache, use 'fast' code always */ + if (cache_size == 0) { + mp_using_cache_safe_exp = 0; + } + if ((cache_size == 0) || (cache_size >= 64)) { + max_window_bits = 6; + } else if (cache_size >= 32) { + max_window_bits = 5; + } else if (cache_size >= 16) { + max_window_bits = 4; + } else + max_window_bits = 1; /* should this be an assert? */ + } + + /* clamp the window size down before we caclulate bits_in_exponent */ + if (mp_using_cache_safe_exp) { + if (window_bits > max_window_bits) { + window_bits = max_window_bits; + } + } +#endif + + odd_ints = 1 << (window_bits - 1); + i = bits_in_exponent % window_bits; + if (i != 0) { + bits_in_exponent += window_bits - i; + } + +#ifdef MP_USING_MONT_MULF + if (mp_using_mont_mulf) { + MP_CHECKOK(s_mp_pad(&montBase, nLen)); + res = mp_exptmod_f(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + } else +#endif +#ifdef MP_USING_CACHE_SAFE_MOD_EXP + if (mp_using_cache_safe_exp) { + res = mp_exptmod_safe_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, 1 << window_bits); + } else +#endif + res = mp_exptmod_i(&montBase, exponent, modulus, result, &mmm, nLen, + bits_in_exponent, window_bits, odd_ints); + +CLEANUP: + mp_clear(&montBase); + mp_clear(&goodBase); + /* Don't mp_clear mmm.N because it is merely a copy of modulus. + ** Just zap it. + */ + memset(&mmm, 0, sizeof mmm); + return res; +} diff --git a/security/nss/lib/freebl/mpi/mpprime.c b/security/nss/lib/freebl/mpi/mpprime.c new file mode 100644 index 000000000..58287192e --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.c @@ -0,0 +1,599 @@ +/* + * mpprime.c + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include "mpprime.h" +#include "mplogic.h" +#include +#include + +#define SMALL_TABLE 0 /* determines size of hard-wired prime table */ + +#define RANDOM() rand() + +#include "primes.c" /* pull in the prime digit table */ + +/* + Test if any of a given vector of digits divides a. If not, MP_NO + is returned; otherwise, MP_YES is returned and 'which' is set to + the index of the integer in the vector which divided a. + */ +mp_err s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which); + +/* {{{ mpp_divis(a, b) */ + +/* + mpp_divis(a, b) + + Returns MP_YES if a is divisible by b, or MP_NO if it is not. + */ + +mp_err +mpp_divis(mp_int *a, mp_int *b) +{ + mp_err res; + mp_int rem; + + if ((res = mp_init(&rem)) != MP_OKAY) + return res; + + if ((res = mp_mod(a, b, &rem)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp_z(&rem) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&rem); + return res; + +} /* end mpp_divis() */ + +/* }}} */ + +/* {{{ mpp_divis_d(a, d) */ + +/* + mpp_divis_d(a, d) + + Return MP_YES if a is divisible by d, or MP_NO if it is not. + */ + +mp_err +mpp_divis_d(mp_int *a, mp_digit d) +{ + mp_err res; + mp_digit rem; + + ARGCHK(a != NULL, MP_BADARG); + + if (d == 0) + return MP_NO; + + if ((res = mp_mod_d(a, d, &rem)) != MP_OKAY) + return res; + + if (rem == 0) + return MP_YES; + else + return MP_NO; + +} /* end mpp_divis_d() */ + +/* }}} */ + +/* {{{ mpp_random(a) */ + +/* + mpp_random(a) + + Assigns a random value to a. This value is generated using the + standard C library's rand() function, so it should not be used for + cryptographic purposes, but it should be fine for primality testing, + since all we really care about there is good statistical properties. + + As many digits as a currently has are filled with random digits. + */ + +mp_err +mpp_random(mp_int *a) + +{ + mp_digit next = 0; + unsigned int ix, jx; + + ARGCHK(a != NULL, MP_BADARG); + + for (ix = 0; ix < USED(a); ix++) { + for (jx = 0; jx < sizeof(mp_digit); jx++) { + next = (next << CHAR_BIT) | (RANDOM() & UCHAR_MAX); + } + DIGIT(a, ix) = next; + } + + return MP_OKAY; + +} /* end mpp_random() */ + +/* }}} */ + +/* {{{ mpp_random_size(a, prec) */ + +mp_err +mpp_random_size(mp_int *a, mp_size prec) +{ + mp_err res; + + ARGCHK(a != NULL && prec > 0, MP_BADARG); + + if ((res = s_mp_pad(a, prec)) != MP_OKAY) + return res; + + return mpp_random(a); + +} /* end mpp_random_size() */ + +/* }}} */ + +/* {{{ mpp_divis_vector(a, vec, size, which) */ + +/* + mpp_divis_vector(a, vec, size, which) + + Determines if a is divisible by any of the 'size' digits in vec. + Returns MP_YES and sets 'which' to the index of the offending digit, + if it is; returns MP_NO if it is not. + */ + +mp_err +mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which) +{ + ARGCHK(a != NULL && vec != NULL && size > 0, MP_BADARG); + + return s_mpp_divp(a, vec, size, which); + +} /* end mpp_divis_vector() */ + +/* }}} */ + +/* {{{ mpp_divis_primes(a, np) */ + +/* + mpp_divis_primes(a, np) + + Test whether a is divisible by any of the first 'np' primes. If it + is, returns MP_YES and sets *np to the value of the digit that did + it. If not, returns MP_NO. + */ +mp_err +mpp_divis_primes(mp_int *a, mp_digit *np) +{ + int size, which; + mp_err res; + + ARGCHK(a != NULL && np != NULL, MP_BADARG); + + size = (int)*np; + if (size > prime_tab_size) + size = prime_tab_size; + + res = mpp_divis_vector(a, prime_tab, size, &which); + if (res == MP_YES) + *np = prime_tab[which]; + + return res; + +} /* end mpp_divis_primes() */ + +/* }}} */ + +/* {{{ mpp_fermat(a, w) */ + +/* + Using w as a witness, try pseudo-primality testing based on Fermat's + little theorem. If a is prime, and (w, a) = 1, then w^a == w (mod + a). So, we compute z = w^a (mod a) and compare z to w; if they are + equal, the test passes and we return MP_YES. Otherwise, we return + MP_NO. + */ +mp_err +mpp_fermat(mp_int *a, mp_digit w) +{ + mp_int base, test; + mp_err res; + + if ((res = mp_init(&base)) != MP_OKAY) + return res; + + mp_set(&base, w); + + if ((res = mp_init(&test)) != MP_OKAY) + goto TEST; + + /* Compute test = base^a (mod a) */ + if ((res = mp_exptmod(&base, a, a, &test)) != MP_OKAY) + goto CLEANUP; + + if (mp_cmp(&base, &test) == 0) + res = MP_YES; + else + res = MP_NO; + +CLEANUP: + mp_clear(&test); +TEST: + mp_clear(&base); + + return res; + +} /* end mpp_fermat() */ + +/* }}} */ + +/* + Perform the fermat test on each of the primes in a list until + a) one of them shows a is not prime, or + b) the list is exhausted. + Returns: MP_YES if it passes tests. + MP_NO if fermat test reveals it is composite + Some MP error code if some other error occurs. + */ +mp_err +mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes) +{ + mp_err rv = MP_YES; + + while (nPrimes-- > 0 && rv == MP_YES) { + rv = mpp_fermat(a, *primes++); + } + return rv; +} + +/* {{{ mpp_pprime(a, nt) */ + +/* + mpp_pprime(a, nt) + + Performs nt iteration of the Miller-Rabin probabilistic primality + test on a. Returns MP_YES if the tests pass, MP_NO if one fails. + If MP_NO is returned, the number is definitely composite. If MP_YES + is returned, it is probably prime (but that is not guaranteed). + */ + +mp_err +mpp_pprime(mp_int *a, int nt) +{ + mp_err res; + mp_int x, amo, m, z; /* "amo" = "a minus one" */ + int iter; + unsigned int jx; + mp_size b; + + ARGCHK(a != NULL, MP_BADARG); + + MP_DIGITS(&x) = 0; + MP_DIGITS(&amo) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&z) = 0; + + /* Initialize temporaries... */ + MP_CHECKOK(mp_init(&amo)); + /* Compute amo = a - 1 for what follows... */ + MP_CHECKOK(mp_sub_d(a, 1, &amo)); + + b = mp_trailing_zeros(&amo); + if (!b) { /* a was even ? */ + res = MP_NO; + goto CLEANUP; + } + + MP_CHECKOK(mp_init_size(&x, MP_USED(a))); + MP_CHECKOK(mp_init(&z)); + MP_CHECKOK(mp_init(&m)); + MP_CHECKOK(mp_div_2d(&amo, b, &m, 0)); + + /* Do the test nt times... */ + for (iter = 0; iter < nt; iter++) { + + /* Choose a random value for 1 < x < a */ + MP_CHECKOK(s_mp_pad(&x, USED(a))); + mpp_random(&x); + MP_CHECKOK(mp_mod(&x, a, &x)); + if (mp_cmp_d(&x, 1) <= 0) { + iter--; /* don't count this iteration */ + continue; /* choose a new x */ + } + + /* Compute z = (x ** m) mod a */ + MP_CHECKOK(mp_exptmod(&x, &m, a, &z)); + + if (mp_cmp_d(&z, 1) == 0 || mp_cmp(&z, &amo) == 0) { + res = MP_YES; + continue; + } + + res = MP_NO; /* just in case the following for loop never executes. */ + for (jx = 1; jx < b; jx++) { + /* z = z^2 (mod a) */ + MP_CHECKOK(mp_sqrmod(&z, a, &z)); + res = MP_NO; /* previous line set res to MP_YES */ + + if (mp_cmp_d(&z, 1) == 0) { + break; + } + if (mp_cmp(&z, &amo) == 0) { + res = MP_YES; + break; + } + } /* end testing loop */ + + /* If the test passes, we will continue iterating, but a failed + test means the candidate is definitely NOT prime, so we will + immediately break out of this loop + */ + if (res == MP_NO) + break; + + } /* end iterations loop */ + +CLEANUP: + mp_clear(&m); + mp_clear(&z); + mp_clear(&x); + mp_clear(&amo); + return res; + +} /* end mpp_pprime() */ + +/* }}} */ + +/* Produce table of composites from list of primes and trial value. +** trial must be odd. List of primes must not include 2. +** sieve should have dimension >= MAXPRIME/2, where MAXPRIME is largest +** prime in list of primes. After this function is finished, +** if sieve[i] is non-zero, then (trial + 2*i) is composite. +** Each prime used in the sieve costs one division of trial, and eliminates +** one or more values from the search space. (3 eliminates 1/3 of the values +** alone!) Each value left in the search space costs 1 or more modular +** exponentations. So, these divisions are a bargain! +*/ +mp_err +mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve) +{ + mp_err res; + mp_digit rem; + mp_size ix; + unsigned long offset; + + memset(sieve, 0, nSieve); + + for (ix = 0; ix < nPrimes; ix++) { + mp_digit prime = primes[ix]; + mp_size i; + if ((res = mp_mod_d(trial, prime, &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + offset = 0; + } else { + offset = prime - rem; + } + + for (i = offset; i < nSieve * 2; i += prime) { + if (i % 2 == 0) { + sieve[i / 2] = 1; + } + } + } + + return MP_OKAY; +} + +#define SIEVE_SIZE 32 * 1024 + +mp_err +mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong, + unsigned long *nTries) +{ + mp_digit np; + mp_err res; + unsigned int i = 0; + mp_int trial; + mp_int q; + mp_size num_tests; + unsigned char *sieve; + + ARGCHK(start != 0, MP_BADARG); + ARGCHK(nBits > 16, MP_RANGE); + + sieve = malloc(SIEVE_SIZE); + ARGCHK(sieve != NULL, MP_MEM); + + MP_DIGITS(&trial) = 0; + MP_DIGITS(&q) = 0; + MP_CHECKOK(mp_init(&trial)); + MP_CHECKOK(mp_init(&q)); + /* values originally taken from table 4.4, + * HandBook of Applied Cryptography, augmented by FIPS-186 + * requirements, Table C.2 and C.3 */ + if (nBits >= 2000) { + num_tests = 3; + } else if (nBits >= 1536) { + num_tests = 4; + } else if (nBits >= 1024) { + num_tests = 5; + } else if (nBits >= 550) { + num_tests = 6; + } else if (nBits >= 450) { + num_tests = 7; + } else if (nBits >= 400) { + num_tests = 8; + } else if (nBits >= 350) { + num_tests = 9; + } else if (nBits >= 300) { + num_tests = 10; + } else if (nBits >= 250) { + num_tests = 20; + } else if (nBits >= 200) { + num_tests = 41; + } else if (nBits >= 100) { + num_tests = 38; /* funny anomaly in the FIPS tables, for aux primes, the + * required more iterations for larger aux primes */ + } else + num_tests = 50; + + if (strong) + --nBits; + MP_CHECKOK(mpl_set_bit(start, nBits - 1, 1)); + MP_CHECKOK(mpl_set_bit(start, 0, 1)); + for (i = mpl_significant_bits(start) - 1; i >= nBits; --i) { + MP_CHECKOK(mpl_set_bit(start, i, 0)); + } + /* start sieveing with prime value of 3. */ + MP_CHECKOK(mpp_sieve(start, prime_tab + 1, prime_tab_size - 1, + sieve, SIEVE_SIZE)); + +#ifdef DEBUG_SIEVE + res = 0; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (!sieve[i]) + ++res; + } + fprintf(stderr, "sieve found %d potential primes.\n", res); +#define FPUTC(x, y) fputc(x, y) +#else +#define FPUTC(x, y) +#endif + + res = MP_NO; + for (i = 0; i < SIEVE_SIZE; ++i) { + if (sieve[i]) /* this number is composite */ + continue; + MP_CHECKOK(mp_add_d(start, 2 * i, &trial)); + FPUTC('.', stderr); + /* run a Fermat test */ + res = mpp_fermat(&trial, 2); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + FPUTC('+', stderr); + /* If that passed, run some Miller-Rabin tests */ + res = mpp_pprime(&trial, num_tests); + if (res != MP_OKAY) { + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + FPUTC('!', stderr); + + if (!strong) + break; /* success !! */ + + /* At this point, we have strong evidence that our candidate + is itself prime. If we want a strong prime, we need now + to test q = 2p + 1 for primality... + */ + MP_CHECKOK(mp_mul_2(&trial, &q)); + MP_CHECKOK(mp_add_d(&q, 1, &q)); + + /* Test q for small prime divisors ... */ + np = prime_tab_size; + res = mpp_divis_primes(&q, &np); + if (res == MP_YES) { /* is composite */ + mp_clear(&q); + continue; + } + if (res != MP_NO) + goto CLEANUP; + + /* And test with Fermat, as with its parent ... */ + res = mpp_fermat(&q, 2); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* And test with Miller-Rabin, as with its parent ... */ + res = mpp_pprime(&q, num_tests); + if (res != MP_YES) { + mp_clear(&q); + if (res == MP_NO) + continue; /* was composite */ + goto CLEANUP; + } + + /* If it passed, we've got a winner */ + mp_exch(&q, &trial); + mp_clear(&q); + break; + + } /* end of loop through sieved values */ + if (res == MP_YES) + mp_exch(&trial, start); +CLEANUP: + mp_clear(&trial); + mp_clear(&q); + if (nTries) + *nTries += i; + if (sieve != NULL) { + memset(sieve, 0, SIEVE_SIZE); + free(sieve); + } + return res; +} + +/*========================================================================*/ +/*------------------------------------------------------------------------*/ +/* Static functions visible only to the library internally */ + +/* {{{ s_mpp_divp(a, vec, size, which) */ + +/* + Test for divisibility by members of a vector of digits. Returns + MP_NO if a is not divisible by any of them; returns MP_YES and sets + 'which' to the index of the offender, if it is. Will stop on the + first digit against which a is divisible. + */ + +mp_err +s_mpp_divp(mp_int *a, const mp_digit *vec, int size, int *which) +{ + mp_err res; + mp_digit rem; + + int ix; + + for (ix = 0; ix < size; ix++) { + if ((res = mp_mod_d(a, vec[ix], &rem)) != MP_OKAY) + return res; + + if (rem == 0) { + if (which) + *which = ix; + return MP_YES; + } + } + + return MP_NO; + +} /* end s_mpp_divp() */ + +/* }}} */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/mpprime.h b/security/nss/lib/freebl/mpi/mpprime.h new file mode 100644 index 000000000..c47c61836 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpprime.h @@ -0,0 +1,38 @@ +/* + * mpprime.h + * + * Utilities for finding and working with prime and pseudo-prime + * integers + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_MP_PRIME_ +#define _H_MP_PRIME_ + +#include "mpi.h" + +extern const int prime_tab_size; /* number of primes available */ +extern const mp_digit prime_tab[]; + +/* Tests for divisibility */ +mp_err mpp_divis(mp_int *a, mp_int *b); +mp_err mpp_divis_d(mp_int *a, mp_digit d); + +/* Random selection */ +mp_err mpp_random(mp_int *a); +mp_err mpp_random_size(mp_int *a, mp_size prec); + +/* Pseudo-primality testing */ +mp_err mpp_divis_vector(mp_int *a, const mp_digit *vec, int size, int *which); +mp_err mpp_divis_primes(mp_int *a, mp_digit *np); +mp_err mpp_fermat(mp_int *a, mp_digit w); +mp_err mpp_fermat_list(mp_int *a, const mp_digit *primes, mp_size nPrimes); +mp_err mpp_pprime(mp_int *a, int nt); +mp_err mpp_sieve(mp_int *trial, const mp_digit *primes, mp_size nPrimes, + unsigned char *sieve, mp_size nSieve); +mp_err mpp_make_prime(mp_int *start, mp_size nBits, mp_size strong, + unsigned long *nTries); + +#endif /* end _H_MP_PRIME_ */ diff --git a/security/nss/lib/freebl/mpi/mpv_sparc.c b/security/nss/lib/freebl/mpi/mpv_sparc.c new file mode 100644 index 000000000..423311b65 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparc.c @@ -0,0 +1,221 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "vis_proto.h" + +/***************************************************************/ + +typedef int t_s32; +typedef unsigned int t_u32; +#if defined(__sparcv9) +typedef long t_s64; +typedef unsigned long t_u64; +#else +typedef long long t_s64; +typedef unsigned long long t_u64; +#endif +typedef double t_d64; + +/***************************************************************/ + +typedef union { + t_d64 d64; + struct { + t_s32 i0; + t_s32 i1; + } i32s; +} d64_2_i32; + +/***************************************************************/ + +#define BUFF_SIZE 256 + +#define A_BITS 19 +#define A_MASK ((1 << A_BITS) - 1) + +/***************************************************************/ + +static t_u64 mask_cnst[] = { + 0x8000000080000000ull +}; + +/***************************************************************/ + +#define DEF_VARS(N) \ + t_d64 *py = (t_d64 *)y; \ + t_d64 mask = *((t_d64 *)mask_cnst); \ + t_d64 ca = (1u << 31) - 1; \ + t_d64 da = (t_d64)a; \ + t_s64 buff[N], s; \ + d64_2_i32 dy + +/***************************************************************/ + +#define MUL_U32_S64_2(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + buff[2 * (i)] = (ca - (t_d64)dy.i32s.i0) * da; \ + buff[2 * (i) + 1] = (ca - (t_d64)dy.i32s.i1) * da + +#define MUL_U32_S64_2_D(i) \ + dy.d64 = vis_fxnor(mask, py[i]); \ + d0 = ca - (t_d64)dy.i32s.i0; \ + d1 = ca - (t_d64)dy.i32s.i1; \ + buff[4 * (i)] = (t_s64)(d0 * da); \ + buff[4 * (i) + 1] = (t_s64)(d0 * db); \ + buff[4 * (i) + 2] = (t_s64)(d1 * da); \ + buff[4 * (i) + 3] = (t_s64)(d1 * db) + +/***************************************************************/ + +#define ADD_S64_U32(i) \ + s = buff[i] + x[i] + c; \ + z[i] = s; \ + c = (s >> 32) + +#define ADD_S64_U32_D(i) \ + s = buff[2 * (i)] + (((t_s64)(buff[2 * (i) + 1])) << A_BITS) + x[i] + uc; \ + z[i] = s; \ + uc = ((t_u64)s >> 32) + +/***************************************************************/ + +#define MUL_U32_S64_8(i) \ + MUL_U32_S64_2(i); \ + MUL_U32_S64_2(i + 1); \ + MUL_U32_S64_2(i + 2); \ + MUL_U32_S64_2(i + 3) + +#define MUL_U32_S64_D_8(i) \ + MUL_U32_S64_2_D(i); \ + MUL_U32_S64_2_D(i + 1); \ + MUL_U32_S64_2_D(i + 2); \ + MUL_U32_S64_2_D(i + 3) + +/***************************************************************/ + +#define ADD_S64_U32_8(i) \ + ADD_S64_U32(i); \ + ADD_S64_U32(i + 1); \ + ADD_S64_U32(i + 2); \ + ADD_S64_U32(i + 3); \ + ADD_S64_U32(i + 4); \ + ADD_S64_U32(i + 5); \ + ADD_S64_U32(i + 6); \ + ADD_S64_U32(i + 7) + +#define ADD_S64_U32_D_8(i) \ + ADD_S64_U32_D(i); \ + ADD_S64_U32_D(i + 1); \ + ADD_S64_U32_D(i + 2); \ + ADD_S64_U32_D(i + 3); \ + ADD_S64_U32_D(i + 4); \ + ADD_S64_U32_D(i + 5); \ + ADD_S64_U32_D(i + 6); \ + ADD_S64_U32_D(i + 7) + +/***************************************************************/ + +t_u32 +mul_add(t_u32 *z, t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + if (a < (1 << A_BITS)) { + + if (n == 8) { + DEF_VARS(8); + t_s32 c = 0; + + MUL_U32_S64_8(0); + ADD_S64_U32_8(0); + + return c; + + } else if (n == 16) { + DEF_VARS(16); + t_s32 c = 0; + + MUL_U32_S64_8(0); + MUL_U32_S64_8(4); + ADD_S64_U32_8(0); + ADD_S64_U32_8(8); + + return c; + + } else { + DEF_VARS(BUFF_SIZE); + t_s32 i, c = 0; + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32(i); + } + + return c; + } + } else { + + if (n == 8) { + DEF_VARS(2 * 8); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + ADD_S64_U32_D_8(0); + + return uc; + + } else if (n == 16) { + DEF_VARS(2 * 16); + t_d64 d0, d1, db; + t_u32 uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + + MUL_U32_S64_D_8(0); + MUL_U32_S64_D_8(4); + ADD_S64_U32_D_8(0); + ADD_S64_U32_D_8(8); + + return uc; + + } else { + DEF_VARS(2 * BUFF_SIZE); + t_d64 d0, d1, db; + t_u32 i, uc = 0; + + da = (t_d64)(a & A_MASK); + db = (t_d64)(a >> A_BITS); + +#pragma pipeloop(0) + for (i = 0; i < (n + 1) / 2; i++) { + MUL_U32_S64_2_D(i); + } + +#pragma pipeloop(0) + for (i = 0; i < n; i++) { + ADD_S64_U32_D(i); + } + + return uc; + } + } +} + +/***************************************************************/ + +t_u32 +mul_add_inp(t_u32 *x, t_u32 *y, int n, t_u32 a) +{ + return mul_add(x, x, y, n, a); +} + +/***************************************************************/ diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv8.s b/security/nss/lib/freebl/mpi/mpv_sparcv8.s new file mode 100644 index 000000000..66122a1d9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv8.s @@ -0,0 +1,1607 @@ +! Inner multiply loop functions for hybrid 32/64-bit Sparc v8plus CPUs. +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 14 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000106 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000106: /* frequency 1.0 confidence 0.0 */ +/* 000000 19 ( 0 0) */ .word 1127219200,0 +/* 0x0008 20 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 21 ( 0 0) */ .align 16 +/* 0x0010 27 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 29 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x0014 30 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 31 ( 1 2) */ xor %g1,-984,%g1 +/* 0x001c 32 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 33 ( 2 4) */ save %sp,%g1,%sp + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 35 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x002c 177 ( 2 3) */ sethi %hi(.L_const_seg_900000106),%g3 +/* 0x0030 178 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000154-.))),%g5 +/* 0x0034 179 ( 3 4) */ or %g0,%i4,%o1 +/* 0x0038 180 ( 3 4) */ st %o1,[%fp+84] +/* 0x003c 181 ( 3 4) */ add %g5,%o7,%o3 +/* 0x0040 182 ( 4 5) */ add %g3,%lo(.L_const_seg_900000106),%g3 +/* 0x0044 183 ( 4 6) */ ld [%o3+%g2],%g2 +/* 0x0048 184 ( 4 5) */ or %g0,%i3,%o2 +/* 0x004c 185 ( 5 6) */ sethi %hi(0x80000),%g4 +/* 0x0050 186 ( 5 7) */ ld [%o3+%g3],%o0 +/* 0x0054 187 ( 5 6) */ or %g0,%i2,%g5 +/* 0x0058 188 ( 6 7) */ or %g0,%o2,%o3 +/* 0x005c 189 ( 6 10) */ ldd [%g2],%f0 +/* 0x0060 190 ( 6 7) */ subcc %o1,%g4,%g0 +/* 0x0064 191 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0068 ( 7 8) */ subcc %o2,8,%g0 +/* 0x006c 193 ( 7 8) */ bne,pn %icc,.L77000037 ! tprob=0.50 +/* 0x0070 ( 8 12) */ ldd [%o0],%f8 +/* 0x0074 195 ( 9 13) */ ldd [%g5],%f4 +/* 0x0078 196 (10 14) */ ldd [%g5+8],%f6 +/* 0x007c 197 (11 15) */ ldd [%g5+16],%f10 +/* 0x0080 198 (11 14) */ fmovs %f8,%f12 +/* 0x0084 199 (12 16) */ fxnor %f0,%f4,%f4 +/* 0x0088 200 (12 14) */ ld [%fp+84],%f13 +/* 0x008c 201 (13 17) */ ldd [%o0+8],%f14 +/* 0x0090 202 (13 17) */ fxnor %f0,%f6,%f6 +/* 0x0094 203 (14 18) */ ldd [%g5+24],%f16 +/* 0x0098 204 (14 18) */ fxnor %f0,%f10,%f10 +/* 0x009c 208 (15 17) */ ld [%i1],%g2 +/* 0x00a0 209 (15 20) */ fsubd %f12,%f8,%f8 +/* 0x00a4 210 (16 21) */ fitod %f4,%f18 +/* 0x00a8 211 (16 18) */ ld [%i1+4],%g3 +/* 0x00ac 212 (17 22) */ fitod %f5,%f4 +/* 0x00b0 213 (17 19) */ ld [%i1+8],%g4 +/* 0x00b4 214 (18 23) */ fitod %f6,%f20 +/* 0x00b8 215 (18 20) */ ld [%i1+12],%g5 +/* 0x00bc 216 (19 21) */ ld [%i1+16],%o0 +/* 0x00c0 217 (19 24) */ fitod %f7,%f6 +/* 0x00c4 218 (20 22) */ ld [%i1+20],%o1 +/* 0x00c8 219 (20 24) */ fxnor %f0,%f16,%f16 +/* 0x00cc 220 (21 26) */ fsubd %f14,%f18,%f12 +/* 0x00d0 221 (21 23) */ ld [%i1+24],%o2 +/* 0x00d4 222 (22 27) */ fsubd %f14,%f4,%f4 +/* 0x00d8 223 (22 24) */ ld [%i1+28],%o3 +/* 0x00dc 224 (23 28) */ fitod %f10,%f18 +/* 0x00e0 225 (24 29) */ fsubd %f14,%f20,%f20 +/* 0x00e4 226 (25 30) */ fitod %f11,%f10 +/* 0x00e8 227 (26 31) */ fsubd %f14,%f6,%f6 +/* 0x00ec 228 (26 31) */ fmuld %f12,%f8,%f12 +/* 0x00f0 229 (27 32) */ fitod %f16,%f22 +/* 0x00f4 230 (27 32) */ fmuld %f4,%f8,%f4 +/* 0x00f8 231 (28 33) */ fsubd %f14,%f18,%f18 +/* 0x00fc 232 (29 34) */ fitod %f17,%f16 +/* 0x0100 233 (29 34) */ fmuld %f20,%f8,%f20 +/* 0x0104 234 (30 35) */ fsubd %f14,%f10,%f10 +/* 0x0108 235 (31 36) */ fdtox %f12,%f12 +/* 0x010c 236 (31 32) */ std %f12,[%sp+152] +/* 0x0110 237 (31 36) */ fmuld %f6,%f8,%f6 +/* 0x0114 238 (32 37) */ fdtox %f4,%f4 +/* 0x0118 239 (32 33) */ std %f4,[%sp+144] +/* 0x011c 240 (33 38) */ fsubd %f14,%f22,%f4 +/* 0x0120 241 (33 38) */ fmuld %f18,%f8,%f12 +/* 0x0124 242 (34 39) */ fdtox %f20,%f18 +/* 0x0128 243 (34 35) */ std %f18,[%sp+136] +/* 0x012c 244 (35 37) */ ldx [%sp+152],%o4 +/* 0x0130 245 (35 40) */ fsubd %f14,%f16,%f14 +/* 0x0134 246 (35 40) */ fmuld %f10,%f8,%f10 +/* 0x0138 247 (36 41) */ fdtox %f6,%f6 +/* 0x013c 248 (36 37) */ std %f6,[%sp+128] +/* 0x0140 249 (37 39) */ ldx [%sp+144],%o5 +/* 0x0144 250 (37 38) */ add %o4,%g2,%o4 +/* 0x0148 251 (38 39) */ st %o4,[%i0] +/* 0x014c 252 (38 39) */ srax %o4,32,%g2 +/* 0x0150 253 (38 43) */ fdtox %f12,%f6 +/* 0x0154 254 (38 43) */ fmuld %f4,%f8,%f4 +/* 0x0158 255 (39 40) */ std %f6,[%sp+120] +/* 0x015c 256 (39 40) */ add %o5,%g3,%g3 +/* 0x0160 257 (40 42) */ ldx [%sp+136],%o7 +/* 0x0164 258 (40 41) */ add %g3,%g2,%g2 +/* 0x0168 259 (40 45) */ fmuld %f14,%f8,%f6 +/* 0x016c 260 (40 45) */ fdtox %f10,%f8 +/* 0x0170 261 (41 42) */ std %f8,[%sp+112] +/* 0x0174 262 (41 42) */ srax %g2,32,%o5 +/* 0x0178 263 (42 44) */ ldx [%sp+128],%g3 +/* 0x017c 264 (42 43) */ add %o7,%g4,%g4 +/* 0x0180 265 (43 44) */ st %g2,[%i0+4] +/* 0x0184 266 (43 44) */ add %g4,%o5,%g4 +/* 0x0188 267 (43 48) */ fdtox %f4,%f4 +/* 0x018c 268 (44 46) */ ldx [%sp+120],%o5 +/* 0x0190 269 (44 45) */ add %g3,%g5,%g3 +/* 0x0194 270 (44 45) */ srax %g4,32,%g5 +/* 0x0198 271 (45 46) */ std %f4,[%sp+104] +/* 0x019c 272 (45 46) */ add %g3,%g5,%g3 +/* 0x01a0 273 (45 50) */ fdtox %f6,%f4 +/* 0x01a4 274 (46 47) */ std %f4,[%sp+96] +/* 0x01a8 275 (46 47) */ add %o5,%o0,%o0 +/* 0x01ac 276 (46 47) */ srax %g3,32,%o5 +/* 0x01b0 277 (47 49) */ ldx [%sp+112],%g5 +/* 0x01b4 278 (47 48) */ add %o0,%o5,%o0 +/* 0x01b8 279 (48 49) */ st %g4,[%i0+8] +/* 0x01bc 280 (49 51) */ ldx [%sp+104],%o5 +/* 0x01c0 281 (49 50) */ add %g5,%o1,%o1 +/* 0x01c4 282 (49 50) */ srax %o0,32,%g5 +/* 0x01c8 283 (50 51) */ st %o0,[%i0+16] +/* 0x01cc 284 (50 51) */ add %o1,%g5,%o1 +/* 0x01d0 285 (51 53) */ ldx [%sp+96],%g5 +/* 0x01d4 286 (51 52) */ add %o5,%o2,%o2 +/* 0x01d8 287 (51 52) */ srax %o1,32,%o5 +/* 0x01dc 288 (52 53) */ st %o1,[%i0+20] +/* 0x01e0 289 (52 53) */ add %o2,%o5,%o2 +/* 0x01e4 290 (53 54) */ st %o2,[%i0+24] +/* 0x01e8 291 (53 54) */ srax %o2,32,%g4 +/* 0x01ec 292 (53 54) */ add %g5,%o3,%g2 +/* 0x01f0 293 (54 55) */ st %g3,[%i0+12] +/* 0x01f4 294 (54 55) */ add %g2,%g4,%g2 +/* 0x01f8 295 (55 56) */ st %g2,[%i0+28] +/* 0x01fc 299 (55 56) */ srax %g2,32,%o7 +/* 0x0200 300 (56 57) */ or %g0,%o7,%i0 +/* 0x0204 (57 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (59 61) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000037 +! + + .L77000037: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 307 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0210 308 ( 0 1) */ bne,pn %icc,.L77000076 ! tprob=0.50 +/* 0x0214 ( 1 5) */ ldd [%o0],%f8 +/* 0x0218 310 ( 2 6) */ ldd [%g5],%f4 +/* 0x021c 311 ( 3 7) */ ldd [%g5+8],%f6 +/* 0x0220 317 ( 4 8) */ ldd [%o0+8],%f14 +/* 0x0224 318 ( 4 7) */ fmovs %f8,%f12 +/* 0x0228 319 ( 5 7) */ ld [%fp+84],%f13 +/* 0x022c 320 ( 5 9) */ fxnor %f0,%f4,%f4 +/* 0x0230 321 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0234 322 ( 6 10) */ fxnor %f0,%f6,%f6 +/* 0x0238 323 ( 7 11) */ ldd [%g5+24],%f16 +/* 0x023c 324 ( 8 12) */ ldd [%g5+32],%f20 +/* 0x0240 325 ( 8 13) */ fsubd %f12,%f8,%f8 +/* 0x0244 331 ( 9 11) */ ld [%i1+40],%o7 +/* 0x0248 332 ( 9 14) */ fitod %f4,%f18 +/* 0x024c 333 (10 14) */ ldd [%g5+40],%f22 +/* 0x0250 334 (10 15) */ fitod %f5,%f4 +/* 0x0254 335 (11 12) */ stx %o7,[%sp+96] +/* 0x0258 336 (11 16) */ fitod %f6,%f24 +/* 0x025c 337 (12 14) */ ld [%i1+44],%o7 +/* 0x0260 338 (12 16) */ fxnor %f0,%f10,%f10 +/* 0x0264 339 (13 17) */ ldd [%g5+48],%f26 +/* 0x0268 340 (13 18) */ fitod %f7,%f6 +/* 0x026c 341 (14 15) */ stx %o7,[%sp+104] +/* 0x0270 342 (14 19) */ fsubd %f14,%f18,%f18 +/* 0x0274 343 (15 17) */ ld [%i1+48],%o7 +/* 0x0278 344 (15 20) */ fsubd %f14,%f4,%f4 +/* 0x027c 345 (16 18) */ ld [%i1+36],%o5 +/* 0x0280 346 (16 21) */ fitod %f10,%f28 +/* 0x0284 347 (17 18) */ stx %o7,[%sp+112] +/* 0x0288 348 (17 21) */ fxnor %f0,%f16,%f16 +/* 0x028c 349 (18 20) */ ld [%i1],%g2 +/* 0x0290 350 (18 23) */ fsubd %f14,%f24,%f24 +/* 0x0294 351 (19 20) */ stx %o5,[%sp+120] +/* 0x0298 352 (19 24) */ fitod %f11,%f10 +/* 0x029c 353 (19 24) */ fmuld %f18,%f8,%f18 +/* 0x02a0 354 (20 22) */ ld [%i1+52],%o5 +/* 0x02a4 355 (20 25) */ fsubd %f14,%f6,%f6 +/* 0x02a8 356 (20 25) */ fmuld %f4,%f8,%f4 +/* 0x02ac 357 (21 26) */ fitod %f16,%f30 +/* 0x02b0 358 (22 26) */ fxnor %f0,%f20,%f20 +/* 0x02b4 359 (22 24) */ ld [%i1+4],%g3 +/* 0x02b8 360 (23 27) */ ldd [%g5+56],%f2 +/* 0x02bc 361 (23 28) */ fsubd %f14,%f28,%f28 +/* 0x02c0 362 (23 28) */ fmuld %f24,%f8,%f24 +/* 0x02c4 363 (24 25) */ stx %o5,[%sp+128] +/* 0x02c8 364 (24 29) */ fdtox %f18,%f18 +/* 0x02cc 365 (25 26) */ std %f18,[%sp+272] +/* 0x02d0 366 (25 30) */ fitod %f17,%f16 +/* 0x02d4 367 (25 30) */ fmuld %f6,%f8,%f6 +/* 0x02d8 368 (26 31) */ fsubd %f14,%f10,%f10 +/* 0x02dc 369 (27 32) */ fitod %f20,%f18 +/* 0x02e0 370 (28 33) */ fdtox %f4,%f4 +/* 0x02e4 371 (28 29) */ std %f4,[%sp+264] +/* 0x02e8 372 (28 33) */ fmuld %f28,%f8,%f28 +/* 0x02ec 373 (29 31) */ ld [%i1+8],%g4 +/* 0x02f0 374 (29 34) */ fsubd %f14,%f30,%f4 +/* 0x02f4 375 (30 34) */ fxnor %f0,%f22,%f22 +/* 0x02f8 376 (30 32) */ ld [%i1+12],%g5 +/* 0x02fc 377 (31 33) */ ld [%i1+16],%o0 +/* 0x0300 378 (31 36) */ fitod %f21,%f20 +/* 0x0304 379 (31 36) */ fmuld %f10,%f8,%f10 +/* 0x0308 380 (32 34) */ ld [%i1+20],%o1 +/* 0x030c 381 (32 37) */ fdtox %f24,%f24 +/* 0x0310 382 (33 34) */ std %f24,[%sp+256] +/* 0x0314 383 (33 38) */ fsubd %f14,%f16,%f16 +/* 0x0318 384 (34 36) */ ldx [%sp+272],%o7 +/* 0x031c 385 (34 39) */ fdtox %f6,%f6 +/* 0x0320 386 (34 39) */ fmuld %f4,%f8,%f4 +/* 0x0324 387 (35 36) */ std %f6,[%sp+248] +/* 0x0328 388 (35 40) */ fitod %f22,%f24 +/* 0x032c 389 (36 38) */ ld [%i1+32],%o4 +/* 0x0330 390 (36 41) */ fsubd %f14,%f18,%f6 +/* 0x0334 391 (36 37) */ add %o7,%g2,%g2 +/* 0x0338 392 (37 39) */ ldx [%sp+264],%o7 +/* 0x033c 393 (37 41) */ fxnor %f0,%f26,%f26 +/* 0x0340 394 (37 38) */ srax %g2,32,%o5 +/* 0x0344 395 (38 39) */ st %g2,[%i0] +/* 0x0348 396 (38 43) */ fitod %f23,%f18 +/* 0x034c 397 (38 43) */ fmuld %f16,%f8,%f16 +/* 0x0350 398 (39 41) */ ldx [%sp+248],%g2 +/* 0x0354 399 (39 44) */ fdtox %f28,%f22 +/* 0x0358 400 (39 40) */ add %o7,%g3,%g3 +/* 0x035c 401 (40 42) */ ldx [%sp+256],%o7 +/* 0x0360 402 (40 45) */ fsubd %f14,%f20,%f20 +/* 0x0364 403 (40 41) */ add %g3,%o5,%g3 +/* 0x0368 404 (41 42) */ std %f22,[%sp+240] +/* 0x036c 405 (41 46) */ fitod %f26,%f22 +/* 0x0370 406 (41 42) */ srax %g3,32,%o5 +/* 0x0374 407 (41 42) */ add %g2,%g5,%g2 +/* 0x0378 408 (42 43) */ st %g3,[%i0+4] +/* 0x037c 409 (42 47) */ fdtox %f10,%f10 +/* 0x0380 410 (42 43) */ add %o7,%g4,%g4 +/* 0x0384 411 (42 47) */ fmuld %f6,%f8,%f6 +/* 0x0388 412 (43 44) */ std %f10,[%sp+232] +/* 0x038c 413 (43 47) */ fxnor %f0,%f2,%f12 +/* 0x0390 414 (43 44) */ add %g4,%o5,%g4 +/* 0x0394 415 (44 45) */ st %g4,[%i0+8] +/* 0x0398 416 (44 45) */ srax %g4,32,%o5 +/* 0x039c 417 (44 49) */ fsubd %f14,%f24,%f10 +/* 0x03a0 418 (45 47) */ ldx [%sp+240],%o7 +/* 0x03a4 419 (45 50) */ fdtox %f4,%f4 +/* 0x03a8 420 (45 46) */ add %g2,%o5,%g2 +/* 0x03ac 421 (45 50) */ fmuld %f20,%f8,%f20 +/* 0x03b0 422 (46 47) */ std %f4,[%sp+224] +/* 0x03b4 423 (46 47) */ srax %g2,32,%g5 +/* 0x03b8 424 (46 51) */ fsubd %f14,%f18,%f4 +/* 0x03bc 425 (47 48) */ st %g2,[%i0+12] +/* 0x03c0 426 (47 52) */ fitod %f27,%f24 +/* 0x03c4 427 (47 48) */ add %o7,%o0,%g3 +/* 0x03c8 428 (48 50) */ ldx [%sp+232],%o5 +/* 0x03cc 429 (48 53) */ fdtox %f16,%f16 +/* 0x03d0 430 (48 49) */ add %g3,%g5,%g2 +/* 0x03d4 431 (49 50) */ std %f16,[%sp+216] +/* 0x03d8 432 (49 50) */ srax %g2,32,%g4 +/* 0x03dc 433 (49 54) */ fitod %f12,%f18 +/* 0x03e0 434 (49 54) */ fmuld %f10,%f8,%f10 +/* 0x03e4 435 (50 51) */ st %g2,[%i0+16] +/* 0x03e8 436 (50 55) */ fsubd %f14,%f22,%f16 +/* 0x03ec 437 (50 51) */ add %o5,%o1,%g2 +/* 0x03f0 438 (51 53) */ ld [%i1+24],%o2 +/* 0x03f4 439 (51 56) */ fitod %f13,%f12 +/* 0x03f8 440 (51 52) */ add %g2,%g4,%g2 +/* 0x03fc 441 (51 56) */ fmuld %f4,%f8,%f22 +/* 0x0400 442 (52 54) */ ldx [%sp+224],%g3 +/* 0x0404 443 (52 53) */ srax %g2,32,%g4 +/* 0x0408 444 (52 57) */ fdtox %f6,%f6 +/* 0x040c 445 (53 54) */ std %f6,[%sp+208] +/* 0x0410 446 (53 58) */ fdtox %f20,%f6 +/* 0x0414 447 (54 55) */ stx %o4,[%sp+136] +/* 0x0418 448 (54 59) */ fsubd %f14,%f24,%f4 +/* 0x041c 449 (55 56) */ std %f6,[%sp+200] +/* 0x0420 450 (55 60) */ fsubd %f14,%f18,%f6 +/* 0x0424 451 (55 60) */ fmuld %f16,%f8,%f16 +/* 0x0428 452 (56 57) */ st %g2,[%i0+20] +/* 0x042c 453 (56 57) */ add %g3,%o2,%g2 +/* 0x0430 454 (56 61) */ fdtox %f10,%f10 +/* 0x0434 455 (57 59) */ ld [%i1+28],%o3 +/* 0x0438 456 (57 58) */ add %g2,%g4,%g2 +/* 0x043c 457 (58 60) */ ldx [%sp+216],%g5 +/* 0x0440 458 (58 59) */ srax %g2,32,%g4 +/* 0x0444 459 (59 60) */ std %f10,[%sp+192] +/* 0x0448 460 (59 64) */ fsubd %f14,%f12,%f10 +/* 0x044c 461 (59 64) */ fmuld %f4,%f8,%f4 +/* 0x0450 462 (60 61) */ st %g2,[%i0+24] +/* 0x0454 463 (60 61) */ add %g5,%o3,%g2 +/* 0x0458 464 (60 65) */ fdtox %f22,%f12 +/* 0x045c 465 (60 65) */ fmuld %f6,%f8,%f6 +/* 0x0460 466 (61 63) */ ldx [%sp+136],%o0 +/* 0x0464 467 (61 62) */ add %g2,%g4,%g2 +/* 0x0468 468 (62 64) */ ldx [%sp+208],%g3 +/* 0x046c 469 (62 63) */ srax %g2,32,%g4 +/* 0x0470 470 (63 65) */ ldx [%sp+120],%o1 +/* 0x0474 471 (64 66) */ ldx [%sp+200],%g5 +/* 0x0478 472 (64 65) */ add %g3,%o0,%g3 +/* 0x047c 473 (64 69) */ fdtox %f4,%f4 +/* 0x0480 474 (64 69) */ fmuld %f10,%f8,%f8 +/* 0x0484 475 (65 66) */ std %f12,[%sp+184] +/* 0x0488 476 (65 66) */ add %g3,%g4,%g3 +/* 0x048c 477 (65 70) */ fdtox %f16,%f12 +/* 0x0490 478 (66 67) */ std %f12,[%sp+176] +/* 0x0494 479 (66 67) */ srax %g3,32,%o0 +/* 0x0498 480 (66 67) */ add %g5,%o1,%g5 +/* 0x049c 481 (67 69) */ ldx [%sp+192],%o2 +/* 0x04a0 482 (67 68) */ add %g5,%o0,%g5 +/* 0x04a4 483 (68 70) */ ldx [%sp+96],%g4 +/* 0x04a8 484 (68 69) */ srax %g5,32,%o1 +/* 0x04ac 485 (69 71) */ ld [%i1+56],%o4 +/* 0x04b0 486 (70 72) */ ldx [%sp+104],%o0 +/* 0x04b4 487 (70 71) */ add %o2,%g4,%g4 +/* 0x04b8 488 (71 72) */ std %f4,[%sp+168] +/* 0x04bc 489 (71 72) */ add %g4,%o1,%g4 +/* 0x04c0 490 (71 76) */ fdtox %f6,%f4 +/* 0x04c4 491 (72 74) */ ldx [%sp+184],%o3 +/* 0x04c8 492 (72 73) */ srax %g4,32,%o2 +/* 0x04cc 493 (73 75) */ ldx [%sp+112],%o1 +/* 0x04d0 494 (74 75) */ std %f4,[%sp+160] +/* 0x04d4 495 (74 75) */ add %o3,%o0,%o0 +/* 0x04d8 496 (74 79) */ fdtox %f8,%f4 +/* 0x04dc 497 (75 77) */ ldx [%sp+176],%o5 +/* 0x04e0 498 (75 76) */ add %o0,%o2,%o0 +/* 0x04e4 499 (76 77) */ stx %o4,[%sp+144] +/* 0x04e8 500 (77 78) */ st %g2,[%i0+28] +/* 0x04ec 501 (77 78) */ add %o5,%o1,%g2 +/* 0x04f0 502 (77 78) */ srax %o0,32,%o1 +/* 0x04f4 503 (78 79) */ std %f4,[%sp+152] +/* 0x04f8 504 (78 79) */ add %g2,%o1,%o1 +/* 0x04fc 505 (79 81) */ ldx [%sp+168],%o7 +/* 0x0500 506 (79 80) */ srax %o1,32,%o3 +/* 0x0504 507 (80 82) */ ldx [%sp+128],%o2 +/* 0x0508 508 (81 83) */ ld [%i1+60],%o4 +/* 0x050c 509 (82 83) */ add %o7,%o2,%o2 +/* 0x0510 510 (83 84) */ add %o2,%o3,%o2 +/* 0x0514 511 (83 85) */ ldx [%sp+144],%o5 +/* 0x0518 512 (84 86) */ ldx [%sp+160],%g2 +/* 0x051c 513 (85 87) */ ldx [%sp+152],%o3 +/* 0x0520 514 (86 87) */ st %g3,[%i0+32] +/* 0x0524 515 (86 87) */ add %g2,%o5,%g2 +/* 0x0528 516 (86 87) */ srax %o2,32,%o5 +/* 0x052c 517 (87 88) */ st %g5,[%i0+36] +/* 0x0530 518 (87 88) */ add %g2,%o5,%g2 +/* 0x0534 519 (87 88) */ add %o3,%o4,%g3 +/* 0x0538 520 (88 89) */ st %o0,[%i0+44] +/* 0x053c 521 (88 89) */ srax %g2,32,%g5 +/* 0x0540 522 (89 90) */ st %o1,[%i0+48] +/* 0x0544 523 (89 90) */ add %g3,%g5,%g3 +/* 0x0548 524 (90 91) */ st %o2,[%i0+52] +/* 0x054c 528 (90 91) */ srax %g3,32,%o7 +/* 0x0550 529 (91 92) */ st %g4,[%i0+40] +/* 0x0554 530 (92 93) */ st %g2,[%i0+56] +/* 0x0558 531 (93 94) */ st %g3,[%i0+60] +/* 0x055c 532 (93 94) */ or %g0,%o7,%i0 +/* 0x0560 (94 101) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0564 (96 98) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000076 +! + + .L77000076: /* frequency 1.0 confidence 0.0 */ +/* 0x0568 540 ( 0 4) */ ldd [%o0],%f6 +/* 0x056c 546 ( 0 1) */ add %o2,1,%g2 +/* 0x0570 547 ( 0 3) */ fmovd %f0,%f14 +/* 0x0574 548 ( 0 1) */ or %g0,0,%o7 +/* 0x0578 549 ( 1 3) */ ld [%fp+84],%f9 +/* 0x057c 550 ( 1 2) */ srl %g2,31,%g3 +/* 0x0580 551 ( 1 2) */ add %fp,-2264,%o5 +/* 0x0584 552 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0588 553 ( 2 6) */ ldd [%o0+8],%f18 +/* 0x058c 554 ( 2 3) */ add %fp,-2256,%o4 +/* 0x0590 555 ( 3 6) */ fmovs %f6,%f8 +/* 0x0594 556 ( 3 4) */ sra %g2,1,%o1 +/* 0x0598 557 ( 3 4) */ or %g0,0,%g2 +/* 0x059c 558 ( 4 5) */ subcc %o1,0,%g0 +/* 0x05a0 559 ( 4 5) */ sub %o1,1,%o2 +/* 0x05a4 563 ( 5 6) */ add %g5,32,%o0 +/* 0x05a8 564 ( 6 11) */ fsubd %f8,%f6,%f16 +/* 0x05ac 565 ( 6 7) */ ble,pt %icc,.L900000161 ! tprob=0.50 +/* 0x05b0 ( 6 7) */ subcc %o3,0,%g0 +/* 0x05b4 567 ( 7 8) */ subcc %o1,7,%g0 +/* 0x05b8 568 ( 7 8) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x05bc ( 7 8) */ sub %o1,2,%o1 +/* 0x05c0 570 ( 8 12) */ ldd [%g5],%f2 +/* 0x05c4 571 ( 9 13) */ ldd [%g5+8],%f4 +/* 0x05c8 572 ( 9 10) */ or %g0,5,%g2 +/* 0x05cc 573 (10 14) */ ldd [%g5+16],%f0 +/* 0x05d0 574 (11 15) */ fxnor %f14,%f2,%f2 +/* 0x05d4 575 (11 15) */ ldd [%g5+24],%f12 +/* 0x05d8 576 (12 16) */ fxnor %f14,%f4,%f6 +/* 0x05dc 577 (12 16) */ ldd [%g5+32],%f10 +/* 0x05e0 578 (13 17) */ fxnor %f14,%f0,%f8 +/* 0x05e4 579 (15 20) */ fitod %f3,%f0 +/* 0x05e8 580 (16 21) */ fitod %f2,%f4 +/* 0x05ec 581 (17 22) */ fitod %f7,%f2 +/* 0x05f0 582 (18 23) */ fitod %f6,%f6 +/* 0x05f4 583 (20 25) */ fsubd %f18,%f0,%f0 +/* 0x05f8 584 (21 26) */ fsubd %f18,%f4,%f4 + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x05fc 586 ( 0 4) */ fxnor %f14,%f12,%f22 +/* 0x0600 587 ( 0 5) */ fmuld %f4,%f16,%f4 +/* 0x0604 588 ( 0 1) */ add %g2,2,%g2 +/* 0x0608 589 ( 0 1) */ add %o4,32,%o4 +/* 0x060c 590 ( 1 6) */ fitod %f9,%f24 +/* 0x0610 591 ( 1 6) */ fmuld %f0,%f16,%f20 +/* 0x0614 592 ( 1 2) */ add %o0,8,%o0 +/* 0x0618 593 ( 1 2) */ subcc %g2,%o1,%g0 +/* 0x061c 594 ( 2 6) */ ldd [%o0],%f12 +/* 0x0620 595 ( 2 7) */ fsubd %f18,%f2,%f0 +/* 0x0624 596 ( 2 3) */ add %o5,32,%o5 +/* 0x0628 597 ( 3 8) */ fsubd %f18,%f6,%f2 +/* 0x062c 598 ( 5 10) */ fdtox %f4,%f4 +/* 0x0630 599 ( 6 11) */ fdtox %f20,%f6 +/* 0x0634 600 ( 6 7) */ std %f4,[%o5-32] +/* 0x0638 601 ( 7 12) */ fitod %f8,%f4 +/* 0x063c 602 ( 7 8) */ std %f6,[%o4-32] +/* 0x0640 603 ( 8 12) */ fxnor %f14,%f10,%f8 +/* 0x0644 604 ( 8 13) */ fmuld %f2,%f16,%f6 +/* 0x0648 605 ( 9 14) */ fitod %f23,%f2 +/* 0x064c 606 ( 9 14) */ fmuld %f0,%f16,%f20 +/* 0x0650 607 ( 9 10) */ add %o0,8,%o0 +/* 0x0654 608 (10 14) */ ldd [%o0],%f10 +/* 0x0658 609 (10 15) */ fsubd %f18,%f24,%f0 +/* 0x065c 610 (12 17) */ fsubd %f18,%f4,%f4 +/* 0x0660 611 (13 18) */ fdtox %f6,%f6 +/* 0x0664 612 (14 19) */ fdtox %f20,%f20 +/* 0x0668 613 (14 15) */ std %f6,[%o5-16] +/* 0x066c 614 (15 20) */ fitod %f22,%f6 +/* 0x0670 615 (15 16) */ ble,pt %icc,.L900000149 ! tprob=0.50 +/* 0x0674 (15 16) */ std %f20,[%o4-16] + +! +! ENTRY .L900000152 +! + + .L900000152: /* frequency 1.0 confidence 0.0 */ +/* 0x0678 618 ( 0 4) */ fxnor %f14,%f12,%f12 +/* 0x067c 619 ( 0 5) */ fmuld %f0,%f16,%f22 +/* 0x0680 620 ( 0 1) */ add %o5,80,%o5 +/* 0x0684 621 ( 0 1) */ add %o4,80,%o4 +/* 0x0688 622 ( 1 5) */ fxnor %f14,%f10,%f0 +/* 0x068c 623 ( 1 6) */ fmuld %f4,%f16,%f24 +/* 0x0690 624 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0694 625 ( 1 2) */ add %o0,8,%g5 +/* 0x0698 626 ( 2 7) */ fitod %f8,%f20 +/* 0x069c 627 ( 3 8) */ fitod %f9,%f8 +/* 0x06a0 628 ( 4 9) */ fsubd %f18,%f6,%f6 +/* 0x06a4 629 ( 5 10) */ fitod %f12,%f26 +/* 0x06a8 630 ( 6 11) */ fitod %f13,%f4 +/* 0x06ac 631 ( 7 12) */ fsubd %f18,%f2,%f12 +/* 0x06b0 632 ( 8 13) */ fitod %f0,%f2 +/* 0x06b4 633 ( 9 14) */ fitod %f1,%f0 +/* 0x06b8 634 (10 15) */ fsubd %f18,%f20,%f10 +/* 0x06bc 635 (10 15) */ fmuld %f6,%f16,%f20 +/* 0x06c0 636 (11 16) */ fsubd %f18,%f8,%f8 +/* 0x06c4 637 (12 17) */ fsubd %f18,%f26,%f6 +/* 0x06c8 638 (12 17) */ fmuld %f12,%f16,%f12 +/* 0x06cc 639 (13 18) */ fsubd %f18,%f4,%f4 +/* 0x06d0 640 (14 19) */ fsubd %f18,%f2,%f2 +/* 0x06d4 641 (15 20) */ fsubd %f18,%f0,%f0 +/* 0x06d8 642 (15 20) */ fmuld %f10,%f16,%f10 +/* 0x06dc 643 (16 21) */ fdtox %f24,%f24 +/* 0x06e0 644 (16 17) */ std %f24,[%o5-80] +/* 0x06e4 645 (16 21) */ fmuld %f8,%f16,%f8 +/* 0x06e8 646 (17 22) */ fdtox %f22,%f22 +/* 0x06ec 647 (17 18) */ std %f22,[%o4-80] +/* 0x06f0 648 (17 22) */ fmuld %f6,%f16,%f6 +/* 0x06f4 649 (18 23) */ fdtox %f20,%f20 +/* 0x06f8 650 (18 19) */ std %f20,[%o5-64] +/* 0x06fc 651 (18 23) */ fmuld %f4,%f16,%f4 +/* 0x0700 652 (19 24) */ fdtox %f12,%f12 +/* 0x0704 653 (19 20) */ std %f12,[%o4-64] +/* 0x0708 654 (19 24) */ fmuld %f2,%f16,%f2 +/* 0x070c 655 (20 25) */ fdtox %f10,%f10 +/* 0x0710 656 (20 21) */ std %f10,[%o5-48] +/* 0x0714 657 (20 25) */ fmuld %f0,%f16,%f0 +/* 0x0718 658 (21 26) */ fdtox %f8,%f8 +/* 0x071c 659 (21 22) */ std %f8,[%o4-48] +/* 0x0720 660 (22 27) */ fdtox %f6,%f6 +/* 0x0724 661 (22 23) */ std %f6,[%o5-32] +/* 0x0728 662 (23 28) */ fdtox %f4,%f4 +/* 0x072c 663 (23 24) */ std %f4,[%o4-32] +/* 0x0730 664 (24 29) */ fdtox %f2,%f2 +/* 0x0734 665 (24 25) */ std %f2,[%o5-16] +/* 0x0738 666 (25 30) */ fdtox %f0,%f0 +/* 0x073c 667 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0740 (25 26) */ std %f0,[%o4-16] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0744 670 ( 0 4) */ ldd [%g5],%f0 + +! +! ENTRY .L900000160 +! + + .L900000160: /* frequency 1.0 confidence 0.0 */ +/* 0x0748 672 ( 0 4) */ fxnor %f14,%f0,%f0 +/* 0x074c 673 ( 0 1) */ add %g2,1,%g2 +/* 0x0750 674 ( 0 1) */ add %g5,8,%g5 +/* 0x0754 675 ( 1 2) */ subcc %g2,%o2,%g0 +/* 0x0758 676 ( 4 9) */ fitod %f0,%f2 +/* 0x075c 677 ( 5 10) */ fitod %f1,%f0 +/* 0x0760 678 ( 9 14) */ fsubd %f18,%f2,%f2 +/* 0x0764 679 (10 15) */ fsubd %f18,%f0,%f0 +/* 0x0768 680 (14 19) */ fmuld %f2,%f16,%f2 +/* 0x076c 681 (15 20) */ fmuld %f0,%f16,%f0 +/* 0x0770 682 (19 24) */ fdtox %f2,%f2 +/* 0x0774 683 (19 20) */ std %f2,[%o5] +/* 0x0778 684 (19 20) */ add %o5,16,%o5 +/* 0x077c 685 (20 25) */ fdtox %f0,%f0 +/* 0x0780 686 (20 21) */ std %f0,[%o4] +/* 0x0784 687 (20 21) */ add %o4,16,%o4 +/* 0x0788 688 (20 21) */ ble,a,pt %icc,.L900000160 ! tprob=0.50 +/* 0x078c (23 27) */ ldd [%g5],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0790 696 ( 0 1) */ subcc %o3,0,%g0 + +! +! ENTRY .L900000161 +! + + .L900000161: /* frequency 1.0 confidence 0.0 */ +/* 0x0794 698 ( 0 1) */ ble,a,pt %icc,.L900000159 ! tprob=0.50 +/* 0x0798 ( 0 1) */ or %g0,%o7,%i0 +/* 0x079c 703 ( 0 2) */ ldx [%fp-2256],%o2 +/* 0x07a0 704 ( 0 1) */ or %g0,%i1,%g3 +/* 0x07a4 705 ( 1 2) */ sub %o3,1,%o5 +/* 0x07a8 706 ( 1 2) */ or %g0,0,%g4 +/* 0x07ac 707 ( 2 3) */ add %fp,-2264,%g5 +/* 0x07b0 708 ( 2 3) */ or %g0,%i0,%g2 +/* 0x07b4 709 ( 3 4) */ subcc %o3,6,%g0 +/* 0x07b8 710 ( 3 4) */ sub %o5,2,%o4 +/* 0x07bc 711 ( 3 4) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x07c0 ( 3 5) */ ldx [%fp-2264],%o0 +/* 0x07c4 713 ( 4 6) */ ld [%g3],%o1 +/* 0x07c8 714 ( 4 5) */ add %g2,4,%g2 +/* 0x07cc 715 ( 4 5) */ or %g0,3,%g4 +/* 0x07d0 716 ( 5 7) */ ld [%g3+4],%o3 +/* 0x07d4 717 ( 5 6) */ add %g3,8,%g3 +/* 0x07d8 718 ( 5 6) */ add %fp,-2240,%g5 +/* 0x07dc 719 ( 6 7) */ add %o0,%o1,%o0 +/* 0x07e0 720 ( 6 8) */ ldx [%fp-2248],%o1 +/* 0x07e4 721 ( 7 8) */ st %o0,[%g2-4] +/* 0x07e8 722 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000145 +! + + .L900000145: /* frequency 1.0 confidence 0.0 */ +/* 0x07ec 724 ( 0 2) */ ld [%g3],%o7 +/* 0x07f0 725 ( 0 1) */ add %o2,%o3,%o2 +/* 0x07f4 726 ( 0 1) */ sra %o0,0,%o3 +/* 0x07f8 727 ( 1 3) */ ldx [%g5],%o0 +/* 0x07fc 728 ( 1 2) */ add %o2,%o3,%o2 +/* 0x0800 729 ( 1 2) */ add %g4,3,%g4 +/* 0x0804 730 ( 2 3) */ st %o2,[%g2] +/* 0x0808 731 ( 2 3) */ srax %o2,32,%o3 +/* 0x080c 732 ( 2 3) */ subcc %g4,%o4,%g0 +/* 0x0810 733 ( 3 5) */ ld [%g3+4],%o2 +/* 0x0814 734 ( 4 5) */ stx %o2,[%sp+96] +/* 0x0818 735 ( 4 5) */ add %o1,%o7,%o1 +/* 0x081c 736 ( 5 7) */ ldx [%g5+8],%o2 +/* 0x0820 737 ( 5 6) */ add %o1,%o3,%o1 +/* 0x0824 738 ( 5 6) */ add %g2,12,%g2 +/* 0x0828 739 ( 6 7) */ st %o1,[%g2-8] +/* 0x082c 740 ( 6 7) */ srax %o1,32,%o7 +/* 0x0830 741 ( 6 7) */ add %g3,12,%g3 +/* 0x0834 742 ( 7 9) */ ld [%g3-4],%o3 +/* 0x0838 743 ( 8 10) */ ldx [%sp+96],%o1 +/* 0x083c 744 (10 11) */ add %o0,%o1,%o0 +/* 0x0840 745 (10 12) */ ldx [%g5+16],%o1 +/* 0x0844 746 (11 12) */ add %o0,%o7,%o0 +/* 0x0848 747 (11 12) */ add %g5,24,%g5 +/* 0x084c 748 (11 12) */ st %o0,[%g2-4] +/* 0x0850 749 (11 12) */ ble,pt %icc,.L900000145 ! tprob=0.50 +/* 0x0854 (12 13) */ srax %o0,32,%o0 + +! +! ENTRY .L900000148 +! + + .L900000148: /* frequency 1.0 confidence 0.0 */ +/* 0x0858 752 ( 0 1) */ add %o2,%o3,%o2 +/* 0x085c 753 ( 0 1) */ sra %o0,0,%o3 +/* 0x0860 754 ( 0 2) */ ld [%g3],%o0 +/* 0x0864 755 ( 1 2) */ add %o2,%o3,%o3 +/* 0x0868 756 ( 1 2) */ add %g2,8,%g2 +/* 0x086c 757 ( 2 3) */ srax %o3,32,%o2 +/* 0x0870 758 ( 2 3) */ st %o3,[%g2-8] +/* 0x0874 759 ( 2 3) */ add %o1,%o0,%o0 +/* 0x0878 760 ( 3 4) */ add %o0,%o2,%o0 +/* 0x087c 761 ( 3 4) */ st %o0,[%g2-4] +/* 0x0880 762 ( 3 4) */ subcc %g4,%o5,%g0 +/* 0x0884 763 ( 3 4) */ bg,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0888 ( 4 5) */ srax %o0,32,%o7 +/* 0x088c 765 ( 4 5) */ add %g3,4,%g3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0890 767 ( 0 2) */ ld [%g3],%o2 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x0894 769 ( 0 2) */ ldx [%g5],%o0 +/* 0x0898 770 ( 0 1) */ sra %o7,0,%o1 +/* 0x089c 771 ( 0 1) */ add %g4,1,%g4 +/* 0x08a0 772 ( 1 2) */ add %g3,4,%g3 +/* 0x08a4 773 ( 1 2) */ add %g5,8,%g5 +/* 0x08a8 774 ( 2 3) */ add %o0,%o2,%o0 +/* 0x08ac 775 ( 2 3) */ subcc %g4,%o5,%g0 +/* 0x08b0 776 ( 3 4) */ add %o0,%o1,%o0 +/* 0x08b4 777 ( 3 4) */ st %o0,[%g2] +/* 0x08b8 778 ( 3 4) */ add %g2,4,%g2 +/* 0x08bc 779 ( 4 5) */ srax %o0,32,%o7 +/* 0x08c0 780 ( 4 5) */ ble,a,pt %icc,.L900000158 ! tprob=0.50 +/* 0x08c4 ( 4 6) */ ld [%g3],%o2 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 783 ( 0 1) */ or %g0,%o7,%i0 +/* 0x08cc ( 1 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x08d0 ( 3 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x08d4 794 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x08d8 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x08dc 796 ( 0 4) */ ldd [%g5],%f4 +/* 0x08e0 804 ( 0 1) */ srl %o1,19,%g3 +/* 0x08e4 805 ( 1 2) */ st %g3,[%sp+240] +/* 0x08e8 806 ( 1 2) */ andn %o1,%g2,%g2 +/* 0x08ec 807 ( 2 6) */ ldd [%o0],%f8 +/* 0x08f0 808 ( 3 4) */ st %g2,[%sp+244] +/* 0x08f4 809 ( 3 7) */ fxnor %f0,%f4,%f4 +/* 0x08f8 810 ( 4 8) */ ldd [%g5+8],%f6 +/* 0x08fc 814 ( 5 9) */ ldd [%o0+8],%f18 +/* 0x0900 815 ( 5 8) */ fmovs %f8,%f12 +/* 0x0904 816 ( 6 10) */ ldd [%g5+16],%f10 +/* 0x0908 817 ( 6 9) */ fmovs %f8,%f16 +/* 0x090c 818 ( 7 11) */ ldd [%g5+24],%f20 +/* 0x0910 819 ( 7 12) */ fitod %f4,%f14 +/* 0x0914 823 ( 8 10) */ ld [%i1],%g2 +/* 0x0918 824 ( 8 13) */ fitod %f5,%f4 +/* 0x091c 825 ( 9 11) */ ld [%sp+240],%f13 +/* 0x0920 826 ( 9 13) */ fxnor %f0,%f6,%f6 +/* 0x0924 827 (10 12) */ ld [%sp+244],%f17 +/* 0x0928 828 (10 14) */ fxnor %f0,%f10,%f10 +/* 0x092c 829 (11 13) */ ld [%i1+28],%o3 +/* 0x0930 830 (11 15) */ fxnor %f0,%f20,%f20 +/* 0x0934 831 (12 14) */ ld [%i1+4],%g3 +/* 0x0938 832 (12 17) */ fsubd %f12,%f8,%f12 +/* 0x093c 833 (13 14) */ stx %o3,[%sp+96] +/* 0x0940 834 (13 18) */ fsubd %f18,%f14,%f14 +/* 0x0944 835 (14 16) */ ld [%i1+8],%g4 +/* 0x0948 836 (14 19) */ fsubd %f16,%f8,%f8 +/* 0x094c 837 (15 17) */ ld [%i1+12],%g5 +/* 0x0950 838 (15 20) */ fsubd %f18,%f4,%f4 +/* 0x0954 839 (16 18) */ ld [%i1+16],%o0 +/* 0x0958 840 (16 21) */ fitod %f6,%f22 +/* 0x095c 841 (17 19) */ ld [%i1+20],%o1 +/* 0x0960 842 (17 22) */ fitod %f7,%f6 +/* 0x0964 843 (18 20) */ ld [%i1+24],%o2 +/* 0x0968 844 (18 23) */ fitod %f10,%f16 +/* 0x096c 845 (18 23) */ fmuld %f14,%f12,%f24 +/* 0x0970 846 (19 24) */ fitod %f20,%f28 +/* 0x0974 847 (19 24) */ fmuld %f14,%f8,%f14 +/* 0x0978 848 (20 25) */ fitod %f11,%f10 +/* 0x097c 849 (20 25) */ fmuld %f4,%f12,%f26 +/* 0x0980 850 (21 26) */ fsubd %f18,%f22,%f22 +/* 0x0984 851 (21 26) */ fmuld %f4,%f8,%f4 +/* 0x0988 852 (22 27) */ fsubd %f18,%f6,%f6 +/* 0x098c 853 (23 28) */ fdtox %f24,%f24 +/* 0x0990 854 (23 24) */ std %f24,[%sp+224] +/* 0x0994 855 (24 29) */ fdtox %f14,%f14 +/* 0x0998 856 (24 25) */ std %f14,[%sp+232] +/* 0x099c 857 (25 30) */ fdtox %f26,%f14 +/* 0x09a0 858 (25 26) */ std %f14,[%sp+208] +/* 0x09a4 859 (26 28) */ ldx [%sp+224],%o4 +/* 0x09a8 860 (26 31) */ fitod %f21,%f20 +/* 0x09ac 861 (26 31) */ fmuld %f22,%f12,%f30 +/* 0x09b0 862 (27 29) */ ldx [%sp+232],%o5 +/* 0x09b4 863 (27 32) */ fsubd %f18,%f16,%f16 +/* 0x09b8 864 (27 32) */ fmuld %f22,%f8,%f22 +/* 0x09bc 865 (28 29) */ sllx %o4,19,%o4 +/* 0x09c0 866 (28 33) */ fdtox %f4,%f4 +/* 0x09c4 867 (28 29) */ std %f4,[%sp+216] +/* 0x09c8 868 (28 33) */ fmuld %f6,%f12,%f24 +/* 0x09cc 869 (29 34) */ fsubd %f18,%f28,%f26 +/* 0x09d0 870 (29 30) */ add %o5,%o4,%o4 +/* 0x09d4 871 (29 34) */ fmuld %f6,%f8,%f6 +/* 0x09d8 872 (30 35) */ fsubd %f18,%f10,%f10 +/* 0x09dc 873 (30 31) */ add %o4,%g2,%g2 +/* 0x09e0 874 (30 31) */ st %g2,[%i0] +/* 0x09e4 875 (31 33) */ ldx [%sp+208],%o7 +/* 0x09e8 876 (31 32) */ srlx %g2,32,%o5 +/* 0x09ec 877 (31 36) */ fsubd %f18,%f20,%f18 +/* 0x09f0 878 (32 37) */ fdtox %f30,%f28 +/* 0x09f4 879 (32 33) */ std %f28,[%sp+192] +/* 0x09f8 880 (32 37) */ fmuld %f16,%f12,%f14 +/* 0x09fc 881 (33 34) */ sllx %o7,19,%o4 +/* 0x0a00 882 (33 35) */ ldx [%sp+216],%o7 +/* 0x0a04 883 (33 38) */ fdtox %f22,%f20 +/* 0x0a08 884 (33 38) */ fmuld %f16,%f8,%f16 +/* 0x0a0c 885 (34 35) */ std %f20,[%sp+200] +/* 0x0a10 886 (34 39) */ fdtox %f24,%f20 +/* 0x0a14 887 (34 39) */ fmuld %f26,%f12,%f22 +/* 0x0a18 888 (35 36) */ std %f20,[%sp+176] +/* 0x0a1c 889 (35 36) */ add %o7,%o4,%o4 +/* 0x0a20 890 (35 40) */ fdtox %f6,%f6 +/* 0x0a24 891 (35 40) */ fmuld %f10,%f12,%f4 +/* 0x0a28 892 (36 38) */ ldx [%sp+192],%o3 +/* 0x0a2c 893 (36 37) */ add %o4,%g3,%g3 +/* 0x0a30 894 (36 41) */ fmuld %f10,%f8,%f10 +/* 0x0a34 895 (37 38) */ std %f6,[%sp+184] +/* 0x0a38 896 (37 38) */ add %g3,%o5,%g3 +/* 0x0a3c 897 (37 42) */ fdtox %f14,%f6 +/* 0x0a40 898 (37 42) */ fmuld %f26,%f8,%f20 +/* 0x0a44 899 (38 40) */ ldx [%sp+200],%o4 +/* 0x0a48 900 (38 39) */ sllx %o3,19,%o3 +/* 0x0a4c 901 (38 39) */ srlx %g3,32,%o5 +/* 0x0a50 902 (38 43) */ fdtox %f16,%f14 +/* 0x0a54 903 (39 40) */ std %f6,[%sp+160] +/* 0x0a58 904 (39 44) */ fmuld %f18,%f12,%f12 +/* 0x0a5c 905 (40 42) */ ldx [%sp+176],%o7 +/* 0x0a60 906 (40 41) */ add %o4,%o3,%o3 +/* 0x0a64 907 (40 45) */ fdtox %f4,%f16 +/* 0x0a68 908 (40 45) */ fmuld %f18,%f8,%f18 +/* 0x0a6c 909 (41 42) */ std %f14,[%sp+168] +/* 0x0a70 910 (41 42) */ add %o3,%g4,%g4 +/* 0x0a74 911 (41 46) */ fdtox %f10,%f4 +/* 0x0a78 912 (42 44) */ ldx [%sp+184],%o3 +/* 0x0a7c 913 (42 43) */ sllx %o7,19,%o4 +/* 0x0a80 914 (42 43) */ add %g4,%o5,%g4 +/* 0x0a84 915 (42 47) */ fdtox %f22,%f14 +/* 0x0a88 916 (43 44) */ std %f16,[%sp+144] +/* 0x0a8c 917 (43 44) */ srlx %g4,32,%o5 +/* 0x0a90 918 (43 48) */ fdtox %f20,%f6 +/* 0x0a94 919 (44 46) */ ldx [%sp+160],%o7 +/* 0x0a98 920 (44 45) */ add %o3,%o4,%o3 +/* 0x0a9c 921 (44 49) */ fdtox %f12,%f16 +/* 0x0aa0 922 (45 46) */ std %f4,[%sp+152] +/* 0x0aa4 923 (45 46) */ add %o3,%g5,%g5 +/* 0x0aa8 924 (45 50) */ fdtox %f18,%f8 +/* 0x0aac 925 (46 48) */ ldx [%sp+168],%o3 +/* 0x0ab0 926 (46 47) */ sllx %o7,19,%o4 +/* 0x0ab4 927 (46 47) */ add %g5,%o5,%g5 +/* 0x0ab8 928 (47 48) */ std %f14,[%sp+128] +/* 0x0abc 929 (47 48) */ srlx %g5,32,%o5 +/* 0x0ac0 930 (48 49) */ std %f6,[%sp+136] +/* 0x0ac4 931 (48 49) */ add %o3,%o4,%o3 +/* 0x0ac8 932 (49 50) */ std %f16,[%sp+112] +/* 0x0acc 933 (49 50) */ add %o3,%o0,%o0 +/* 0x0ad0 934 (50 52) */ ldx [%sp+144],%o7 +/* 0x0ad4 935 (50 51) */ add %o0,%o5,%o0 +/* 0x0ad8 936 (51 53) */ ldx [%sp+152],%o3 +/* 0x0adc 937 (52 53) */ std %f8,[%sp+120] +/* 0x0ae0 938 (52 53) */ sllx %o7,19,%o4 +/* 0x0ae4 939 (52 53) */ srlx %o0,32,%o7 +/* 0x0ae8 940 (53 54) */ stx %o0,[%sp+104] +/* 0x0aec 941 (53 54) */ add %o3,%o4,%o3 +/* 0x0af0 942 (54 56) */ ldx [%sp+128],%o5 +/* 0x0af4 943 (54 55) */ add %o3,%o1,%o1 +/* 0x0af8 944 (55 57) */ ldx [%sp+136],%o0 +/* 0x0afc 945 (55 56) */ add %o1,%o7,%o1 +/* 0x0b00 946 (56 57) */ st %g3,[%i0+4] +/* 0x0b04 947 (56 57) */ sllx %o5,19,%o3 +/* 0x0b08 948 (57 59) */ ldx [%sp+112],%o4 +/* 0x0b0c 949 (57 58) */ add %o0,%o3,%o3 +/* 0x0b10 950 (58 60) */ ldx [%sp+120],%o0 +/* 0x0b14 951 (58 59) */ add %o3,%o2,%o2 +/* 0x0b18 952 (58 59) */ srlx %o1,32,%o3 +/* 0x0b1c 953 (59 60) */ st %o1,[%i0+20] +/* 0x0b20 954 (59 60) */ sllx %o4,19,%g2 +/* 0x0b24 955 (59 60) */ add %o2,%o3,%o2 +/* 0x0b28 956 (60 62) */ ldx [%sp+96],%o4 +/* 0x0b2c 957 (60 61) */ srlx %o2,32,%g3 +/* 0x0b30 958 (60 61) */ add %o0,%g2,%g2 +/* 0x0b34 959 (61 63) */ ldx [%sp+104],%o0 +/* 0x0b38 960 (62 63) */ st %o2,[%i0+24] +/* 0x0b3c 961 (62 63) */ add %g2,%o4,%g2 +/* 0x0b40 962 (63 64) */ st %o0,[%i0+16] +/* 0x0b44 963 (63 64) */ add %g2,%g3,%g2 +/* 0x0b48 964 (64 65) */ st %g4,[%i0+8] +/* 0x0b4c 968 (64 65) */ srlx %g2,32,%o7 +/* 0x0b50 969 (65 66) */ st %g5,[%i0+12] +/* 0x0b54 970 (66 67) */ st %g2,[%i0+28] +/* 0x0b58 971 (66 67) */ or %g0,%o7,%i0 +/* 0x0b5c (67 74) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0b60 (69 71) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0b64 978 ( 0 1) */ subcc %o2,16,%g0 +/* 0x0b68 979 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0b6c ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0b70 981 ( 1 5) */ ldd [%g5],%f4 +/* 0x0b74 982 ( 2 6) */ ldd [%g5+8],%f6 +/* 0x0b78 989 ( 2 3) */ andn %o1,%g2,%g2 +/* 0x0b7c 993 ( 2 3) */ srl %o1,19,%g3 +/* 0x0b80 994 ( 3 7) */ ldd [%g5+16],%f8 +/* 0x0b84 995 ( 4 8) */ fxnor %f0,%f4,%f4 +/* 0x0b88 996 ( 4 5) */ st %g2,[%sp+356] +/* 0x0b8c 997 ( 5 9) */ ldd [%o0],%f20 +/* 0x0b90 998 ( 5 9) */ fxnor %f0,%f6,%f6 +/* 0x0b94 999 ( 6 7) */ st %g3,[%sp+352] +/* 0x0b98 1000 ( 6 10) */ fxnor %f0,%f8,%f8 +/* 0x0b9c 1005 ( 7 11) */ ldd [%o0+8],%f30 +/* 0x0ba0 1006 ( 8 13) */ fitod %f4,%f22 +/* 0x0ba4 1007 ( 8 12) */ ldd [%g5+24],%f10 +/* 0x0ba8 1008 ( 9 12) */ fmovs %f20,%f24 +/* 0x0bac 1009 ( 9 13) */ ldd [%g5+32],%f12 +/* 0x0bb0 1010 (10 15) */ fitod %f5,%f4 +/* 0x0bb4 1011 (10 14) */ ldd [%g5+40],%f14 +/* 0x0bb8 1012 (11 14) */ fmovs %f20,%f26 +/* 0x0bbc 1013 (11 15) */ ldd [%g5+48],%f16 +/* 0x0bc0 1014 (12 14) */ ld [%sp+356],%f25 +/* 0x0bc4 1015 (12 17) */ fitod %f6,%f28 +/* 0x0bc8 1016 (13 15) */ ld [%sp+352],%f27 +/* 0x0bcc 1017 (13 18) */ fitod %f8,%f32 +/* 0x0bd0 1018 (14 19) */ fsubd %f30,%f22,%f22 +/* 0x0bd4 1019 (14 18) */ ldd [%g5+56],%f18 +/* 0x0bd8 1020 (15 20) */ fsubd %f24,%f20,%f24 +/* 0x0bdc 1021 (16 21) */ fsubd %f26,%f20,%f20 +/* 0x0be0 1022 (17 22) */ fsubd %f30,%f4,%f4 +/* 0x0be4 1023 (18 23) */ fsubd %f30,%f28,%f26 +/* 0x0be8 1024 (19 24) */ fitod %f7,%f6 +/* 0x0bec 1025 (20 25) */ fsubd %f30,%f32,%f28 +/* 0x0bf0 1026 (20 25) */ fmuld %f22,%f24,%f32 +/* 0x0bf4 1027 (21 26) */ fmuld %f22,%f20,%f22 +/* 0x0bf8 1028 (21 25) */ fxnor %f0,%f10,%f10 +/* 0x0bfc 1029 (22 27) */ fmuld %f4,%f24,%f44 +/* 0x0c00 1030 (22 27) */ fitod %f9,%f8 +/* 0x0c04 1031 (23 28) */ fmuld %f4,%f20,%f4 +/* 0x0c08 1032 (23 27) */ fxnor %f0,%f12,%f12 +/* 0x0c0c 1033 (24 29) */ fsubd %f30,%f6,%f6 +/* 0x0c10 1034 (24 29) */ fmuld %f26,%f24,%f46 +/* 0x0c14 1035 (25 30) */ fitod %f10,%f34 +/* 0x0c18 1036 (26 31) */ fdtox %f22,%f22 +/* 0x0c1c 1037 (26 27) */ std %f22,[%sp+336] +/* 0x0c20 1038 (27 32) */ fmuld %f26,%f20,%f22 +/* 0x0c24 1039 (27 32) */ fdtox %f44,%f26 +/* 0x0c28 1040 (27 28) */ std %f26,[%sp+328] +/* 0x0c2c 1041 (28 33) */ fdtox %f4,%f4 +/* 0x0c30 1042 (28 29) */ std %f4,[%sp+320] +/* 0x0c34 1043 (29 34) */ fmuld %f6,%f24,%f26 +/* 0x0c38 1044 (29 34) */ fsubd %f30,%f8,%f8 +/* 0x0c3c 1045 (30 35) */ fdtox %f46,%f4 +/* 0x0c40 1046 (30 31) */ std %f4,[%sp+312] +/* 0x0c44 1047 (31 36) */ fmuld %f28,%f24,%f4 +/* 0x0c48 1048 (31 36) */ fdtox %f32,%f32 +/* 0x0c4c 1049 (31 32) */ std %f32,[%sp+344] +/* 0x0c50 1050 (32 37) */ fitod %f11,%f10 +/* 0x0c54 1051 (32 37) */ fmuld %f6,%f20,%f32 +/* 0x0c58 1052 (33 38) */ fsubd %f30,%f34,%f34 +/* 0x0c5c 1053 (34 39) */ fdtox %f22,%f6 +/* 0x0c60 1054 (34 35) */ std %f6,[%sp+304] +/* 0x0c64 1058 (35 40) */ fitod %f12,%f36 +/* 0x0c68 1059 (35 40) */ fmuld %f28,%f20,%f6 +/* 0x0c6c 1060 (36 41) */ fdtox %f26,%f22 +/* 0x0c70 1061 (36 37) */ std %f22,[%sp+296] +/* 0x0c74 1062 (37 42) */ fmuld %f8,%f24,%f22 +/* 0x0c78 1063 (37 42) */ fdtox %f4,%f4 +/* 0x0c7c 1064 (37 38) */ std %f4,[%sp+280] +/* 0x0c80 1065 (38 43) */ fmuld %f8,%f20,%f8 +/* 0x0c84 1066 (38 43) */ fsubd %f30,%f10,%f10 +/* 0x0c88 1067 (39 44) */ fmuld %f34,%f24,%f4 +/* 0x0c8c 1068 (39 44) */ fitod %f13,%f12 +/* 0x0c90 1069 (40 45) */ fsubd %f30,%f36,%f36 +/* 0x0c94 1070 (41 46) */ fdtox %f6,%f6 +/* 0x0c98 1071 (41 42) */ std %f6,[%sp+272] +/* 0x0c9c 1072 (42 46) */ fxnor %f0,%f14,%f14 +/* 0x0ca0 1073 (42 47) */ fmuld %f34,%f20,%f6 +/* 0x0ca4 1074 (43 48) */ fdtox %f22,%f22 +/* 0x0ca8 1075 (43 44) */ std %f22,[%sp+264] +/* 0x0cac 1076 (44 49) */ fdtox %f8,%f8 +/* 0x0cb0 1077 (44 45) */ std %f8,[%sp+256] +/* 0x0cb4 1078 (44 49) */ fmuld %f10,%f24,%f22 +/* 0x0cb8 1079 (45 50) */ fdtox %f4,%f4 +/* 0x0cbc 1080 (45 46) */ std %f4,[%sp+248] +/* 0x0cc0 1081 (45 50) */ fmuld %f10,%f20,%f8 +/* 0x0cc4 1082 (46 51) */ fsubd %f30,%f12,%f4 +/* 0x0cc8 1083 (46 51) */ fmuld %f36,%f24,%f10 +/* 0x0ccc 1084 (47 52) */ fitod %f14,%f38 +/* 0x0cd0 1085 (48 53) */ fdtox %f6,%f6 +/* 0x0cd4 1086 (48 49) */ std %f6,[%sp+240] +/* 0x0cd8 1087 (49 54) */ fdtox %f22,%f12 +/* 0x0cdc 1088 (49 50) */ std %f12,[%sp+232] +/* 0x0ce0 1089 (49 54) */ fmuld %f36,%f20,%f6 +/* 0x0ce4 1090 (50 55) */ fdtox %f8,%f8 +/* 0x0ce8 1091 (50 51) */ std %f8,[%sp+224] +/* 0x0cec 1092 (51 56) */ fdtox %f10,%f22 +/* 0x0cf0 1093 (51 52) */ std %f22,[%sp+216] +/* 0x0cf4 1094 (51 56) */ fmuld %f4,%f24,%f8 +/* 0x0cf8 1095 (52 57) */ fitod %f15,%f14 +/* 0x0cfc 1096 (52 57) */ fmuld %f4,%f20,%f4 +/* 0x0d00 1097 (53 58) */ fsubd %f30,%f38,%f22 +/* 0x0d04 1098 (54 58) */ fxnor %f0,%f16,%f16 +/* 0x0d08 1099 (55 60) */ fdtox %f6,%f6 +/* 0x0d0c 1100 (55 56) */ std %f6,[%sp+208] +/* 0x0d10 1101 (56 61) */ fdtox %f8,%f6 +/* 0x0d14 1102 (56 57) */ std %f6,[%sp+200] +/* 0x0d18 1103 (57 62) */ fsubd %f30,%f14,%f10 +/* 0x0d1c 1104 (58 63) */ fitod %f16,%f40 +/* 0x0d20 1105 (58 63) */ fmuld %f22,%f24,%f6 +/* 0x0d24 1106 (59 64) */ fdtox %f4,%f4 +/* 0x0d28 1107 (59 60) */ std %f4,[%sp+192] +/* 0x0d2c 1108 (60 65) */ fitod %f17,%f16 +/* 0x0d30 1109 (60 65) */ fmuld %f22,%f20,%f4 +/* 0x0d34 1110 (61 65) */ fxnor %f0,%f18,%f18 +/* 0x0d38 1111 (62 67) */ fdtox %f32,%f32 +/* 0x0d3c 1112 (62 63) */ std %f32,[%sp+288] +/* 0x0d40 1113 (62 67) */ fmuld %f10,%f24,%f8 +/* 0x0d44 1114 (63 68) */ fdtox %f6,%f6 +/* 0x0d48 1115 (63 64) */ std %f6,[%sp+184] +/* 0x0d4c 1116 (63 68) */ fmuld %f10,%f20,%f22 +/* 0x0d50 1117 (64 69) */ fsubd %f30,%f40,%f6 +/* 0x0d54 1118 (65 70) */ fdtox %f4,%f4 +/* 0x0d58 1119 (65 66) */ std %f4,[%sp+176] +/* 0x0d5c 1120 (66 71) */ fsubd %f30,%f16,%f10 +/* 0x0d60 1121 (67 72) */ fdtox %f8,%f4 +/* 0x0d64 1122 (67 68) */ std %f4,[%sp+168] +/* 0x0d68 1123 (68 73) */ fdtox %f22,%f4 +/* 0x0d6c 1124 (68 69) */ std %f4,[%sp+160] +/* 0x0d70 1125 (69 74) */ fitod %f18,%f42 +/* 0x0d74 1126 (69 74) */ fmuld %f6,%f24,%f4 +/* 0x0d78 1127 (70 75) */ fmuld %f6,%f20,%f22 +/* 0x0d7c 1128 (71 76) */ fmuld %f10,%f24,%f6 +/* 0x0d80 1129 (72 77) */ fmuld %f10,%f20,%f8 +/* 0x0d84 1130 (74 79) */ fdtox %f4,%f4 +/* 0x0d88 1131 (74 75) */ std %f4,[%sp+152] +/* 0x0d8c 1132 (75 80) */ fsubd %f30,%f42,%f4 +/* 0x0d90 1133 (76 81) */ fdtox %f6,%f6 +/* 0x0d94 1134 (76 77) */ std %f6,[%sp+136] +/* 0x0d98 1135 (77 82) */ fdtox %f22,%f22 +/* 0x0d9c 1136 (77 78) */ std %f22,[%sp+144] +/* 0x0da0 1137 (78 83) */ fdtox %f8,%f22 +/* 0x0da4 1138 (78 79) */ std %f22,[%sp+128] +/* 0x0da8 1139 (79 84) */ fitod %f19,%f22 +/* 0x0dac 1140 (80 85) */ fmuld %f4,%f24,%f6 +/* 0x0db0 1141 (81 86) */ fmuld %f4,%f20,%f4 +/* 0x0db4 1142 (84 89) */ fsubd %f30,%f22,%f22 +/* 0x0db8 1143 (85 90) */ fdtox %f6,%f6 +/* 0x0dbc 1144 (85 86) */ std %f6,[%sp+120] +/* 0x0dc0 1145 (86 91) */ fdtox %f4,%f4 +/* 0x0dc4 1146 (86 87) */ std %f4,[%sp+112] +/* 0x0dc8 1150 (87 89) */ ldx [%sp+336],%g2 +/* 0x0dcc 1151 (88 90) */ ldx [%sp+344],%g3 +/* 0x0dd0 1152 (89 91) */ ld [%i1],%g4 +/* 0x0dd4 1153 (89 90) */ sllx %g2,19,%g2 +/* 0x0dd8 1154 (89 94) */ fmuld %f22,%f20,%f4 +/* 0x0ddc 1155 (90 92) */ ldx [%sp+328],%g5 +/* 0x0de0 1156 (90 91) */ add %g3,%g2,%g2 +/* 0x0de4 1157 (90 95) */ fmuld %f22,%f24,%f6 +/* 0x0de8 1158 (91 93) */ ldx [%sp+320],%g3 +/* 0x0dec 1159 (91 92) */ add %g2,%g4,%g4 +/* 0x0df0 1160 (92 94) */ ldx [%sp+304],%o0 +/* 0x0df4 1161 (93 94) */ st %g4,[%i0] +/* 0x0df8 1162 (93 94) */ sllx %g3,19,%g2 +/* 0x0dfc 1163 (93 94) */ srlx %g4,32,%g4 +/* 0x0e00 1164 (94 96) */ ld [%i1+4],%g3 +/* 0x0e04 1165 (94 95) */ add %g5,%g2,%g2 +/* 0x0e08 1166 (94 99) */ fdtox %f4,%f4 +/* 0x0e0c 1167 (95 97) */ ldx [%sp+312],%g5 +/* 0x0e10 1168 (95 100) */ fdtox %f6,%f6 +/* 0x0e14 1169 (96 98) */ ldx [%sp+288],%o1 +/* 0x0e18 1170 (96 97) */ add %g2,%g3,%g2 +/* 0x0e1c 1171 (96 97) */ sllx %o0,19,%g3 +/* 0x0e20 1172 (97 99) */ ldx [%sp+272],%o2 +/* 0x0e24 1173 (97 98) */ add %g2,%g4,%g2 +/* 0x0e28 1174 (97 98) */ add %g5,%g3,%g3 +/* 0x0e2c 1175 (98 100) */ ld [%i1+8],%g4 +/* 0x0e30 1176 (98 99) */ srlx %g2,32,%o0 +/* 0x0e34 1177 (99 101) */ ldx [%sp+296],%g5 +/* 0x0e38 1178 (100 101) */ st %g2,[%i0+4] +/* 0x0e3c 1179 (100 101) */ sllx %o2,19,%g2 +/* 0x0e40 1180 (100 101) */ add %g3,%g4,%g3 +/* 0x0e44 1181 (101 103) */ ldx [%sp+256],%o2 +/* 0x0e48 1182 (101 102) */ sllx %o1,19,%g4 +/* 0x0e4c 1183 (101 102) */ add %g3,%o0,%g3 +/* 0x0e50 1184 (102 104) */ ld [%i1+12],%o0 +/* 0x0e54 1185 (102 103) */ srlx %g3,32,%o1 +/* 0x0e58 1186 (102 103) */ add %g5,%g4,%g4 +/* 0x0e5c 1187 (103 105) */ ldx [%sp+280],%g5 +/* 0x0e60 1188 (104 105) */ st %g3,[%i0+8] +/* 0x0e64 1189 (104 105) */ sllx %o2,19,%g3 +/* 0x0e68 1190 (104 105) */ add %g4,%o0,%g4 +/* 0x0e6c 1191 (105 107) */ ld [%i1+16],%o0 +/* 0x0e70 1192 (105 106) */ add %g5,%g2,%g2 +/* 0x0e74 1193 (105 106) */ add %g4,%o1,%g4 +/* 0x0e78 1194 (106 108) */ ldx [%sp+264],%g5 +/* 0x0e7c 1195 (106 107) */ srlx %g4,32,%o1 +/* 0x0e80 1196 (107 109) */ ldx [%sp+240],%o2 +/* 0x0e84 1197 (107 108) */ add %g2,%o0,%g2 +/* 0x0e88 1198 (108 110) */ ld [%i1+20],%o0 +/* 0x0e8c 1199 (108 109) */ add %g5,%g3,%g3 +/* 0x0e90 1200 (108 109) */ add %g2,%o1,%g2 +/* 0x0e94 1201 (109 111) */ ldx [%sp+248],%g5 +/* 0x0e98 1202 (109 110) */ srlx %g2,32,%o1 +/* 0x0e9c 1203 (110 111) */ st %g4,[%i0+12] +/* 0x0ea0 1204 (110 111) */ sllx %o2,19,%g4 +/* 0x0ea4 1205 (110 111) */ add %g3,%o0,%g3 +/* 0x0ea8 1206 (111 113) */ ld [%i1+24],%o0 +/* 0x0eac 1207 (111 112) */ add %g5,%g4,%g4 +/* 0x0eb0 1208 (111 112) */ add %g3,%o1,%g3 +/* 0x0eb4 1209 (112 114) */ ldx [%sp+224],%o2 +/* 0x0eb8 1210 (112 113) */ srlx %g3,32,%o1 +/* 0x0ebc 1211 (113 115) */ ldx [%sp+232],%g5 +/* 0x0ec0 1212 (113 114) */ add %g4,%o0,%g4 +/* 0x0ec4 1213 (114 115) */ st %g2,[%i0+16] +/* 0x0ec8 1214 (114 115) */ sllx %o2,19,%g2 +/* 0x0ecc 1215 (114 115) */ add %g4,%o1,%g4 +/* 0x0ed0 1216 (115 117) */ ld [%i1+28],%o0 +/* 0x0ed4 1217 (115 116) */ srlx %g4,32,%o1 +/* 0x0ed8 1218 (115 116) */ add %g5,%g2,%g2 +/* 0x0edc 1222 (116 118) */ ldx [%sp+208],%o2 +/* 0x0ee0 1223 (117 119) */ ldx [%sp+216],%g5 +/* 0x0ee4 1224 (117 118) */ add %g2,%o0,%g2 +/* 0x0ee8 1225 (118 119) */ st %g3,[%i0+20] +/* 0x0eec 1226 (118 119) */ sllx %o2,19,%g3 +/* 0x0ef0 1227 (118 119) */ add %g2,%o1,%g2 +/* 0x0ef4 1228 (119 121) */ ld [%i1+32],%o0 +/* 0x0ef8 1229 (119 120) */ srlx %g2,32,%o1 +/* 0x0efc 1230 (119 120) */ add %g5,%g3,%g3 +/* 0x0f00 1231 (120 122) */ ldx [%sp+192],%o2 +/* 0x0f04 1232 (121 123) */ ldx [%sp+200],%g5 +/* 0x0f08 1233 (121 122) */ add %g3,%o0,%g3 +/* 0x0f0c 1234 (122 123) */ st %g4,[%i0+24] +/* 0x0f10 1235 (122 123) */ sllx %o2,19,%g4 +/* 0x0f14 1236 (122 123) */ add %g3,%o1,%g3 +/* 0x0f18 1237 (123 125) */ ld [%i1+36],%o0 +/* 0x0f1c 1238 (123 124) */ srlx %g3,32,%o1 +/* 0x0f20 1239 (123 124) */ add %g5,%g4,%g4 +/* 0x0f24 1240 (124 126) */ ldx [%sp+176],%o2 +/* 0x0f28 1241 (125 127) */ ldx [%sp+184],%g5 +/* 0x0f2c 1242 (125 126) */ add %g4,%o0,%g4 +/* 0x0f30 1243 (126 127) */ st %g2,[%i0+28] +/* 0x0f34 1244 (126 127) */ sllx %o2,19,%g2 +/* 0x0f38 1245 (126 127) */ add %g4,%o1,%g4 +/* 0x0f3c 1246 (127 129) */ ld [%i1+40],%o0 +/* 0x0f40 1247 (127 128) */ srlx %g4,32,%o1 +/* 0x0f44 1248 (127 128) */ add %g5,%g2,%g2 +/* 0x0f48 1249 (128 130) */ ldx [%sp+160],%o2 +/* 0x0f4c 1250 (129 131) */ ldx [%sp+168],%g5 +/* 0x0f50 1251 (129 130) */ add %g2,%o0,%g2 +/* 0x0f54 1252 (130 131) */ st %g3,[%i0+32] +/* 0x0f58 1253 (130 131) */ sllx %o2,19,%g3 +/* 0x0f5c 1254 (130 131) */ add %g2,%o1,%g2 +/* 0x0f60 1255 (131 133) */ ld [%i1+44],%o0 +/* 0x0f64 1256 (131 132) */ srlx %g2,32,%o1 +/* 0x0f68 1257 (131 132) */ add %g5,%g3,%g3 +/* 0x0f6c 1258 (132 134) */ ldx [%sp+144],%o2 +/* 0x0f70 1259 (133 135) */ ldx [%sp+152],%g5 +/* 0x0f74 1260 (133 134) */ add %g3,%o0,%g3 +/* 0x0f78 1261 (134 135) */ st %g4,[%i0+36] +/* 0x0f7c 1262 (134 135) */ sllx %o2,19,%g4 +/* 0x0f80 1263 (134 135) */ add %g3,%o1,%g3 +/* 0x0f84 1264 (135 137) */ ld [%i1+48],%o0 +/* 0x0f88 1265 (135 136) */ srlx %g3,32,%o1 +/* 0x0f8c 1266 (135 136) */ add %g5,%g4,%g4 +/* 0x0f90 1267 (136 138) */ ldx [%sp+128],%o2 +/* 0x0f94 1268 (137 139) */ ldx [%sp+136],%g5 +/* 0x0f98 1269 (137 138) */ add %g4,%o0,%g4 +/* 0x0f9c 1270 (138 139) */ std %f4,[%sp+96] +/* 0x0fa0 1271 (138 139) */ add %g4,%o1,%g4 +/* 0x0fa4 1272 (139 140) */ st %g2,[%i0+40] +/* 0x0fa8 1273 (139 140) */ sllx %o2,19,%g2 +/* 0x0fac 1274 (139 140) */ srlx %g4,32,%o1 +/* 0x0fb0 1275 (140 142) */ ld [%i1+52],%o0 +/* 0x0fb4 1276 (140 141) */ add %g5,%g2,%g2 +/* 0x0fb8 1277 (141 142) */ std %f6,[%sp+104] +/* 0x0fbc 1278 (142 144) */ ldx [%sp+120],%g5 +/* 0x0fc0 1279 (142 143) */ add %g2,%o0,%g2 +/* 0x0fc4 1280 (143 144) */ st %g3,[%i0+44] +/* 0x0fc8 1281 (143 144) */ add %g2,%o1,%g2 +/* 0x0fcc 1282 (144 146) */ ldx [%sp+112],%o2 +/* 0x0fd0 1283 (144 145) */ srlx %g2,32,%o1 +/* 0x0fd4 1284 (145 147) */ ld [%i1+56],%o0 +/* 0x0fd8 1285 (146 147) */ st %g4,[%i0+48] +/* 0x0fdc 1286 (146 147) */ sllx %o2,19,%g3 +/* 0x0fe0 1287 (147 149) */ ldx [%sp+96],%o2 +/* 0x0fe4 1288 (147 148) */ add %g5,%g3,%g3 +/* 0x0fe8 1289 (148 150) */ ldx [%sp+104],%g5 +/* 0x0fec 1290 (148 149) */ add %g3,%o0,%g3 +/* 0x0ff0 1291 (149 151) */ ld [%i1+60],%o0 +/* 0x0ff4 1292 (149 150) */ sllx %o2,19,%g4 +/* 0x0ff8 1293 (149 150) */ add %g3,%o1,%g3 +/* 0x0ffc 1294 (150 151) */ st %g2,[%i0+52] +/* 0x1000 1295 (150 151) */ srlx %g3,32,%o1 +/* 0x1004 1296 (150 151) */ add %g5,%g4,%g4 +/* 0x1008 1297 (151 152) */ st %g3,[%i0+56] +/* 0x100c 1298 (151 152) */ add %g4,%o0,%g2 +/* 0x1010 1299 (152 153) */ add %g2,%o1,%g2 +/* 0x1014 1300 (152 153) */ st %g2,[%i0+60] +/* 0x1018 1304 (153 154) */ srlx %g2,32,%o7 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ +/* 0x119c 1437 ( 0 1) */ or %g0,%o7,%i0 + +! +! ENTRY .L900000159 +! + + .L900000159: /* frequency 1.0 confidence 0.0 */ +/* 0x11a0 ( 0 7) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x11a4 ( 2 4) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+96] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+92] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-304,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+96],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+92],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-296,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1800),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-288,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1800),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-280,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1800),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-304,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1800),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-296,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1800),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-264,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1800),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-272,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1800),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-296,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + +/* 0x11a8 1441 ( 0 0) */ .type mul_add,2 +/* 0x11a8 1442 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x11a8 1445 ( 0 0) */ .align 16 +/* 0x11b0 1451 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x11b0 1453 ( 0 1) */ or %g0,%o2,%g1 +/* 0x11b4 1454 ( 0 1) */ or %g0,%o3,%o4 +/* 0x11b8 1455 ( 1 2) */ or %g0,%o0,%g3 +/* 0x11bc 1456 ( 1 2) */ or %g0,%o1,%g2 +/* 0x11c0 1466 ( 2 3) */ or %g0,%g1,%o3 +/* 0x11c4 1467 ( 2 3) */ or %g0,%g3,%o1 +/* 0x11c8 1468 ( 3 4) */ or %g0,%g2,%o2 +/* 0x11cc 1469 ( 3 4) */ or %g0,%o7,%g1 +/* 0x11d0 1470 ( 4 6) */ call mul_add ! params = ! Result = +/* 0x11d4 ( 5 6) */ or %g0,%g1,%o7 +/* 0x11d8 1472 ( 0 0) */ .type mul_add_inp,2 +/* 0x11d8 1473 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x11d8 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x11d8 8 ( 0 0) */ .word -2147483648 +/* 0x11dc 9 ( 0 0) */ .word -2147483648 +/* 0x11e0 10 ( 0 0) */ .type mask_cnst,#object +/* 0x11e0 11 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpv_sparcv9.s b/security/nss/lib/freebl/mpi/mpv_sparcv9.s new file mode 100644 index 000000000..e2fbe0bd0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpv_sparcv9.s @@ -0,0 +1,1645 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .section ".text",#alloc,#execinstr +/* 000000 0 ( 0 0) */ .register %g2,#scratch +/* 000000 ( 0 0) */ .register %g3,#scratch +/* 000000 3 ( 0 0) */ .file "mpv_sparc.c" +/* 000000 15 ( 0 0) */ .align 8 +! +! SUBROUTINE .L_const_seg_900000101 +! +! OFFSET SOURCE LINE LABEL INSTRUCTION (ISSUE TIME) (COMPLETION TIME) + + .L_const_seg_900000101: /* frequency 1.0 confidence 0.0 */ +/* 000000 20 ( 0 0) */ .word 1127219200,0 +/* 0x0008 21 ( 0 0) */ .word 1105199103,-4194304 +/* 0x0010 22 ( 0 0) */ .align 8 +/* 0x0010 28 ( 0 0) */ .global mul_add + +! +! ENTRY mul_add +! + + .global mul_add + mul_add: /* frequency 1.0 confidence 0.0 */ +/* 0x0010 30 ( 0 1) */ sethi %hi(0x1c00),%g1 +/* 0x0014 31 ( 0 1) */ sethi %hi(mask_cnst),%g2 +/* 0x0018 32 ( 1 2) */ xor %g1,-48,%g1 +/* 0x001c 33 ( 1 2) */ add %g2,%lo(mask_cnst),%g2 +/* 0x0020 34 ( 2 3) */ save %sp,%g1,%sp + +! +! ENTRY .L900000149 +! + + .L900000149: /* frequency 1.0 confidence 0.0 */ +/* 0x0024 36 ( 0 2) */ call (.+0x8) ! params = ! Result = +/* 0x0028 ( 1 2) */ sethi %hi((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x002c 178 ( 2 3) */ sethi %hi(.L_const_seg_900000101),%g3 +/* 0x0030 179 ( 2 3) */ add %g5,%lo((_GLOBAL_OFFSET_TABLE_-(.L900000149-.))),%g5 +/* 0x0034 180 ( 3 4) */ add %g3,%lo(.L_const_seg_900000101),%g3 +/* 0x0038 181 ( 3 4) */ add %g5,%o7,%o1 +/* 0x003c 182 ( 4 5) */ sethi %hi(0x80000),%g4 +/* 0x0040 183 ( 4 6) */ ldx [%o1+%g2],%g2 +/* 0x0044 184 ( 4 5) */ or %g0,%i2,%o2 +/* 0x0048 185 ( 5 6) */ subcc %i4,%g4,%g0 +/* 0x004c 186 ( 5 7) */ ldx [%o1+%g3],%o0 +/* 0x0050 187 ( 6 7) */ or %g0,%i0,%o7 +/* 0x0054 188 ( 6 7) */ or %g0,%i1,%o5 +/* 0x0058 189 ( 6 9) */ ldd [%g2],%f0 +/* 0x005c 190 ( 6 7) */ bcc,pn %icc,.L77000048 ! tprob=0.50 +/* 0x0060 ( 7 8) */ subcc %i3,8,%g0 +/* 0x0064 192 ( 7 8) */ bne,pn %icc,.L900000158 ! tprob=0.50 +/* 0x0068 ( 8 9) */ subcc %i3,16,%g0 +/* 0x006c 194 ( 9 12) */ ldd [%o2],%f4 +/* 0x0070 195 (10 11) */ st %i4,[%sp+2287] +/* 0x0074 196 (11 14) */ ldd [%o0],%f8 +/* 0x0078 197 (11 13) */ fxnor %f0,%f4,%f4 +/* 0x007c 198 (12 15) */ ldd [%o2+8],%f10 +/* 0x0080 199 (13 16) */ fitod %f4,%f12 +/* 0x0084 200 (13 16) */ ldd [%o0+8],%f14 +/* 0x0088 201 (14 17) */ ld [%sp+2287],%f7 +/* 0x008c 202 (14 17) */ fitod %f5,%f4 +/* 0x0090 203 (15 17) */ fxnor %f0,%f10,%f10 +/* 0x0094 204 (15 18) */ ldd [%o2+16],%f16 +/* 0x0098 205 (16 19) */ ldd [%o2+24],%f18 +/* 0x009c 206 (17 20) */ fsubd %f14,%f4,%f4 +/* 0x00a0 210 (17 20) */ ld [%i1],%g2 +/* 0x00a4 211 (18 20) */ fxnor %f0,%f16,%f16 +/* 0x00a8 212 (18 21) */ ld [%i1+4],%g3 +/* 0x00ac 213 (19 22) */ ld [%i1+8],%g4 +/* 0x00b0 214 (20 23) */ fitod %f16,%f20 +/* 0x00b4 215 (20 23) */ ld [%i1+16],%o0 +/* 0x00b8 216 (21 24) */ ld [%i1+12],%g5 +/* 0x00bc 217 (22 25) */ ld [%i1+20],%o1 +/* 0x00c0 218 (23 26) */ ld [%i1+24],%o2 +/* 0x00c4 219 (24 25) */ fmovs %f8,%f6 +/* 0x00c8 220 (24 27) */ ld [%i1+28],%o3 +/* 0x00cc 221 (26 29) */ fsubd %f6,%f8,%f6 +/* 0x00d0 222 (27 30) */ fsubd %f14,%f12,%f8 +/* 0x00d4 223 (28 31) */ fitod %f10,%f12 +/* 0x00d8 224 (29 32) */ fmuld %f4,%f6,%f4 +/* 0x00dc 225 (29 32) */ fitod %f11,%f10 +/* 0x00e0 226 (30 33) */ fmuld %f8,%f6,%f8 +/* 0x00e4 227 (31 34) */ fsubd %f14,%f12,%f12 +/* 0x00e8 228 (32 35) */ fdtox %f4,%f4 +/* 0x00ec 229 (32 33) */ std %f4,[%sp+2271] +/* 0x00f0 230 (33 36) */ fdtox %f8,%f8 +/* 0x00f4 231 (33 34) */ std %f8,[%sp+2279] +/* 0x00f8 232 (34 37) */ fmuld %f12,%f6,%f12 +/* 0x00fc 233 (34 37) */ fsubd %f14,%f10,%f10 +/* 0x0100 234 (35 38) */ fsubd %f14,%f20,%f4 +/* 0x0104 235 (36 39) */ fitod %f17,%f8 +/* 0x0108 236 (37 39) */ fxnor %f0,%f18,%f16 +/* 0x010c 237 (37 39) */ ldx [%sp+2279],%o4 +/* 0x0110 238 (37 40) */ fmuld %f10,%f6,%f10 +/* 0x0114 239 (38 41) */ fdtox %f12,%f12 +/* 0x0118 240 (38 39) */ std %f12,[%sp+2263] +/* 0x011c 241 (38 41) */ fmuld %f4,%f6,%f4 +/* 0x0120 242 (39 42) */ fitod %f16,%f18 +/* 0x0124 243 (39 40) */ add %o4,%g2,%g2 +/* 0x0128 244 (39 40) */ st %g2,[%i0] +/* 0x012c 245 (40 42) */ ldx [%sp+2271],%o4 +/* 0x0130 246 (40 43) */ fsubd %f14,%f8,%f8 +/* 0x0134 247 (40 41) */ srax %g2,32,%o5 +/* 0x0138 248 (41 44) */ fdtox %f10,%f10 +/* 0x013c 249 (41 42) */ std %f10,[%sp+2255] +/* 0x0140 250 (42 45) */ fdtox %f4,%f4 +/* 0x0144 251 (42 43) */ std %f4,[%sp+2247] +/* 0x0148 252 (42 43) */ add %o4,%g3,%o4 +/* 0x014c 253 (43 46) */ fitod %f17,%f12 +/* 0x0150 254 (43 45) */ ldx [%sp+2263],%g2 +/* 0x0154 255 (43 44) */ add %o4,%o5,%g3 +/* 0x0158 256 (43 46) */ fmuld %f8,%f6,%f8 +/* 0x015c 257 (44 47) */ fsubd %f14,%f18,%f10 +/* 0x0160 258 (44 45) */ st %g3,[%i0+4] +/* 0x0164 259 (44 45) */ srax %g3,32,%g3 +/* 0x0168 260 (45 46) */ add %g2,%g4,%g4 +/* 0x016c 261 (45 47) */ ldx [%sp+2255],%g2 +/* 0x0170 262 (46 49) */ fsubd %f14,%f12,%f4 +/* 0x0174 263 (46 47) */ add %g4,%g3,%g3 +/* 0x0178 264 (46 48) */ ldx [%sp+2247],%g4 +/* 0x017c 265 (47 50) */ fmuld %f10,%f6,%f10 +/* 0x0180 266 (47 50) */ fdtox %f8,%f8 +/* 0x0184 267 (47 48) */ std %f8,[%sp+2239] +/* 0x0188 268 (48 49) */ add %g4,%o0,%g4 +/* 0x018c 269 (48 49) */ add %g2,%g5,%g2 +/* 0x0190 270 (48 49) */ st %g3,[%i0+8] +/* 0x0194 271 (49 52) */ fmuld %f4,%f6,%f4 +/* 0x0198 272 (49 50) */ srax %g3,32,%o0 +/* 0x019c 273 (49 51) */ ldx [%sp+2239],%g5 +/* 0x01a0 274 (50 53) */ fdtox %f10,%f6 +/* 0x01a4 275 (50 51) */ std %f6,[%sp+2231] +/* 0x01a8 276 (50 51) */ add %g2,%o0,%g2 +/* 0x01ac 277 (51 52) */ srax %g2,32,%g3 +/* 0x01b0 278 (51 52) */ add %g5,%o1,%o1 +/* 0x01b4 279 (51 52) */ st %g2,[%i0+12] +/* 0x01b8 280 (52 55) */ fdtox %f4,%f4 +/* 0x01bc 281 (52 53) */ std %f4,[%sp+2223] +/* 0x01c0 282 (52 53) */ add %g4,%g3,%g3 +/* 0x01c4 283 (53 54) */ srax %g3,32,%g4 +/* 0x01c8 284 (53 54) */ st %g3,[%i0+16] +/* 0x01cc 285 (54 56) */ ldx [%sp+2231],%o0 +/* 0x01d0 286 (54 55) */ add %o1,%g4,%g4 +/* 0x01d4 287 (55 56) */ srax %g4,32,%g2 +/* 0x01d8 288 (55 57) */ ldx [%sp+2223],%g5 +/* 0x01dc 289 (56 57) */ add %o0,%o2,%o2 +/* 0x01e0 290 (56 57) */ st %g4,[%i0+20] +/* 0x01e4 291 (57 58) */ add %o2,%g2,%g2 +/* 0x01e8 292 (57 58) */ add %g5,%o3,%g5 +/* 0x01ec 293 (57 58) */ st %g2,[%i0+24] +/* 0x01f0 294 (58 59) */ srax %g2,32,%g3 +/* 0x01f4 295 (59 60) */ add %g5,%g3,%g2 +/* 0x01f8 296 (59 60) */ st %g2,[%i0+28] +/* 0x01fc 300 (60 61) */ srax %g2,32,%o3 +/* 0x0200 301 (61 62) */ srl %o3,0,%i0 +/* 0x0204 (62 64) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0208 (64 65) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000158 +! + + .L900000158: /* frequency 1.0 confidence 0.0 */ +/* 0x020c 308 ( 0 1) */ bne,a,pn %icc,.L900000157 ! tprob=0.50 +/* 0x0210 ( 0 1) */ st %i4,[%sp+2223] +/* 0x0214 315 ( 1 4) */ ldd [%o2],%f4 +/* 0x0218 316 ( 2 3) */ st %i4,[%sp+2351] +/* 0x021c 317 ( 3 6) */ ldd [%o0],%f8 +/* 0x0220 318 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0224 319 ( 4 7) */ ldd [%o2+8],%f10 +/* 0x0228 320 ( 5 8) */ ldd [%o0+8],%f14 +/* 0x022c 321 ( 5 8) */ fitod %f4,%f12 +/* 0x0230 322 ( 6 9) */ ld [%sp+2351],%f7 +/* 0x0234 323 ( 6 8) */ fxnor %f0,%f10,%f10 +/* 0x0238 324 ( 7 10) */ ldd [%o2+16],%f16 +/* 0x023c 325 ( 7 10) */ fitod %f5,%f4 +/* 0x0240 326 ( 8 11) */ ldd [%o2+24],%f18 +/* 0x0244 330 ( 9 12) */ ldd [%o2+32],%f20 +/* 0x0248 331 ( 9 11) */ fxnor %f0,%f16,%f16 +/* 0x024c 335 (10 13) */ ld [%i1],%g2 +/* 0x0250 336 (10 13) */ fsubd %f14,%f4,%f4 +/* 0x0254 337 (11 14) */ ldd [%o2+40],%f22 +/* 0x0258 338 (11 14) */ fitod %f16,%f28 +/* 0x025c 339 (12 15) */ ld [%i1+4],%g3 +/* 0x0260 340 (13 16) */ ld [%i1+8],%g4 +/* 0x0264 341 (13 15) */ fxnor %f0,%f22,%f22 +/* 0x0268 342 (14 17) */ ld [%i1+12],%g5 +/* 0x026c 343 (15 18) */ ld [%i1+16],%o0 +/* 0x0270 344 (16 19) */ ldd [%o2+48],%f24 +/* 0x0274 345 (17 20) */ ld [%i1+20],%o1 +/* 0x0278 346 (17 18) */ fmovs %f8,%f6 +/* 0x027c 347 (18 21) */ ldd [%o2+56],%f26 +/* 0x0280 348 (19 22) */ ld [%i1+24],%o2 +/* 0x0284 349 (19 22) */ fsubd %f6,%f8,%f6 +/* 0x0288 350 (20 23) */ ld [%i1+28],%o3 +/* 0x028c 351 (20 23) */ fsubd %f14,%f12,%f8 +/* 0x0290 355 (21 24) */ ld [%i1+32],%o4 +/* 0x0294 356 (21 24) */ fitod %f10,%f12 +/* 0x0298 357 (22 25) */ ld [%i1+36],%o7 +/* 0x029c 358 (22 25) */ fitod %f11,%f10 +/* 0x02a0 359 (22 25) */ fmuld %f4,%f6,%f4 +/* 0x02a4 360 (23 26) */ ld [%i1+40],%l1 +/* 0x02a8 361 (23 26) */ fmuld %f8,%f6,%f8 +/* 0x02ac 362 (24 27) */ ld [%i1+56],%l5 +/* 0x02b0 363 (24 27) */ fsubd %f14,%f12,%f12 +/* 0x02b4 364 (25 28) */ fsubd %f14,%f10,%f10 +/* 0x02b8 365 (26 29) */ fdtox %f8,%f8 +/* 0x02bc 366 (26 27) */ std %f8,[%sp+2343] +/* 0x02c0 367 (27 30) */ fitod %f17,%f8 +/* 0x02c4 368 (27 30) */ fmuld %f12,%f6,%f12 +/* 0x02c8 369 (28 31) */ fdtox %f4,%f4 +/* 0x02cc 370 (28 29) */ std %f4,[%sp+2335] +/* 0x02d0 371 (28 31) */ fmuld %f10,%f6,%f10 +/* 0x02d4 372 (29 31) */ fxnor %f0,%f18,%f16 +/* 0x02d8 373 (30 33) */ fdtox %f12,%f12 +/* 0x02dc 374 (30 31) */ std %f12,[%sp+2327] +/* 0x02e0 375 (31 33) */ ldx [%sp+2343],%o5 +/* 0x02e4 376 (31 34) */ fsubd %f14,%f8,%f8 +/* 0x02e8 377 (32 35) */ fsubd %f14,%f28,%f4 +/* 0x02ec 378 (33 36) */ fitod %f17,%f12 +/* 0x02f0 379 (33 34) */ add %o5,%g2,%g2 +/* 0x02f4 380 (33 34) */ st %g2,[%i0] +/* 0x02f8 381 (34 36) */ ldx [%sp+2335],%o5 +/* 0x02fc 382 (34 37) */ fitod %f16,%f18 +/* 0x0300 383 (34 35) */ srax %g2,32,%l0 +/* 0x0304 384 (35 37) */ fxnor %f0,%f20,%f16 +/* 0x0308 385 (35 38) */ fmuld %f8,%f6,%f20 +/* 0x030c 386 (36 39) */ fdtox %f10,%f10 +/* 0x0310 387 (36 37) */ std %f10,[%sp+2319] +/* 0x0314 388 (36 37) */ add %o5,%g3,%g3 +/* 0x0318 389 (36 39) */ fmuld %f4,%f6,%f4 +/* 0x031c 390 (37 40) */ fitod %f16,%f8 +/* 0x0320 391 (37 38) */ add %g3,%l0,%g3 +/* 0x0324 392 (37 38) */ st %g3,[%i0+4] +/* 0x0328 393 (38 40) */ ldx [%sp+2327],%o5 +/* 0x032c 394 (38 41) */ fsubd %f14,%f18,%f18 +/* 0x0330 395 (38 39) */ srax %g3,32,%l3 +/* 0x0334 396 (39 41) */ ldx [%sp+2319],%l2 +/* 0x0338 397 (39 42) */ fdtox %f4,%f4 +/* 0x033c 398 (40 41) */ std %f4,[%sp+2311] +/* 0x0340 399 (40 43) */ fdtox %f20,%f20 +/* 0x0344 400 (40 41) */ add %o5,%g4,%g4 +/* 0x0348 401 (41 42) */ std %f20,[%sp+2303] +/* 0x034c 402 (41 44) */ fsubd %f14,%f12,%f4 +/* 0x0350 403 (41 42) */ add %g4,%l3,%g4 +/* 0x0354 404 (41 44) */ fmuld %f18,%f6,%f18 +/* 0x0358 405 (42 43) */ st %g4,[%i0+8] +/* 0x035c 406 (42 45) */ fitod %f17,%f16 +/* 0x0360 407 (42 43) */ srax %g4,32,%l4 +/* 0x0364 408 (43 46) */ ld [%i1+44],%l0 +/* 0x0368 409 (43 46) */ fsubd %f14,%f8,%f20 +/* 0x036c 410 (43 44) */ add %l2,%g5,%l2 +/* 0x0370 411 (44 46) */ ldx [%sp+2311],%g5 +/* 0x0374 412 (44 47) */ fitod %f22,%f8 +/* 0x0378 413 (44 45) */ add %l2,%l4,%l2 +/* 0x037c 414 (44 47) */ fmuld %f4,%f6,%f4 +/* 0x0380 415 (45 46) */ st %l2,[%i0+12] +/* 0x0384 416 (45 48) */ fsubd %f14,%f16,%f10 +/* 0x0388 417 (46 49) */ ld [%i1+52],%l3 +/* 0x038c 418 (46 49) */ fdtox %f18,%f18 +/* 0x0390 419 (46 47) */ add %g5,%o0,%l4 +/* 0x0394 420 (46 49) */ fmuld %f20,%f6,%f12 +/* 0x0398 421 (47 48) */ std %f18,[%sp+2295] +/* 0x039c 422 (47 48) */ srax %l2,32,%o0 +/* 0x03a0 423 (47 50) */ fitod %f23,%f16 +/* 0x03a4 424 (48 51) */ ld [%i1+48],%o5 +/* 0x03a8 425 (48 51) */ fsubd %f14,%f8,%f8 +/* 0x03ac 426 (48 49) */ add %l4,%o0,%l4 +/* 0x03b0 427 (49 50) */ st %l4,[%i0+16] +/* 0x03b4 428 (49 50) */ srax %l4,32,%o0 +/* 0x03b8 429 (49 51) */ fxnor %f0,%f24,%f18 +/* 0x03bc 430 (50 52) */ ldx [%sp+2303],%g5 +/* 0x03c0 431 (50 53) */ fdtox %f4,%f4 +/* 0x03c4 432 (51 52) */ std %f4,[%sp+2287] +/* 0x03c8 433 (51 54) */ fdtox %f12,%f12 +/* 0x03cc 434 (51 54) */ fmuld %f10,%f6,%f4 +/* 0x03d0 435 (52 53) */ std %f12,[%sp+2279] +/* 0x03d4 436 (52 55) */ fsubd %f14,%f16,%f12 +/* 0x03d8 437 (52 53) */ add %g5,%o1,%g2 +/* 0x03dc 438 (52 55) */ fmuld %f8,%f6,%f8 +/* 0x03e0 439 (53 55) */ ldx [%sp+2295],%g5 +/* 0x03e4 440 (53 56) */ fitod %f18,%f10 +/* 0x03e8 441 (53 54) */ add %g2,%o0,%g2 +/* 0x03ec 442 (54 55) */ st %g2,[%i0+20] +/* 0x03f0 443 (54 57) */ fitod %f19,%f16 +/* 0x03f4 444 (54 55) */ srax %g2,32,%o0 +/* 0x03f8 445 (55 58) */ fdtox %f8,%f8 +/* 0x03fc 446 (55 56) */ std %f8,[%sp+2263] +/* 0x0400 447 (55 56) */ add %g5,%o2,%g3 +/* 0x0404 448 (56 58) */ ldx [%sp+2287],%g5 +/* 0x0408 449 (56 59) */ fsubd %f14,%f10,%f10 +/* 0x040c 450 (56 57) */ add %g3,%o0,%g3 +/* 0x0410 451 (57 58) */ st %g3,[%i0+24] +/* 0x0414 452 (57 60) */ fsubd %f14,%f16,%f8 +/* 0x0418 453 (57 58) */ srax %g3,32,%o0 +/* 0x041c 454 (58 61) */ fdtox %f4,%f4 +/* 0x0420 455 (58 59) */ std %f4,[%sp+2271] +/* 0x0424 456 (58 59) */ add %g5,%o3,%g4 +/* 0x0428 457 (59 61) */ fxnor %f0,%f26,%f18 +/* 0x042c 458 (59 62) */ fmuld %f12,%f6,%f4 +/* 0x0430 459 (59 60) */ add %g4,%o0,%g4 +/* 0x0434 460 (60 61) */ st %g4,[%i0+28] +/* 0x0438 461 (60 63) */ fmuld %f10,%f6,%f10 +/* 0x043c 462 (60 61) */ srax %g4,32,%o0 +/* 0x0440 463 (61 63) */ ldx [%sp+2279],%g5 +/* 0x0444 464 (61 64) */ fitod %f18,%f12 +/* 0x0448 465 (61 64) */ fmuld %f8,%f6,%f8 +/* 0x044c 466 (62 65) */ fdtox %f4,%f4 +/* 0x0450 467 (62 63) */ std %f4,[%sp+2255] +/* 0x0454 468 (63 64) */ add %g5,%o4,%l2 +/* 0x0458 469 (63 65) */ ldx [%sp+2271],%g5 +/* 0x045c 470 (63 66) */ fdtox %f10,%f16 +/* 0x0460 471 (64 67) */ fsubd %f14,%f12,%f4 +/* 0x0464 472 (64 65) */ std %f16,[%sp+2247] +/* 0x0468 473 (64 65) */ add %l2,%o0,%l2 +/* 0x046c 474 (65 68) */ fdtox %f8,%f8 +/* 0x0470 475 (65 66) */ std %f8,[%sp+2239] +/* 0x0474 476 (65 66) */ add %g5,%o7,%l4 +/* 0x0478 477 (66 69) */ fitod %f19,%f10 +/* 0x047c 478 (66 68) */ ldx [%sp+2263],%g5 +/* 0x0480 479 (66 67) */ srax %l2,32,%o0 +/* 0x0484 480 (67 68) */ add %l4,%o0,%l4 +/* 0x0488 481 (67 70) */ fmuld %f4,%f6,%f4 +/* 0x048c 482 (67 69) */ ldx [%sp+2255],%o0 +/* 0x0490 483 (68 69) */ srax %l4,32,%o1 +/* 0x0494 484 (68 69) */ add %g5,%l1,%l1 +/* 0x0498 485 (68 69) */ st %l2,[%i0+32] +/* 0x049c 486 (69 72) */ fsubd %f14,%f10,%f8 +/* 0x04a0 487 (69 71) */ ldx [%sp+2239],%o3 +/* 0x04a4 488 (69 70) */ add %l1,%o1,%o1 +/* 0x04a8 489 (70 72) */ ldx [%sp+2247],%g5 +/* 0x04ac 490 (70 71) */ srax %o1,32,%o2 +/* 0x04b0 491 (70 71) */ add %o0,%l0,%o0 +/* 0x04b4 492 (71 74) */ fdtox %f4,%f4 +/* 0x04b8 493 (71 72) */ std %f4,[%sp+2231] +/* 0x04bc 494 (71 72) */ add %o0,%o2,%o2 +/* 0x04c0 495 (72 73) */ add %o3,%l3,%l3 +/* 0x04c4 496 (72 75) */ fmuld %f8,%f6,%f4 +/* 0x04c8 497 (72 73) */ add %g5,%o5,%g5 +/* 0x04cc 498 (73 74) */ srax %o2,32,%o3 +/* 0x04d0 499 (73 74) */ st %l4,[%i0+36] +/* 0x04d4 500 (74 75) */ add %g5,%o3,%g2 +/* 0x04d8 501 (74 76) */ ldx [%sp+2231],%o0 +/* 0x04dc 502 (75 76) */ srax %g2,32,%g3 +/* 0x04e0 503 (75 78) */ fdtox %f4,%f4 +/* 0x04e4 504 (75 76) */ std %f4,[%sp+2223] +/* 0x04e8 505 (76 77) */ st %o1,[%i0+40] +/* 0x04ec 506 (76 77) */ add %l3,%g3,%g3 +/* 0x04f0 507 (76 77) */ add %o0,%l5,%g5 +/* 0x04f4 508 (77 78) */ st %o2,[%i0+44] +/* 0x04f8 509 (77 78) */ srax %g3,32,%g4 +/* 0x04fc 510 (78 79) */ st %g2,[%i0+48] +/* 0x0500 511 (78 79) */ add %g5,%g4,%g4 +/* 0x0504 512 (79 80) */ st %g3,[%i0+52] +/* 0x0508 513 (79 80) */ srax %g4,32,%g5 +/* 0x050c 514 (80 83) */ ld [%i1+60],%g3 +/* 0x0510 515 (81 83) */ ldx [%sp+2223],%g2 +/* 0x0514 516 (82 83) */ st %g4,[%i0+56] +/* 0x0518 517 (83 84) */ add %g2,%g3,%g2 +/* 0x051c 518 (84 85) */ add %g2,%g5,%g2 +/* 0x0520 519 (84 85) */ st %g2,[%i0+60] +/* 0x0524 523 (85 86) */ srax %g2,32,%o3 +/* 0x0528 524 (86 87) */ srl %o3,0,%i0 +/* 0x052c (87 89) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0530 (89 90) */ restore %g0,%g0,%g0 + +! +! ENTRY .L900000157 +! + + .L900000157: /* frequency 1.0 confidence 0.0 */ +/* 0x0534 532 ( 0 1) */ fmovd %f0,%f14 +/* 0x0538 533 ( 0 3) */ ldd [%o0],%f8 +/* 0x053c 539 ( 0 1) */ add %i3,1,%g2 +/* 0x0540 540 ( 1 4) */ ld [%sp+2223],%f7 +/* 0x0544 541 ( 1 2) */ srl %g2,31,%g3 +/* 0x0548 545 ( 1 2) */ add %fp,-217,%g4 +/* 0x054c 546 ( 2 3) */ add %g2,%g3,%g2 +/* 0x0550 547 ( 2 3) */ or %g0,0,%g5 +/* 0x0554 548 ( 2 5) */ ldd [%o0+8],%f18 +/* 0x0558 549 ( 3 4) */ fmovs %f8,%f6 +/* 0x055c 550 ( 3 4) */ sra %g2,1,%o1 +/* 0x0560 551 ( 3 4) */ or %g0,0,%o0 +/* 0x0564 552 ( 4 5) */ subcc %o1,0,%g0 +/* 0x0568 553 ( 5 6) */ or %g0,%o1,%o3 +/* 0x056c 554 ( 5 8) */ fsubd %f6,%f8,%f16 +/* 0x0570 555 ( 5 6) */ ble,pt %icc,.L900000156 ! tprob=0.50 +/* 0x0574 ( 6 7) */ subcc %i3,0,%g0 +/* 0x0578 557 ( 6 7) */ sub %o1,1,%g2 +/* 0x057c 558 ( 7 8) */ or %g0,0,%i0 +/* 0x0580 559 ( 7 8) */ or %g0,1,%g3 +/* 0x0584 560 ( 8 9) */ subcc %o3,10,%g0 +/* 0x0588 561 ( 8 9) */ bl,pn %icc,.L77000077 ! tprob=0.50 +/* 0x058c ( 9 10) */ or %g0,0,%o1 +/* 0x0590 563 ( 9 12) */ ldd [%i2+8],%f0 +/* 0x0594 564 ( 9 10) */ sub %o3,3,%o3 +/* 0x0598 565 (10 13) */ ldd [%i2],%f2 +/* 0x059c 566 (10 11) */ or %g0,7,%o0 +/* 0x05a0 567 (10 11) */ or %g0,2,%i0 +/* 0x05a4 568 (11 13) */ fxnor %f14,%f0,%f8 +/* 0x05a8 569 (11 14) */ ldd [%i2+16],%f4 +/* 0x05ac 570 (11 12) */ or %g0,16,%o2 +/* 0x05b0 571 (12 14) */ fxnor %f14,%f2,%f2 +/* 0x05b4 572 (12 15) */ ldd [%i2+24],%f6 +/* 0x05b8 573 (12 13) */ or %g0,48,%o4 +/* 0x05bc 574 (13 16) */ fitod %f8,%f12 +/* 0x05c0 575 (13 14) */ or %g0,24,%o1 +/* 0x05c4 576 (13 14) */ or %g0,3,%g3 +/* 0x05c8 577 (14 17) */ fitod %f2,%f0 +/* 0x05cc 578 (15 18) */ fitod %f3,%f20 +/* 0x05d0 579 (15 18) */ ldd [%i2+32],%f2 +/* 0x05d4 580 (16 19) */ fitod %f9,%f10 +/* 0x05d8 581 (16 19) */ ldd [%i2+40],%f8 +/* 0x05dc 582 (17 20) */ fsubd %f18,%f0,%f0 +/* 0x05e0 583 (18 21) */ fsubd %f18,%f20,%f22 +/* 0x05e4 584 (19 22) */ fsubd %f18,%f12,%f20 +/* 0x05e8 585 (19 22) */ ldd [%i2+48],%f12 +/* 0x05ec 586 (20 23) */ fsubd %f18,%f10,%f10 +/* 0x05f0 587 (20 23) */ fmuld %f0,%f16,%f0 +/* 0x05f4 588 (21 23) */ fxnor %f14,%f4,%f4 +/* 0x05f8 589 (21 24) */ fmuld %f22,%f16,%f22 +/* 0x05fc 590 (22 24) */ fxnor %f14,%f6,%f6 +/* 0x0600 591 (22 25) */ fmuld %f20,%f16,%f20 +/* 0x0604 592 (23 26) */ fdtox %f0,%f0 +/* 0x0608 593 (23 24) */ std %f0,[%fp-217] +/* 0x060c 594 (23 26) */ fmuld %f10,%f16,%f10 +/* 0x0610 595 (24 27) */ fdtox %f22,%f22 +/* 0x0614 596 (24 25) */ std %f22,[%fp-209] +/* 0x0618 597 (25 28) */ fitod %f5,%f0 +/* 0x061c 598 (26 29) */ fdtox %f10,%f10 +/* 0x0620 599 (27 30) */ fdtox %f20,%f20 +/* 0x0624 600 (27 28) */ std %f20,[%fp-201] +/* 0x0628 601 (28 31) */ fitod %f4,%f4 +/* 0x062c 602 (28 29) */ std %f10,[%fp-193] +/* 0x0630 603 (29 31) */ fxnor %f14,%f2,%f10 +/* 0x0634 604 (30 33) */ fitod %f7,%f2 +/* 0x0638 605 (31 34) */ fsubd %f18,%f0,%f0 +/* 0x063c 606 (32 35) */ fsubd %f18,%f4,%f4 +/* 0x0640 607 (33 35) */ fxnor %f14,%f8,%f8 + +! +! ENTRY .L900000144 +! + + .L900000144: /* frequency 1.0 confidence 0.0 */ +/* 0x0644 609 ( 0 3) */ fitod %f11,%f22 +/* 0x0648 610 ( 0 1) */ add %o0,3,%o0 +/* 0x064c 611 ( 0 1) */ add %g3,6,%g3 +/* 0x0650 612 ( 0 3) */ fmuld %f0,%f16,%f0 +/* 0x0654 613 ( 1 4) */ fmuld %f4,%f16,%f24 +/* 0x0658 614 ( 1 2) */ subcc %o0,%o3,%g0 +/* 0x065c 615 ( 1 2) */ add %i0,6,%i0 +/* 0x0660 616 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x0664 617 ( 2 5) */ fitod %f6,%f4 +/* 0x0668 618 ( 3 6) */ fdtox %f0,%f0 +/* 0x066c 619 ( 3 4) */ add %o4,8,%i1 +/* 0x0670 620 ( 4 7) */ ldd [%i2+%i1],%f20 +/* 0x0674 621 ( 4 7) */ fdtox %f24,%f6 +/* 0x0678 622 ( 4 5) */ add %o2,16,%o4 +/* 0x067c 623 ( 5 8) */ fsubd %f18,%f4,%f4 +/* 0x0680 624 ( 5 6) */ std %f6,[%o4+%g4] +/* 0x0684 625 ( 5 6) */ add %o1,16,%o2 +/* 0x0688 626 ( 6 8) */ fxnor %f14,%f12,%f6 +/* 0x068c 627 ( 6 7) */ std %f0,[%o2+%g4] +/* 0x0690 628 ( 7 10) */ fitod %f9,%f0 +/* 0x0694 629 ( 7 10) */ fmuld %f2,%f16,%f2 +/* 0x0698 630 ( 8 11) */ fmuld %f4,%f16,%f24 +/* 0x069c 631 ( 8 11) */ fsubd %f18,%f22,%f12 +/* 0x06a0 632 ( 9 12) */ fitod %f10,%f4 +/* 0x06a4 633 (10 13) */ fdtox %f2,%f2 +/* 0x06a8 634 (10 11) */ add %i1,8,%o1 +/* 0x06ac 635 (11 14) */ ldd [%i2+%o1],%f22 +/* 0x06b0 636 (11 14) */ fdtox %f24,%f10 +/* 0x06b4 637 (11 12) */ add %o4,16,%i4 +/* 0x06b8 638 (12 15) */ fsubd %f18,%f4,%f4 +/* 0x06bc 639 (12 13) */ std %f10,[%i4+%g4] +/* 0x06c0 640 (12 13) */ add %o2,16,%i1 +/* 0x06c4 641 (13 15) */ fxnor %f14,%f20,%f10 +/* 0x06c8 642 (13 14) */ std %f2,[%i1+%g4] +/* 0x06cc 643 (14 17) */ fitod %f7,%f2 +/* 0x06d0 644 (14 17) */ fmuld %f12,%f16,%f12 +/* 0x06d4 645 (15 18) */ fmuld %f4,%f16,%f24 +/* 0x06d8 646 (15 18) */ fsubd %f18,%f0,%f0 +/* 0x06dc 647 (16 19) */ fitod %f8,%f4 +/* 0x06e0 648 (17 20) */ fdtox %f12,%f20 +/* 0x06e4 649 (17 18) */ add %o1,8,%o4 +/* 0x06e8 650 (18 21) */ ldd [%i2+%o4],%f12 +/* 0x06ec 651 (18 21) */ fdtox %f24,%f8 +/* 0x06f0 652 (18 19) */ add %i4,16,%o2 +/* 0x06f4 653 (19 22) */ fsubd %f18,%f4,%f4 +/* 0x06f8 654 (19 20) */ std %f8,[%o2+%g4] +/* 0x06fc 655 (19 20) */ add %i1,16,%o1 +/* 0x0700 656 (20 22) */ fxnor %f14,%f22,%f8 +/* 0x0704 657 (20 21) */ ble,pt %icc,.L900000144 ! tprob=0.50 +/* 0x0708 (20 21) */ std %f20,[%o1+%g4] + +! +! ENTRY .L900000147 +! + + .L900000147: /* frequency 1.0 confidence 0.0 */ +/* 0x070c 660 ( 0 3) */ fitod %f6,%f6 +/* 0x0710 661 ( 0 3) */ fmuld %f4,%f16,%f24 +/* 0x0714 662 ( 0 1) */ add %i4,32,%l4 +/* 0x0718 663 ( 1 4) */ fsubd %f18,%f2,%f2 +/* 0x071c 664 ( 1 4) */ fmuld %f0,%f16,%f22 +/* 0x0720 665 ( 1 2) */ add %i1,32,%l3 +/* 0x0724 666 ( 2 5) */ fitod %f10,%f28 +/* 0x0728 667 ( 2 3) */ sra %o0,0,%o2 +/* 0x072c 668 ( 2 3) */ add %i4,48,%l2 +/* 0x0730 669 ( 3 6) */ fsubd %f18,%f6,%f4 +/* 0x0734 670 ( 3 4) */ add %i1,48,%l1 +/* 0x0738 671 ( 3 4) */ add %i4,64,%l0 +/* 0x073c 672 ( 4 7) */ fitod %f11,%f26 +/* 0x0740 673 ( 4 5) */ sllx %o2,3,%o1 +/* 0x0744 674 ( 4 5) */ add %i1,64,%i5 +/* 0x0748 675 ( 5 8) */ fitod %f8,%f6 +/* 0x074c 676 ( 5 6) */ add %i4,80,%i4 +/* 0x0750 677 ( 5 6) */ add %i1,80,%i1 +/* 0x0754 678 ( 6 8) */ fxnor %f14,%f12,%f0 +/* 0x0758 679 ( 6 9) */ fmuld %f4,%f16,%f20 +/* 0x075c 680 ( 6 7) */ add %i4,16,%o4 +/* 0x0760 681 ( 7 10) */ fitod %f9,%f4 +/* 0x0764 682 ( 7 10) */ fmuld %f2,%f16,%f12 +/* 0x0768 683 ( 7 8) */ add %i1,16,%o3 +/* 0x076c 684 ( 8 11) */ fsubd %f18,%f28,%f10 +/* 0x0770 685 ( 8 9) */ subcc %o0,%g2,%g0 +/* 0x0774 686 ( 8 9) */ add %g3,12,%g3 +/* 0x0778 687 ( 9 12) */ fitod %f0,%f2 +/* 0x077c 688 (10 13) */ fsubd %f18,%f26,%f8 +/* 0x0780 689 (11 14) */ fitod %f1,%f0 +/* 0x0784 690 (11 14) */ fmuld %f10,%f16,%f10 +/* 0x0788 691 (12 15) */ fdtox %f24,%f24 +/* 0x078c 692 (12 13) */ std %f24,[%l4+%g4] +/* 0x0790 693 (12 13) */ add %i0,12,%i0 +/* 0x0794 694 (13 16) */ fsubd %f18,%f6,%f6 +/* 0x0798 695 (13 16) */ fmuld %f8,%f16,%f8 +/* 0x079c 696 (14 17) */ fdtox %f22,%f22 +/* 0x07a0 697 (14 15) */ std %f22,[%l3+%g4] +/* 0x07a4 698 (15 18) */ fsubd %f18,%f4,%f4 +/* 0x07a8 699 (16 19) */ fdtox %f20,%f20 +/* 0x07ac 700 (16 17) */ std %f20,[%l2+%g4] +/* 0x07b0 701 (16 19) */ fmuld %f6,%f16,%f6 +/* 0x07b4 702 (17 20) */ fsubd %f18,%f2,%f2 +/* 0x07b8 703 (18 21) */ fsubd %f18,%f0,%f0 +/* 0x07bc 704 (18 21) */ fmuld %f4,%f16,%f4 +/* 0x07c0 705 (19 22) */ fdtox %f12,%f12 +/* 0x07c4 706 (19 20) */ std %f12,[%l1+%g4] +/* 0x07c8 707 (20 23) */ fdtox %f10,%f10 +/* 0x07cc 708 (20 21) */ std %f10,[%l0+%g4] +/* 0x07d0 709 (20 23) */ fmuld %f2,%f16,%f2 +/* 0x07d4 710 (21 24) */ fdtox %f8,%f8 +/* 0x07d8 711 (21 22) */ std %f8,[%i5+%g4] +/* 0x07dc 712 (21 24) */ fmuld %f0,%f16,%f0 +/* 0x07e0 713 (22 25) */ fdtox %f6,%f6 +/* 0x07e4 714 (22 23) */ std %f6,[%i4+%g4] +/* 0x07e8 715 (23 26) */ fdtox %f4,%f4 +/* 0x07ec 716 (23 24) */ std %f4,[%i1+%g4] +/* 0x07f0 717 (24 27) */ fdtox %f2,%f2 +/* 0x07f4 718 (24 25) */ std %f2,[%o4+%g4] +/* 0x07f8 719 (25 28) */ fdtox %f0,%f0 +/* 0x07fc 720 (25 26) */ bg,pn %icc,.L77000043 ! tprob=0.50 +/* 0x0800 (25 26) */ std %f0,[%o3+%g4] + +! +! ENTRY .L77000077 +! + + .L77000077: /* frequency 1.0 confidence 0.0 */ +/* 0x0804 723 ( 0 3) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L900000155 +! + + .L900000155: /* frequency 1.0 confidence 0.0 */ +/* 0x0808 725 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x080c 726 ( 0 1) */ sra %i0,0,%o1 +/* 0x0810 727 ( 0 1) */ add %o0,1,%o0 +/* 0x0814 728 ( 1 2) */ sllx %o1,3,%i4 +/* 0x0818 729 ( 1 2) */ add %i0,2,%i0 +/* 0x081c 730 ( 2 5) */ fitod %f0,%f2 +/* 0x0820 731 ( 2 3) */ sra %g3,0,%o1 +/* 0x0824 732 ( 2 3) */ add %g3,2,%g3 +/* 0x0828 733 ( 3 6) */ fitod %f1,%f0 +/* 0x082c 734 ( 3 4) */ sllx %o1,3,%i1 +/* 0x0830 735 ( 3 4) */ subcc %o0,%g2,%g0 +/* 0x0834 736 ( 4 5) */ sra %o0,0,%o2 +/* 0x0838 737 ( 5 8) */ fsubd %f18,%f2,%f2 +/* 0x083c 738 ( 5 6) */ sllx %o2,3,%o1 +/* 0x0840 739 ( 6 9) */ fsubd %f18,%f0,%f0 +/* 0x0844 740 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x0848 741 ( 9 12) */ fmuld %f0,%f16,%f0 +/* 0x084c 742 (11 14) */ fdtox %f2,%f2 +/* 0x0850 743 (11 12) */ std %f2,[%i4+%g4] +/* 0x0854 744 (12 15) */ fdtox %f0,%f0 +/* 0x0858 745 (12 13) */ std %f0,[%i1+%g4] +/* 0x085c 746 (12 13) */ ble,a,pt %icc,.L900000155 ! tprob=0.50 +/* 0x0860 (14 17) */ ldd [%i2+%o1],%f0 + +! +! ENTRY .L77000043 +! + + .L77000043: /* frequency 1.0 confidence 0.0 */ +/* 0x0864 754 ( 0 1) */ subcc %i3,0,%g0 + +! +! ENTRY .L900000156 +! + + .L900000156: /* frequency 1.0 confidence 0.0 */ +/* 0x0868 756 ( 0 1) */ ble,a,pt %icc,.L77000061 ! tprob=0.50 +/* 0x086c ( 0 1) */ or %g0,%g5,%o3 +/* 0x0870 761 ( 0 2) */ ldx [%fp-209],%i1 +/* 0x0874 762 ( 1 2) */ sub %i3,1,%g3 +/* 0x0878 763 ( 1 2) */ or %g0,0,%i0 +/* 0x087c 764 ( 2 3) */ subcc %i3,5,%g0 +/* 0x0880 765 ( 2 3) */ bl,pn %icc,.L77000078 ! tprob=0.50 +/* 0x0884 ( 2 4) */ ldx [%fp-217],%i2 +/* 0x0888 767 ( 3 6) */ ld [%o5],%i3 +/* 0x088c 768 ( 3 4) */ or %g0,8,%g2 +/* 0x0890 769 ( 3 4) */ or %g0,16,%o4 +/* 0x0894 770 ( 4 5) */ sub %g3,1,%o3 +/* 0x0898 771 ( 4 5) */ or %g0,3,%i0 +/* 0x089c 772 ( 5 6) */ add %i2,%i3,%o1 +/* 0x08a0 773 ( 5 8) */ ld [%o5+4],%i2 +/* 0x08a4 774 ( 6 7) */ st %o1,[%o7] +/* 0x08a8 775 ( 6 7) */ srax %o1,32,%o1 +/* 0x08ac 776 ( 7 9) */ ldx [%fp-201],%o2 +/* 0x08b0 777 ( 7 8) */ add %i1,%i2,%o0 +/* 0x08b4 778 ( 7 8) */ or %g0,%o1,%i1 +/* 0x08b8 779 ( 8 11) */ ld [%o5+8],%o1 +/* 0x08bc 780 ( 8 9) */ add %o0,%i1,%o0 +/* 0x08c0 781 ( 9 10) */ st %o0,[%o7+4] +/* 0x08c4 782 ( 9 10) */ srax %o0,32,%o0 + +! +! ENTRY .L900000140 +! + + .L900000140: /* frequency 1.0 confidence 0.0 */ +/* 0x08c8 784 ( 0 1) */ add %g2,4,%i1 +/* 0x08cc 785 ( 0 1) */ add %o4,8,%o4 +/* 0x08d0 786 ( 1 3) */ ldx [%o4+%g4],%i2 +/* 0x08d4 787 ( 1 2) */ sra %o0,0,%g5 +/* 0x08d8 788 ( 1 2) */ add %o2,%o1,%o1 +/* 0x08dc 789 ( 2 5) */ ld [%o5+%i1],%o0 +/* 0x08e0 790 ( 2 3) */ add %o1,%g5,%o1 +/* 0x08e4 791 ( 2 3) */ add %i0,2,%i0 +/* 0x08e8 792 ( 3 4) */ st %o1,[%o7+%g2] +/* 0x08ec 793 ( 3 4) */ srax %o1,32,%g5 +/* 0x08f0 794 ( 3 4) */ subcc %i0,%o3,%g0 +/* 0x08f4 795 ( 4 5) */ add %g2,8,%g2 +/* 0x08f8 796 ( 4 5) */ add %o4,8,%o4 +/* 0x08fc 797 ( 5 7) */ ldx [%o4+%g4],%o2 +/* 0x0900 798 ( 5 6) */ add %i2,%o0,%o0 +/* 0x0904 799 ( 6 9) */ ld [%o5+%g2],%o1 +/* 0x0908 800 ( 6 7) */ add %o0,%g5,%o0 +/* 0x090c 801 ( 7 8) */ st %o0,[%o7+%i1] +/* 0x0910 802 ( 7 8) */ ble,pt %icc,.L900000140 ! tprob=0.50 +/* 0x0914 ( 7 8) */ srax %o0,32,%o0 + +! +! ENTRY .L900000143 +! + + .L900000143: /* frequency 1.0 confidence 0.0 */ +/* 0x0918 805 ( 0 1) */ sra %o0,0,%o3 +/* 0x091c 806 ( 0 1) */ add %o2,%o1,%o0 +/* 0x0920 807 ( 1 2) */ add %o0,%o3,%o0 +/* 0x0924 808 ( 1 2) */ st %o0,[%o7+%g2] +/* 0x0928 809 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x092c 810 ( 2 3) */ srax %o0,32,%g5 +/* 0x0930 811 ( 2 3) */ bg,a,pn %icc,.L77000061 ! tprob=0.50 +/* 0x0934 ( 3 4) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000078 +! + + .L77000078: /* frequency 1.0 confidence 0.0 */ +/* 0x0938 814 ( 0 1) */ sra %i0,0,%o0 + +! +! ENTRY .L900000154 +! + + .L900000154: /* frequency 1.0 confidence 0.0 */ +/* 0x093c 816 ( 0 1) */ sllx %o0,2,%g2 +/* 0x0940 817 ( 0 1) */ add %i0,1,%i0 +/* 0x0944 818 ( 1 2) */ sllx %o0,3,%o4 +/* 0x0948 819 ( 1 4) */ ld [%o5+%g2],%o2 +/* 0x094c 820 ( 1 2) */ subcc %i0,%g3,%g0 +/* 0x0950 821 ( 2 4) */ ldx [%o4+%g4],%o0 +/* 0x0954 822 ( 2 3) */ sra %g5,0,%o1 +/* 0x0958 823 ( 4 5) */ add %o0,%o2,%o0 +/* 0x095c 824 ( 5 6) */ add %o0,%o1,%o0 +/* 0x0960 825 ( 5 6) */ st %o0,[%o7+%g2] +/* 0x0964 826 ( 6 7) */ srax %o0,32,%g5 +/* 0x0968 827 ( 6 7) */ ble,pt %icc,.L900000154 ! tprob=0.50 +/* 0x096c ( 7 8) */ sra %i0,0,%o0 + +! +! ENTRY .L77000047 +! + + .L77000047: /* frequency 1.0 confidence 0.0 */ +/* 0x0970 834 ( 0 1) */ or %g0,%g5,%o3 + +! +! ENTRY .L77000061 +! + + .L77000061: /* frequency 1.0 confidence 0.0 */ + +/* 0x0974 835 ( 1 2) */ srl %o3,0,%i0 +/* 0x0978 ( 2 4) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x097c ( 4 5) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000048 +! + + .L77000048: /* frequency 1.0 confidence 0.0 */ +/* 0x0980 844 ( 0 1) */ bne,pn %icc,.L77000050 ! tprob=0.50 +/* 0x0984 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0988 854 ( 0 3) */ ldd [%o2],%f4 +/* 0x098c 855 ( 1 4) */ ldd [%o0],%f6 +/* 0x0990 856 ( 1 2) */ srl %i4,19,%g3 +/* 0x0994 857 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0998 858 ( 2 3) */ st %g3,[%sp+2351] +/* 0x099c 859 ( 2 4) */ fxnor %f0,%f4,%f4 +/* 0x09a0 860 ( 3 4) */ st %g2,[%sp+2355] +/* 0x09a4 861 ( 4 7) */ ldd [%o2+8],%f12 +/* 0x09a8 862 ( 4 7) */ fitod %f4,%f10 +/* 0x09ac 863 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x09b0 864 ( 5 8) */ fitod %f5,%f4 +/* 0x09b4 865 ( 6 9) */ ldd [%o2+16],%f18 +/* 0x09b8 866 ( 6 8) */ fxnor %f0,%f12,%f12 +/* 0x09bc 867 ( 7 10) */ ld [%sp+2351],%f9 +/* 0x09c0 868 ( 7 10) */ fsubd %f16,%f10,%f10 +/* 0x09c4 869 ( 8 11) */ ld [%sp+2355],%f15 +/* 0x09c8 870 ( 8 11) */ fitod %f12,%f22 +/* 0x09cc 871 ( 9 12) */ ldd [%o2+24],%f20 +/* 0x09d0 872 ( 9 12) */ fitod %f13,%f12 +/* 0x09d4 876 (10 13) */ ld [%i1],%g2 +/* 0x09d8 877 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x09dc 878 (11 14) */ ld [%i1+4],%g3 +/* 0x09e0 879 (11 14) */ fsubd %f16,%f22,%f22 +/* 0x09e4 880 (12 15) */ ld [%i1+8],%g4 +/* 0x09e8 881 (12 14) */ fxnor %f0,%f18,%f18 +/* 0x09ec 882 (13 16) */ ld [%i1+12],%g5 +/* 0x09f0 883 (13 16) */ fsubd %f16,%f12,%f12 +/* 0x09f4 884 (14 17) */ ld [%i1+16],%o0 +/* 0x09f8 885 (14 17) */ fitod %f18,%f26 +/* 0x09fc 886 (15 18) */ ld [%i1+20],%o1 +/* 0x0a00 887 (15 17) */ fxnor %f0,%f20,%f20 +/* 0x0a04 888 (16 19) */ ld [%i1+24],%o2 +/* 0x0a08 889 (17 20) */ ld [%i1+28],%o3 +/* 0x0a0c 890 (19 20) */ fmovs %f6,%f8 +/* 0x0a10 891 (20 21) */ fmovs %f6,%f14 +/* 0x0a14 892 (22 25) */ fsubd %f8,%f6,%f8 +/* 0x0a18 893 (23 26) */ fsubd %f14,%f6,%f6 +/* 0x0a1c 894 (25 28) */ fmuld %f10,%f8,%f14 +/* 0x0a20 895 (26 29) */ fmuld %f10,%f6,%f10 +/* 0x0a24 896 (27 30) */ fmuld %f4,%f8,%f24 +/* 0x0a28 897 (28 31) */ fdtox %f14,%f14 +/* 0x0a2c 898 (28 29) */ std %f14,[%sp+2335] +/* 0x0a30 899 (28 31) */ fmuld %f22,%f8,%f28 +/* 0x0a34 900 (29 32) */ fitod %f19,%f14 +/* 0x0a38 901 (29 32) */ fmuld %f22,%f6,%f18 +/* 0x0a3c 902 (30 33) */ fdtox %f10,%f10 +/* 0x0a40 903 (30 31) */ std %f10,[%sp+2343] +/* 0x0a44 904 (30 33) */ fmuld %f4,%f6,%f4 +/* 0x0a48 905 (31 34) */ fmuld %f12,%f8,%f22 +/* 0x0a4c 906 (32 35) */ fdtox %f18,%f18 +/* 0x0a50 907 (32 33) */ std %f18,[%sp+2311] +/* 0x0a54 908 (32 35) */ fmuld %f12,%f6,%f10 +/* 0x0a58 909 (33 35) */ ldx [%sp+2335],%o4 +/* 0x0a5c 910 (33 36) */ fdtox %f24,%f12 +/* 0x0a60 911 (34 35) */ std %f12,[%sp+2319] +/* 0x0a64 912 (34 37) */ fsubd %f16,%f26,%f12 +/* 0x0a68 913 (35 37) */ ldx [%sp+2343],%o5 +/* 0x0a6c 914 (35 36) */ sllx %o4,19,%o4 +/* 0x0a70 915 (35 38) */ fdtox %f4,%f4 +/* 0x0a74 916 (36 37) */ std %f4,[%sp+2327] +/* 0x0a78 917 (36 39) */ fdtox %f28,%f24 +/* 0x0a7c 918 (37 38) */ std %f24,[%sp+2303] +/* 0x0a80 919 (37 40) */ fitod %f20,%f4 +/* 0x0a84 920 (37 38) */ add %o5,%o4,%o4 +/* 0x0a88 921 (37 40) */ fmuld %f12,%f8,%f24 +/* 0x0a8c 922 (38 40) */ ldx [%sp+2319],%o7 +/* 0x0a90 923 (38 41) */ fsubd %f16,%f14,%f14 +/* 0x0a94 924 (38 39) */ add %o4,%g2,%o4 +/* 0x0a98 925 (38 41) */ fmuld %f12,%f6,%f12 +/* 0x0a9c 926 (39 41) */ ldx [%sp+2327],%o5 +/* 0x0aa0 927 (39 42) */ fitod %f21,%f18 +/* 0x0aa4 928 (40 41) */ st %o4,[%i0] +/* 0x0aa8 929 (40 41) */ sllx %o7,19,%o7 +/* 0x0aac 930 (40 43) */ fdtox %f22,%f20 +/* 0x0ab0 931 (41 42) */ std %f20,[%sp+2287] +/* 0x0ab4 932 (41 44) */ fdtox %f10,%f10 +/* 0x0ab8 933 (41 42) */ add %o5,%o7,%o5 +/* 0x0abc 934 (41 44) */ fmuld %f14,%f8,%f20 +/* 0x0ac0 935 (42 43) */ std %f10,[%sp+2295] +/* 0x0ac4 936 (42 43) */ srlx %o4,32,%o7 +/* 0x0ac8 937 (42 45) */ fsubd %f16,%f4,%f4 +/* 0x0acc 938 (42 45) */ fmuld %f14,%f6,%f14 +/* 0x0ad0 939 (43 45) */ ldx [%sp+2311],%g2 +/* 0x0ad4 940 (43 46) */ fdtox %f24,%f10 +/* 0x0ad8 941 (43 44) */ add %o5,%g3,%g3 +/* 0x0adc 942 (44 45) */ std %f10,[%sp+2271] +/* 0x0ae0 943 (44 45) */ add %g3,%o7,%g3 +/* 0x0ae4 944 (44 47) */ fdtox %f12,%f12 +/* 0x0ae8 945 (45 47) */ ldx [%sp+2303],%l0 +/* 0x0aec 946 (45 48) */ fsubd %f16,%f18,%f10 +/* 0x0af0 947 (45 48) */ fmuld %f4,%f8,%f16 +/* 0x0af4 948 (46 47) */ std %f12,[%sp+2279] +/* 0x0af8 949 (46 49) */ fdtox %f20,%f12 +/* 0x0afc 950 (46 49) */ fmuld %f4,%f6,%f4 +/* 0x0b00 951 (47 48) */ std %f12,[%sp+2255] +/* 0x0b04 952 (47 48) */ sllx %l0,19,%l0 +/* 0x0b08 953 (47 50) */ fdtox %f14,%f12 +/* 0x0b0c 954 (48 50) */ ldx [%sp+2287],%o5 +/* 0x0b10 955 (48 49) */ add %g2,%l0,%g2 +/* 0x0b14 956 (48 51) */ fmuld %f10,%f8,%f8 +/* 0x0b18 957 (49 51) */ ldx [%sp+2295],%l1 +/* 0x0b1c 958 (49 50) */ srlx %g3,32,%l0 +/* 0x0b20 959 (49 50) */ add %g2,%g4,%g4 +/* 0x0b24 960 (49 52) */ fmuld %f10,%f6,%f6 +/* 0x0b28 961 (50 51) */ std %f12,[%sp+2263] +/* 0x0b2c 962 (50 51) */ sllx %o5,19,%g2 +/* 0x0b30 963 (50 51) */ add %g4,%l0,%g4 +/* 0x0b34 964 (51 53) */ ldx [%sp+2279],%l0 +/* 0x0b38 965 (51 52) */ srlx %g4,32,%o5 +/* 0x0b3c 966 (51 52) */ add %l1,%g2,%g2 +/* 0x0b40 967 (52 53) */ st %g3,[%i0+4] +/* 0x0b44 968 (52 53) */ add %g2,%g5,%g2 +/* 0x0b48 969 (52 55) */ fdtox %f16,%f10 +/* 0x0b4c 970 (53 55) */ ldx [%sp+2271],%o7 +/* 0x0b50 971 (53 54) */ add %g2,%o5,%g2 +/* 0x0b54 972 (53 56) */ fdtox %f4,%f4 +/* 0x0b58 973 (54 55) */ std %f10,[%sp+2239] +/* 0x0b5c 974 (55 56) */ sllx %o7,19,%o7 +/* 0x0b60 975 (55 56) */ std %f4,[%sp+2247] +/* 0x0b64 976 (55 58) */ fdtox %f8,%f4 +/* 0x0b68 977 (56 57) */ add %l0,%o7,%o7 +/* 0x0b6c 978 (56 58) */ ldx [%sp+2263],%o5 +/* 0x0b70 979 (57 58) */ add %o7,%o0,%o0 +/* 0x0b74 980 (57 58) */ std %f4,[%sp+2223] +/* 0x0b78 981 (57 60) */ fdtox %f6,%f4 +/* 0x0b7c 982 (58 60) */ ldx [%sp+2255],%g5 +/* 0x0b80 983 (58 59) */ srlx %g2,32,%o7 +/* 0x0b84 984 (59 60) */ std %f4,[%sp+2231] +/* 0x0b88 985 (59 60) */ add %o0,%o7,%o0 +/* 0x0b8c 986 (60 61) */ sllx %g5,19,%g5 +/* 0x0b90 987 (60 62) */ ldx [%sp+2247],%l1 +/* 0x0b94 988 (61 62) */ add %o5,%g5,%g5 +/* 0x0b98 989 (61 62) */ st %g2,[%i0+12] +/* 0x0b9c 990 (62 64) */ ldx [%sp+2239],%l0 +/* 0x0ba0 991 (62 63) */ srlx %o0,32,%o4 +/* 0x0ba4 992 (62 63) */ add %g5,%o1,%o1 +/* 0x0ba8 993 (63 64) */ add %o1,%o4,%o1 +/* 0x0bac 994 (63 65) */ ldx [%sp+2223],%o7 +/* 0x0bb0 995 (64 65) */ sllx %l0,19,%g3 +/* 0x0bb4 996 (64 66) */ ldx [%sp+2231],%o5 +/* 0x0bb8 997 (65 66) */ add %l1,%g3,%o4 +/* 0x0bbc 998 (65 66) */ st %o0,[%i0+16] +/* 0x0bc0 999 (66 67) */ add %o4,%o2,%o2 +/* 0x0bc4 1000 (66 67) */ st %o1,[%i0+20] +/* 0x0bc8 1001 (67 68) */ srlx %o1,32,%o4 +/* 0x0bcc 1002 (67 68) */ st %g4,[%i0+8] +/* 0x0bd0 1003 (68 69) */ sllx %o7,19,%g2 +/* 0x0bd4 1004 (68 69) */ add %o2,%o4,%o4 +/* 0x0bd8 1005 (68 69) */ st %o4,[%i0+24] +/* 0x0bdc 1006 (69 70) */ add %o5,%g2,%g2 +/* 0x0be0 1007 (70 71) */ srlx %o4,32,%g3 +/* 0x0be4 1008 (70 71) */ add %g2,%o3,%g2 +/* 0x0be8 1009 (71 72) */ add %g2,%g3,%g2 +/* 0x0bec 1010 (71 72) */ st %g2,[%i0+28] +/* 0x0bf0 1014 (72 73) */ srlx %g2,32,%o3 +/* 0x0bf4 1015 (73 74) */ srl %o3,0,%i0 +/* 0x0bf8 (74 76) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x0bfc (76 77) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000050 +! + + .L77000050: /* frequency 1.0 confidence 0.0 */ +/* 0x0c00 1022 ( 0 1) */ subcc %i3,16,%g0 +/* 0x0c04 1023 ( 0 1) */ bne,pn %icc,.L77000073 ! tprob=0.50 +/* 0x0c08 ( 0 1) */ sethi %hi(0xfff80000),%g2 +/* 0x0c0c 1034 ( 1 4) */ ldd [%o2],%f4 +/* 0x0c10 1035 ( 1 2) */ andn %i4,%g2,%g2 +/* 0x0c14 1036 ( 2 3) */ st %g2,[%sp+2483] +/* 0x0c18 1037 ( 2 3) */ srl %i4,19,%g2 +/* 0x0c1c 1038 ( 3 4) */ st %g2,[%sp+2479] +/* 0x0c20 1039 ( 3 5) */ fxnor %f0,%f4,%f4 +/* 0x0c24 1040 ( 4 7) */ ldd [%o0],%f8 +/* 0x0c28 1041 ( 5 8) */ fitod %f4,%f10 +/* 0x0c2c 1042 ( 5 8) */ ldd [%o0+8],%f16 +/* 0x0c30 1043 ( 6 9) */ ldd [%o2+8],%f14 +/* 0x0c34 1044 ( 6 9) */ fitod %f5,%f4 +/* 0x0c38 1045 ( 7 10) */ ld [%sp+2483],%f13 +/* 0x0c3c 1046 ( 8 11) */ ld [%sp+2479],%f7 +/* 0x0c40 1047 ( 8 11) */ fsubd %f16,%f10,%f10 +/* 0x0c44 1048 ( 9 11) */ fxnor %f0,%f14,%f14 +/* 0x0c48 1049 (10 13) */ fsubd %f16,%f4,%f4 +/* 0x0c4c 1050 (14 15) */ fmovs %f8,%f12 +/* 0x0c50 1051 (15 16) */ fmovs %f8,%f6 +/* 0x0c54 1052 (17 20) */ fsubd %f12,%f8,%f12 +/* 0x0c58 1053 (18 21) */ fsubd %f6,%f8,%f6 +/* 0x0c5c 1054 (19 22) */ fitod %f14,%f8 +/* 0x0c60 1055 (20 23) */ fmuld %f10,%f12,%f18 +/* 0x0c64 1056 (20 23) */ fitod %f15,%f14 +/* 0x0c68 1057 (21 24) */ fmuld %f10,%f6,%f10 +/* 0x0c6c 1058 (22 25) */ fsubd %f16,%f8,%f8 +/* 0x0c70 1059 (22 25) */ fmuld %f4,%f12,%f20 +/* 0x0c74 1060 (23 26) */ fmuld %f4,%f6,%f4 +/* 0x0c78 1061 (23 26) */ fsubd %f16,%f14,%f14 +/* 0x0c7c 1062 (24 27) */ fdtox %f10,%f10 +/* 0x0c80 1063 (24 25) */ std %f10,[%sp+2463] +/* 0x0c84 1064 (25 28) */ fmuld %f8,%f12,%f10 +/* 0x0c88 1065 (25 28) */ fdtox %f18,%f18 +/* 0x0c8c 1066 (25 26) */ std %f18,[%sp+2471] +/* 0x0c90 1067 (26 29) */ fmuld %f8,%f6,%f8 +/* 0x0c94 1068 (26 29) */ fdtox %f4,%f4 +/* 0x0c98 1069 (26 27) */ std %f4,[%sp+2447] +/* 0x0c9c 1070 (27 30) */ fmuld %f14,%f12,%f4 +/* 0x0ca0 1071 (27 30) */ fdtox %f20,%f18 +/* 0x0ca4 1072 (27 28) */ std %f18,[%sp+2455] +/* 0x0ca8 1073 (28 31) */ fdtox %f10,%f10 +/* 0x0cac 1074 (28 29) */ std %f10,[%sp+2439] +/* 0x0cb0 1075 (28 31) */ fmuld %f14,%f6,%f14 +/* 0x0cb4 1076 (29 32) */ fdtox %f8,%f8 +/* 0x0cb8 1077 (29 30) */ std %f8,[%sp+2431] +/* 0x0cbc 1078 (30 33) */ ldd [%o2+16],%f10 +/* 0x0cc0 1079 (30 33) */ fdtox %f4,%f4 +/* 0x0cc4 1080 (31 34) */ ldd [%o2+24],%f8 +/* 0x0cc8 1081 (31 34) */ fdtox %f14,%f14 +/* 0x0ccc 1082 (32 33) */ std %f4,[%sp+2423] +/* 0x0cd0 1083 (32 34) */ fxnor %f0,%f10,%f10 +/* 0x0cd4 1084 (33 35) */ fxnor %f0,%f8,%f4 +/* 0x0cd8 1085 (33 34) */ std %f14,[%sp+2415] +/* 0x0cdc 1086 (34 37) */ fitod %f10,%f8 +/* 0x0ce0 1087 (35 38) */ fitod %f11,%f10 +/* 0x0ce4 1088 (36 39) */ fitod %f4,%f14 +/* 0x0ce8 1089 (37 40) */ fsubd %f16,%f8,%f8 +/* 0x0cec 1090 (38 41) */ fsubd %f16,%f10,%f10 +/* 0x0cf0 1091 (39 42) */ fsubd %f16,%f14,%f14 +/* 0x0cf4 1092 (40 43) */ fmuld %f8,%f12,%f18 +/* 0x0cf8 1093 (40 43) */ fitod %f5,%f4 +/* 0x0cfc 1094 (41 44) */ fmuld %f8,%f6,%f8 +/* 0x0d00 1095 (42 45) */ fmuld %f10,%f12,%f20 +/* 0x0d04 1096 (43 46) */ fmuld %f10,%f6,%f10 +/* 0x0d08 1097 (43 46) */ fsubd %f16,%f4,%f4 +/* 0x0d0c 1098 (44 47) */ fdtox %f8,%f8 +/* 0x0d10 1099 (44 45) */ std %f8,[%sp+2399] +/* 0x0d14 1100 (45 48) */ fmuld %f14,%f12,%f8 +/* 0x0d18 1101 (45 48) */ fdtox %f18,%f18 +/* 0x0d1c 1102 (45 46) */ std %f18,[%sp+2407] +/* 0x0d20 1103 (46 49) */ fdtox %f10,%f10 +/* 0x0d24 1104 (46 47) */ std %f10,[%sp+2383] +/* 0x0d28 1105 (46 49) */ fmuld %f14,%f6,%f14 +/* 0x0d2c 1106 (47 50) */ fmuld %f4,%f12,%f10 +/* 0x0d30 1107 (47 50) */ fdtox %f20,%f18 +/* 0x0d34 1108 (47 48) */ std %f18,[%sp+2391] +/* 0x0d38 1109 (48 51) */ fdtox %f8,%f8 +/* 0x0d3c 1110 (48 49) */ std %f8,[%sp+2375] +/* 0x0d40 1111 (48 51) */ fmuld %f4,%f6,%f4 +/* 0x0d44 1112 (49 52) */ fdtox %f14,%f14 +/* 0x0d48 1113 (49 50) */ std %f14,[%sp+2367] +/* 0x0d4c 1117 (50 53) */ ldd [%o2+32],%f8 +/* 0x0d50 1118 (50 53) */ fdtox %f10,%f10 +/* 0x0d54 1119 (51 54) */ fdtox %f4,%f4 +/* 0x0d58 1120 (51 52) */ std %f4,[%sp+2351] +/* 0x0d5c 1121 (52 54) */ fxnor %f0,%f8,%f8 +/* 0x0d60 1122 (52 55) */ ldd [%o2+40],%f14 +/* 0x0d64 1123 (53 54) */ std %f10,[%sp+2359] +/* 0x0d68 1124 (54 57) */ fitod %f8,%f4 +/* 0x0d6c 1125 (55 57) */ fxnor %f0,%f14,%f10 +/* 0x0d70 1126 (56 59) */ fitod %f9,%f8 +/* 0x0d74 1127 (57 60) */ fsubd %f16,%f4,%f4 +/* 0x0d78 1128 (58 61) */ fitod %f10,%f14 +/* 0x0d7c 1129 (59 62) */ fsubd %f16,%f8,%f8 +/* 0x0d80 1130 (60 63) */ fmuld %f4,%f12,%f18 +/* 0x0d84 1131 (60 63) */ fitod %f11,%f10 +/* 0x0d88 1132 (61 64) */ fmuld %f4,%f6,%f4 +/* 0x0d8c 1133 (61 64) */ fsubd %f16,%f14,%f14 +/* 0x0d90 1134 (62 65) */ fmuld %f8,%f12,%f20 +/* 0x0d94 1135 (63 66) */ fmuld %f8,%f6,%f8 +/* 0x0d98 1136 (63 66) */ fsubd %f16,%f10,%f10 +/* 0x0d9c 1137 (64 67) */ fdtox %f4,%f4 +/* 0x0da0 1138 (64 65) */ std %f4,[%sp+2335] +/* 0x0da4 1139 (65 68) */ fmuld %f14,%f12,%f4 +/* 0x0da8 1140 (65 68) */ fdtox %f18,%f18 +/* 0x0dac 1141 (65 66) */ std %f18,[%sp+2343] +/* 0x0db0 1142 (66 69) */ fdtox %f8,%f8 +/* 0x0db4 1143 (66 67) */ std %f8,[%sp+2319] +/* 0x0db8 1144 (66 69) */ fmuld %f14,%f6,%f14 +/* 0x0dbc 1145 (67 70) */ fmuld %f10,%f12,%f8 +/* 0x0dc0 1146 (67 70) */ fdtox %f20,%f18 +/* 0x0dc4 1147 (67 68) */ std %f18,[%sp+2327] +/* 0x0dc8 1148 (68 71) */ fdtox %f4,%f4 +/* 0x0dcc 1149 (68 69) */ std %f4,[%sp+2311] +/* 0x0dd0 1150 (68 71) */ fmuld %f10,%f6,%f10 +/* 0x0dd4 1151 (69 72) */ fdtox %f14,%f14 +/* 0x0dd8 1152 (69 70) */ std %f14,[%sp+2303] +/* 0x0ddc 1153 (70 73) */ ldd [%o2+48],%f4 +/* 0x0de0 1154 (70 73) */ fdtox %f8,%f8 +/* 0x0de4 1155 (71 74) */ fdtox %f10,%f10 +/* 0x0de8 1156 (71 72) */ std %f10,[%sp+2287] +/* 0x0dec 1157 (72 74) */ fxnor %f0,%f4,%f4 +/* 0x0df0 1158 (72 75) */ ldd [%o2+56],%f14 +/* 0x0df4 1159 (73 74) */ std %f8,[%sp+2295] +/* 0x0df8 1160 (74 77) */ fitod %f4,%f10 +/* 0x0dfc 1161 (75 78) */ fitod %f5,%f4 +/* 0x0e00 1162 (76 78) */ fxnor %f0,%f14,%f8 +/* 0x0e04 1163 (77 80) */ fsubd %f16,%f10,%f10 +/* 0x0e08 1164 (78 81) */ fsubd %f16,%f4,%f4 +/* 0x0e0c 1165 (79 82) */ fitod %f8,%f14 +/* 0x0e10 1166 (80 83) */ fmuld %f10,%f12,%f18 +/* 0x0e14 1167 (80 83) */ fitod %f9,%f8 +/* 0x0e18 1168 (81 84) */ fmuld %f10,%f6,%f10 +/* 0x0e1c 1169 (82 85) */ fmuld %f4,%f12,%f20 +/* 0x0e20 1170 (82 85) */ fsubd %f16,%f14,%f14 +/* 0x0e24 1171 (83 86) */ fdtox %f18,%f18 +/* 0x0e28 1172 (83 84) */ std %f18,[%sp+2279] +/* 0x0e2c 1173 (83 86) */ fmuld %f4,%f6,%f4 +/* 0x0e30 1174 (84 87) */ fdtox %f10,%f10 +/* 0x0e34 1175 (84 85) */ std %f10,[%sp+2271] +/* 0x0e38 1176 (85 88) */ fdtox %f20,%f10 +/* 0x0e3c 1177 (85 86) */ std %f10,[%sp+2263] +/* 0x0e40 1178 (86 89) */ fdtox %f4,%f4 +/* 0x0e44 1179 (86 87) */ std %f4,[%sp+2255] +/* 0x0e48 1180 (86 89) */ fmuld %f14,%f12,%f10 +/* 0x0e4c 1181 (87 90) */ fmuld %f14,%f6,%f4 +/* 0x0e50 1182 (89 92) */ fdtox %f10,%f10 +/* 0x0e54 1183 (89 90) */ std %f10,[%sp+2247] +/* 0x0e58 1184 (90 93) */ fdtox %f4,%f4 +/* 0x0e5c 1185 (90 91) */ std %f4,[%sp+2239] +/* 0x0e60 1189 (91 93) */ ldx [%sp+2463],%g2 +/* 0x0e64 1190 (91 94) */ fsubd %f16,%f8,%f4 +/* 0x0e68 1191 (92 94) */ ldx [%sp+2471],%g3 +/* 0x0e6c 1192 (93 96) */ ld [%i1],%g4 +/* 0x0e70 1193 (93 94) */ sllx %g2,19,%g2 +/* 0x0e74 1194 (94 96) */ ldx [%sp+2455],%g5 +/* 0x0e78 1195 (94 95) */ add %g3,%g2,%g2 +/* 0x0e7c 1196 (94 97) */ fmuld %f4,%f6,%f6 +/* 0x0e80 1197 (95 97) */ ldx [%sp+2447],%g3 +/* 0x0e84 1198 (95 96) */ add %g2,%g4,%g4 +/* 0x0e88 1199 (95 98) */ fmuld %f4,%f12,%f4 +/* 0x0e8c 1200 (96 97) */ st %g4,[%i0] +/* 0x0e90 1201 (96 97) */ srlx %g4,32,%g4 +/* 0x0e94 1202 (97 100) */ ld [%i1+8],%o0 +/* 0x0e98 1203 (97 98) */ sllx %g3,19,%g2 +/* 0x0e9c 1204 (97 100) */ fdtox %f6,%f6 +/* 0x0ea0 1205 (98 101) */ ld [%i1+4],%g3 +/* 0x0ea4 1206 (98 99) */ add %g5,%g2,%g2 +/* 0x0ea8 1207 (98 101) */ fdtox %f4,%f4 +/* 0x0eac 1208 (99 101) */ ldx [%sp+2439],%g5 +/* 0x0eb0 1209 (100 103) */ ld [%i1+12],%o1 +/* 0x0eb4 1210 (100 101) */ add %g2,%g3,%g2 +/* 0x0eb8 1211 (101 103) */ ldx [%sp+2431],%g3 +/* 0x0ebc 1212 (101 102) */ add %g2,%g4,%g4 +/* 0x0ec0 1213 (102 103) */ st %g4,[%i0+4] +/* 0x0ec4 1214 (103 104) */ std %f6,[%sp+2223] +/* 0x0ec8 1215 (103 104) */ sllx %g3,19,%g2 +/* 0x0ecc 1216 (104 106) */ ldx [%sp+2423],%g3 +/* 0x0ed0 1217 (104 105) */ add %g5,%g2,%g2 +/* 0x0ed4 1218 (105 107) */ ldx [%sp+2415],%g5 +/* 0x0ed8 1219 (105 106) */ add %g2,%o0,%g2 +/* 0x0edc 1220 (106 107) */ std %f4,[%sp+2231] +/* 0x0ee0 1221 (106 107) */ srlx %g4,32,%o0 +/* 0x0ee4 1222 (107 109) */ ldx [%sp+2407],%g4 +/* 0x0ee8 1223 (107 108) */ sllx %g5,19,%g5 +/* 0x0eec 1224 (107 108) */ add %g2,%o0,%g2 +/* 0x0ef0 1225 (108 109) */ st %g2,[%i0+8] +/* 0x0ef4 1226 (108 109) */ srlx %g2,32,%o0 +/* 0x0ef8 1227 (108 109) */ add %g3,%g5,%g3 +/* 0x0efc 1228 (109 111) */ ldx [%sp+2399],%g5 +/* 0x0f00 1229 (109 110) */ add %g3,%o1,%g3 +/* 0x0f04 1230 (110 113) */ ld [%i1+16],%o1 +/* 0x0f08 1231 (110 111) */ add %g3,%o0,%g3 +/* 0x0f0c 1232 (111 112) */ st %g3,[%i0+12] +/* 0x0f10 1233 (111 112) */ sllx %g5,19,%g5 +/* 0x0f14 1234 (112 113) */ srlx %g3,32,%o0 +/* 0x0f18 1235 (112 113) */ add %g4,%g5,%g2 +/* 0x0f1c 1236 (112 114) */ ldx [%sp+2383],%g5 +/* 0x0f20 1237 (113 115) */ ldx [%sp+2391],%g4 +/* 0x0f24 1238 (113 114) */ add %g2,%o1,%g2 +/* 0x0f28 1239 (114 117) */ ld [%i1+20],%o1 +/* 0x0f2c 1240 (114 115) */ sllx %g5,19,%g5 +/* 0x0f30 1241 (114 115) */ add %g2,%o0,%g2 +/* 0x0f34 1242 (115 116) */ st %g2,[%i0+16] +/* 0x0f38 1243 (115 116) */ srlx %g2,32,%o0 +/* 0x0f3c 1244 (115 116) */ add %g4,%g5,%g3 +/* 0x0f40 1245 (116 118) */ ldx [%sp+2367],%g5 +/* 0x0f44 1246 (116 117) */ add %g3,%o1,%g3 +/* 0x0f48 1247 (117 119) */ ldx [%sp+2375],%g4 +/* 0x0f4c 1248 (117 118) */ add %g3,%o0,%g3 +/* 0x0f50 1249 (118 121) */ ld [%i1+24],%o1 +/* 0x0f54 1250 (118 119) */ sllx %g5,19,%g5 +/* 0x0f58 1251 (119 120) */ st %g3,[%i0+20] +/* 0x0f5c 1252 (119 120) */ add %g4,%g5,%g2 +/* 0x0f60 1253 (120 122) */ ldx [%sp+2351],%g5 +/* 0x0f64 1254 (120 121) */ srlx %g3,32,%o0 +/* 0x0f68 1255 (120 121) */ add %g2,%o1,%g2 +/* 0x0f6c 1256 (121 123) */ ldx [%sp+2359],%g4 +/* 0x0f70 1257 (121 122) */ add %g2,%o0,%g2 +/* 0x0f74 1258 (122 125) */ ld [%i1+28],%o1 +/* 0x0f78 1259 (122 123) */ sllx %g5,19,%g5 +/* 0x0f7c 1260 (123 124) */ st %g2,[%i0+24] +/* 0x0f80 1261 (123 124) */ add %g4,%g5,%g3 +/* 0x0f84 1265 (124 126) */ ldx [%sp+2335],%g5 +/* 0x0f88 1266 (124 125) */ srlx %g2,32,%o0 +/* 0x0f8c 1267 (124 125) */ add %g3,%o1,%g3 +/* 0x0f90 1268 (125 127) */ ldx [%sp+2343],%g4 +/* 0x0f94 1269 (125 126) */ add %g3,%o0,%g3 +/* 0x0f98 1270 (126 127) */ sllx %g5,19,%g5 +/* 0x0f9c 1271 (126 129) */ ld [%i1+32],%o1 +/* 0x0fa0 1272 (127 128) */ add %g4,%g5,%g2 +/* 0x0fa4 1273 (127 129) */ ldx [%sp+2319],%g5 +/* 0x0fa8 1274 (128 130) */ ldx [%sp+2327],%g4 +/* 0x0fac 1275 (128 129) */ srlx %g3,32,%o0 +/* 0x0fb0 1276 (128 129) */ add %g2,%o1,%g2 +/* 0x0fb4 1277 (129 130) */ st %g3,[%i0+28] +/* 0x0fb8 1278 (129 130) */ sllx %g5,19,%g5 +/* 0x0fbc 1279 (129 130) */ add %g2,%o0,%g2 +/* 0x0fc0 1280 (130 133) */ ld [%i1+36],%o1 +/* 0x0fc4 1281 (130 131) */ add %g4,%g5,%g3 +/* 0x0fc8 1282 (131 133) */ ldx [%sp+2303],%g5 +/* 0x0fcc 1283 (131 132) */ srlx %g2,32,%o0 +/* 0x0fd0 1284 (132 134) */ ldx [%sp+2311],%g4 +/* 0x0fd4 1285 (132 133) */ add %g3,%o1,%g3 +/* 0x0fd8 1286 (133 134) */ sllx %g5,19,%g5 +/* 0x0fdc 1287 (133 134) */ st %g2,[%i0+32] +/* 0x0fe0 1288 (133 134) */ add %g3,%o0,%g3 +/* 0x0fe4 1289 (134 135) */ add %g4,%g5,%g2 +/* 0x0fe8 1290 (134 136) */ ldx [%sp+2287],%g5 +/* 0x0fec 1291 (135 137) */ ldx [%sp+2295],%g4 +/* 0x0ff0 1292 (135 136) */ srlx %g3,32,%o0 +/* 0x0ff4 1293 (136 139) */ ld [%i1+40],%o1 +/* 0x0ff8 1294 (136 137) */ sllx %g5,19,%g5 +/* 0x0ffc 1295 (137 138) */ st %g3,[%i0+36] +/* 0x1000 1296 (137 138) */ add %g4,%g5,%g3 +/* 0x1004 1297 (138 140) */ ldx [%sp+2271],%g5 +/* 0x1008 1298 (138 139) */ add %g2,%o1,%g2 +/* 0x100c 1299 (139 141) */ ldx [%sp+2279],%g4 +/* 0x1010 1300 (139 140) */ add %g2,%o0,%g2 +/* 0x1014 1301 (140 143) */ ld [%i1+44],%o1 +/* 0x1018 1302 (140 141) */ sllx %g5,19,%g5 +/* 0x101c 1303 (141 142) */ st %g2,[%i0+40] +/* 0x1020 1304 (141 142) */ srlx %g2,32,%o0 +/* 0x1024 1305 (141 142) */ add %g4,%g5,%g2 +/* 0x1028 1306 (142 144) */ ldx [%sp+2255],%g5 +/* 0x102c 1307 (142 143) */ add %g3,%o1,%g3 +/* 0x1030 1308 (143 145) */ ldx [%sp+2263],%g4 +/* 0x1034 1309 (143 144) */ add %g3,%o0,%g3 +/* 0x1038 1310 (144 147) */ ld [%i1+48],%o1 +/* 0x103c 1311 (144 145) */ sllx %g5,19,%g5 +/* 0x1040 1312 (145 146) */ srlx %g3,32,%o0 +/* 0x1044 1313 (145 146) */ st %g3,[%i0+44] +/* 0x1048 1314 (145 146) */ add %g4,%g5,%g3 +/* 0x104c 1315 (146 148) */ ldx [%sp+2239],%g5 +/* 0x1050 1316 (146 147) */ add %g2,%o1,%g2 +/* 0x1054 1317 (147 150) */ ld [%i1+52],%o1 +/* 0x1058 1318 (147 148) */ add %g2,%o0,%g2 +/* 0x105c 1319 (148 150) */ ldx [%sp+2247],%g4 +/* 0x1060 1320 (148 149) */ sllx %g5,19,%g5 +/* 0x1064 1321 (149 150) */ srlx %g2,32,%o0 +/* 0x1068 1322 (149 150) */ st %g2,[%i0+48] +/* 0x106c 1323 (149 150) */ add %g3,%o1,%g3 +/* 0x1070 1324 (150 153) */ ld [%i1+56],%o1 +/* 0x1074 1325 (150 151) */ add %g4,%g5,%g2 +/* 0x1078 1326 (150 151) */ add %g3,%o0,%g3 +/* 0x107c 1327 (151 153) */ ldx [%sp+2223],%g5 +/* 0x1080 1328 (151 152) */ srlx %g3,32,%o0 +/* 0x1084 1329 (152 154) */ ldx [%sp+2231],%g4 +/* 0x1088 1330 (152 153) */ add %g2,%o1,%g2 +/* 0x108c 1331 (153 154) */ sllx %g5,19,%g5 +/* 0x1090 1332 (153 156) */ ld [%i1+60],%o1 +/* 0x1094 1333 (153 154) */ add %g2,%o0,%g2 +/* 0x1098 1334 (154 155) */ st %g3,[%i0+52] +/* 0x109c 1335 (154 155) */ add %g4,%g5,%g3 +/* 0x10a0 1336 (155 156) */ st %g2,[%i0+56] +/* 0x10a4 1337 (155 156) */ srlx %g2,32,%g2 +/* 0x10a8 1338 (155 156) */ add %g3,%o1,%g3 +/* 0x10ac 1339 (156 157) */ add %g3,%g2,%g2 +/* 0x10b0 1340 (156 157) */ st %g2,[%i0+60] +/* 0x10b4 1344 (157 158) */ srlx %g2,32,%o3 +/* 0x10b8 1345 (158 159) */ srl %o3,0,%i0 +/* 0x10bc (159 161) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x10c0 (161 162) */ restore %g0,%g0,%g0 + +! +! ENTRY .L77000073 +! + + .L77000073: /* frequency 1.0 confidence 0.0 */ + + + or %g0, %i4, %o2 + or %g0, %o0, %o1 + or %g0, %i3, %o0 + +! +! ENTRY .L77000052 +! + + .L77000052: /* frequency 1.0 confidence 0.0 */ +/* 0x1028 1318 ( 0 1) */ andn %o2,%g2,%g2 +/* 0x102c 1319 ( 0 1) */ st %g2,[%sp+2227] +/* 0x1030 1325 ( 0 1) */ add %o0,1,%g3 +/* 0x1034 1326 ( 0 1) */ fmovd %f0,%f14 +/* 0x1038 1327 ( 1 2) */ srl %o2,19,%g2 +/* 0x103c 1328 ( 1 2) */ st %g2,[%sp+2223] +/* 0x1040 1329 ( 1 2) */ or %g0,0,%o5 +/* 0x1044 1330 ( 2 3) */ srl %g3,31,%g2 +/* 0x1048 1331 ( 2 5) */ ldd [%o1],%f6 +/* 0x104c 1335 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1050 1336 ( 3 4) */ add %g3,%g2,%g2 +/* 0x1054 1337 ( 3 4) */ xor %g1,-625,%g1 +/* 0x1058 1338 ( 3 6) */ ldd [%o1+8],%f20 +/* 0x105c 1339 ( 4 5) */ sra %g2,1,%o3 +/* 0x1060 1340 ( 4 5) */ fmovs %f6,%f8 +/* 0x1064 1341 ( 4 5) */ add %g1,%fp,%g3 +/* 0x1068 1342 ( 5 6) */ fmovs %f6,%f10 +/* 0x106c 1343 ( 5 7) */ ld [%sp+2227],%f9 +/* 0x1070 1344 ( 5 6) */ subcc %o3,0,%g0 +/* 0x1074 1345 ( 6 8) */ ld [%sp+2223],%f11 +/* 0x1078 1346 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x107c 1347 ( 6 7) */ or %g0,%i2,%o1 +/* 0x1080 1348 ( 7 10) */ fsubd %f8,%f6,%f18 +/* 0x1084 1349 ( 7 8) */ xor %g1,-617,%g1 +/* 0x1088 1350 ( 7 8) */ or %g0,0,%g4 +/* 0x108c 1351 ( 8 11) */ fsubd %f10,%f6,%f16 +/* 0x1090 1352 ( 8 9) */ bleu,pt %icc,.L990000162 ! tprob=0.50 +/* 0x1094 ( 8 9) */ subcc %o0,0,%g0 +/* 0x1098 1354 ( 9 10) */ add %g1,%fp,%g2 +/* 0x109c 1355 ( 9 10) */ sethi %hi(0x1000),%g1 +/* 0x10a0 1356 (10 11) */ xor %g1,-609,%g1 +/* 0x10a4 1357 (10 11) */ subcc %o3,7,%g0 +/* 0x10a8 1358 (11 12) */ add %g1,%fp,%o7 +/* 0x10ac 1359 (11 12) */ sethi %hi(0x1000),%g1 +/* 0x10b0 1360 (12 13) */ xor %g1,-601,%g1 +/* 0x10b4 1361 (13 14) */ add %g1,%fp,%o4 +/* 0x10b8 1362 (13 14) */ bl,pn %icc,.L77000054 ! tprob=0.50 +/* 0x10bc (13 14) */ sub %o3,2,%o2 +/* 0x10c0 1364 (14 17) */ ldd [%o1],%f2 +/* 0x10c4 1365 (14 15) */ add %o1,16,%g5 +/* 0x10c8 1366 (14 15) */ or %g0,4,%g4 +/* 0x10cc 1367 (15 18) */ ldd [%o1+8],%f0 +/* 0x10d0 1368 (15 16) */ add %o1,8,%o1 +/* 0x10d4 1369 (16 18) */ fxnor %f14,%f2,%f6 +/* 0x10d8 1370 (16 19) */ ldd [%g5],%f4 +/* 0x10dc 1371 (16 17) */ add %o1,16,%o1 +/* 0x10e0 1372 (17 19) */ fxnor %f14,%f0,%f12 +/* 0x10e4 1373 (17 20) */ ldd [%o1],%f0 +/* 0x10e8 1374 (17 18) */ add %o1,8,%o1 +/* 0x10ec 1375 (18 21) */ fitod %f7,%f2 +/* 0x10f0 1376 (19 22) */ fitod %f6,%f6 +/* 0x10f4 1377 (20 22) */ fxnor %f14,%f4,%f10 +/* 0x10f8 1378 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x10fc 1379 (22 24) */ fxnor %f14,%f0,%f8 +/* 0x1100 1380 (23 26) */ fitod %f13,%f4 +/* 0x1104 1381 (24 27) */ fsubd %f20,%f6,%f6 +/* 0x1108 1382 (24 27) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000154 +! + + .L990000154: /* frequency 1.0 confidence 0.0 */ +/* 0x110c 1384 ( 0 3) */ ldd [%o1],%f24 +/* 0x1110 1385 ( 0 1) */ add %g4,3,%g4 +/* 0x1114 1386 ( 0 1) */ add %o4,96,%o4 +/* 0x1118 1387 ( 1 4) */ fitod %f11,%f22 +/* 0x111c 1388 ( 2 5) */ fsubd %f20,%f4,%f26 +/* 0x1120 1389 ( 2 3) */ subcc %g4,%o2,%g0 +/* 0x1124 1390 ( 2 3) */ add %o7,96,%o7 +/* 0x1128 1391 ( 2 5) */ fmuld %f6,%f18,%f28 +/* 0x112c 1392 ( 3 6) */ fmuld %f6,%f16,%f6 +/* 0x1130 1393 ( 3 4) */ add %g2,96,%g2 +/* 0x1134 1394 ( 3 4) */ add %g3,96,%g3 +/* 0x1138 1395 ( 4 7) */ fdtox %f0,%f0 +/* 0x113c 1396 ( 5 8) */ fitod %f12,%f4 +/* 0x1140 1397 ( 5 8) */ fmuld %f2,%f18,%f2 +/* 0x1144 1398 ( 6 9) */ fdtox %f28,%f12 +/* 0x1148 1399 ( 7 10) */ fdtox %f6,%f6 +/* 0x114c 1400 ( 7 8) */ std %f12,[%g3-96] +/* 0x1150 1401 ( 8 9) */ std %f6,[%g2-96] +/* 0x1154 1402 ( 8 11) */ fdtox %f2,%f2 +/* 0x1158 1403 ( 9 12) */ fsubd %f20,%f4,%f6 +/* 0x115c 1404 ( 9 10) */ std %f2,[%o7-96] +/* 0x1160 1405 ( 9 10) */ add %o1,8,%o1 +/* 0x1164 1406 (10 12) */ fxnor %f14,%f24,%f12 +/* 0x1168 1407 (10 13) */ fmuld %f26,%f16,%f4 +/* 0x116c 1408 (10 11) */ std %f0,[%o4-96] +/* 0x1170 1409 (11 14) */ ldd [%o1],%f0 +/* 0x1174 1410 (11 14) */ fitod %f9,%f2 +/* 0x1178 1411 (12 15) */ fsubd %f20,%f22,%f28 +/* 0x117c 1412 (12 15) */ fmuld %f6,%f18,%f24 +/* 0x1180 1413 (13 16) */ fmuld %f6,%f16,%f22 +/* 0x1184 1414 (13 16) */ fdtox %f4,%f4 +/* 0x1188 1415 (14 17) */ fitod %f10,%f6 +/* 0x118c 1416 (14 17) */ fmuld %f26,%f18,%f10 +/* 0x1190 1417 (15 18) */ fdtox %f24,%f24 +/* 0x1194 1418 (16 19) */ fdtox %f22,%f22 +/* 0x1198 1419 (16 17) */ std %f24,[%g3-64] +/* 0x119c 1420 (17 18) */ std %f22,[%g2-64] +/* 0x11a0 1421 (17 20) */ fdtox %f10,%f10 +/* 0x11a4 1422 (18 21) */ fsubd %f20,%f6,%f6 +/* 0x11a8 1423 (18 19) */ std %f10,[%o7-64] +/* 0x11ac 1424 (18 19) */ add %o1,8,%o1 +/* 0x11b0 1425 (19 21) */ fxnor %f14,%f0,%f10 +/* 0x11b4 1426 (19 22) */ fmuld %f28,%f16,%f0 +/* 0x11b8 1427 (19 20) */ std %f4,[%o4-64] +/* 0x11bc 1428 (20 23) */ ldd [%o1],%f22 +/* 0x11c0 1429 (20 23) */ fitod %f13,%f4 +/* 0x11c4 1430 (21 24) */ fsubd %f20,%f2,%f2 +/* 0x11c8 1431 (21 24) */ fmuld %f6,%f18,%f26 +/* 0x11cc 1432 (22 25) */ fmuld %f6,%f16,%f24 +/* 0x11d0 1433 (22 25) */ fdtox %f0,%f0 +/* 0x11d4 1434 (23 26) */ fitod %f8,%f6 +/* 0x11d8 1435 (23 26) */ fmuld %f28,%f18,%f8 +/* 0x11dc 1436 (24 27) */ fdtox %f26,%f26 +/* 0x11e0 1437 (25 28) */ fdtox %f24,%f24 +/* 0x11e4 1438 (25 26) */ std %f26,[%g3-32] +/* 0x11e8 1439 (26 27) */ std %f24,[%g2-32] +/* 0x11ec 1440 (26 29) */ fdtox %f8,%f8 +/* 0x11f0 1441 (27 30) */ fsubd %f20,%f6,%f6 +/* 0x11f4 1442 (27 28) */ std %f8,[%o7-32] +/* 0x11f8 1443 (27 28) */ add %o1,8,%o1 +/* 0x11fc 1444 (28 30) */ fxnor %f14,%f22,%f8 +/* 0x1200 1445 (28 29) */ std %f0,[%o4-32] +/* 0x1204 1446 (28 29) */ bcs,pt %icc,.L990000154 ! tprob=0.50 +/* 0x1208 (28 31) */ fmuld %f2,%f16,%f0 + +! +! ENTRY .L990000157 +! + + .L990000157: /* frequency 1.0 confidence 0.0 */ +/* 0x120c 1449 ( 0 3) */ fitod %f12,%f28 +/* 0x1210 1450 ( 0 3) */ fmuld %f6,%f18,%f24 +/* 0x1214 1451 ( 0 1) */ add %g3,128,%g3 +/* 0x1218 1452 ( 1 4) */ fitod %f10,%f12 +/* 0x121c 1453 ( 1 4) */ fmuld %f6,%f16,%f26 +/* 0x1220 1454 ( 1 2) */ add %g2,128,%g2 +/* 0x1224 1455 ( 2 5) */ fsubd %f20,%f4,%f4 +/* 0x1228 1456 ( 2 5) */ fmuld %f2,%f18,%f22 +/* 0x122c 1457 ( 2 3) */ add %o7,128,%o7 +/* 0x1230 1458 ( 3 6) */ fdtox %f24,%f6 +/* 0x1234 1459 ( 3 4) */ std %f6,[%g3-128] +/* 0x1238 1460 ( 3 4) */ add %o4,128,%o4 +/* 0x123c 1461 ( 4 7) */ fsubd %f20,%f28,%f2 +/* 0x1240 1462 ( 4 5) */ subcc %g4,%o3,%g0 +/* 0x1244 1463 ( 5 8) */ fitod %f11,%f6 +/* 0x1248 1464 ( 5 8) */ fmuld %f4,%f18,%f24 +/* 0x124c 1465 ( 6 9) */ fdtox %f26,%f10 +/* 0x1250 1466 ( 6 7) */ std %f10,[%g2-128] +/* 0x1254 1467 ( 7 10) */ fdtox %f22,%f10 +/* 0x1258 1468 ( 7 8) */ std %f10,[%o7-128] +/* 0x125c 1469 ( 7 10) */ fmuld %f2,%f18,%f26 +/* 0x1260 1470 ( 8 11) */ fsubd %f20,%f12,%f10 +/* 0x1264 1471 ( 8 11) */ fmuld %f2,%f16,%f2 +/* 0x1268 1472 ( 9 12) */ fsubd %f20,%f6,%f22 +/* 0x126c 1473 ( 9 12) */ fmuld %f4,%f16,%f12 +/* 0x1270 1474 (10 13) */ fdtox %f0,%f0 +/* 0x1274 1475 (10 11) */ std %f0,[%o4-128] +/* 0x1278 1476 (11 14) */ fitod %f8,%f4 +/* 0x127c 1477 (11 14) */ fmuld %f10,%f18,%f6 +/* 0x1280 1478 (12 15) */ fdtox %f26,%f0 +/* 0x1284 1479 (12 13) */ std %f0,[%g3-96] +/* 0x1288 1480 (12 15) */ fmuld %f10,%f16,%f10 +/* 0x128c 1481 (13 16) */ fdtox %f2,%f2 +/* 0x1290 1482 (13 14) */ std %f2,[%g2-96] +/* 0x1294 1483 (14 17) */ fitod %f9,%f0 +/* 0x1298 1484 (14 17) */ fmuld %f22,%f18,%f2 +/* 0x129c 1485 (15 18) */ fdtox %f24,%f8 +/* 0x12a0 1486 (15 16) */ std %f8,[%o7-96] +/* 0x12a4 1487 (16 19) */ fsubd %f20,%f4,%f4 +/* 0x12a8 1488 (16 19) */ fmuld %f22,%f16,%f8 +/* 0x12ac 1489 (17 20) */ fdtox %f12,%f12 +/* 0x12b0 1490 (17 18) */ std %f12,[%o4-96] +/* 0x12b4 1491 (18 21) */ fsubd %f20,%f0,%f0 +/* 0x12b8 1492 (19 22) */ fdtox %f6,%f6 +/* 0x12bc 1493 (19 20) */ std %f6,[%g3-64] +/* 0x12c0 1494 (20 23) */ fdtox %f10,%f10 +/* 0x12c4 1495 (20 21) */ std %f10,[%g2-64] +/* 0x12c8 1496 (20 23) */ fmuld %f4,%f18,%f6 +/* 0x12cc 1497 (21 24) */ fdtox %f2,%f2 +/* 0x12d0 1498 (21 22) */ std %f2,[%o7-64] +/* 0x12d4 1499 (21 24) */ fmuld %f4,%f16,%f4 +/* 0x12d8 1500 (22 25) */ fmuld %f0,%f18,%f2 +/* 0x12dc 1501 (22 25) */ fdtox %f8,%f8 +/* 0x12e0 1502 (22 23) */ std %f8,[%o4-64] +/* 0x12e4 1503 (23 26) */ fdtox %f6,%f6 +/* 0x12e8 1504 (23 24) */ std %f6,[%g3-32] +/* 0x12ec 1505 (23 26) */ fmuld %f0,%f16,%f0 +/* 0x12f0 1506 (24 27) */ fdtox %f4,%f4 +/* 0x12f4 1507 (24 25) */ std %f4,[%g2-32] +/* 0x12f8 1508 (25 28) */ fdtox %f2,%f2 +/* 0x12fc 1509 (25 26) */ std %f2,[%o7-32] +/* 0x1300 1510 (26 29) */ fdtox %f0,%f0 +/* 0x1304 1511 (26 27) */ bcc,pn %icc,.L77000056 ! tprob=0.50 +/* 0x1308 (26 27) */ std %f0,[%o4-32] + +! +! ENTRY .L77000054 +! + + .L77000054: /* frequency 1.0 confidence 0.0 */ +/* 0x130c 1514 ( 0 3) */ ldd [%o1],%f0 + +! +! ENTRY .L990000161 +! + + .L990000161: /* frequency 1.0 confidence 0.0 */ +/* 0x1310 1516 ( 0 2) */ fxnor %f14,%f0,%f0 +/* 0x1314 1517 ( 0 1) */ add %g4,1,%g4 +/* 0x1318 1518 ( 0 1) */ add %o1,8,%o1 +/* 0x131c 1519 ( 1 2) */ subcc %g4,%o3,%g0 +/* 0x1320 1520 ( 2 5) */ fitod %f0,%f2 +/* 0x1324 1521 ( 3 6) */ fitod %f1,%f0 +/* 0x1328 1522 ( 5 8) */ fsubd %f20,%f2,%f2 +/* 0x132c 1523 ( 6 9) */ fsubd %f20,%f0,%f0 +/* 0x1330 1524 ( 8 11) */ fmuld %f2,%f18,%f6 +/* 0x1334 1525 ( 9 12) */ fmuld %f2,%f16,%f4 +/* 0x1338 1526 (10 13) */ fmuld %f0,%f18,%f2 +/* 0x133c 1527 (11 14) */ fdtox %f6,%f6 +/* 0x1340 1528 (11 12) */ std %f6,[%g3] +/* 0x1344 1529 (11 14) */ fmuld %f0,%f16,%f0 +/* 0x1348 1530 (12 15) */ fdtox %f4,%f4 +/* 0x134c 1531 (12 13) */ std %f4,[%g2] +/* 0x1350 1532 (12 13) */ add %g2,32,%g2 +/* 0x1354 1533 (13 16) */ fdtox %f2,%f2 +/* 0x1358 1534 (13 14) */ std %f2,[%o7] +/* 0x135c 1535 (13 14) */ add %o7,32,%o7 +/* 0x1360 1536 (14 17) */ fdtox %f0,%f0 +/* 0x1364 1537 (14 15) */ std %f0,[%o4] +/* 0x1368 1538 (14 15) */ add %o4,32,%o4 +/* 0x136c 1539 (15 16) */ add %g3,32,%g3 +/* 0x1370 1540 (15 16) */ bcs,a,pt %icc,.L990000161 ! tprob=0.50 +/* 0x1374 (16 19) */ ldd [%o1],%f0 + +! +! ENTRY .L77000056 +! + + .L77000056: /* frequency 1.0 confidence 0.0 */ +/* 0x1378 1548 ( 0 1) */ subcc %o0,0,%g0 + +! +! ENTRY .L990000162 +! + + .L990000162: /* frequency 1.0 confidence 0.0 */ +/* 0x137c 1550 ( 0 1) */ bleu,pt %icc,.L77770061 ! tprob=0.50 +/* 0x1380 ( 0 1) */ nop +/* 0x1384 1555 ( 0 1) */ sethi %hi(0x1000),%g1 +/* 0x1388 1556 ( 1 2) */ xor %g1,-625,%g1 +/* 0x138c 1557 ( 1 2) */ or %g0,%i1,%g4 +/* 0x1390 1558 ( 2 3) */ add %g1,%fp,%g5 +/* 0x1394 1559 ( 2 3) */ sethi %hi(0x1000),%g1 +/* 0x1398 1560 ( 3 4) */ xor %g1,-617,%g1 +/* 0x139c 1561 ( 3 4) */ or %g0,%o0,%o7 +/* 0x13a0 1562 ( 4 5) */ add %g1,%fp,%g2 +/* 0x13a4 1563 ( 4 5) */ or %g0,0,%i2 +/* 0x13a8 1564 ( 5 6) */ or %g0,%i0,%g3 +/* 0x13ac 1565 ( 5 6) */ subcc %o0,6,%g0 +/* 0x13b0 1566 ( 5 6) */ bl,pn %icc,.L77000058 ! tprob=0.50 +/* 0x13b4 ( 6 7) */ sethi %hi(0x1000),%g1 +/* 0x13b8 1568 ( 6 8) */ ld [%g4],%o2 +/* 0x13bc 1569 ( 6 7) */ add %g3,4,%g3 +/* 0x13c0 1570 ( 7 8) */ xor %g1,-585,%g1 +/* 0x13c4 1571 ( 7 8) */ sub %o7,3,%o4 +/* 0x13c8 1572 ( 8 9) */ add %g1,%fp,%g2 +/* 0x13cc 1573 ( 8 9) */ sethi %hi(0x1000),%g1 +/* 0x13d0 1574 ( 9 10) */ xor %g1,-593,%g1 +/* 0x13d4 1575 ( 9 10) */ or %g0,2,%i2 +/* 0x13d8 1576 (10 11) */ add %g1,%fp,%g5 +/* 0x13dc 1577 (10 11) */ sethi %hi(0x1000),%g1 +/* 0x13e0 1578 (11 12) */ xor %g1,-617,%g1 +/* 0x13e4 1579 (12 13) */ add %g1,%fp,%g1 +/* 0x13e8 1580 (13 15) */ ldx [%g1],%o1 +/* 0x13ec 1581 (14 16) */ ldx [%g1-8],%o0 +/* 0x13f0 1582 (15 16) */ sllx %o1,19,%o1 +/* 0x13f4 1583 (15 17) */ ldx [%g1+16],%o3 +/* 0x13f8 1584 (16 17) */ add %o0,%o1,%o0 +/* 0x13fc 1585 (16 18) */ ld [%g4+4],%o1 +/* 0x1400 1586 (16 17) */ add %g4,8,%g4 +/* 0x1404 1587 (17 18) */ sllx %o3,19,%o3 +/* 0x1408 1588 (17 18) */ add %o0,%o2,%o0 +/* 0x140c 1589 (17 19) */ ldx [%g1+8],%o2 +/* 0x1410 1590 (18 19) */ st %o0,[%g3-4] +/* 0x1414 1591 (18 19) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000142 +! + + .L990000142: /* frequency 1.0 confidence 0.0 */ +/* 0x1418 1593 ( 0 1) */ add %o2,%o3,%o2 +/* 0x141c 1594 ( 0 1) */ add %i2,4,%i2 +/* 0x1420 1595 ( 0 2) */ ld [%g4],%o3 +/* 0x1424 1596 ( 1 2) */ srl %o0,0,%o5 +/* 0x1428 1597 ( 1 2) */ add %o2,%o1,%o1 +/* 0x142c 1598 ( 1 3) */ ldx [%g2],%o0 +/* 0x1430 1599 ( 3 4) */ sllx %o0,19,%o2 +/* 0x1434 1600 ( 3 5) */ ldx [%g5],%o0 +/* 0x1438 1601 ( 3 4) */ add %o1,%o5,%o1 +/* 0x143c 1602 ( 4 5) */ st %o1,[%g3] +/* 0x1440 1603 ( 4 5) */ srlx %o1,32,%o5 +/* 0x1444 1604 ( 4 5) */ subcc %i2,%o4,%g0 +/* 0x1448 1605 ( 5 7) */ ldx [%g2+16],%o1 +/* 0x144c 1606 ( 5 6) */ add %o0,%o2,%o0 +/* 0x1450 1607 ( 5 6) */ add %g3,16,%g3 +/* 0x1454 1608 ( 6 8) */ ld [%g4+4],%o2 +/* 0x1458 1609 ( 6 7) */ add %o0,%o3,%o0 +/* 0x145c 1610 ( 7 8) */ sllx %o1,19,%o3 +/* 0x1460 1611 ( 7 9) */ ldx [%g5+16],%o1 +/* 0x1464 1612 ( 7 8) */ add %o0,%o5,%o0 +/* 0x1468 1613 ( 8 9) */ st %o0,[%g3-12] +/* 0x146c 1614 ( 8 9) */ srlx %o0,32,%o5 +/* 0x1470 1615 ( 8 9) */ add %g4,16,%g4 +/* 0x1474 1616 ( 9 11) */ ldx [%g2+32],%o0 +/* 0x1478 1617 ( 9 10) */ add %o1,%o3,%o1 +/* 0x147c 1618 ( 9 10) */ add %g2,64,%g2 +/* 0x1480 1619 (10 12) */ ld [%g4-8],%o3 +/* 0x1484 1620 (10 11) */ add %o1,%o2,%o2 +/* 0x1488 1621 (11 12) */ sllx %o0,19,%o1 +/* 0x148c 1622 (11 13) */ ldx [%g5+32],%o0 +/* 0x1490 1623 (11 12) */ add %o2,%o5,%o2 +/* 0x1494 1624 (12 13) */ st %o2,[%g3-8] +/* 0x1498 1625 (12 13) */ srlx %o2,32,%o5 +/* 0x149c 1626 (12 13) */ add %g5,64,%g5 +/* 0x14a0 1627 (13 15) */ ldx [%g2-16],%o2 +/* 0x14a4 1628 (13 14) */ add %o0,%o1,%o0 +/* 0x14a8 1629 (14 16) */ ld [%g4-4],%o1 +/* 0x14ac 1630 (14 15) */ add %o0,%o3,%o0 +/* 0x14b0 1631 (15 16) */ sllx %o2,19,%o3 +/* 0x14b4 1632 (15 17) */ ldx [%g5-16],%o2 +/* 0x14b8 1633 (15 16) */ add %o0,%o5,%o0 +/* 0x14bc 1634 (16 17) */ st %o0,[%g3-4] +/* 0x14c0 1635 (16 17) */ bcs,pt %icc,.L990000142 ! tprob=0.50 +/* 0x14c4 (16 17) */ srlx %o0,32,%o0 + +! +! ENTRY .L990000145 +! + + .L990000145: /* frequency 1.0 confidence 0.0 */ +/* 0x14c8 1638 ( 0 1) */ add %o2,%o3,%o3 +/* 0x14cc 1639 ( 0 1) */ add %g3,4,%g3 +/* 0x14d0 1640 ( 1 2) */ srl %o0,0,%o2 +/* 0x14d4 1641 ( 1 2) */ add %o3,%o1,%o0 +/* 0x14d8 1642 ( 2 3) */ add %o0,%o2,%o0 +/* 0x14dc 1643 ( 2 3) */ st %o0,[%g3-4] +/* 0x14e0 1644 ( 2 3) */ subcc %i2,%o7,%g0 +/* 0x14e4 1645 ( 2 3) */ bcc,pn %icc,.L77770061 ! tprob=0.50 +/* 0x14e8 ( 3 4) */ srlx %o0,32,%o5 + +! +! ENTRY .L77000058 +! + + .L77000058: /* frequency 1.0 confidence 0.0 */ +/* 0x14ec 1648 ( 0 2) */ ldx [%g2],%o2 + +! +! ENTRY .L990000160 +! + + .L990000160: /* frequency 1.0 confidence 0.0 */ +/* 0x14f0 1650 ( 0 1) */ sllx %o2,19,%o3 +/* 0x14f4 1651 ( 0 2) */ ldx [%g5],%o0 +/* 0x14f8 1652 ( 0 1) */ add %i2,1,%i2 +/* 0x14fc 1653 ( 1 2) */ srl %o5,0,%o1 +/* 0x1500 1654 ( 1 3) */ ld [%g4],%o2 +/* 0x1504 1655 ( 1 2) */ add %g2,16,%g2 +/* 0x1508 1656 ( 2 3) */ add %o0,%o3,%o0 +/* 0x150c 1657 ( 2 3) */ add %g5,16,%g5 +/* 0x1510 1658 ( 3 4) */ add %o0,%o2,%o0 +/* 0x1514 1659 ( 3 4) */ add %g4,4,%g4 +/* 0x1518 1660 ( 4 5) */ add %o0,%o1,%o0 +/* 0x151c 1661 ( 4 5) */ st %o0,[%g3] +/* 0x1520 1662 ( 4 5) */ subcc %i2,%o7,%g0 +/* 0x1524 1663 ( 5 6) */ srlx %o0,32,%o5 +/* 0x1528 1664 ( 5 6) */ add %g3,4,%g3 +/* 0x152c 1665 ( 5 6) */ bcs,a,pt %icc,.L990000160 ! tprob=0.50 +/* 0x1530 ( 6 8) */ ldx [%g2],%o2 + +! +! ENTRY .L77770061 +! + + .L77770061: /* frequency 1.0 confidence 0.0 */ +/* 0x1534 ( 0 2) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1538 ( 2 3) */ restore %g0,%o5,%o0 + + +/* 0x124c 1476 ( 0 0) */ .type mul_add,2 +/* 0x124c 1477 ( 0 0) */ .size mul_add,(.-mul_add) +/* 0x124c 1480 ( 0 0) */ .align 8 +/* 0x1250 1486 ( 0 0) */ .global mul_add_inp + +! +! ENTRY mul_add_inp +! + + .global mul_add_inp + mul_add_inp: /* frequency 1.0 confidence 0.0 */ +/* 0x1250 1488 ( 0 1) */ save %sp,-176,%sp +/* 0x1254 1500 ( 1 2) */ sra %i2,0,%o3 +/* 0x1258 1501 ( 1 2) */ or %g0,%i1,%o2 +/* 0x125c 1502 ( 2 3) */ or %g0,%i0,%o0 +/* 0x1260 1503 ( 2 3) */ or %g0,%i0,%o1 +/* 0x1264 1504 ( 3 5) */ call mul_add ! params = ! Result = +/* 0x1268 ( 4 5) */ srl %i3,0,%o4 +/* 0x126c 1506 ( 5 6) */ srl %o0,0,%i0 +/* 0x1270 ( 6 8) */ ret ! Result = %o1 %o0 %f0 %f1 +/* 0x1274 ( 8 9) */ restore %g0,%g0,%g0 +/* 0x1278 1509 ( 0 0) */ .type mul_add_inp,2 +/* 0x1278 1510 ( 0 0) */ .size mul_add_inp,(.-mul_add_inp) + + .section ".data",#alloc,#write +/* 0x1278 6 ( 0 0) */ .align 8 + +! +! ENTRY mask_cnst +! + + mask_cnst: /* frequency 1.0 confidence 0.0 */ +/* 0x1278 8 ( 0 0) */ .xword -9223372034707292160 +/* 0x1280 9 ( 0 0) */ .type mask_cnst,#object +/* 0x1280 10 ( 0 0) */ .size mask_cnst,8 + diff --git a/security/nss/lib/freebl/mpi/mpvalpha.c b/security/nss/lib/freebl/mpi/mpvalpha.c new file mode 100644 index 000000000..94e86eedb --- /dev/null +++ b/security/nss/lib/freebl/mpi/mpvalpha.c @@ -0,0 +1,183 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi-priv.h" +#include + +#define MP_MUL_DxD(a, b, Phi, Plo) \ + { \ + Plo = asm("mulq %a0, %a1, %v0", a, b); \ + Phi = asm("umulh %a0, %a1, %v0", a, b); \ + } + +/* This is empty for the loop in s_mpv_mul_d */ +#define CARRY_ADD + +#define ONE_MUL \ + a_i = *a++; \ + MP_MUL_DxD(a_i, b, a1b1, a0b0); \ + a0b0 += carry; \ + if (a0b0 < carry) \ + ++a1b1; \ + CARRY_ADD \ + *c++ = a0b0; \ + carry = a1b1; + +#define FOUR_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL \ + ONE_MUL + +#define SIXTEEN_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL \ + FOUR_MUL + +#define THIRTYTWO_MUL \ + SIXTEEN_MUL \ + SIXTEEN_MUL + +#define ONETWENTYEIGHT_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL + +#define EXPAND_256(CALL) \ + mp_digit carry = 0; \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + if (a_len & 255) { \ + if (a_len & 1) { \ + ONE_MUL \ + } \ + if (a_len & 2) { \ + ONE_MUL \ + ONE_MUL \ + } \ + if (a_len & 4) { \ + FOUR_MUL \ + } \ + if (a_len & 8) { \ + FOUR_MUL \ + FOUR_MUL \ + } \ + if (a_len & 16) { \ + SIXTEEN_MUL \ + } \ + if (a_len & 32) { \ + THIRTYTWO_MUL \ + } \ + if (a_len & 64) { \ + THIRTYTWO_MUL \ + THIRTYTWO_MUL \ + } \ + if (a_len & 128) { \ + ONETWENTYEIGHT_MUL \ + } \ + a_len = a_len & (-256); \ + } \ + if (a_len >= 256) { \ + carry = CALL(a, a_len, b, c, carry); \ + c += a_len; \ + } + +#define FUNC_NAME(NAME) \ + mp_digit NAME(const mp_digit *a, \ + mp_size a_len, \ + mp_digit b, mp_digit *c, \ + mp_digit carry) + +#define DECLARE_MUL_256(FNAME) \ + FUNC_NAME(FNAME) \ + { \ + mp_digit a_i; \ + mp_digit a0b0, a1b1; \ + while (a_len) { \ + ONETWENTYEIGHT_MUL \ + ONETWENTYEIGHT_MUL \ + a_len -= 256; \ + } \ + return carry; \ + } + +/* Expanding the loop in s_mpv_mul_d appeared to slow down the + (admittedly) small number of tests (i.e., timetest) used to + measure performance, so this define disables that optimization. */ +#define DO_NOT_EXPAND 1 + +/* Need forward declaration so it can be instantiated after + the routine that uses it; this helps locality somewhat */ +#if !defined(DO_NOT_EXPAND) +FUNC_NAME(s_mpv_mul_d_MUL256); +#endif + +/* c = a * b */ +void +s_mpv_mul_d(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ +#if defined(DO_NOT_EXPAND) + mp_digit carry = 0; + while (a_len--) { + mp_digit a_i = *a++; + mp_digit a0b0, a1b1; + + MP_MUL_DxD(a_i, b, a1b1, a0b0); + + a0b0 += carry; + if (a0b0 < carry) + ++a1b1; + *c++ = a0b0; + carry = a1b1; + } +#else + EXPAND_256(s_mpv_mul_d_MUL256) +#endif + *c = carry; +} + +#if !defined(DO_NOT_EXPAND) +DECLARE_MUL_256(s_mpv_mul_d_MUL256) +#endif + +#undef CARRY_ADD +/* This is redefined for the loop in s_mpv_mul_d_add */ +#define CARRY_ADD \ + a0b0 += a_i = *c; \ + if (a0b0 < a_i) \ + ++a1b1; + +/* Need forward declaration so it can be instantiated between the + two routines that use it; this helps locality somewhat */ +FUNC_NAME(s_mpv_mul_d_add_MUL256); + +/* c += a * b */ +void +s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + *c = carry; +} + +/* Instantiate multiply 256 routine here */ +DECLARE_MUL_256(s_mpv_mul_d_add_MUL256) + +/* Presently, this is only used by the Montgomery arithmetic code. */ +/* c += a * b */ +void +s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, + mp_digit b, mp_digit *c) +{ + EXPAND_256(s_mpv_mul_d_add_MUL256) + while (carry) { + mp_digit c_i = *c; + carry += c_i; + *c++ = carry; + carry = carry < c_i; + } +} diff --git a/security/nss/lib/freebl/mpi/mulsqr.c b/security/nss/lib/freebl/mpi/mulsqr.c new file mode 100644 index 000000000..461d40ab3 --- /dev/null +++ b/security/nss/lib/freebl/mpi/mulsqr.c @@ -0,0 +1,84 @@ +/* + * Test whether to include squaring code given the current settings + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include + +#define MP_SQUARE 1 /* make sure squaring code is included */ + +#include "mpi.h" +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + int ntests, prec, ix; + unsigned int seed; + clock_t start, stop; + double multime, sqrtime; + mp_int a, c; + + seed = (unsigned int)time(NULL); + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((ntests = abs(atoi(argv[1]))) == 0) { + fprintf(stderr, "%s: must request at least 1 test.\n", argv[0]); + return 1; + } + if ((prec = abs(atoi(argv[2]))) < CHAR_BIT) { + fprintf(stderr, "%s: must request at least %d bits.\n", argv[0], + CHAR_BIT); + return 1; + } + + prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; + + mp_init_size(&a, prec); + mp_init_size(&c, 2 * prec); + + /* Test multiplication by self */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_mul(&a, &a, &c); + } + stop = clock(); + + multime = (double)(stop - start) / CLOCKS_PER_SEC; + + /* Test squaring */ + srand(seed); + start = clock(); + for (ix = 0; ix < ntests; ix++) { + mpp_random_size(&a, prec); + mp_sqr(&a, &c); + } + stop = clock(); + + sqrtime = (double)(stop - start) / CLOCKS_PER_SEC; + + printf("Multiply: %.4f\n", multime); + printf("Square: %.4f\n", sqrtime); + if (multime < sqrtime) { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - multime / sqrtime)); + printf("Prefer: multiply\n"); + } else { + printf("Speedup: %.1f%%\n", 100.0 * (1.0 - sqrtime / multime)); + printf("Prefer: square\n"); + } + + mp_clear(&a); + mp_clear(&c); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/multest b/security/nss/lib/freebl/mpi/multest new file mode 100755 index 000000000..24752e019 --- /dev/null +++ b/security/nss/lib/freebl/mpi/multest @@ -0,0 +1,76 @@ +#!/bin/sh +# +# multest +# +# Run multiply and square timing tests, to compute a chart for the +# current processor and compiler combination. + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +ECHO=/bin/echo +MAKE=gmake + +$ECHO "\n** Running multiply and square timing tests\n" + +$ECHO "Bringing 'mulsqr' up to date ... " +if $MAKE mulsqr ; then + : +else + $ECHO "\nMake failed to build mulsqr.\n" + exit 1 +fi + +if [ ! -x ./mulsqr ] ; then + $ECHO "\nCannot find 'mulsqr' program, testing cannot continue.\n" + exit 1 +fi + +sizes='64 128 192 256 320 384 448 512 640 768 896 1024 1536 2048' +ntests=500000 + +$ECHO "Running timing tests, please wait ... " + +trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP + +touch tt$$.tmp +$ECHO $ntests tests >> tt$$.tmp +for size in $sizes ; do + $ECHO "$size bits ... \c" + set -A res `./mulsqr $ntests $size|head -3|tr -d '%'|awk '{print $2}'` + $ECHO $size"\t"${res[0]}"\t"${res[1]}"\t"${res[2]} >> tt$$.tmp + $ECHO "(done)" +done +mv tt$$.tmp mulsqr-results.txt +rm -f tt$$.tmp + +$ECHO "\n** Running Karatsuba-Ofman multiplication tests\n" + +$ECHO "Brining 'karatsuba' up to date ... " +if $MAKE karatsuba ; then + : +else + $ECHO "\nMake failed to build karatsuba.\n" + exit 1 +fi + +if [ ! -x ./karatsuba ] ; then + $ECHO "\nCannot find 'karatsuba' program, testing cannot continue.\n" + exit 1 +fi + +ntests=100000 + +trap 'echo "oop!";rm -f tt*.tmp;exit 0' INT HUP + +touch tt$$.tmp +for size in $sizes ; do + $ECHO "$size bits ... " + ./karatsuba $ntests $size >> tt$$.tmp + tail -2 tt$$.tmp +done +mv tt$$.tmp karatsuba-results.txt +rm -f tt$$.tmp + +exit 0 diff --git a/security/nss/lib/freebl/mpi/primes.c b/security/nss/lib/freebl/mpi/primes.c new file mode 100644 index 000000000..c8bd93ff9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/primes.c @@ -0,0 +1,841 @@ +/* + * These tables of primes wwere generated using the 'sieve' program + * (sieve.c) and converted to this format with 'ptab.pl'. + * + * The 'small' table is just the first 128 primes. The 'large' table + * is a table of all the prime values that will fit into a single + * mp_digit (given the current size of an mp_digit, which is two bytes). + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#if SMALL_TABLE +#define MP_PRIME_TAB_SIZE 128 +#else +#define MP_PRIME_TAB_SIZE 6542 +#endif + +const int prime_tab_size = MP_PRIME_TAB_SIZE; +const mp_digit prime_tab[] = { + 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013, + 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035, + 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059, + 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083, + 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD, + 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF, + 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107, + 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137, + 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167, + 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199, + 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9, + 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7, + 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239, + 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265, + 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293, + 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF, +#if !SMALL_TABLE + 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301, + 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B, + 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371, + 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD, + 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5, + 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419, + 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449, + 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B, + 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7, + 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503, + 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529, + 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F, + 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3, + 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7, + 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623, + 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653, + 0x0655, 0x065B, 0x0665, 0x0679, 0x067F, 0x0683, 0x0685, 0x069D, + 0x06A1, 0x06A3, 0x06AD, 0x06B9, 0x06BB, 0x06C5, 0x06CD, 0x06D3, + 0x06D9, 0x06DF, 0x06F1, 0x06F7, 0x06FB, 0x06FD, 0x0709, 0x0713, + 0x071F, 0x0727, 0x0737, 0x0745, 0x074B, 0x074F, 0x0751, 0x0755, + 0x0757, 0x0761, 0x076D, 0x0773, 0x0779, 0x078B, 0x078D, 0x079D, + 0x079F, 0x07B5, 0x07BB, 0x07C3, 0x07C9, 0x07CD, 0x07CF, 0x07D3, + 0x07DB, 0x07E1, 0x07EB, 0x07ED, 0x07F7, 0x0805, 0x080F, 0x0815, + 0x0821, 0x0823, 0x0827, 0x0829, 0x0833, 0x083F, 0x0841, 0x0851, + 0x0853, 0x0859, 0x085D, 0x085F, 0x0869, 0x0871, 0x0883, 0x089B, + 0x089F, 0x08A5, 0x08AD, 0x08BD, 0x08BF, 0x08C3, 0x08CB, 0x08DB, + 0x08DD, 0x08E1, 0x08E9, 0x08EF, 0x08F5, 0x08F9, 0x0905, 0x0907, + 0x091D, 0x0923, 0x0925, 0x092B, 0x092F, 0x0935, 0x0943, 0x0949, + 0x094D, 0x094F, 0x0955, 0x0959, 0x095F, 0x096B, 0x0971, 0x0977, + 0x0985, 0x0989, 0x098F, 0x099B, 0x09A3, 0x09A9, 0x09AD, 0x09C7, + 0x09D9, 0x09E3, 0x09EB, 0x09EF, 0x09F5, 0x09F7, 0x09FD, 0x0A13, + 0x0A1F, 0x0A21, 0x0A31, 0x0A39, 0x0A3D, 0x0A49, 0x0A57, 0x0A61, + 0x0A63, 0x0A67, 0x0A6F, 0x0A75, 0x0A7B, 0x0A7F, 0x0A81, 0x0A85, + 0x0A8B, 0x0A93, 0x0A97, 0x0A99, 0x0A9F, 0x0AA9, 0x0AAB, 0x0AB5, + 0x0ABD, 0x0AC1, 0x0ACF, 0x0AD9, 0x0AE5, 0x0AE7, 0x0AED, 0x0AF1, + 0x0AF3, 0x0B03, 0x0B11, 0x0B15, 0x0B1B, 0x0B23, 0x0B29, 0x0B2D, + 0x0B3F, 0x0B47, 0x0B51, 0x0B57, 0x0B5D, 0x0B65, 0x0B6F, 0x0B7B, + 0x0B89, 0x0B8D, 0x0B93, 0x0B99, 0x0B9B, 0x0BB7, 0x0BB9, 0x0BC3, + 0x0BCB, 0x0BCF, 0x0BDD, 0x0BE1, 0x0BE9, 0x0BF5, 0x0BFB, 0x0C07, + 0x0C0B, 0x0C11, 0x0C25, 0x0C2F, 0x0C31, 0x0C41, 0x0C5B, 0x0C5F, + 0x0C61, 0x0C6D, 0x0C73, 0x0C77, 0x0C83, 0x0C89, 0x0C91, 0x0C95, + 0x0C9D, 0x0CB3, 0x0CB5, 0x0CB9, 0x0CBB, 0x0CC7, 0x0CE3, 0x0CE5, + 0x0CEB, 0x0CF1, 0x0CF7, 0x0CFB, 0x0D01, 0x0D03, 0x0D0F, 0x0D13, + 0x0D1F, 0x0D21, 0x0D2B, 0x0D2D, 0x0D3D, 0x0D3F, 0x0D4F, 0x0D55, + 0x0D69, 0x0D79, 0x0D81, 0x0D85, 0x0D87, 0x0D8B, 0x0D8D, 0x0DA3, + 0x0DAB, 0x0DB7, 0x0DBD, 0x0DC7, 0x0DC9, 0x0DCD, 0x0DD3, 0x0DD5, + 0x0DDB, 0x0DE5, 0x0DE7, 0x0DF3, 0x0DFD, 0x0DFF, 0x0E09, 0x0E17, + 0x0E1D, 0x0E21, 0x0E27, 0x0E2F, 0x0E35, 0x0E3B, 0x0E4B, 0x0E57, + 0x0E59, 0x0E5D, 0x0E6B, 0x0E71, 0x0E75, 0x0E7D, 0x0E87, 0x0E8F, + 0x0E95, 0x0E9B, 0x0EB1, 0x0EB7, 0x0EB9, 0x0EC3, 0x0ED1, 0x0ED5, + 0x0EDB, 0x0EED, 0x0EEF, 0x0EF9, 0x0F07, 0x0F0B, 0x0F0D, 0x0F17, + 0x0F25, 0x0F29, 0x0F31, 0x0F43, 0x0F47, 0x0F4D, 0x0F4F, 0x0F53, + 0x0F59, 0x0F5B, 0x0F67, 0x0F6B, 0x0F7F, 0x0F95, 0x0FA1, 0x0FA3, + 0x0FA7, 0x0FAD, 0x0FB3, 0x0FB5, 0x0FBB, 0x0FD1, 0x0FD3, 0x0FD9, + 0x0FE9, 0x0FEF, 0x0FFB, 0x0FFD, 0x1003, 0x100F, 0x101F, 0x1021, + 0x1025, 0x102B, 0x1039, 0x103D, 0x103F, 0x1051, 0x1069, 0x1073, + 0x1079, 0x107B, 0x1085, 0x1087, 0x1091, 0x1093, 0x109D, 0x10A3, + 0x10A5, 0x10AF, 0x10B1, 0x10BB, 0x10C1, 0x10C9, 0x10E7, 0x10F1, + 0x10F3, 0x10FD, 0x1105, 0x110B, 0x1115, 0x1127, 0x112D, 0x1139, + 0x1145, 0x1147, 0x1159, 0x115F, 0x1163, 0x1169, 0x116F, 0x1181, + 0x1183, 0x118D, 0x119B, 0x11A1, 0x11A5, 0x11A7, 0x11AB, 0x11C3, + 0x11C5, 0x11D1, 0x11D7, 0x11E7, 0x11EF, 0x11F5, 0x11FB, 0x120D, + 0x121D, 0x121F, 0x1223, 0x1229, 0x122B, 0x1231, 0x1237, 0x1241, + 0x1247, 0x1253, 0x125F, 0x1271, 0x1273, 0x1279, 0x127D, 0x128F, + 0x1297, 0x12AF, 0x12B3, 0x12B5, 0x12B9, 0x12BF, 0x12C1, 0x12CD, + 0x12D1, 0x12DF, 0x12FD, 0x1307, 0x130D, 0x1319, 0x1327, 0x132D, + 0x1337, 0x1343, 0x1345, 0x1349, 0x134F, 0x1357, 0x135D, 0x1367, + 0x1369, 0x136D, 0x137B, 0x1381, 0x1387, 0x138B, 0x1391, 0x1393, + 0x139D, 0x139F, 0x13AF, 0x13BB, 0x13C3, 0x13D5, 0x13D9, 0x13DF, + 0x13EB, 0x13ED, 0x13F3, 0x13F9, 0x13FF, 0x141B, 0x1421, 0x142F, + 0x1433, 0x143B, 0x1445, 0x144D, 0x1459, 0x146B, 0x146F, 0x1471, + 0x1475, 0x148D, 0x1499, 0x149F, 0x14A1, 0x14B1, 0x14B7, 0x14BD, + 0x14CB, 0x14D5, 0x14E3, 0x14E7, 0x1505, 0x150B, 0x1511, 0x1517, + 0x151F, 0x1525, 0x1529, 0x152B, 0x1537, 0x153D, 0x1541, 0x1543, + 0x1549, 0x155F, 0x1565, 0x1567, 0x156B, 0x157D, 0x157F, 0x1583, + 0x158F, 0x1591, 0x1597, 0x159B, 0x15B5, 0x15BB, 0x15C1, 0x15C5, + 0x15CD, 0x15D7, 0x15F7, 0x1607, 0x1609, 0x160F, 0x1613, 0x1615, + 0x1619, 0x161B, 0x1625, 0x1633, 0x1639, 0x163D, 0x1645, 0x164F, + 0x1655, 0x1669, 0x166D, 0x166F, 0x1675, 0x1693, 0x1697, 0x169F, + 0x16A9, 0x16AF, 0x16B5, 0x16BD, 0x16C3, 0x16CF, 0x16D3, 0x16D9, + 0x16DB, 0x16E1, 0x16E5, 0x16EB, 0x16ED, 0x16F7, 0x16F9, 0x1709, + 0x170F, 0x1723, 0x1727, 0x1733, 0x1741, 0x175D, 0x1763, 0x1777, + 0x177B, 0x178D, 0x1795, 0x179B, 0x179F, 0x17A5, 0x17B3, 0x17B9, + 0x17BF, 0x17C9, 0x17CB, 0x17D5, 0x17E1, 0x17E9, 0x17F3, 0x17F5, + 0x17FF, 0x1807, 0x1813, 0x181D, 0x1835, 0x1837, 0x183B, 0x1843, + 0x1849, 0x184D, 0x1855, 0x1867, 0x1871, 0x1877, 0x187D, 0x187F, + 0x1885, 0x188F, 0x189B, 0x189D, 0x18A7, 0x18AD, 0x18B3, 0x18B9, + 0x18C1, 0x18C7, 0x18D1, 0x18D7, 0x18D9, 0x18DF, 0x18E5, 0x18EB, + 0x18F5, 0x18FD, 0x1915, 0x191B, 0x1931, 0x1933, 0x1945, 0x1949, + 0x1951, 0x195B, 0x1979, 0x1981, 0x1993, 0x1997, 0x1999, 0x19A3, + 0x19A9, 0x19AB, 0x19B1, 0x19B5, 0x19C7, 0x19CF, 0x19DB, 0x19ED, + 0x19FD, 0x1A03, 0x1A05, 0x1A11, 0x1A17, 0x1A21, 0x1A23, 0x1A2D, + 0x1A2F, 0x1A35, 0x1A3F, 0x1A4D, 0x1A51, 0x1A69, 0x1A6B, 0x1A7B, + 0x1A7D, 0x1A87, 0x1A89, 0x1A93, 0x1AA7, 0x1AAB, 0x1AAD, 0x1AB1, + 0x1AB9, 0x1AC9, 0x1ACF, 0x1AD5, 0x1AD7, 0x1AE3, 0x1AF3, 0x1AFB, + 0x1AFF, 0x1B05, 0x1B23, 0x1B25, 0x1B2F, 0x1B31, 0x1B37, 0x1B3B, + 0x1B41, 0x1B47, 0x1B4F, 0x1B55, 0x1B59, 0x1B65, 0x1B6B, 0x1B73, + 0x1B7F, 0x1B83, 0x1B91, 0x1B9D, 0x1BA7, 0x1BBF, 0x1BC5, 0x1BD1, + 0x1BD7, 0x1BD9, 0x1BEF, 0x1BF7, 0x1C09, 0x1C13, 0x1C19, 0x1C27, + 0x1C2B, 0x1C2D, 0x1C33, 0x1C3D, 0x1C45, 0x1C4B, 0x1C4F, 0x1C55, + 0x1C73, 0x1C81, 0x1C8B, 0x1C8D, 0x1C99, 0x1CA3, 0x1CA5, 0x1CB5, + 0x1CB7, 0x1CC9, 0x1CE1, 0x1CF3, 0x1CF9, 0x1D09, 0x1D1B, 0x1D21, + 0x1D23, 0x1D35, 0x1D39, 0x1D3F, 0x1D41, 0x1D4B, 0x1D53, 0x1D5D, + 0x1D63, 0x1D69, 0x1D71, 0x1D75, 0x1D7B, 0x1D7D, 0x1D87, 0x1D89, + 0x1D95, 0x1D99, 0x1D9F, 0x1DA5, 0x1DA7, 0x1DB3, 0x1DB7, 0x1DC5, + 0x1DD7, 0x1DDB, 0x1DE1, 0x1DF5, 0x1DF9, 0x1E01, 0x1E07, 0x1E0B, + 0x1E13, 0x1E17, 0x1E25, 0x1E2B, 0x1E2F, 0x1E3D, 0x1E49, 0x1E4D, + 0x1E4F, 0x1E6D, 0x1E71, 0x1E89, 0x1E8F, 0x1E95, 0x1EA1, 0x1EAD, + 0x1EBB, 0x1EC1, 0x1EC5, 0x1EC7, 0x1ECB, 0x1EDD, 0x1EE3, 0x1EEF, + 0x1EF7, 0x1EFD, 0x1F01, 0x1F0D, 0x1F0F, 0x1F1B, 0x1F39, 0x1F49, + 0x1F4B, 0x1F51, 0x1F67, 0x1F75, 0x1F7B, 0x1F85, 0x1F91, 0x1F97, + 0x1F99, 0x1F9D, 0x1FA5, 0x1FAF, 0x1FB5, 0x1FBB, 0x1FD3, 0x1FE1, + 0x1FE7, 0x1FEB, 0x1FF3, 0x1FFF, 0x2011, 0x201B, 0x201D, 0x2027, + 0x2029, 0x202D, 0x2033, 0x2047, 0x204D, 0x2051, 0x205F, 0x2063, + 0x2065, 0x2069, 0x2077, 0x207D, 0x2089, 0x20A1, 0x20AB, 0x20B1, + 0x20B9, 0x20C3, 0x20C5, 0x20E3, 0x20E7, 0x20ED, 0x20EF, 0x20FB, + 0x20FF, 0x210D, 0x2113, 0x2135, 0x2141, 0x2149, 0x214F, 0x2159, + 0x215B, 0x215F, 0x2173, 0x217D, 0x2185, 0x2195, 0x2197, 0x21A1, + 0x21AF, 0x21B3, 0x21B5, 0x21C1, 0x21C7, 0x21D7, 0x21DD, 0x21E5, + 0x21E9, 0x21F1, 0x21F5, 0x21FB, 0x2203, 0x2209, 0x220F, 0x221B, + 0x2221, 0x2225, 0x222B, 0x2231, 0x2239, 0x224B, 0x224F, 0x2263, + 0x2267, 0x2273, 0x2275, 0x227F, 0x2285, 0x2287, 0x2291, 0x229D, + 0x229F, 0x22A3, 0x22B7, 0x22BD, 0x22DB, 0x22E1, 0x22E5, 0x22ED, + 0x22F7, 0x2303, 0x2309, 0x230B, 0x2327, 0x2329, 0x232F, 0x2333, + 0x2335, 0x2345, 0x2351, 0x2353, 0x2359, 0x2363, 0x236B, 0x2383, + 0x238F, 0x2395, 0x23A7, 0x23AD, 0x23B1, 0x23BF, 0x23C5, 0x23C9, + 0x23D5, 0x23DD, 0x23E3, 0x23EF, 0x23F3, 0x23F9, 0x2405, 0x240B, + 0x2417, 0x2419, 0x2429, 0x243D, 0x2441, 0x2443, 0x244D, 0x245F, + 0x2467, 0x246B, 0x2479, 0x247D, 0x247F, 0x2485, 0x249B, 0x24A1, + 0x24AF, 0x24B5, 0x24BB, 0x24C5, 0x24CB, 0x24CD, 0x24D7, 0x24D9, + 0x24DD, 0x24DF, 0x24F5, 0x24F7, 0x24FB, 0x2501, 0x2507, 0x2513, + 0x2519, 0x2527, 0x2531, 0x253D, 0x2543, 0x254B, 0x254F, 0x2573, + 0x2581, 0x258D, 0x2593, 0x2597, 0x259D, 0x259F, 0x25AB, 0x25B1, + 0x25BD, 0x25CD, 0x25CF, 0x25D9, 0x25E1, 0x25F7, 0x25F9, 0x2605, + 0x260B, 0x260F, 0x2615, 0x2627, 0x2629, 0x2635, 0x263B, 0x263F, + 0x264B, 0x2653, 0x2659, 0x2665, 0x2669, 0x266F, 0x267B, 0x2681, + 0x2683, 0x268F, 0x269B, 0x269F, 0x26AD, 0x26B3, 0x26C3, 0x26C9, + 0x26CB, 0x26D5, 0x26DD, 0x26EF, 0x26F5, 0x2717, 0x2719, 0x2735, + 0x2737, 0x274D, 0x2753, 0x2755, 0x275F, 0x276B, 0x276D, 0x2773, + 0x2777, 0x277F, 0x2795, 0x279B, 0x279D, 0x27A7, 0x27AF, 0x27B3, + 0x27B9, 0x27C1, 0x27C5, 0x27D1, 0x27E3, 0x27EF, 0x2803, 0x2807, + 0x280D, 0x2813, 0x281B, 0x281F, 0x2821, 0x2831, 0x283D, 0x283F, + 0x2849, 0x2851, 0x285B, 0x285D, 0x2861, 0x2867, 0x2875, 0x2881, + 0x2897, 0x289F, 0x28BB, 0x28BD, 0x28C1, 0x28D5, 0x28D9, 0x28DB, + 0x28DF, 0x28ED, 0x28F7, 0x2903, 0x2905, 0x2911, 0x2921, 0x2923, + 0x293F, 0x2947, 0x295D, 0x2965, 0x2969, 0x296F, 0x2975, 0x2983, + 0x2987, 0x298F, 0x299B, 0x29A1, 0x29A7, 0x29AB, 0x29BF, 0x29C3, + 0x29D5, 0x29D7, 0x29E3, 0x29E9, 0x29ED, 0x29F3, 0x2A01, 0x2A13, + 0x2A1D, 0x2A25, 0x2A2F, 0x2A4F, 0x2A55, 0x2A5F, 0x2A65, 0x2A6B, + 0x2A6D, 0x2A73, 0x2A83, 0x2A89, 0x2A8B, 0x2A97, 0x2A9D, 0x2AB9, + 0x2ABB, 0x2AC5, 0x2ACD, 0x2ADD, 0x2AE3, 0x2AEB, 0x2AF1, 0x2AFB, + 0x2B13, 0x2B27, 0x2B31, 0x2B33, 0x2B3D, 0x2B3F, 0x2B4B, 0x2B4F, + 0x2B55, 0x2B69, 0x2B6D, 0x2B6F, 0x2B7B, 0x2B8D, 0x2B97, 0x2B99, + 0x2BA3, 0x2BA5, 0x2BA9, 0x2BBD, 0x2BCD, 0x2BE7, 0x2BEB, 0x2BF3, + 0x2BF9, 0x2BFD, 0x2C09, 0x2C0F, 0x2C17, 0x2C23, 0x2C2F, 0x2C35, + 0x2C39, 0x2C41, 0x2C57, 0x2C59, 0x2C69, 0x2C77, 0x2C81, 0x2C87, + 0x2C93, 0x2C9F, 0x2CAD, 0x2CB3, 0x2CB7, 0x2CCB, 0x2CCF, 0x2CDB, + 0x2CE1, 0x2CE3, 0x2CE9, 0x2CEF, 0x2CFF, 0x2D07, 0x2D1D, 0x2D1F, + 0x2D3B, 0x2D43, 0x2D49, 0x2D4D, 0x2D61, 0x2D65, 0x2D71, 0x2D89, + 0x2D9D, 0x2DA1, 0x2DA9, 0x2DB3, 0x2DB5, 0x2DC5, 0x2DC7, 0x2DD3, + 0x2DDF, 0x2E01, 0x2E03, 0x2E07, 0x2E0D, 0x2E19, 0x2E1F, 0x2E25, + 0x2E2D, 0x2E33, 0x2E37, 0x2E39, 0x2E3F, 0x2E57, 0x2E5B, 0x2E6F, + 0x2E79, 0x2E7F, 0x2E85, 0x2E93, 0x2E97, 0x2E9D, 0x2EA3, 0x2EA5, + 0x2EB1, 0x2EB7, 0x2EC1, 0x2EC3, 0x2ECD, 0x2ED3, 0x2EE7, 0x2EEB, + 0x2F05, 0x2F09, 0x2F0B, 0x2F11, 0x2F27, 0x2F29, 0x2F41, 0x2F45, + 0x2F4B, 0x2F4D, 0x2F51, 0x2F57, 0x2F6F, 0x2F75, 0x2F7D, 0x2F81, + 0x2F83, 0x2FA5, 0x2FAB, 0x2FB3, 0x2FC3, 0x2FCF, 0x2FD1, 0x2FDB, + 0x2FDD, 0x2FE7, 0x2FED, 0x2FF5, 0x2FF9, 0x3001, 0x300D, 0x3023, + 0x3029, 0x3037, 0x303B, 0x3055, 0x3059, 0x305B, 0x3067, 0x3071, + 0x3079, 0x307D, 0x3085, 0x3091, 0x3095, 0x30A3, 0x30A9, 0x30B9, + 0x30BF, 0x30C7, 0x30CB, 0x30D1, 0x30D7, 0x30DF, 0x30E5, 0x30EF, + 0x30FB, 0x30FD, 0x3103, 0x3109, 0x3119, 0x3121, 0x3127, 0x312D, + 0x3139, 0x3143, 0x3145, 0x314B, 0x315D, 0x3161, 0x3167, 0x316D, + 0x3173, 0x317F, 0x3191, 0x3199, 0x319F, 0x31A9, 0x31B1, 0x31C3, + 0x31C7, 0x31D5, 0x31DB, 0x31ED, 0x31F7, 0x31FF, 0x3209, 0x3215, + 0x3217, 0x321D, 0x3229, 0x3235, 0x3259, 0x325D, 0x3263, 0x326B, + 0x326F, 0x3275, 0x3277, 0x327B, 0x328D, 0x3299, 0x329F, 0x32A7, + 0x32AD, 0x32B3, 0x32B7, 0x32C9, 0x32CB, 0x32CF, 0x32D1, 0x32E9, + 0x32ED, 0x32F3, 0x32F9, 0x3307, 0x3325, 0x332B, 0x332F, 0x3335, + 0x3341, 0x3347, 0x335B, 0x335F, 0x3367, 0x336B, 0x3373, 0x3379, + 0x337F, 0x3383, 0x33A1, 0x33A3, 0x33AD, 0x33B9, 0x33C1, 0x33CB, + 0x33D3, 0x33EB, 0x33F1, 0x33FD, 0x3401, 0x340F, 0x3413, 0x3419, + 0x341B, 0x3437, 0x3445, 0x3455, 0x3457, 0x3463, 0x3469, 0x346D, + 0x3481, 0x348B, 0x3491, 0x3497, 0x349D, 0x34A5, 0x34AF, 0x34BB, + 0x34C9, 0x34D3, 0x34E1, 0x34F1, 0x34FF, 0x3509, 0x3517, 0x351D, + 0x352D, 0x3533, 0x353B, 0x3541, 0x3551, 0x3565, 0x356F, 0x3571, + 0x3577, 0x357B, 0x357D, 0x3581, 0x358D, 0x358F, 0x3599, 0x359B, + 0x35A1, 0x35B7, 0x35BD, 0x35BF, 0x35C3, 0x35D5, 0x35DD, 0x35E7, + 0x35EF, 0x3605, 0x3607, 0x3611, 0x3623, 0x3631, 0x3635, 0x3637, + 0x363B, 0x364D, 0x364F, 0x3653, 0x3659, 0x3661, 0x366B, 0x366D, + 0x368B, 0x368F, 0x36AD, 0x36AF, 0x36B9, 0x36BB, 0x36CD, 0x36D1, + 0x36E3, 0x36E9, 0x36F7, 0x3701, 0x3703, 0x3707, 0x371B, 0x373F, + 0x3745, 0x3749, 0x374F, 0x375D, 0x3761, 0x3775, 0x377F, 0x378D, + 0x37A3, 0x37A9, 0x37AB, 0x37C9, 0x37D5, 0x37DF, 0x37F1, 0x37F3, + 0x37F7, 0x3805, 0x380B, 0x3821, 0x3833, 0x3835, 0x3841, 0x3847, + 0x384B, 0x3853, 0x3857, 0x385F, 0x3865, 0x386F, 0x3871, 0x387D, + 0x388F, 0x3899, 0x38A7, 0x38B7, 0x38C5, 0x38C9, 0x38CF, 0x38D5, + 0x38D7, 0x38DD, 0x38E1, 0x38E3, 0x38FF, 0x3901, 0x391D, 0x3923, + 0x3925, 0x3929, 0x392F, 0x393D, 0x3941, 0x394D, 0x395B, 0x396B, + 0x3979, 0x397D, 0x3983, 0x398B, 0x3991, 0x3995, 0x399B, 0x39A1, + 0x39A7, 0x39AF, 0x39B3, 0x39BB, 0x39BF, 0x39CD, 0x39DD, 0x39E5, + 0x39EB, 0x39EF, 0x39FB, 0x3A03, 0x3A13, 0x3A15, 0x3A1F, 0x3A27, + 0x3A2B, 0x3A31, 0x3A4B, 0x3A51, 0x3A5B, 0x3A63, 0x3A67, 0x3A6D, + 0x3A79, 0x3A87, 0x3AA5, 0x3AA9, 0x3AB7, 0x3ACD, 0x3AD5, 0x3AE1, + 0x3AE5, 0x3AEB, 0x3AF3, 0x3AFD, 0x3B03, 0x3B11, 0x3B1B, 0x3B21, + 0x3B23, 0x3B2D, 0x3B39, 0x3B45, 0x3B53, 0x3B59, 0x3B5F, 0x3B71, + 0x3B7B, 0x3B81, 0x3B89, 0x3B9B, 0x3B9F, 0x3BA5, 0x3BA7, 0x3BAD, + 0x3BB7, 0x3BB9, 0x3BC3, 0x3BCB, 0x3BD1, 0x3BD7, 0x3BE1, 0x3BE3, + 0x3BF5, 0x3BFF, 0x3C01, 0x3C0D, 0x3C11, 0x3C17, 0x3C1F, 0x3C29, + 0x3C35, 0x3C43, 0x3C4F, 0x3C53, 0x3C5B, 0x3C65, 0x3C6B, 0x3C71, + 0x3C85, 0x3C89, 0x3C97, 0x3CA7, 0x3CB5, 0x3CBF, 0x3CC7, 0x3CD1, + 0x3CDD, 0x3CDF, 0x3CF1, 0x3CF7, 0x3D03, 0x3D0D, 0x3D19, 0x3D1B, + 0x3D1F, 0x3D21, 0x3D2D, 0x3D33, 0x3D37, 0x3D3F, 0x3D43, 0x3D6F, + 0x3D73, 0x3D75, 0x3D79, 0x3D7B, 0x3D85, 0x3D91, 0x3D97, 0x3D9D, + 0x3DAB, 0x3DAF, 0x3DB5, 0x3DBB, 0x3DC1, 0x3DC9, 0x3DCF, 0x3DF3, + 0x3E05, 0x3E09, 0x3E0F, 0x3E11, 0x3E1D, 0x3E23, 0x3E29, 0x3E2F, + 0x3E33, 0x3E41, 0x3E57, 0x3E63, 0x3E65, 0x3E77, 0x3E81, 0x3E87, + 0x3EA1, 0x3EB9, 0x3EBD, 0x3EBF, 0x3EC3, 0x3EC5, 0x3EC9, 0x3ED7, + 0x3EDB, 0x3EE1, 0x3EE7, 0x3EEF, 0x3EFF, 0x3F0B, 0x3F0D, 0x3F37, + 0x3F3B, 0x3F3D, 0x3F41, 0x3F59, 0x3F5F, 0x3F65, 0x3F67, 0x3F79, + 0x3F7D, 0x3F8B, 0x3F91, 0x3FAD, 0x3FBF, 0x3FCD, 0x3FD3, 0x3FDD, + 0x3FE9, 0x3FEB, 0x3FF1, 0x3FFD, 0x401B, 0x4021, 0x4025, 0x402B, + 0x4031, 0x403F, 0x4043, 0x4045, 0x405D, 0x4061, 0x4067, 0x406D, + 0x4087, 0x4091, 0x40A3, 0x40A9, 0x40B1, 0x40B7, 0x40BD, 0x40DB, + 0x40DF, 0x40EB, 0x40F7, 0x40F9, 0x4109, 0x410B, 0x4111, 0x4115, + 0x4121, 0x4133, 0x4135, 0x413B, 0x413F, 0x4159, 0x4165, 0x416B, + 0x4177, 0x417B, 0x4193, 0x41AB, 0x41B7, 0x41BD, 0x41BF, 0x41CB, + 0x41E7, 0x41EF, 0x41F3, 0x41F9, 0x4205, 0x4207, 0x4219, 0x421F, + 0x4223, 0x4229, 0x422F, 0x4243, 0x4253, 0x4255, 0x425B, 0x4261, + 0x4273, 0x427D, 0x4283, 0x4285, 0x4289, 0x4291, 0x4297, 0x429D, + 0x42B5, 0x42C5, 0x42CB, 0x42D3, 0x42DD, 0x42E3, 0x42F1, 0x4307, + 0x430F, 0x431F, 0x4325, 0x4327, 0x4333, 0x4337, 0x4339, 0x434F, + 0x4357, 0x4369, 0x438B, 0x438D, 0x4393, 0x43A5, 0x43A9, 0x43AF, + 0x43B5, 0x43BD, 0x43C7, 0x43CF, 0x43E1, 0x43E7, 0x43EB, 0x43ED, + 0x43F1, 0x43F9, 0x4409, 0x440B, 0x4417, 0x4423, 0x4429, 0x443B, + 0x443F, 0x4445, 0x444B, 0x4451, 0x4453, 0x4459, 0x4465, 0x446F, + 0x4483, 0x448F, 0x44A1, 0x44A5, 0x44AB, 0x44AD, 0x44BD, 0x44BF, + 0x44C9, 0x44D7, 0x44DB, 0x44F9, 0x44FB, 0x4505, 0x4511, 0x4513, + 0x452B, 0x4531, 0x4541, 0x4549, 0x4553, 0x4555, 0x4561, 0x4577, + 0x457D, 0x457F, 0x458F, 0x45A3, 0x45AD, 0x45AF, 0x45BB, 0x45C7, + 0x45D9, 0x45E3, 0x45EF, 0x45F5, 0x45F7, 0x4601, 0x4603, 0x4609, + 0x4613, 0x4625, 0x4627, 0x4633, 0x4639, 0x463D, 0x4643, 0x4645, + 0x465D, 0x4679, 0x467B, 0x467F, 0x4681, 0x468B, 0x468D, 0x469D, + 0x46A9, 0x46B1, 0x46C7, 0x46C9, 0x46CF, 0x46D3, 0x46D5, 0x46DF, + 0x46E5, 0x46F9, 0x4705, 0x470F, 0x4717, 0x4723, 0x4729, 0x472F, + 0x4735, 0x4739, 0x474B, 0x474D, 0x4751, 0x475D, 0x476F, 0x4771, + 0x477D, 0x4783, 0x4787, 0x4789, 0x4799, 0x47A5, 0x47B1, 0x47BF, + 0x47C3, 0x47CB, 0x47DD, 0x47E1, 0x47ED, 0x47FB, 0x4801, 0x4807, + 0x480B, 0x4813, 0x4819, 0x481D, 0x4831, 0x483D, 0x4847, 0x4855, + 0x4859, 0x485B, 0x486B, 0x486D, 0x4879, 0x4897, 0x489B, 0x48A1, + 0x48B9, 0x48CD, 0x48E5, 0x48EF, 0x48F7, 0x4903, 0x490D, 0x4919, + 0x491F, 0x492B, 0x4937, 0x493D, 0x4945, 0x4955, 0x4963, 0x4969, + 0x496D, 0x4973, 0x4997, 0x49AB, 0x49B5, 0x49D3, 0x49DF, 0x49E1, + 0x49E5, 0x49E7, 0x4A03, 0x4A0F, 0x4A1D, 0x4A23, 0x4A39, 0x4A41, + 0x4A45, 0x4A57, 0x4A5D, 0x4A6B, 0x4A7D, 0x4A81, 0x4A87, 0x4A89, + 0x4A8F, 0x4AB1, 0x4AC3, 0x4AC5, 0x4AD5, 0x4ADB, 0x4AED, 0x4AEF, + 0x4B07, 0x4B0B, 0x4B0D, 0x4B13, 0x4B1F, 0x4B25, 0x4B31, 0x4B3B, + 0x4B43, 0x4B49, 0x4B59, 0x4B65, 0x4B6D, 0x4B77, 0x4B85, 0x4BAD, + 0x4BB3, 0x4BB5, 0x4BBB, 0x4BBF, 0x4BCB, 0x4BD9, 0x4BDD, 0x4BDF, + 0x4BE3, 0x4BE5, 0x4BE9, 0x4BF1, 0x4BF7, 0x4C01, 0x4C07, 0x4C0D, + 0x4C0F, 0x4C15, 0x4C1B, 0x4C21, 0x4C2D, 0x4C33, 0x4C4B, 0x4C55, + 0x4C57, 0x4C61, 0x4C67, 0x4C73, 0x4C79, 0x4C7F, 0x4C8D, 0x4C93, + 0x4C99, 0x4CCD, 0x4CE1, 0x4CE7, 0x4CF1, 0x4CF3, 0x4CFD, 0x4D05, + 0x4D0F, 0x4D1B, 0x4D27, 0x4D29, 0x4D2F, 0x4D33, 0x4D41, 0x4D51, + 0x4D59, 0x4D65, 0x4D6B, 0x4D81, 0x4D83, 0x4D8D, 0x4D95, 0x4D9B, + 0x4DB1, 0x4DB3, 0x4DC9, 0x4DCF, 0x4DD7, 0x4DE1, 0x4DED, 0x4DF9, + 0x4DFB, 0x4E05, 0x4E0B, 0x4E17, 0x4E19, 0x4E1D, 0x4E2B, 0x4E35, + 0x4E37, 0x4E3D, 0x4E4F, 0x4E53, 0x4E5F, 0x4E67, 0x4E79, 0x4E85, + 0x4E8B, 0x4E91, 0x4E95, 0x4E9B, 0x4EA1, 0x4EAF, 0x4EB3, 0x4EB5, + 0x4EC1, 0x4ECD, 0x4ED1, 0x4ED7, 0x4EE9, 0x4EFB, 0x4F07, 0x4F09, + 0x4F19, 0x4F25, 0x4F2D, 0x4F3F, 0x4F49, 0x4F63, 0x4F67, 0x4F6D, + 0x4F75, 0x4F7B, 0x4F81, 0x4F85, 0x4F87, 0x4F91, 0x4FA5, 0x4FA9, + 0x4FAF, 0x4FB7, 0x4FBB, 0x4FCF, 0x4FD9, 0x4FDB, 0x4FFD, 0x4FFF, + 0x5003, 0x501B, 0x501D, 0x5029, 0x5035, 0x503F, 0x5045, 0x5047, + 0x5053, 0x5071, 0x5077, 0x5083, 0x5093, 0x509F, 0x50A1, 0x50B7, + 0x50C9, 0x50D5, 0x50E3, 0x50ED, 0x50EF, 0x50FB, 0x5107, 0x510B, + 0x510D, 0x5111, 0x5117, 0x5123, 0x5125, 0x5135, 0x5147, 0x5149, + 0x5171, 0x5179, 0x5189, 0x518F, 0x5197, 0x51A1, 0x51A3, 0x51A7, + 0x51B9, 0x51C1, 0x51CB, 0x51D3, 0x51DF, 0x51E3, 0x51F5, 0x51F7, + 0x5209, 0x5213, 0x5215, 0x5219, 0x521B, 0x521F, 0x5227, 0x5243, + 0x5245, 0x524B, 0x5261, 0x526D, 0x5273, 0x5281, 0x5293, 0x5297, + 0x529D, 0x52A5, 0x52AB, 0x52B1, 0x52BB, 0x52C3, 0x52C7, 0x52C9, + 0x52DB, 0x52E5, 0x52EB, 0x52FF, 0x5315, 0x531D, 0x5323, 0x5341, + 0x5345, 0x5347, 0x534B, 0x535D, 0x5363, 0x5381, 0x5383, 0x5387, + 0x538F, 0x5395, 0x5399, 0x539F, 0x53AB, 0x53B9, 0x53DB, 0x53E9, + 0x53EF, 0x53F3, 0x53F5, 0x53FB, 0x53FF, 0x540D, 0x5411, 0x5413, + 0x5419, 0x5435, 0x5437, 0x543B, 0x5441, 0x5449, 0x5453, 0x5455, + 0x545F, 0x5461, 0x546B, 0x546D, 0x5471, 0x548F, 0x5491, 0x549D, + 0x54A9, 0x54B3, 0x54C5, 0x54D1, 0x54DF, 0x54E9, 0x54EB, 0x54F7, + 0x54FD, 0x5507, 0x550D, 0x551B, 0x5527, 0x552B, 0x5539, 0x553D, + 0x554F, 0x5551, 0x555B, 0x5563, 0x5567, 0x556F, 0x5579, 0x5585, + 0x5597, 0x55A9, 0x55B1, 0x55B7, 0x55C9, 0x55D9, 0x55E7, 0x55ED, + 0x55F3, 0x55FD, 0x560B, 0x560F, 0x5615, 0x5617, 0x5623, 0x562F, + 0x5633, 0x5639, 0x563F, 0x564B, 0x564D, 0x565D, 0x565F, 0x566B, + 0x5671, 0x5675, 0x5683, 0x5689, 0x568D, 0x568F, 0x569B, 0x56AD, + 0x56B1, 0x56D5, 0x56E7, 0x56F3, 0x56FF, 0x5701, 0x5705, 0x5707, + 0x570B, 0x5713, 0x571F, 0x5723, 0x5747, 0x574D, 0x575F, 0x5761, + 0x576D, 0x5777, 0x577D, 0x5789, 0x57A1, 0x57A9, 0x57AF, 0x57B5, + 0x57C5, 0x57D1, 0x57D3, 0x57E5, 0x57EF, 0x5803, 0x580D, 0x580F, + 0x5815, 0x5827, 0x582B, 0x582D, 0x5855, 0x585B, 0x585D, 0x586D, + 0x586F, 0x5873, 0x587B, 0x588D, 0x5897, 0x58A3, 0x58A9, 0x58AB, + 0x58B5, 0x58BD, 0x58C1, 0x58C7, 0x58D3, 0x58D5, 0x58DF, 0x58F1, + 0x58F9, 0x58FF, 0x5903, 0x5917, 0x591B, 0x5921, 0x5945, 0x594B, + 0x594D, 0x5957, 0x595D, 0x5975, 0x597B, 0x5989, 0x5999, 0x599F, + 0x59B1, 0x59B3, 0x59BD, 0x59D1, 0x59DB, 0x59E3, 0x59E9, 0x59ED, + 0x59F3, 0x59F5, 0x59FF, 0x5A01, 0x5A0D, 0x5A11, 0x5A13, 0x5A17, + 0x5A1F, 0x5A29, 0x5A2F, 0x5A3B, 0x5A4D, 0x5A5B, 0x5A67, 0x5A77, + 0x5A7F, 0x5A85, 0x5A95, 0x5A9D, 0x5AA1, 0x5AA3, 0x5AA9, 0x5ABB, + 0x5AD3, 0x5AE5, 0x5AEF, 0x5AFB, 0x5AFD, 0x5B01, 0x5B0F, 0x5B19, + 0x5B1F, 0x5B25, 0x5B2B, 0x5B3D, 0x5B49, 0x5B4B, 0x5B67, 0x5B79, + 0x5B87, 0x5B97, 0x5BA3, 0x5BB1, 0x5BC9, 0x5BD5, 0x5BEB, 0x5BF1, + 0x5BF3, 0x5BFD, 0x5C05, 0x5C09, 0x5C0B, 0x5C0F, 0x5C1D, 0x5C29, + 0x5C2F, 0x5C33, 0x5C39, 0x5C47, 0x5C4B, 0x5C4D, 0x5C51, 0x5C6F, + 0x5C75, 0x5C77, 0x5C7D, 0x5C87, 0x5C89, 0x5CA7, 0x5CBD, 0x5CBF, + 0x5CC3, 0x5CC9, 0x5CD1, 0x5CD7, 0x5CDD, 0x5CED, 0x5CF9, 0x5D05, + 0x5D0B, 0x5D13, 0x5D17, 0x5D19, 0x5D31, 0x5D3D, 0x5D41, 0x5D47, + 0x5D4F, 0x5D55, 0x5D5B, 0x5D65, 0x5D67, 0x5D6D, 0x5D79, 0x5D95, + 0x5DA3, 0x5DA9, 0x5DAD, 0x5DB9, 0x5DC1, 0x5DC7, 0x5DD3, 0x5DD7, + 0x5DDD, 0x5DEB, 0x5DF1, 0x5DFD, 0x5E07, 0x5E0D, 0x5E13, 0x5E1B, + 0x5E21, 0x5E27, 0x5E2B, 0x5E2D, 0x5E31, 0x5E39, 0x5E45, 0x5E49, + 0x5E57, 0x5E69, 0x5E73, 0x5E75, 0x5E85, 0x5E8B, 0x5E9F, 0x5EA5, + 0x5EAF, 0x5EB7, 0x5EBB, 0x5ED9, 0x5EFD, 0x5F09, 0x5F11, 0x5F27, + 0x5F33, 0x5F35, 0x5F3B, 0x5F47, 0x5F57, 0x5F5D, 0x5F63, 0x5F65, + 0x5F77, 0x5F7B, 0x5F95, 0x5F99, 0x5FA1, 0x5FB3, 0x5FBD, 0x5FC5, + 0x5FCF, 0x5FD5, 0x5FE3, 0x5FE7, 0x5FFB, 0x6011, 0x6023, 0x602F, + 0x6037, 0x6053, 0x605F, 0x6065, 0x606B, 0x6073, 0x6079, 0x6085, + 0x609D, 0x60AD, 0x60BB, 0x60BF, 0x60CD, 0x60D9, 0x60DF, 0x60E9, + 0x60F5, 0x6109, 0x610F, 0x6113, 0x611B, 0x612D, 0x6139, 0x614B, + 0x6155, 0x6157, 0x615B, 0x616F, 0x6179, 0x6187, 0x618B, 0x6191, + 0x6193, 0x619D, 0x61B5, 0x61C7, 0x61C9, 0x61CD, 0x61E1, 0x61F1, + 0x61FF, 0x6209, 0x6217, 0x621D, 0x6221, 0x6227, 0x623B, 0x6241, + 0x624B, 0x6251, 0x6253, 0x625F, 0x6265, 0x6283, 0x628D, 0x6295, + 0x629B, 0x629F, 0x62A5, 0x62AD, 0x62D5, 0x62D7, 0x62DB, 0x62DD, + 0x62E9, 0x62FB, 0x62FF, 0x6305, 0x630D, 0x6317, 0x631D, 0x632F, + 0x6341, 0x6343, 0x634F, 0x635F, 0x6367, 0x636D, 0x6371, 0x6377, + 0x637D, 0x637F, 0x63B3, 0x63C1, 0x63C5, 0x63D9, 0x63E9, 0x63EB, + 0x63EF, 0x63F5, 0x6401, 0x6403, 0x6409, 0x6415, 0x6421, 0x6427, + 0x642B, 0x6439, 0x6443, 0x6449, 0x644F, 0x645D, 0x6467, 0x6475, + 0x6485, 0x648D, 0x6493, 0x649F, 0x64A3, 0x64AB, 0x64C1, 0x64C7, + 0x64C9, 0x64DB, 0x64F1, 0x64F7, 0x64F9, 0x650B, 0x6511, 0x6521, + 0x652F, 0x6539, 0x653F, 0x654B, 0x654D, 0x6553, 0x6557, 0x655F, + 0x6571, 0x657D, 0x658D, 0x658F, 0x6593, 0x65A1, 0x65A5, 0x65AD, + 0x65B9, 0x65C5, 0x65E3, 0x65F3, 0x65FB, 0x65FF, 0x6601, 0x6607, + 0x661D, 0x6629, 0x6631, 0x663B, 0x6641, 0x6647, 0x664D, 0x665B, + 0x6661, 0x6673, 0x667D, 0x6689, 0x668B, 0x6695, 0x6697, 0x669B, + 0x66B5, 0x66B9, 0x66C5, 0x66CD, 0x66D1, 0x66E3, 0x66EB, 0x66F5, + 0x6703, 0x6713, 0x6719, 0x671F, 0x6727, 0x6731, 0x6737, 0x673F, + 0x6745, 0x6751, 0x675B, 0x676F, 0x6779, 0x6781, 0x6785, 0x6791, + 0x67AB, 0x67BD, 0x67C1, 0x67CD, 0x67DF, 0x67E5, 0x6803, 0x6809, + 0x6811, 0x6817, 0x682D, 0x6839, 0x683B, 0x683F, 0x6845, 0x684B, + 0x684D, 0x6857, 0x6859, 0x685D, 0x6863, 0x6869, 0x686B, 0x6871, + 0x6887, 0x6899, 0x689F, 0x68B1, 0x68BD, 0x68C5, 0x68D1, 0x68D7, + 0x68E1, 0x68ED, 0x68EF, 0x68FF, 0x6901, 0x690B, 0x690D, 0x6917, + 0x6929, 0x692F, 0x6943, 0x6947, 0x6949, 0x694F, 0x6965, 0x696B, + 0x6971, 0x6983, 0x6989, 0x6997, 0x69A3, 0x69B3, 0x69B5, 0x69BB, + 0x69C1, 0x69C5, 0x69D3, 0x69DF, 0x69E3, 0x69E5, 0x69F7, 0x6A07, + 0x6A2B, 0x6A37, 0x6A3D, 0x6A4B, 0x6A67, 0x6A69, 0x6A75, 0x6A7B, + 0x6A87, 0x6A8D, 0x6A91, 0x6A93, 0x6AA3, 0x6AC1, 0x6AC9, 0x6AE1, + 0x6AE7, 0x6B05, 0x6B0F, 0x6B11, 0x6B23, 0x6B27, 0x6B2D, 0x6B39, + 0x6B41, 0x6B57, 0x6B59, 0x6B5F, 0x6B75, 0x6B87, 0x6B89, 0x6B93, + 0x6B95, 0x6B9F, 0x6BBD, 0x6BBF, 0x6BDB, 0x6BE1, 0x6BEF, 0x6BFF, + 0x6C05, 0x6C19, 0x6C29, 0x6C2B, 0x6C31, 0x6C35, 0x6C55, 0x6C59, + 0x6C5B, 0x6C5F, 0x6C65, 0x6C67, 0x6C73, 0x6C77, 0x6C7D, 0x6C83, + 0x6C8F, 0x6C91, 0x6C97, 0x6C9B, 0x6CA1, 0x6CA9, 0x6CAF, 0x6CB3, + 0x6CC7, 0x6CCB, 0x6CEB, 0x6CF5, 0x6CFD, 0x6D0D, 0x6D0F, 0x6D25, + 0x6D27, 0x6D2B, 0x6D31, 0x6D39, 0x6D3F, 0x6D4F, 0x6D5D, 0x6D61, + 0x6D73, 0x6D7B, 0x6D7F, 0x6D93, 0x6D99, 0x6DA5, 0x6DB1, 0x6DB7, + 0x6DC1, 0x6DC3, 0x6DCD, 0x6DCF, 0x6DDB, 0x6DF7, 0x6E03, 0x6E15, + 0x6E17, 0x6E29, 0x6E33, 0x6E3B, 0x6E45, 0x6E75, 0x6E77, 0x6E7B, + 0x6E81, 0x6E89, 0x6E93, 0x6E95, 0x6E9F, 0x6EBD, 0x6EBF, 0x6EE3, + 0x6EE9, 0x6EF3, 0x6EF9, 0x6EFB, 0x6F0D, 0x6F11, 0x6F17, 0x6F1F, + 0x6F2F, 0x6F3D, 0x6F4D, 0x6F53, 0x6F61, 0x6F65, 0x6F79, 0x6F7D, + 0x6F83, 0x6F85, 0x6F8F, 0x6F9B, 0x6F9D, 0x6FA3, 0x6FAF, 0x6FB5, + 0x6FBB, 0x6FBF, 0x6FCB, 0x6FCD, 0x6FD3, 0x6FD7, 0x6FE3, 0x6FE9, + 0x6FF1, 0x6FF5, 0x6FF7, 0x6FFD, 0x700F, 0x7019, 0x701F, 0x7027, + 0x7033, 0x7039, 0x704F, 0x7051, 0x7057, 0x7063, 0x7075, 0x7079, + 0x7087, 0x708D, 0x7091, 0x70A5, 0x70AB, 0x70BB, 0x70C3, 0x70C7, + 0x70CF, 0x70E5, 0x70ED, 0x70F9, 0x70FF, 0x7105, 0x7115, 0x7121, + 0x7133, 0x7151, 0x7159, 0x715D, 0x715F, 0x7163, 0x7169, 0x7183, + 0x7187, 0x7195, 0x71AD, 0x71C3, 0x71C9, 0x71CB, 0x71D1, 0x71DB, + 0x71E1, 0x71EF, 0x71F5, 0x71FB, 0x7207, 0x7211, 0x7217, 0x7219, + 0x7225, 0x722F, 0x723B, 0x7243, 0x7255, 0x7267, 0x7271, 0x7277, + 0x727F, 0x728F, 0x7295, 0x729B, 0x72A3, 0x72B3, 0x72C7, 0x72CB, + 0x72CD, 0x72D7, 0x72D9, 0x72E3, 0x72EF, 0x72F5, 0x72FD, 0x7303, + 0x730D, 0x7321, 0x732B, 0x733D, 0x7357, 0x735B, 0x7361, 0x737F, + 0x7381, 0x7385, 0x738D, 0x7393, 0x739F, 0x73AB, 0x73BD, 0x73C1, + 0x73C9, 0x73DF, 0x73E5, 0x73E7, 0x73F3, 0x7415, 0x741B, 0x742D, + 0x7439, 0x743F, 0x7441, 0x745D, 0x746B, 0x747B, 0x7489, 0x748D, + 0x749B, 0x74A7, 0x74AB, 0x74B1, 0x74B7, 0x74B9, 0x74DD, 0x74E1, + 0x74E7, 0x74FB, 0x7507, 0x751F, 0x7525, 0x753B, 0x753D, 0x754D, + 0x755F, 0x756B, 0x7577, 0x7589, 0x758B, 0x7591, 0x7597, 0x759D, + 0x75A1, 0x75A7, 0x75B5, 0x75B9, 0x75BB, 0x75D1, 0x75D9, 0x75E5, + 0x75EB, 0x75F5, 0x75FB, 0x7603, 0x760F, 0x7621, 0x762D, 0x7633, + 0x763D, 0x763F, 0x7655, 0x7663, 0x7669, 0x766F, 0x7673, 0x7685, + 0x768B, 0x769F, 0x76B5, 0x76B7, 0x76C3, 0x76DB, 0x76DF, 0x76F1, + 0x7703, 0x7705, 0x771B, 0x771D, 0x7721, 0x772D, 0x7735, 0x7741, + 0x774B, 0x7759, 0x775D, 0x775F, 0x7771, 0x7781, 0x77A7, 0x77AD, + 0x77B3, 0x77B9, 0x77C5, 0x77CF, 0x77D5, 0x77E1, 0x77E9, 0x77EF, + 0x77F3, 0x77F9, 0x7807, 0x7825, 0x782B, 0x7835, 0x783D, 0x7853, + 0x7859, 0x7861, 0x786D, 0x7877, 0x7879, 0x7883, 0x7885, 0x788B, + 0x7895, 0x7897, 0x78A1, 0x78AD, 0x78BF, 0x78D3, 0x78D9, 0x78DD, + 0x78E5, 0x78FB, 0x7901, 0x7907, 0x7925, 0x792B, 0x7939, 0x793F, + 0x794B, 0x7957, 0x795D, 0x7967, 0x7969, 0x7973, 0x7991, 0x7993, + 0x79A3, 0x79AB, 0x79AF, 0x79B1, 0x79B7, 0x79C9, 0x79CD, 0x79CF, + 0x79D5, 0x79D9, 0x79F3, 0x79F7, 0x79FF, 0x7A05, 0x7A0F, 0x7A11, + 0x7A15, 0x7A1B, 0x7A23, 0x7A27, 0x7A2D, 0x7A4B, 0x7A57, 0x7A59, + 0x7A5F, 0x7A65, 0x7A69, 0x7A7D, 0x7A93, 0x7A9B, 0x7A9F, 0x7AA1, + 0x7AA5, 0x7AED, 0x7AF5, 0x7AF9, 0x7B01, 0x7B17, 0x7B19, 0x7B1D, + 0x7B2B, 0x7B35, 0x7B37, 0x7B3B, 0x7B4F, 0x7B55, 0x7B5F, 0x7B71, + 0x7B77, 0x7B8B, 0x7B9B, 0x7BA1, 0x7BA9, 0x7BAF, 0x7BB3, 0x7BC7, + 0x7BD3, 0x7BE9, 0x7BEB, 0x7BEF, 0x7BF1, 0x7BFD, 0x7C07, 0x7C19, + 0x7C1B, 0x7C31, 0x7C37, 0x7C49, 0x7C67, 0x7C69, 0x7C73, 0x7C81, + 0x7C8B, 0x7C93, 0x7CA3, 0x7CD5, 0x7CDB, 0x7CE5, 0x7CED, 0x7CF7, + 0x7D03, 0x7D09, 0x7D1B, 0x7D1D, 0x7D33, 0x7D39, 0x7D3B, 0x7D3F, + 0x7D45, 0x7D4D, 0x7D53, 0x7D59, 0x7D63, 0x7D75, 0x7D77, 0x7D8D, + 0x7D8F, 0x7D9F, 0x7DAD, 0x7DB7, 0x7DBD, 0x7DBF, 0x7DCB, 0x7DD5, + 0x7DE9, 0x7DED, 0x7DFB, 0x7E01, 0x7E05, 0x7E29, 0x7E2B, 0x7E2F, + 0x7E35, 0x7E41, 0x7E43, 0x7E47, 0x7E55, 0x7E61, 0x7E67, 0x7E6B, + 0x7E71, 0x7E73, 0x7E79, 0x7E7D, 0x7E91, 0x7E9B, 0x7E9D, 0x7EA7, + 0x7EAD, 0x7EB9, 0x7EBB, 0x7ED3, 0x7EDF, 0x7EEB, 0x7EF1, 0x7EF7, + 0x7EFB, 0x7F13, 0x7F15, 0x7F19, 0x7F31, 0x7F33, 0x7F39, 0x7F3D, + 0x7F43, 0x7F4B, 0x7F5B, 0x7F61, 0x7F63, 0x7F6D, 0x7F79, 0x7F87, + 0x7F8D, 0x7FAF, 0x7FB5, 0x7FC3, 0x7FC9, 0x7FCD, 0x7FCF, 0x7FED, + 0x8003, 0x800B, 0x800F, 0x8015, 0x801D, 0x8021, 0x8023, 0x803F, + 0x8041, 0x8047, 0x804B, 0x8065, 0x8077, 0x808D, 0x808F, 0x8095, + 0x80A5, 0x80AB, 0x80AD, 0x80BD, 0x80C9, 0x80CB, 0x80D7, 0x80DB, + 0x80E1, 0x80E7, 0x80F5, 0x80FF, 0x8105, 0x810D, 0x8119, 0x811D, + 0x812F, 0x8131, 0x813B, 0x8143, 0x8153, 0x8159, 0x815F, 0x817D, + 0x817F, 0x8189, 0x819B, 0x819D, 0x81A7, 0x81AF, 0x81B3, 0x81BB, + 0x81C7, 0x81DF, 0x8207, 0x8209, 0x8215, 0x821F, 0x8225, 0x8231, + 0x8233, 0x823F, 0x8243, 0x8245, 0x8249, 0x824F, 0x8261, 0x826F, + 0x827B, 0x8281, 0x8285, 0x8293, 0x82B1, 0x82B5, 0x82BD, 0x82C7, + 0x82CF, 0x82D5, 0x82DF, 0x82F1, 0x82F9, 0x82FD, 0x830B, 0x831B, + 0x8321, 0x8329, 0x832D, 0x8333, 0x8335, 0x833F, 0x8341, 0x834D, + 0x8351, 0x8353, 0x8357, 0x835D, 0x8365, 0x8369, 0x836F, 0x838F, + 0x83A7, 0x83B1, 0x83B9, 0x83CB, 0x83D5, 0x83D7, 0x83DD, 0x83E7, + 0x83E9, 0x83ED, 0x83FF, 0x8405, 0x8411, 0x8413, 0x8423, 0x8425, + 0x843B, 0x8441, 0x8447, 0x844F, 0x8461, 0x8465, 0x8477, 0x8483, + 0x848B, 0x8491, 0x8495, 0x84A9, 0x84AF, 0x84CD, 0x84E3, 0x84EF, + 0x84F1, 0x84F7, 0x8509, 0x850D, 0x854B, 0x854F, 0x8551, 0x855D, + 0x8563, 0x856D, 0x856F, 0x857B, 0x8587, 0x85A3, 0x85A5, 0x85A9, + 0x85B7, 0x85CD, 0x85D3, 0x85D5, 0x85DB, 0x85E1, 0x85EB, 0x85F9, + 0x85FD, 0x85FF, 0x8609, 0x860F, 0x8617, 0x8621, 0x862F, 0x8639, + 0x863F, 0x8641, 0x864D, 0x8663, 0x8675, 0x867D, 0x8687, 0x8699, + 0x86A5, 0x86A7, 0x86B3, 0x86B7, 0x86C3, 0x86C5, 0x86CF, 0x86D1, + 0x86D7, 0x86E9, 0x86EF, 0x86F5, 0x8717, 0x871D, 0x871F, 0x872B, + 0x872F, 0x8735, 0x8747, 0x8759, 0x875B, 0x876B, 0x8771, 0x8777, + 0x877F, 0x8785, 0x878F, 0x87A1, 0x87A9, 0x87B3, 0x87BB, 0x87C5, + 0x87C7, 0x87CB, 0x87DD, 0x87F7, 0x8803, 0x8819, 0x881B, 0x881F, + 0x8821, 0x8837, 0x883D, 0x8843, 0x8851, 0x8861, 0x8867, 0x887B, + 0x8885, 0x8891, 0x8893, 0x88A5, 0x88CF, 0x88D3, 0x88EB, 0x88ED, + 0x88F3, 0x88FD, 0x8909, 0x890B, 0x8911, 0x891B, 0x8923, 0x8927, + 0x892D, 0x8939, 0x8945, 0x894D, 0x8951, 0x8957, 0x8963, 0x8981, + 0x8995, 0x899B, 0x89B3, 0x89B9, 0x89C3, 0x89CF, 0x89D1, 0x89DB, + 0x89EF, 0x89F5, 0x89FB, 0x89FF, 0x8A0B, 0x8A19, 0x8A23, 0x8A35, + 0x8A41, 0x8A49, 0x8A4F, 0x8A5B, 0x8A5F, 0x8A6D, 0x8A77, 0x8A79, + 0x8A85, 0x8AA3, 0x8AB3, 0x8AB5, 0x8AC1, 0x8AC7, 0x8ACB, 0x8ACD, + 0x8AD1, 0x8AD7, 0x8AF1, 0x8AF5, 0x8B07, 0x8B09, 0x8B0D, 0x8B13, + 0x8B21, 0x8B57, 0x8B5D, 0x8B91, 0x8B93, 0x8BA3, 0x8BA9, 0x8BAF, + 0x8BBB, 0x8BD5, 0x8BD9, 0x8BDB, 0x8BE1, 0x8BF7, 0x8BFD, 0x8BFF, + 0x8C0B, 0x8C17, 0x8C1D, 0x8C27, 0x8C39, 0x8C3B, 0x8C47, 0x8C53, + 0x8C5D, 0x8C6F, 0x8C7B, 0x8C81, 0x8C89, 0x8C8F, 0x8C99, 0x8C9F, + 0x8CA7, 0x8CAB, 0x8CAD, 0x8CB1, 0x8CC5, 0x8CDD, 0x8CE3, 0x8CE9, + 0x8CF3, 0x8D01, 0x8D0B, 0x8D0D, 0x8D23, 0x8D29, 0x8D37, 0x8D41, + 0x8D5B, 0x8D5F, 0x8D71, 0x8D79, 0x8D85, 0x8D91, 0x8D9B, 0x8DA7, + 0x8DAD, 0x8DB5, 0x8DC5, 0x8DCB, 0x8DD3, 0x8DD9, 0x8DDF, 0x8DF5, + 0x8DF7, 0x8E01, 0x8E15, 0x8E1F, 0x8E25, 0x8E51, 0x8E63, 0x8E69, + 0x8E73, 0x8E75, 0x8E79, 0x8E7F, 0x8E8D, 0x8E91, 0x8EAB, 0x8EAF, + 0x8EB1, 0x8EBD, 0x8EC7, 0x8ECF, 0x8ED3, 0x8EDB, 0x8EE7, 0x8EEB, + 0x8EF7, 0x8EFF, 0x8F15, 0x8F1D, 0x8F23, 0x8F2D, 0x8F3F, 0x8F45, + 0x8F4B, 0x8F53, 0x8F59, 0x8F65, 0x8F69, 0x8F71, 0x8F83, 0x8F8D, + 0x8F99, 0x8F9F, 0x8FAB, 0x8FAD, 0x8FB3, 0x8FB7, 0x8FB9, 0x8FC9, + 0x8FD5, 0x8FE1, 0x8FEF, 0x8FF9, 0x9007, 0x900D, 0x9017, 0x9023, + 0x9025, 0x9031, 0x9037, 0x903B, 0x9041, 0x9043, 0x904F, 0x9053, + 0x906D, 0x9073, 0x9085, 0x908B, 0x9095, 0x909B, 0x909D, 0x90AF, + 0x90B9, 0x90C1, 0x90C5, 0x90DF, 0x90E9, 0x90FD, 0x9103, 0x9113, + 0x9127, 0x9133, 0x913D, 0x9145, 0x914F, 0x9151, 0x9161, 0x9167, + 0x917B, 0x9185, 0x9199, 0x919D, 0x91BB, 0x91BD, 0x91C1, 0x91C9, + 0x91D9, 0x91DB, 0x91ED, 0x91F1, 0x91F3, 0x91F9, 0x9203, 0x9215, + 0x9221, 0x922F, 0x9241, 0x9247, 0x9257, 0x926B, 0x9271, 0x9275, + 0x927D, 0x9283, 0x9287, 0x928D, 0x9299, 0x92A1, 0x92AB, 0x92AD, + 0x92B9, 0x92BF, 0x92C3, 0x92C5, 0x92CB, 0x92D5, 0x92D7, 0x92E7, + 0x92F3, 0x9301, 0x930B, 0x9311, 0x9319, 0x931F, 0x933B, 0x933D, + 0x9343, 0x9355, 0x9373, 0x9395, 0x9397, 0x93A7, 0x93B3, 0x93B5, + 0x93C7, 0x93D7, 0x93DD, 0x93E5, 0x93EF, 0x93F7, 0x9401, 0x9409, + 0x9413, 0x943F, 0x9445, 0x944B, 0x944F, 0x9463, 0x9467, 0x9469, + 0x946D, 0x947B, 0x9497, 0x949F, 0x94A5, 0x94B5, 0x94C3, 0x94E1, + 0x94E7, 0x9505, 0x9509, 0x9517, 0x9521, 0x9527, 0x952D, 0x9535, + 0x9539, 0x954B, 0x9557, 0x955D, 0x955F, 0x9575, 0x9581, 0x9589, + 0x958F, 0x959B, 0x959F, 0x95AD, 0x95B1, 0x95B7, 0x95B9, 0x95BD, + 0x95CF, 0x95E3, 0x95E9, 0x95F9, 0x961F, 0x962F, 0x9631, 0x9635, + 0x963B, 0x963D, 0x9665, 0x968F, 0x969D, 0x96A1, 0x96A7, 0x96A9, + 0x96C1, 0x96CB, 0x96D1, 0x96D3, 0x96E5, 0x96EF, 0x96FB, 0x96FD, + 0x970D, 0x970F, 0x9715, 0x9725, 0x972B, 0x9733, 0x9737, 0x9739, + 0x9743, 0x9749, 0x9751, 0x975B, 0x975D, 0x976F, 0x977F, 0x9787, + 0x9793, 0x97A5, 0x97B1, 0x97B7, 0x97C3, 0x97CD, 0x97D3, 0x97D9, + 0x97EB, 0x97F7, 0x9805, 0x9809, 0x980B, 0x9815, 0x9829, 0x982F, + 0x983B, 0x9841, 0x9851, 0x986B, 0x986F, 0x9881, 0x9883, 0x9887, + 0x98A7, 0x98B1, 0x98B9, 0x98BF, 0x98C3, 0x98C9, 0x98CF, 0x98DD, + 0x98E3, 0x98F5, 0x98F9, 0x98FB, 0x990D, 0x9917, 0x991F, 0x9929, + 0x9931, 0x993B, 0x993D, 0x9941, 0x9947, 0x9949, 0x9953, 0x997D, + 0x9985, 0x9991, 0x9995, 0x999B, 0x99AD, 0x99AF, 0x99BF, 0x99C7, + 0x99CB, 0x99CD, 0x99D7, 0x99E5, 0x99F1, 0x99FB, 0x9A0F, 0x9A13, + 0x9A1B, 0x9A25, 0x9A4B, 0x9A4F, 0x9A55, 0x9A57, 0x9A61, 0x9A75, + 0x9A7F, 0x9A8B, 0x9A91, 0x9A9D, 0x9AB7, 0x9AC3, 0x9AC7, 0x9ACF, + 0x9AEB, 0x9AF3, 0x9AF7, 0x9AFF, 0x9B17, 0x9B1D, 0x9B27, 0x9B2F, + 0x9B35, 0x9B45, 0x9B51, 0x9B59, 0x9B63, 0x9B6F, 0x9B77, 0x9B8D, + 0x9B93, 0x9B95, 0x9B9F, 0x9BA1, 0x9BA7, 0x9BB1, 0x9BB7, 0x9BBD, + 0x9BC5, 0x9BCB, 0x9BCF, 0x9BDD, 0x9BF9, 0x9C01, 0x9C11, 0x9C23, + 0x9C2B, 0x9C2F, 0x9C35, 0x9C49, 0x9C4D, 0x9C5F, 0x9C65, 0x9C67, + 0x9C7F, 0x9C97, 0x9C9D, 0x9CA3, 0x9CAF, 0x9CBB, 0x9CBF, 0x9CC1, + 0x9CD7, 0x9CD9, 0x9CE3, 0x9CE9, 0x9CF1, 0x9CFD, 0x9D01, 0x9D15, + 0x9D27, 0x9D2D, 0x9D31, 0x9D3D, 0x9D55, 0x9D5B, 0x9D61, 0x9D97, + 0x9D9F, 0x9DA5, 0x9DA9, 0x9DC3, 0x9DE7, 0x9DEB, 0x9DED, 0x9DF1, + 0x9E0B, 0x9E17, 0x9E23, 0x9E27, 0x9E2D, 0x9E33, 0x9E3B, 0x9E47, + 0x9E51, 0x9E53, 0x9E5F, 0x9E6F, 0x9E81, 0x9E87, 0x9E8F, 0x9E95, + 0x9EA1, 0x9EB3, 0x9EBD, 0x9EBF, 0x9EF5, 0x9EF9, 0x9EFB, 0x9F05, + 0x9F23, 0x9F2F, 0x9F37, 0x9F3B, 0x9F43, 0x9F53, 0x9F61, 0x9F6D, + 0x9F73, 0x9F77, 0x9F7D, 0x9F89, 0x9F8F, 0x9F91, 0x9F95, 0x9FA3, + 0x9FAF, 0x9FB3, 0x9FC1, 0x9FC7, 0x9FDF, 0x9FE5, 0x9FEB, 0x9FF5, + 0xA001, 0xA00D, 0xA021, 0xA033, 0xA039, 0xA03F, 0xA04F, 0xA057, + 0xA05B, 0xA061, 0xA075, 0xA079, 0xA099, 0xA09D, 0xA0AB, 0xA0B5, + 0xA0B7, 0xA0BD, 0xA0C9, 0xA0D9, 0xA0DB, 0xA0DF, 0xA0E5, 0xA0F1, + 0xA0F3, 0xA0FD, 0xA105, 0xA10B, 0xA10F, 0xA111, 0xA11B, 0xA129, + 0xA12F, 0xA135, 0xA141, 0xA153, 0xA175, 0xA17D, 0xA187, 0xA18D, + 0xA1A5, 0xA1AB, 0xA1AD, 0xA1B7, 0xA1C3, 0xA1C5, 0xA1E3, 0xA1ED, + 0xA1FB, 0xA207, 0xA213, 0xA223, 0xA229, 0xA22F, 0xA231, 0xA243, + 0xA247, 0xA24D, 0xA26B, 0xA279, 0xA27D, 0xA283, 0xA289, 0xA28B, + 0xA291, 0xA295, 0xA29B, 0xA2A9, 0xA2AF, 0xA2B3, 0xA2BB, 0xA2C5, + 0xA2D1, 0xA2D7, 0xA2F7, 0xA301, 0xA309, 0xA31F, 0xA321, 0xA32B, + 0xA331, 0xA349, 0xA351, 0xA355, 0xA373, 0xA379, 0xA37B, 0xA387, + 0xA397, 0xA39F, 0xA3A5, 0xA3A9, 0xA3AF, 0xA3B7, 0xA3C7, 0xA3D5, + 0xA3DB, 0xA3E1, 0xA3E5, 0xA3E7, 0xA3F1, 0xA3FD, 0xA3FF, 0xA40F, + 0xA41D, 0xA421, 0xA423, 0xA427, 0xA43B, 0xA44D, 0xA457, 0xA459, + 0xA463, 0xA469, 0xA475, 0xA493, 0xA49B, 0xA4AD, 0xA4B9, 0xA4C3, + 0xA4C5, 0xA4CB, 0xA4D1, 0xA4D5, 0xA4E1, 0xA4ED, 0xA4EF, 0xA4F3, + 0xA4FF, 0xA511, 0xA529, 0xA52B, 0xA535, 0xA53B, 0xA543, 0xA553, + 0xA55B, 0xA561, 0xA56D, 0xA577, 0xA585, 0xA58B, 0xA597, 0xA59D, + 0xA5A3, 0xA5A7, 0xA5A9, 0xA5C1, 0xA5C5, 0xA5CB, 0xA5D3, 0xA5D9, + 0xA5DD, 0xA5DF, 0xA5E3, 0xA5E9, 0xA5F7, 0xA5FB, 0xA603, 0xA60D, + 0xA625, 0xA63D, 0xA649, 0xA64B, 0xA651, 0xA65D, 0xA673, 0xA691, + 0xA693, 0xA699, 0xA6AB, 0xA6B5, 0xA6BB, 0xA6C1, 0xA6C9, 0xA6CD, + 0xA6CF, 0xA6D5, 0xA6DF, 0xA6E7, 0xA6F1, 0xA6F7, 0xA6FF, 0xA70F, + 0xA715, 0xA723, 0xA729, 0xA72D, 0xA745, 0xA74D, 0xA757, 0xA759, + 0xA765, 0xA76B, 0xA76F, 0xA793, 0xA795, 0xA7AB, 0xA7B1, 0xA7B9, + 0xA7BF, 0xA7C9, 0xA7D1, 0xA7D7, 0xA7E3, 0xA7ED, 0xA7FB, 0xA805, + 0xA80B, 0xA81D, 0xA829, 0xA82B, 0xA837, 0xA83B, 0xA855, 0xA85F, + 0xA86D, 0xA87D, 0xA88F, 0xA897, 0xA8A9, 0xA8B5, 0xA8C1, 0xA8C7, + 0xA8D7, 0xA8E5, 0xA8FD, 0xA907, 0xA913, 0xA91B, 0xA931, 0xA937, + 0xA939, 0xA943, 0xA97F, 0xA985, 0xA987, 0xA98B, 0xA993, 0xA9A3, + 0xA9B1, 0xA9BB, 0xA9C1, 0xA9D9, 0xA9DF, 0xA9EB, 0xA9FD, 0xAA15, + 0xAA17, 0xAA35, 0xAA39, 0xAA3B, 0xAA47, 0xAA4D, 0xAA57, 0xAA59, + 0xAA5D, 0xAA6B, 0xAA71, 0xAA81, 0xAA83, 0xAA8D, 0xAA95, 0xAAAB, + 0xAABF, 0xAAC5, 0xAAC9, 0xAAE9, 0xAAEF, 0xAB01, 0xAB05, 0xAB07, + 0xAB0B, 0xAB0D, 0xAB11, 0xAB19, 0xAB4D, 0xAB5B, 0xAB71, 0xAB73, + 0xAB89, 0xAB9D, 0xABA7, 0xABAF, 0xABB9, 0xABBB, 0xABC1, 0xABC5, + 0xABD3, 0xABD7, 0xABDD, 0xABF1, 0xABF5, 0xABFB, 0xABFD, 0xAC09, + 0xAC15, 0xAC1B, 0xAC27, 0xAC37, 0xAC39, 0xAC45, 0xAC4F, 0xAC57, + 0xAC5B, 0xAC61, 0xAC63, 0xAC7F, 0xAC8B, 0xAC93, 0xAC9D, 0xACA9, + 0xACAB, 0xACAF, 0xACBD, 0xACD9, 0xACE1, 0xACE7, 0xACEB, 0xACED, + 0xACF1, 0xACF7, 0xACF9, 0xAD05, 0xAD3F, 0xAD45, 0xAD53, 0xAD5D, + 0xAD5F, 0xAD65, 0xAD81, 0xADA1, 0xADA5, 0xADC3, 0xADCB, 0xADD1, + 0xADD5, 0xADDB, 0xADE7, 0xADF3, 0xADF5, 0xADF9, 0xADFF, 0xAE05, + 0xAE13, 0xAE23, 0xAE2B, 0xAE49, 0xAE4D, 0xAE4F, 0xAE59, 0xAE61, + 0xAE67, 0xAE6B, 0xAE71, 0xAE8B, 0xAE8F, 0xAE9B, 0xAE9D, 0xAEA7, + 0xAEB9, 0xAEC5, 0xAED1, 0xAEE3, 0xAEE5, 0xAEE9, 0xAEF5, 0xAEFD, + 0xAF09, 0xAF13, 0xAF27, 0xAF2B, 0xAF33, 0xAF43, 0xAF4F, 0xAF57, + 0xAF5D, 0xAF6D, 0xAF75, 0xAF7F, 0xAF8B, 0xAF99, 0xAF9F, 0xAFA3, + 0xAFAB, 0xAFB7, 0xAFBB, 0xAFCF, 0xAFD5, 0xAFFD, 0xB005, 0xB015, + 0xB01B, 0xB03F, 0xB041, 0xB047, 0xB04B, 0xB051, 0xB053, 0xB069, + 0xB07B, 0xB07D, 0xB087, 0xB08D, 0xB0B1, 0xB0BF, 0xB0CB, 0xB0CF, + 0xB0E1, 0xB0E9, 0xB0ED, 0xB0FB, 0xB105, 0xB107, 0xB111, 0xB119, + 0xB11D, 0xB11F, 0xB131, 0xB141, 0xB14D, 0xB15B, 0xB165, 0xB173, + 0xB179, 0xB17F, 0xB1A9, 0xB1B3, 0xB1B9, 0xB1BF, 0xB1D3, 0xB1DD, + 0xB1E5, 0xB1F1, 0xB1F5, 0xB201, 0xB213, 0xB215, 0xB21F, 0xB22D, + 0xB23F, 0xB249, 0xB25B, 0xB263, 0xB269, 0xB26D, 0xB27B, 0xB281, + 0xB28B, 0xB2A9, 0xB2B7, 0xB2BD, 0xB2C3, 0xB2C7, 0xB2D3, 0xB2F9, + 0xB2FD, 0xB2FF, 0xB303, 0xB309, 0xB311, 0xB31D, 0xB327, 0xB32D, + 0xB33F, 0xB345, 0xB377, 0xB37D, 0xB381, 0xB387, 0xB393, 0xB39B, + 0xB3A5, 0xB3C5, 0xB3CB, 0xB3E1, 0xB3E3, 0xB3ED, 0xB3F9, 0xB40B, + 0xB40D, 0xB413, 0xB417, 0xB435, 0xB43D, 0xB443, 0xB449, 0xB45B, + 0xB465, 0xB467, 0xB46B, 0xB477, 0xB48B, 0xB495, 0xB49D, 0xB4B5, + 0xB4BF, 0xB4C1, 0xB4C7, 0xB4DD, 0xB4E3, 0xB4E5, 0xB4F7, 0xB501, + 0xB50D, 0xB50F, 0xB52D, 0xB53F, 0xB54B, 0xB567, 0xB569, 0xB56F, + 0xB573, 0xB579, 0xB587, 0xB58D, 0xB599, 0xB5A3, 0xB5AB, 0xB5AF, + 0xB5BB, 0xB5D5, 0xB5DF, 0xB5E7, 0xB5ED, 0xB5FD, 0xB5FF, 0xB609, + 0xB61B, 0xB629, 0xB62F, 0xB633, 0xB639, 0xB647, 0xB657, 0xB659, + 0xB65F, 0xB663, 0xB66F, 0xB683, 0xB687, 0xB69B, 0xB69F, 0xB6A5, + 0xB6B1, 0xB6B3, 0xB6D7, 0xB6DB, 0xB6E1, 0xB6E3, 0xB6ED, 0xB6EF, + 0xB705, 0xB70D, 0xB713, 0xB71D, 0xB729, 0xB735, 0xB747, 0xB755, + 0xB76D, 0xB791, 0xB795, 0xB7A9, 0xB7C1, 0xB7CB, 0xB7D1, 0xB7D3, + 0xB7EF, 0xB7F5, 0xB807, 0xB80F, 0xB813, 0xB819, 0xB821, 0xB827, + 0xB82B, 0xB82D, 0xB839, 0xB855, 0xB867, 0xB875, 0xB885, 0xB893, + 0xB8A5, 0xB8AF, 0xB8B7, 0xB8BD, 0xB8C1, 0xB8C7, 0xB8CD, 0xB8D5, + 0xB8EB, 0xB8F7, 0xB8F9, 0xB903, 0xB915, 0xB91B, 0xB91D, 0xB92F, + 0xB939, 0xB93B, 0xB947, 0xB951, 0xB963, 0xB983, 0xB989, 0xB98D, + 0xB993, 0xB999, 0xB9A1, 0xB9A7, 0xB9AD, 0xB9B7, 0xB9CB, 0xB9D1, + 0xB9DD, 0xB9E7, 0xB9EF, 0xB9F9, 0xBA07, 0xBA0D, 0xBA17, 0xBA25, + 0xBA29, 0xBA2B, 0xBA41, 0xBA53, 0xBA55, 0xBA5F, 0xBA61, 0xBA65, + 0xBA79, 0xBA7D, 0xBA7F, 0xBAA1, 0xBAA3, 0xBAAF, 0xBAB5, 0xBABF, + 0xBAC1, 0xBACB, 0xBADD, 0xBAE3, 0xBAF1, 0xBAFD, 0xBB09, 0xBB1F, + 0xBB27, 0xBB2D, 0xBB3D, 0xBB43, 0xBB4B, 0xBB4F, 0xBB5B, 0xBB61, + 0xBB69, 0xBB6D, 0xBB91, 0xBB97, 0xBB9D, 0xBBB1, 0xBBC9, 0xBBCF, + 0xBBDB, 0xBBED, 0xBBF7, 0xBBF9, 0xBC03, 0xBC1D, 0xBC23, 0xBC33, + 0xBC3B, 0xBC41, 0xBC45, 0xBC5D, 0xBC6F, 0xBC77, 0xBC83, 0xBC8F, + 0xBC99, 0xBCAB, 0xBCB7, 0xBCB9, 0xBCD1, 0xBCD5, 0xBCE1, 0xBCF3, + 0xBCFF, 0xBD0D, 0xBD17, 0xBD19, 0xBD1D, 0xBD35, 0xBD41, 0xBD4F, + 0xBD59, 0xBD5F, 0xBD61, 0xBD67, 0xBD6B, 0xBD71, 0xBD8B, 0xBD8F, + 0xBD95, 0xBD9B, 0xBD9D, 0xBDB3, 0xBDBB, 0xBDCD, 0xBDD1, 0xBDE3, + 0xBDEB, 0xBDEF, 0xBE07, 0xBE09, 0xBE15, 0xBE21, 0xBE25, 0xBE27, + 0xBE5B, 0xBE5D, 0xBE6F, 0xBE75, 0xBE79, 0xBE7F, 0xBE8B, 0xBE8D, + 0xBE93, 0xBE9F, 0xBEA9, 0xBEB1, 0xBEB5, 0xBEB7, 0xBECF, 0xBED9, + 0xBEDB, 0xBEE5, 0xBEE7, 0xBEF3, 0xBEF9, 0xBF0B, 0xBF33, 0xBF39, + 0xBF4D, 0xBF5D, 0xBF5F, 0xBF6B, 0xBF71, 0xBF7B, 0xBF87, 0xBF89, + 0xBF8D, 0xBF93, 0xBFA1, 0xBFAD, 0xBFB9, 0xBFCF, 0xBFD5, 0xBFDD, + 0xBFE1, 0xBFE3, 0xBFF3, 0xC005, 0xC011, 0xC013, 0xC019, 0xC029, + 0xC02F, 0xC031, 0xC037, 0xC03B, 0xC047, 0xC065, 0xC06D, 0xC07D, + 0xC07F, 0xC091, 0xC09B, 0xC0B3, 0xC0B5, 0xC0BB, 0xC0D3, 0xC0D7, + 0xC0D9, 0xC0EF, 0xC0F1, 0xC101, 0xC103, 0xC109, 0xC115, 0xC119, + 0xC12B, 0xC133, 0xC137, 0xC145, 0xC149, 0xC15B, 0xC173, 0xC179, + 0xC17B, 0xC181, 0xC18B, 0xC18D, 0xC197, 0xC1BD, 0xC1C3, 0xC1CD, + 0xC1DB, 0xC1E1, 0xC1E7, 0xC1FF, 0xC203, 0xC205, 0xC211, 0xC221, + 0xC22F, 0xC23F, 0xC24B, 0xC24D, 0xC253, 0xC25D, 0xC277, 0xC27B, + 0xC27D, 0xC289, 0xC28F, 0xC293, 0xC29F, 0xC2A7, 0xC2B3, 0xC2BD, + 0xC2CF, 0xC2D5, 0xC2E3, 0xC2FF, 0xC301, 0xC307, 0xC311, 0xC313, + 0xC317, 0xC325, 0xC347, 0xC349, 0xC34F, 0xC365, 0xC367, 0xC371, + 0xC37F, 0xC383, 0xC385, 0xC395, 0xC39D, 0xC3A7, 0xC3AD, 0xC3B5, + 0xC3BF, 0xC3C7, 0xC3CB, 0xC3D1, 0xC3D3, 0xC3E3, 0xC3E9, 0xC3EF, + 0xC401, 0xC41F, 0xC42D, 0xC433, 0xC437, 0xC455, 0xC457, 0xC461, + 0xC46F, 0xC473, 0xC487, 0xC491, 0xC499, 0xC49D, 0xC4A5, 0xC4B7, + 0xC4BB, 0xC4C9, 0xC4CF, 0xC4D3, 0xC4EB, 0xC4F1, 0xC4F7, 0xC509, + 0xC51B, 0xC51D, 0xC541, 0xC547, 0xC551, 0xC55F, 0xC56B, 0xC56F, + 0xC575, 0xC577, 0xC595, 0xC59B, 0xC59F, 0xC5A1, 0xC5A7, 0xC5C3, + 0xC5D7, 0xC5DB, 0xC5EF, 0xC5FB, 0xC613, 0xC623, 0xC635, 0xC641, + 0xC64F, 0xC655, 0xC659, 0xC665, 0xC685, 0xC691, 0xC697, 0xC6A1, + 0xC6A9, 0xC6B3, 0xC6B9, 0xC6CB, 0xC6CD, 0xC6DD, 0xC6EB, 0xC6F1, + 0xC707, 0xC70D, 0xC719, 0xC71B, 0xC72D, 0xC731, 0xC739, 0xC757, + 0xC763, 0xC767, 0xC773, 0xC775, 0xC77F, 0xC7A5, 0xC7BB, 0xC7BD, + 0xC7C1, 0xC7CF, 0xC7D5, 0xC7E1, 0xC7F9, 0xC7FD, 0xC7FF, 0xC803, + 0xC811, 0xC81D, 0xC827, 0xC829, 0xC839, 0xC83F, 0xC853, 0xC857, + 0xC86B, 0xC881, 0xC88D, 0xC88F, 0xC893, 0xC895, 0xC8A1, 0xC8B7, + 0xC8CF, 0xC8D5, 0xC8DB, 0xC8DD, 0xC8E3, 0xC8E7, 0xC8ED, 0xC8EF, + 0xC8F9, 0xC905, 0xC911, 0xC917, 0xC919, 0xC91F, 0xC92F, 0xC937, + 0xC93D, 0xC941, 0xC953, 0xC95F, 0xC96B, 0xC979, 0xC97D, 0xC989, + 0xC98F, 0xC997, 0xC99D, 0xC9AF, 0xC9B5, 0xC9BF, 0xC9CB, 0xC9D9, + 0xC9DF, 0xC9E3, 0xC9EB, 0xCA01, 0xCA07, 0xCA09, 0xCA25, 0xCA37, + 0xCA39, 0xCA4B, 0xCA55, 0xCA5B, 0xCA69, 0xCA73, 0xCA75, 0xCA7F, + 0xCA8D, 0xCA93, 0xCA9D, 0xCA9F, 0xCAB5, 0xCABB, 0xCAC3, 0xCAC9, + 0xCAD9, 0xCAE5, 0xCAED, 0xCB03, 0xCB05, 0xCB09, 0xCB17, 0xCB29, + 0xCB35, 0xCB3B, 0xCB53, 0xCB59, 0xCB63, 0xCB65, 0xCB71, 0xCB87, + 0xCB99, 0xCB9F, 0xCBB3, 0xCBB9, 0xCBC3, 0xCBD1, 0xCBD5, 0xCBD7, + 0xCBDD, 0xCBE9, 0xCBFF, 0xCC0D, 0xCC19, 0xCC1D, 0xCC23, 0xCC2B, + 0xCC41, 0xCC43, 0xCC4D, 0xCC59, 0xCC61, 0xCC89, 0xCC8B, 0xCC91, + 0xCC9B, 0xCCA3, 0xCCA7, 0xCCD1, 0xCCE5, 0xCCE9, 0xCD09, 0xCD15, + 0xCD1F, 0xCD25, 0xCD31, 0xCD3D, 0xCD3F, 0xCD49, 0xCD51, 0xCD57, + 0xCD5B, 0xCD63, 0xCD67, 0xCD81, 0xCD93, 0xCD97, 0xCD9F, 0xCDBB, + 0xCDC1, 0xCDD3, 0xCDD9, 0xCDE5, 0xCDE7, 0xCDF1, 0xCDF7, 0xCDFD, + 0xCE0B, 0xCE15, 0xCE21, 0xCE2F, 0xCE47, 0xCE4D, 0xCE51, 0xCE65, + 0xCE7B, 0xCE7D, 0xCE8F, 0xCE93, 0xCE99, 0xCEA5, 0xCEA7, 0xCEB7, + 0xCEC9, 0xCED7, 0xCEDD, 0xCEE3, 0xCEE7, 0xCEED, 0xCEF5, 0xCF07, + 0xCF0B, 0xCF19, 0xCF37, 0xCF3B, 0xCF4D, 0xCF55, 0xCF5F, 0xCF61, + 0xCF65, 0xCF6D, 0xCF79, 0xCF7D, 0xCF89, 0xCF9B, 0xCF9D, 0xCFA9, + 0xCFB3, 0xCFB5, 0xCFC5, 0xCFCD, 0xCFD1, 0xCFEF, 0xCFF1, 0xCFF7, + 0xD013, 0xD015, 0xD01F, 0xD021, 0xD033, 0xD03D, 0xD04B, 0xD04F, + 0xD069, 0xD06F, 0xD081, 0xD085, 0xD099, 0xD09F, 0xD0A3, 0xD0AB, + 0xD0BD, 0xD0C1, 0xD0CD, 0xD0E7, 0xD0FF, 0xD103, 0xD117, 0xD12D, + 0xD12F, 0xD141, 0xD157, 0xD159, 0xD15D, 0xD169, 0xD16B, 0xD171, + 0xD177, 0xD17D, 0xD181, 0xD187, 0xD195, 0xD199, 0xD1B1, 0xD1BD, + 0xD1C3, 0xD1D5, 0xD1D7, 0xD1E3, 0xD1FF, 0xD20D, 0xD211, 0xD217, + 0xD21F, 0xD235, 0xD23B, 0xD247, 0xD259, 0xD261, 0xD265, 0xD279, + 0xD27F, 0xD283, 0xD289, 0xD28B, 0xD29D, 0xD2A3, 0xD2A7, 0xD2B3, + 0xD2BF, 0xD2C7, 0xD2E3, 0xD2E9, 0xD2F1, 0xD2FB, 0xD2FD, 0xD315, + 0xD321, 0xD32B, 0xD343, 0xD34B, 0xD355, 0xD369, 0xD375, 0xD37B, + 0xD387, 0xD393, 0xD397, 0xD3A5, 0xD3B1, 0xD3C9, 0xD3EB, 0xD3FD, + 0xD405, 0xD40F, 0xD415, 0xD427, 0xD42F, 0xD433, 0xD43B, 0xD44B, + 0xD459, 0xD45F, 0xD463, 0xD469, 0xD481, 0xD483, 0xD489, 0xD48D, + 0xD493, 0xD495, 0xD4A5, 0xD4AB, 0xD4B1, 0xD4C5, 0xD4DD, 0xD4E1, + 0xD4E3, 0xD4E7, 0xD4F5, 0xD4F9, 0xD50B, 0xD50D, 0xD513, 0xD51F, + 0xD523, 0xD531, 0xD535, 0xD537, 0xD549, 0xD559, 0xD55F, 0xD565, + 0xD567, 0xD577, 0xD58B, 0xD591, 0xD597, 0xD5B5, 0xD5B9, 0xD5C1, + 0xD5C7, 0xD5DF, 0xD5EF, 0xD5F5, 0xD5FB, 0xD603, 0xD60F, 0xD62D, + 0xD631, 0xD643, 0xD655, 0xD65D, 0xD661, 0xD67B, 0xD685, 0xD687, + 0xD69D, 0xD6A5, 0xD6AF, 0xD6BD, 0xD6C3, 0xD6C7, 0xD6D9, 0xD6E1, + 0xD6ED, 0xD709, 0xD70B, 0xD711, 0xD715, 0xD721, 0xD727, 0xD73F, + 0xD745, 0xD74D, 0xD757, 0xD76B, 0xD77B, 0xD783, 0xD7A1, 0xD7A7, + 0xD7AD, 0xD7B1, 0xD7B3, 0xD7BD, 0xD7CB, 0xD7D1, 0xD7DB, 0xD7FB, + 0xD811, 0xD823, 0xD825, 0xD829, 0xD82B, 0xD82F, 0xD837, 0xD84D, + 0xD855, 0xD867, 0xD873, 0xD88F, 0xD891, 0xD8A1, 0xD8AD, 0xD8BF, + 0xD8CD, 0xD8D7, 0xD8E9, 0xD8F5, 0xD8FB, 0xD91B, 0xD925, 0xD933, + 0xD939, 0xD943, 0xD945, 0xD94F, 0xD951, 0xD957, 0xD96D, 0xD96F, + 0xD973, 0xD979, 0xD981, 0xD98B, 0xD991, 0xD99F, 0xD9A5, 0xD9A9, + 0xD9B5, 0xD9D3, 0xD9EB, 0xD9F1, 0xD9F7, 0xD9FF, 0xDA05, 0xDA09, + 0xDA0B, 0xDA0F, 0xDA15, 0xDA1D, 0xDA23, 0xDA29, 0xDA3F, 0xDA51, + 0xDA59, 0xDA5D, 0xDA5F, 0xDA71, 0xDA77, 0xDA7B, 0xDA7D, 0xDA8D, + 0xDA9F, 0xDAB3, 0xDABD, 0xDAC3, 0xDAC9, 0xDAE7, 0xDAE9, 0xDAF5, + 0xDB11, 0xDB17, 0xDB1D, 0xDB23, 0xDB25, 0xDB31, 0xDB3B, 0xDB43, + 0xDB55, 0xDB67, 0xDB6B, 0xDB73, 0xDB85, 0xDB8F, 0xDB91, 0xDBAD, + 0xDBAF, 0xDBB9, 0xDBC7, 0xDBCB, 0xDBCD, 0xDBEB, 0xDBF7, 0xDC0D, + 0xDC27, 0xDC31, 0xDC39, 0xDC3F, 0xDC49, 0xDC51, 0xDC61, 0xDC6F, + 0xDC75, 0xDC7B, 0xDC85, 0xDC93, 0xDC99, 0xDC9D, 0xDC9F, 0xDCA9, + 0xDCB5, 0xDCB7, 0xDCBD, 0xDCC7, 0xDCCF, 0xDCD3, 0xDCD5, 0xDCDF, + 0xDCF9, 0xDD0F, 0xDD15, 0xDD17, 0xDD23, 0xDD35, 0xDD39, 0xDD53, + 0xDD57, 0xDD5F, 0xDD69, 0xDD6F, 0xDD7D, 0xDD87, 0xDD89, 0xDD9B, + 0xDDA1, 0xDDAB, 0xDDBF, 0xDDC5, 0xDDCB, 0xDDCF, 0xDDE7, 0xDDE9, + 0xDDED, 0xDDF5, 0xDDFB, 0xDE0B, 0xDE19, 0xDE29, 0xDE3B, 0xDE3D, + 0xDE41, 0xDE4D, 0xDE4F, 0xDE59, 0xDE5B, 0xDE61, 0xDE6D, 0xDE77, + 0xDE7D, 0xDE83, 0xDE97, 0xDE9D, 0xDEA1, 0xDEA7, 0xDECD, 0xDED1, + 0xDED7, 0xDEE3, 0xDEF1, 0xDEF5, 0xDF01, 0xDF09, 0xDF13, 0xDF1F, + 0xDF2B, 0xDF33, 0xDF37, 0xDF3D, 0xDF4B, 0xDF55, 0xDF5B, 0xDF67, + 0xDF69, 0xDF73, 0xDF85, 0xDF87, 0xDF99, 0xDFA3, 0xDFAB, 0xDFB5, + 0xDFB7, 0xDFC3, 0xDFC7, 0xDFD5, 0xDFF1, 0xDFF3, 0xE003, 0xE005, + 0xE017, 0xE01D, 0xE027, 0xE02D, 0xE035, 0xE045, 0xE053, 0xE071, + 0xE07B, 0xE08F, 0xE095, 0xE09F, 0xE0B7, 0xE0B9, 0xE0D5, 0xE0D7, + 0xE0E3, 0xE0F3, 0xE0F9, 0xE101, 0xE125, 0xE129, 0xE131, 0xE135, + 0xE143, 0xE14F, 0xE159, 0xE161, 0xE16D, 0xE171, 0xE177, 0xE17F, + 0xE183, 0xE189, 0xE197, 0xE1AD, 0xE1B5, 0xE1BB, 0xE1BF, 0xE1C1, + 0xE1CB, 0xE1D1, 0xE1E5, 0xE1EF, 0xE1F7, 0xE1FD, 0xE203, 0xE219, + 0xE22B, 0xE22D, 0xE23D, 0xE243, 0xE257, 0xE25B, 0xE275, 0xE279, + 0xE287, 0xE29D, 0xE2AB, 0xE2AF, 0xE2BB, 0xE2C1, 0xE2C9, 0xE2CD, + 0xE2D3, 0xE2D9, 0xE2F3, 0xE2FD, 0xE2FF, 0xE311, 0xE323, 0xE327, + 0xE329, 0xE339, 0xE33B, 0xE34D, 0xE351, 0xE357, 0xE35F, 0xE363, + 0xE369, 0xE375, 0xE377, 0xE37D, 0xE383, 0xE39F, 0xE3C5, 0xE3C9, + 0xE3D1, 0xE3E1, 0xE3FB, 0xE3FF, 0xE401, 0xE40B, 0xE417, 0xE419, + 0xE423, 0xE42B, 0xE431, 0xE43B, 0xE447, 0xE449, 0xE453, 0xE455, + 0xE46D, 0xE471, 0xE48F, 0xE4A9, 0xE4AF, 0xE4B5, 0xE4C7, 0xE4CD, + 0xE4D3, 0xE4E9, 0xE4EB, 0xE4F5, 0xE507, 0xE521, 0xE525, 0xE537, + 0xE53F, 0xE545, 0xE54B, 0xE557, 0xE567, 0xE56D, 0xE575, 0xE585, + 0xE58B, 0xE593, 0xE5A3, 0xE5A5, 0xE5CF, 0xE609, 0xE611, 0xE615, + 0xE61B, 0xE61D, 0xE621, 0xE629, 0xE639, 0xE63F, 0xE653, 0xE657, + 0xE663, 0xE66F, 0xE675, 0xE681, 0xE683, 0xE68D, 0xE68F, 0xE695, + 0xE6AB, 0xE6AD, 0xE6B7, 0xE6BD, 0xE6C5, 0xE6CB, 0xE6D5, 0xE6E3, + 0xE6E9, 0xE6EF, 0xE6F3, 0xE705, 0xE70D, 0xE717, 0xE71F, 0xE72F, + 0xE73D, 0xE747, 0xE749, 0xE753, 0xE755, 0xE761, 0xE767, 0xE76B, + 0xE77F, 0xE789, 0xE791, 0xE7C5, 0xE7CD, 0xE7D7, 0xE7DD, 0xE7DF, + 0xE7E9, 0xE7F1, 0xE7FB, 0xE801, 0xE807, 0xE80F, 0xE819, 0xE81B, + 0xE831, 0xE833, 0xE837, 0xE83D, 0xE84B, 0xE84F, 0xE851, 0xE869, + 0xE875, 0xE879, 0xE893, 0xE8A5, 0xE8A9, 0xE8AF, 0xE8BD, 0xE8DB, + 0xE8E1, 0xE8E5, 0xE8EB, 0xE8ED, 0xE903, 0xE90B, 0xE90F, 0xE915, + 0xE917, 0xE92D, 0xE933, 0xE93B, 0xE94B, 0xE951, 0xE95F, 0xE963, + 0xE969, 0xE97B, 0xE983, 0xE98F, 0xE995, 0xE9A1, 0xE9B9, 0xE9D7, + 0xE9E7, 0xE9EF, 0xEA11, 0xEA19, 0xEA2F, 0xEA35, 0xEA43, 0xEA4D, + 0xEA5F, 0xEA6D, 0xEA71, 0xEA7D, 0xEA85, 0xEA89, 0xEAAD, 0xEAB3, + 0xEAB9, 0xEABB, 0xEAC5, 0xEAC7, 0xEACB, 0xEADF, 0xEAE5, 0xEAEB, + 0xEAF5, 0xEB01, 0xEB07, 0xEB09, 0xEB31, 0xEB39, 0xEB3F, 0xEB5B, + 0xEB61, 0xEB63, 0xEB6F, 0xEB81, 0xEB85, 0xEB9D, 0xEBAB, 0xEBB1, + 0xEBB7, 0xEBC1, 0xEBD5, 0xEBDF, 0xEBED, 0xEBFD, 0xEC0B, 0xEC1B, + 0xEC21, 0xEC29, 0xEC4D, 0xEC51, 0xEC5D, 0xEC69, 0xEC6F, 0xEC7B, + 0xECAD, 0xECB9, 0xECBF, 0xECC3, 0xECC9, 0xECCF, 0xECD7, 0xECDD, + 0xECE7, 0xECE9, 0xECF3, 0xECF5, 0xED07, 0xED11, 0xED1F, 0xED2F, + 0xED37, 0xED3D, 0xED41, 0xED55, 0xED59, 0xED5B, 0xED65, 0xED6B, + 0xED79, 0xED8B, 0xED95, 0xEDBB, 0xEDC5, 0xEDD7, 0xEDD9, 0xEDE3, + 0xEDE5, 0xEDF1, 0xEDF5, 0xEDF7, 0xEDFB, 0xEE09, 0xEE0F, 0xEE19, + 0xEE21, 0xEE49, 0xEE4F, 0xEE63, 0xEE67, 0xEE73, 0xEE7B, 0xEE81, + 0xEEA3, 0xEEAB, 0xEEC1, 0xEEC9, 0xEED5, 0xEEDF, 0xEEE1, 0xEEF1, + 0xEF1B, 0xEF27, 0xEF2F, 0xEF45, 0xEF4D, 0xEF63, 0xEF6B, 0xEF71, + 0xEF93, 0xEF95, 0xEF9B, 0xEF9F, 0xEFAD, 0xEFB3, 0xEFC3, 0xEFC5, + 0xEFDB, 0xEFE1, 0xEFE9, 0xF001, 0xF017, 0xF01D, 0xF01F, 0xF02B, + 0xF02F, 0xF035, 0xF043, 0xF047, 0xF04F, 0xF067, 0xF06B, 0xF071, + 0xF077, 0xF079, 0xF08F, 0xF0A3, 0xF0A9, 0xF0AD, 0xF0BB, 0xF0BF, + 0xF0C5, 0xF0CB, 0xF0D3, 0xF0D9, 0xF0E3, 0xF0E9, 0xF0F1, 0xF0F7, + 0xF107, 0xF115, 0xF11B, 0xF121, 0xF137, 0xF13D, 0xF155, 0xF175, + 0xF17B, 0xF18D, 0xF193, 0xF1A5, 0xF1AF, 0xF1B7, 0xF1D5, 0xF1E7, + 0xF1ED, 0xF1FD, 0xF209, 0xF20F, 0xF21B, 0xF21D, 0xF223, 0xF227, + 0xF233, 0xF23B, 0xF241, 0xF257, 0xF25F, 0xF265, 0xF269, 0xF277, + 0xF281, 0xF293, 0xF2A7, 0xF2B1, 0xF2B3, 0xF2B9, 0xF2BD, 0xF2BF, + 0xF2DB, 0xF2ED, 0xF2EF, 0xF2F9, 0xF2FF, 0xF305, 0xF30B, 0xF319, + 0xF341, 0xF359, 0xF35B, 0xF35F, 0xF367, 0xF373, 0xF377, 0xF38B, + 0xF38F, 0xF3AF, 0xF3C1, 0xF3D1, 0xF3D7, 0xF3FB, 0xF403, 0xF409, + 0xF40D, 0xF413, 0xF421, 0xF425, 0xF42B, 0xF445, 0xF44B, 0xF455, + 0xF463, 0xF475, 0xF47F, 0xF485, 0xF48B, 0xF499, 0xF4A3, 0xF4A9, + 0xF4AF, 0xF4BD, 0xF4C3, 0xF4DB, 0xF4DF, 0xF4ED, 0xF503, 0xF50B, + 0xF517, 0xF521, 0xF529, 0xF535, 0xF547, 0xF551, 0xF563, 0xF56B, + 0xF583, 0xF58D, 0xF595, 0xF599, 0xF5B1, 0xF5B7, 0xF5C9, 0xF5CF, + 0xF5D1, 0xF5DB, 0xF5F9, 0xF5FB, 0xF605, 0xF607, 0xF60B, 0xF60D, + 0xF635, 0xF637, 0xF653, 0xF65B, 0xF661, 0xF667, 0xF679, 0xF67F, + 0xF689, 0xF697, 0xF69B, 0xF6AD, 0xF6CB, 0xF6DD, 0xF6DF, 0xF6EB, + 0xF709, 0xF70F, 0xF72D, 0xF731, 0xF743, 0xF74F, 0xF751, 0xF755, + 0xF763, 0xF769, 0xF773, 0xF779, 0xF781, 0xF787, 0xF791, 0xF79D, + 0xF79F, 0xF7A5, 0xF7B1, 0xF7BB, 0xF7BD, 0xF7CF, 0xF7D3, 0xF7E7, + 0xF7EB, 0xF7F1, 0xF7FF, 0xF805, 0xF80B, 0xF821, 0xF827, 0xF82D, + 0xF835, 0xF847, 0xF859, 0xF863, 0xF865, 0xF86F, 0xF871, 0xF877, + 0xF87B, 0xF881, 0xF88D, 0xF89F, 0xF8A1, 0xF8AB, 0xF8B3, 0xF8B7, + 0xF8C9, 0xF8CB, 0xF8D1, 0xF8D7, 0xF8DD, 0xF8E7, 0xF8EF, 0xF8F9, + 0xF8FF, 0xF911, 0xF91D, 0xF925, 0xF931, 0xF937, 0xF93B, 0xF941, + 0xF94F, 0xF95F, 0xF961, 0xF96D, 0xF971, 0xF977, 0xF99D, 0xF9A3, + 0xF9A9, 0xF9B9, 0xF9CD, 0xF9E9, 0xF9FD, 0xFA07, 0xFA0D, 0xFA13, + 0xFA21, 0xFA25, 0xFA3F, 0xFA43, 0xFA51, 0xFA5B, 0xFA6D, 0xFA7B, + 0xFA97, 0xFA99, 0xFA9D, 0xFAAB, 0xFABB, 0xFABD, 0xFAD9, 0xFADF, + 0xFAE7, 0xFAED, 0xFB0F, 0xFB17, 0xFB1B, 0xFB2D, 0xFB2F, 0xFB3F, + 0xFB47, 0xFB4D, 0xFB75, 0xFB7D, 0xFB8F, 0xFB93, 0xFBB1, 0xFBB7, + 0xFBC3, 0xFBC5, 0xFBE3, 0xFBE9, 0xFBF3, 0xFC01, 0xFC29, 0xFC37, + 0xFC41, 0xFC43, 0xFC4F, 0xFC59, 0xFC61, 0xFC65, 0xFC6D, 0xFC73, + 0xFC79, 0xFC95, 0xFC97, 0xFC9B, 0xFCA7, 0xFCB5, 0xFCC5, 0xFCCD, + 0xFCEB, 0xFCFB, 0xFD0D, 0xFD0F, 0xFD19, 0xFD2B, 0xFD31, 0xFD51, + 0xFD55, 0xFD67, 0xFD6D, 0xFD6F, 0xFD7B, 0xFD85, 0xFD97, 0xFD99, + 0xFD9F, 0xFDA9, 0xFDB7, 0xFDC9, 0xFDE5, 0xFDEB, 0xFDF3, 0xFE03, + 0xFE05, 0xFE09, 0xFE1D, 0xFE27, 0xFE2F, 0xFE41, 0xFE4B, 0xFE4D, + 0xFE57, 0xFE5F, 0xFE63, 0xFE69, 0xFE75, 0xFE7B, 0xFE8F, 0xFE93, + 0xFE95, 0xFE9B, 0xFE9F, 0xFEB3, 0xFEBD, 0xFED7, 0xFEE9, 0xFEF3, + 0xFEF5, 0xFF07, 0xFF0D, 0xFF1D, 0xFF2B, 0xFF2F, 0xFF49, 0xFF4D, + 0xFF5B, 0xFF65, 0xFF71, 0xFF7F, 0xFF85, 0xFF8B, 0xFF8F, 0xFF9D, + 0xFFA7, 0xFFA9, 0xFFC7, 0xFFD9, 0xFFEF, 0xFFF1, +#endif +}; diff --git a/security/nss/lib/freebl/mpi/stats b/security/nss/lib/freebl/mpi/stats new file mode 100755 index 000000000..a5deb94c0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/stats @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +# +# Treat each line as a sequence of comma and/or space delimited +# floating point numbers, and compute basic statistics on them. +# These are written to standard output + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +$min = 1.7976931348623157E+308; +$max = 2.2250738585072014E-308; +$sum = $num = 0; + +while(<>) { + chomp; + + @nums = split(/[\s,]+/, $_); + next if($#nums < 0); + + $num += scalar @nums; + foreach (@nums) { + $min = $_ if($_ < $min); + $max = $_ if($_ > $max); + $sum += $_; + } +} + +if($num) { + $avg = $sum / $num; +} else { + $min = $max = 0; +} + +printf "%d\tmin=%.2f, avg=%.2f, max=%.2f, sum=%.2f\n", + $num, $min, $avg, $max, $sum; + +# end diff --git a/security/nss/lib/freebl/mpi/target.mk b/security/nss/lib/freebl/mpi/target.mk new file mode 100644 index 000000000..dd74564b1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/target.mk @@ -0,0 +1,233 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +## +## Define CFLAGS to contain any local options your compiler +## setup requires. +## +## Conditional compilation options are no longer here; see +## the file 'mpi-config.h' instead. +## +MPICMN = -I. -DMP_API_COMPATIBLE -DMP_IOFUNC +CFLAGS= -O $(MPICMN) +#CFLAGS=-ansi -fullwarn -woff 1521 -O3 $(MPICMN) +#CFLAGS=-ansi -pedantic -Wall -O3 $(MPICMN) +#CFLAGS=-ansi -pedantic -Wall -g -O2 -DMP_DEBUG=1 $(MPICMN) + +ifeq ($(TARGET),mipsIRIX) +#IRIX +#MPICMN += -DMP_MONT_USE_MP_MUL +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +MPICMN += -DMP_USE_UINT_DIGIT +#MPICMN += -DMP_NO_MP_WORD +AS_OBJS = mpi_mips.o +#ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 -exceptions +ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 +#CFLAGS=-ansi -n32 -O3 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN) +CFLAGS=-ansi -n32 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN) +#CFLAGS=-ansi -n32 -g -fullwarn -woff 1429 -D_SGI_SOURCE $(MPICMN) +#CFLAGS=-ansi -64 -O2 -fullwarn -woff 1429 -D_SGI_SOURCE -DMP_NO_MP_WORD \ + $(MPICMN) +endif + +ifeq ($(TARGET),alphaOSF1) +#Alpha/OSF1 +MPICMN += -DMP_ASSEMBLY_MULTIPLY +AS_OBJS+= mpvalpha.o +#CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT $(MPICMN) +CFLAGS= -O -Olimit 4000 -ieee_with_inexact -std1 -DOSF1 -D_REENTRANT \ + -DMP_NO_MP_WORD $(MPICMN) +endif + +ifeq ($(TARGET),v9SOLARIS) +#Solaris 64 +SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xchip=ultra -xarch=v9a -KPIC -mt +#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v9a -KPIC -mt +SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v9a -KPIC -mt +AS_OBJS += montmulfv9.o +AS_OBJS += mpi_sparc.o mpv_sparcv9.o +MPICMN += -DMP_USE_UINT_DIGIT +#MPICMN += -DMP_NO_MP_WORD +MPICMN += -DMP_ASSEMBLY_MULTIPLY +MPICMN += -DMP_USING_MONT_MULF +CFLAGS= -O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \ + -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN) +#CFLAGS= -g -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \ + -DSOLARIS2_8 -xarch=v9 -DXP_UNIX $(MPICMN) +endif + +ifeq ($(TARGET),v8plusSOLARIS) +#Solaris 32 +SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8plusa -KPIC -mt +SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt +AS_OBJS += montmulfv8.o +AS_OBJS += mpi_sparc.o mpv_sparcv8.o +#AS_OBJS = montmulf.o +MPICMN += -DMP_ASSEMBLY_MULTIPLY +MPICMN += -DMP_USING_MONT_MULF +MPICMN += -DMP_USE_UINT_DIGIT +MPICMN += -DMP_NO_MP_WORD +CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \ + -DSOLARIS2_6 -xarch=v8plus -DXP_UNIX $(MPICMN) +endif + +ifeq ($(TARGET),v8SOLARIS) +#Solaris 32 +#SOLARIS_FPU_FLAGS = -fast -xO5 -xrestrict=%all -xdepend -xchip=ultra -xarch=v8 -KPIC -mt +#SOLARIS_ASM_FLAGS = -xchip=ultra -xarch=v8plusa -KPIC -mt +#AS_OBJS = montmulfv8.o mpi_sparc.o mpv_sparcv8.o +#AS_OBJS = montmulf.o +#MPICMN += -DMP_USING_MONT_MULF +#MPICMN += -DMP_ASSEMBLY_MULTIPLY +MPICMN += -DMP_USE_LONG_LONG_MULTIPLY -DMP_USE_UINT_DIGIT +MPICMN += -DMP_NO_MP_WORD +CFLAGS=-O -KPIC -DSVR4 -DSYSV -D__svr4 -D__svr4__ -DSOLARIS -D_REENTRANT \ + -DSOLARIS2_6 -xarch=v8 -DXP_UNIX $(MPICMN) +endif + +ifeq ($(TARGET),ia64HPUX) +#HPUX 32 on ia64 -- 64 bit digits SCREAM. +# This one is for DD32 which is the 32-bit ABI with 64-bit registers. +CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN) +#CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN) +#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +p +DD32 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN) +endif + +ifeq ($(TARGET),ia64HPUX64) +#HPUX 32 on ia64 +# This one is for DD64 which is the 64-bit ABI +CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Aa +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN) +#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +p +DD64 -DHPUX11 -DXP_UNIX -Wl,+k $(MPICMN) +endif + +ifeq ($(TARGET),PA2.0WHPUX) +#HPUX64 (HP PA 2.0 Wide) using MAXPY and 64-bit digits +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +AS_OBJS = mpi_hp.o hpma512.o hppa20.o +CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +O3 +DChpux -DHPUX11 -DXP_UNIX \ + $(MPICMN) +#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +DA2.0W +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \ + $(MPICMN) +AS = $(CC) $(CFLAGS) -c +endif + +ifeq ($(TARGET),PA2.0NHPUX) +#HPUX32 (HP PA 2.0 Narrow) hybrid model, using 32-bit digits +# This one is for DA2.0 (N) which is the 32-bit ABI with 64-bit registers. +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +AS_OBJS = mpi_hp.o hpma512.o hppa20.o +CFLAGS= +O3 -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \ + -Wl,+k $(MPICMN) +#CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE -Aa +e -z +DA2.0 +DS2.0 +DChpux -DHPUX11 -DXP_UNIX \ + -Wl,+k $(MPICMN) +AS = $(CC) $(CFLAGS) -c +endif + +ifeq ($(TARGET),PA1.1HPUX) +#HPUX32 (HP PA 1.1) Pure 32 bit +MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD +#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY +CFLAGS= -O -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ + -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN) +##CFLAGS= -g -DHPUX10 -D_POSIX_C_SOURCE=199506L -Ae +Z -DHPUX -Dhppa \ +# -D_HPUX_SOURCE +DAportable +DS1.1 -DHPUX11 -DXP_UNIX $(MPICMN) +endif + +ifeq ($(TARGET),32AIX) +# +CC = xlC_r +MPICMN += -DMP_USE_UINT_DIGIT +MPICMN += -DMP_NO_DIV_WORD +#MPICMN += -DMP_NO_MUL_WORD +MPICMN += -DMP_NO_ADD_WORD +MPICMN += -DMP_NO_SUB_WORD +#MPICMN += -DMP_NO_MP_WORD +#MPICMN += -DMP_USE_LONG_LONG_MULTIPLY +CFLAGS = -O -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN) +#CFLAGS = -g -DAIX -DSYSV -qarch=com -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN) +#CFLAGS += -pg +endif + +ifeq ($(TARGET),64AIX) +# +CC = xlC_r +MPICMN += -DMP_USE_UINT_DIGIT +CFLAGS = -O -O2 -DAIX -DSYSV -qarch=com -DAIX_64BIT -DAIX4_3 -DXP_UNIX -UDEBUG -DNDEBUG $(MPICMN) +OBJECT_MODE=64 +export OBJECT_MODE +endif + +ifeq ($(TARGET),x86LINUX) +#Linux +AS_OBJS = mpi_x86.o +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D +MPICMN += -DMP_MONT_USE_MP_MUL -DMP_IS_LITTLE_ENDIAN +CFLAGS= -O2 -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \ + -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \ + -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN) +#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \ + -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \ + -DXP_UNIX -DDEBUG -UNDEBUG -D_REENTRANT $(MPICMN) +#CFLAGS= -g -fPIC -DLINUX1_2 -Di386 -D_XOPEN_SOURCE -DLINUX2_1 -ansi -Wall \ + -pipe -DLINUX -Dlinux -D_POSIX_SOURCE -D_BSD_SOURCE -DHAVE_STRERROR \ + -DXP_UNIX -UDEBUG -DNDEBUG -D_REENTRANT $(MPICMN) +endif + +ifeq ($(TARGET),armLINUX) +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE +MPICMN += -DMP_USE_UINT_DIGIT +AS_OBJS += mpi_arm.o +endif + +ifeq ($(TARGET),AMD64SOLARIS) +ASFLAGS += -xarch=generic64 +AS_OBJS = mpi_amd64.o mpi_amd64_sun.o +MP_CONFIG = -DMP_ASSEMBLY_MULTIPLY -DMPI_AMD64 +MP_CONFIG += -DMP_IS_LITTLE_ENDIAN +CFLAGS = -xarch=generic64 -xO4 -I. -DMP_API_COMPATIBLE -DMP_IOFUNC $(MP_CONFIG) +MPICMN += $(MP_CONFIG) + +mpi_amd64_asm.o: mpi_amd64_sun.s + $(AS) -xarch=generic64 -P -D_ASM mpi_amd64_sun.s +endif + +ifeq ($(TARGET),WIN32) +ifeq ($(CPU_ARCH),x86_64) +AS_OBJS = mpi_amd64.obj mpi_amd64_masm.obj mp_comba_amd64_masm.asm +CFLAGS = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER) +CFLAGS += -DWIN32 -DWIN64 -D_WINDOWS -D_AMD_64_ -D_M_AMD64 -DWIN95 -DXP_PC +CFLAGS += $(MPICMN) + +$(AS_OBJS): %.obj : %.asm + ml64 -Cp -Sn -Zi -coff -nologo -c $< + +$(LIBOBJS): %.obj : %.c + cl $(CFLAGS) -Fo$@ -c $< +else +AS_OBJS = mpi_x86.obj +MPICMN += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE -DMP_ASSEMBLY_DIV_2DX1D +MPICMN += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD -DMP_API_COMPATIBLE +MPICMN += -DMP_MONT_USE_MP_MUL +MPICMN += -DMP_CHAR_STORE_SLOW -DMP_IS_LITTLE_ENDIAN +CFLAGS = -Od -Z7 -MDd -W3 -nologo -DDEBUG -D_DEBUG -UNDEBUG -DDEBUG_$(USER) +CFLAGS += -DWIN32 -D_WINDOWS -D_X86_ -DWIN95 -DXP_PC +CFLAGS += $(MPICMN) + +$(AS_OBJS): %.obj : %.asm + ml -Cp -Sn -Zi -coff -nologo -c $< + +$(LIBOBJS): %.obj : %.c + cl $(CFLAGS) -Fo$@ -c $< + +endif +endif diff --git a/security/nss/lib/freebl/mpi/test-arrays.txt b/security/nss/lib/freebl/mpi/test-arrays.txt new file mode 100644 index 000000000..6c8908c1a --- /dev/null +++ b/security/nss/lib/freebl/mpi/test-arrays.txt @@ -0,0 +1,55 @@ +# +# Test suite table for MPI library +# +# Format of entries: +# suite-name:function-name:description +# +# suite-name The name used to identify this test in mpi-test +# function-name The function called to perform this test in mpi-test.c +# description A brief description of what the suite tests + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +list:test_list:print out a list of the available test suites +copy:test_copy:test assignment of mp-int structures +exchange:test_exch:test exchange of mp-int structures +zero:test_zero:test zeroing of an mp-int +set:test_set:test setting an mp-int to a small constant +absolute-value:test_abs:test the absolute value function +negate:test_neg:test the arithmetic negation function +add-digit:test_add_d:test digit addition +add:test_add:test full addition +subtract-digit:test_sub_d:test digit subtraction +subtract:test_sub:test full subtraction +multiply-digit:test_mul_d:test digit multiplication +multiply:test_mul:test full multiplication +square:test_sqr:test full squaring function +divide-digit:test_div_d:test digit division +divide-2:test_div_2:test division by two +divide-2d:test_div_2d:test division & remainder by 2^d +divide:test_div:test full division +expt-digit:test_expt_d:test digit exponentiation +expt:test_expt:test full exponentiation +expt-2:test_2expt:test power-of-two exponentiation +modulo-digit:test_mod_d:test digit modular reduction +modulo:test_mod:test full modular reduction +mod-add:test_addmod:test modular addition +mod-subtract:test_submod:test modular subtraction +mod-multiply:test_mulmod:test modular multiplication +mod-square:test_sqrmod:test modular squaring function +mod-expt:test_exptmod:test full modular exponentiation +mod-expt-digit:test_exptmod_d:test digit modular exponentiation +mod-inverse:test_invmod:test modular inverse function +compare-digit:test_cmp_d:test digit comparison function +compare-zero:test_cmp_z:test zero comparison function +compare:test_cmp:test general signed comparison +compare-magnitude:test_cmp_mag:test general magnitude comparison +parity:test_parity:test parity comparison functions +gcd:test_gcd:test greatest common divisor functions +lcm:test_lcm:test least common multiple function +conversion:test_convert:test general radix conversion facilities +binary:test_raw:test raw output format +pprime:test_pprime:test probabilistic primality tester +fermat:test_fermat:test Fermat pseudoprimality tester diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE b/security/nss/lib/freebl/mpi/tests/LICENSE new file mode 100644 index 000000000..c2c5d0190 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/LICENSE @@ -0,0 +1,6 @@ +Within this directory, each of the file listed below is licensed under +the terms given in the file LICENSE-MPL, also in this directory. + +pi1k.txt +pi2k.txt +pi5k.txt diff --git a/security/nss/lib/freebl/mpi/tests/LICENSE-MPL b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL new file mode 100644 index 000000000..41dc2327f --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/LICENSE-MPL @@ -0,0 +1,3 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/tests/mptest-1.c b/security/nss/lib/freebl/mpi/tests/mptest-1.c new file mode 100644 index 000000000..449134668 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-1.c @@ -0,0 +1,43 @@ +/* + * Simple test driver for MPI library + * + * Test 1: Simple input test (drives single-digit multiply and add, + * as well as I/O routines) + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#ifdef MAC_CW_SIOUX +#include +#endif + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + int ix; + mp_int mp; + +#ifdef MAC_CW_SIOUX + argc = ccommand(&argv); +#endif + + mp_init(&mp); + + for (ix = 1; ix < argc; ix++) { + mp_read_radix(&mp, argv[ix], 10); + mp_print(&mp, stdout); + fputc('\n', stdout); + } + + mp_clear(&mp); + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-2.c b/security/nss/lib/freebl/mpi/tests/mptest-2.c new file mode 100644 index 000000000..1505e6afd --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-2.c @@ -0,0 +1,62 @@ +/* + * Simple test driver for MPI library + * + * Test 2: Basic addition and subtraction test + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a, b, c; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 2: Basic addition and subtraction\n\n"); + + mp_init(&a); + mp_init(&b); + + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&b, argv[2], 10); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + mp_init(&c); + printf("c = a + b\n"); + + mp_add(&a, &b, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("c = a - b\n"); + + mp_sub(&a, &b, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mp_clear(&c); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3.c b/security/nss/lib/freebl/mpi/tests/mptest-3.c new file mode 100644 index 000000000..86fb24654 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-3.c @@ -0,0 +1,105 @@ +/* + * Simple test driver for MPI library + * + * Test 3: Multiplication, division, and exponentiation test + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include + +#include "mpi.h" + +#define EXPT 0 /* define nonzero to get exponentiate test */ + +int +main(int argc, char *argv[]) +{ + int ix; + mp_int a, b, c, d; + mp_digit r; + mp_err res; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 3: Multiplication and division\n\n"); + srand(time(NULL)); + + mp_init(&a); + mp_init(&b); + + mp_read_variable_radix(&a, argv[1], 10); + mp_read_variable_radix(&b, argv[2], 10); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + mp_init(&c); + printf("\nc = a * b\n"); + + mp_mul(&a, &b, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = b * 32523\n"); + + mp_mul_d(&b, 32523, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mp_init(&d); + printf("\nc = a / b, d = a mod b\n"); + + mp_div(&a, &b, &c, &d); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("d = "); + mp_print(&d, stdout); + fputc('\n', stdout); + + ix = rand() % 256; + printf("\nc = a / %d, r = a mod %d\n", ix, ix); + mp_div_d(&a, (mp_digit)ix, &c, &r); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("r = %04X\n", r); + +#if EXPT + printf("\nc = a ** b\n"); + mp_expt(&a, &b, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); +#endif + + ix = rand() % 256; + printf("\nc = 2^%d\n", ix); + mp_2expt(&c, ix); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mp_clear(&d); + mp_clear(&c); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-3a.c b/security/nss/lib/freebl/mpi/tests/mptest-3a.c new file mode 100644 index 000000000..c6cea7046 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-3a.c @@ -0,0 +1,123 @@ +/* + * Simple test driver for MPI library + * + * Test 3a: Multiplication vs. squaring timing test + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include + +#include "mpi.h" +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + int ix, num, prec = 8; + double d1, d2; + clock_t start, finish; + time_t seed; + mp_int a, c, d; + + seed = time(NULL); + + if (argc < 2) { + fprintf(stderr, "Usage: %s []\n", argv[0]); + return 1; + } + + if ((num = atoi(argv[1])) < 0) + num = -num; + + if (!num) { + fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]); + return 1; + } + + if (argc > 2) { + if ((prec = atoi(argv[2])) <= 0) + prec = 8; + else + prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; + } + + printf("Test 3a: Multiplication vs squaring timing test\n" + "Precision: %d digits (%u bits)\n" + "# of tests: %d\n\n", + prec, prec * DIGIT_BIT, num); + + mp_init_size(&a, prec); + + mp_init(&c); + mp_init(&d); + + printf("Verifying accuracy ... \n"); + srand((unsigned int)seed); + for (ix = 0; ix < num; ix++) { + mpp_random_size(&a, prec); + mp_mul(&a, &a, &c); + mp_sqr(&a, &d); + + if (mp_cmp(&c, &d) != 0) { + printf("Error! Results not accurate:\n"); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("d = "); + mp_print(&d, stdout); + fputc('\n', stdout); + mp_sub(&c, &d, &d); + printf("dif "); + mp_print(&d, stdout); + fputc('\n', stdout); + mp_clear(&c); + mp_clear(&d); + mp_clear(&a); + return 1; + } + } + printf("Accuracy is confirmed for the %d test samples\n", num); + mp_clear(&d); + + printf("Testing squaring ... \n"); + srand((unsigned int)seed); + start = clock(); + for (ix = 0; ix < num; ix++) { + mpp_random_size(&a, prec); + mp_sqr(&a, &c); + } + finish = clock(); + + d2 = (double)(finish - start) / CLOCKS_PER_SEC; + + printf("Testing multiplication ... \n"); + srand((unsigned int)seed); + start = clock(); + for (ix = 0; ix < num; ix++) { + mpp_random(&a); + mp_mul(&a, &a, &c); + } + finish = clock(); + + d1 = (double)(finish - start) / CLOCKS_PER_SEC; + + printf("Multiplication time: %.3f sec (%.3f each)\n", d1, d1 / num); + printf("Squaring time: %.3f sec (%.3f each)\n", d2, d2 / num); + printf("Improvement: %.2f%%\n", (1.0 - (d2 / d1)) * 100.0); + + mp_clear(&c); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4.c b/security/nss/lib/freebl/mpi/tests/mptest-4.c new file mode 100644 index 000000000..0f326ac2c --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-4.c @@ -0,0 +1,111 @@ +/* + * Simple test driver for MPI library + * + * Test 4: Modular arithmetic tests + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + int ix; + mp_int a, b, c, m; + mp_digit r; + + if (argc < 4) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 4: Modular arithmetic\n\n"); + + mp_init(&a); + mp_init(&b); + mp_init(&m); + + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&b, argv[2], 10); + mp_read_radix(&m, argv[3], 10); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("m = "); + mp_print(&m, stdout); + fputc('\n', stdout); + + mp_init(&c); + printf("\nc = a (mod m)\n"); + + mp_mod(&a, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = b (mod m)\n"); + + mp_mod(&b, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = b (mod 1853)\n"); + + mp_mod_d(&b, 1853, &r); + printf("c = %04X\n", r); + + printf("\nc = (a + b) mod m\n"); + + mp_addmod(&a, &b, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = (a - b) mod m\n"); + + mp_submod(&a, &b, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = (a * b) mod m\n"); + + mp_mulmod(&a, &b, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nc = (a ** b) mod m\n"); + + mp_exptmod(&a, &b, &m, &c); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + printf("\nIn-place modular squaring test:\n"); + for (ix = 0; ix < 5; ix++) { + printf("a = (a * a) mod m a = "); + mp_sqrmod(&a, &m, &a); + mp_print(&a, stdout); + fputc('\n', stdout); + } + + mp_clear(&c); + mp_clear(&m); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4a.c b/security/nss/lib/freebl/mpi/tests/mptest-4a.c new file mode 100644 index 000000000..0c8e18872 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-4a.c @@ -0,0 +1,109 @@ +/* + * mptest4a - modular exponentiation speed test + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include + +#include "mpi.h" +#include "mpprime.h" + +typedef struct { + unsigned int sec; + unsigned int usec; +} instant_t; + +instant_t +now(void) +{ + struct timeval clk; + instant_t res; + + res.sec = res.usec = 0; + + if (gettimeofday(&clk, NULL) != 0) + return res; + + res.sec = clk.tv_sec; + res.usec = clk.tv_usec; + + return res; +} + +extern mp_err s_mp_pad(); + +int +main(int argc, char *argv[]) +{ + int ix, num, prec = 8; + unsigned int d; + instant_t start, finish; + time_t seed; + mp_int a, m, c; + + seed = time(NULL); + + if (argc < 2) { + fprintf(stderr, "Usage: %s []\n", argv[0]); + return 1; + } + + if ((num = atoi(argv[1])) < 0) + num = -num; + + if (!num) { + fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]); + return 1; + } + + if (argc > 2) { + if ((prec = atoi(argv[2])) <= 0) + prec = 8; + } + + printf("Test 3a: Modular exponentiation timing test\n" + "Precision: %d digits (%d bits)\n" + "# of tests: %d\n\n", + prec, prec * DIGIT_BIT, num); + + mp_init_size(&a, prec); + mp_init_size(&m, prec); + mp_init_size(&c, prec); + s_mp_pad(&a, prec); + s_mp_pad(&m, prec); + s_mp_pad(&c, prec); + + printf("Testing modular exponentiation ... \n"); + srand((unsigned int)seed); + + start = now(); + for (ix = 0; ix < num; ix++) { + mpp_random(&a); + mpp_random(&c); + mpp_random(&m); + mp_exptmod(&a, &c, &m, &c); + } + finish = now(); + + d = (finish.sec - start.sec) * 1000000; + d -= start.usec; + d += finish.usec; + + printf("Total time elapsed: %u usec\n", d); + printf("Time per exponentiation: %u usec (%.3f sec)\n", + (d / num), (double)(d / num) / 1000000); + + mp_clear(&c); + mp_clear(&a); + mp_clear(&m); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-4b.c b/security/nss/lib/freebl/mpi/tests/mptest-4b.c new file mode 100644 index 000000000..1bb2f911f --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-4b.c @@ -0,0 +1,107 @@ +/* + * mptest-4b.c + * + * Test speed of a large modular exponentiation of a primitive element + * modulo a prime. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include + +#include "mpi.h" +#include "mpprime.h" + +char *g_prime = + "34BD53C07350E817CCD49721020F1754527959C421C1533244769D4CF060A8B1C3DA" + "25094BE723FB1E2369B55FEEBBE0FAC16425161BF82684062B5EC5D7D47D1B23C117" + "0FA19745E44A55E148314E582EB813AC9EE5126295E2E380CACC2F6D206B293E5ED9" + "23B54EE961A8C69CD625CE4EC38B70C649D7F014432AEF3A1C93"; +char *g_gen = "5"; + +typedef struct { + unsigned int sec; + unsigned int usec; +} instant_t; + +instant_t +now(void) +{ + struct timeval clk; + instant_t res; + + res.sec = res.usec = 0; + + if (gettimeofday(&clk, NULL) != 0) + return res; + + res.sec = clk.tv_sec; + res.usec = clk.tv_usec; + + return res; +} + +extern mp_err s_mp_pad(); + +int +main(int argc, char *argv[]) +{ + instant_t start, finish; + mp_int prime, gen, expt, res; + unsigned int ix, diff; + int num; + + srand(time(NULL)); + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((num = atoi(argv[1])) < 0) + num = -num; + + if (num == 0) + ++num; + + mp_init(&prime); + mp_init(&gen); + mp_init(&res); + mp_read_radix(&prime, g_prime, 16); + mp_read_radix(&gen, g_gen, 16); + + mp_init_size(&expt, USED(&prime) - 1); + s_mp_pad(&expt, USED(&prime) - 1); + + printf("Testing %d modular exponentations ... \n", num); + + start = now(); + for (ix = 0; ix < num; ix++) { + mpp_random(&expt); + mp_exptmod(&gen, &expt, &prime, &res); + } + finish = now(); + + diff = (finish.sec - start.sec) * 1000000; + diff += finish.usec; + diff -= start.usec; + + printf("%d operations took %u usec (%.3f sec)\n", + num, diff, (double)diff / 1000000.0); + printf("That is %.3f sec per operation.\n", + ((double)diff / 1000000.0) / num); + + mp_clear(&expt); + mp_clear(&res); + mp_clear(&gen); + mp_clear(&prime); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5.c b/security/nss/lib/freebl/mpi/tests/mptest-5.c new file mode 100644 index 000000000..dff3ed470 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-5.c @@ -0,0 +1,85 @@ +/* + * Simple test driver for MPI library + * + * Test 5: Other number theoretic functions + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a, b, c, x, y; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 5: Number theoretic functions\n\n"); + + mp_init(&a); + mp_init(&b); + + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&b, argv[2], 10); + + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + mp_init(&c); + printf("\nc = (a, b)\n"); + + mp_gcd(&a, &b, &c); + printf("Euclid: c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + /* + mp_bgcd(&a, &b, &c); + printf("Binary: c = "); mp_print(&c, stdout); fputc('\n', stdout); + */ + mp_init(&x); + mp_init(&y); + printf("\nc = (a, b) = ax + by\n"); + + mp_xgcd(&a, &b, &c, &x, &y); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("x = "); + mp_print(&x, stdout); + fputc('\n', stdout); + printf("y = "); + mp_print(&y, stdout); + fputc('\n', stdout); + + printf("\nc = a^-1 (mod b)\n"); + if (mp_invmod(&a, &b, &c) == MP_UNDEF) { + printf("a has no inverse mod b\n"); + } else { + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + } + + mp_clear(&y); + mp_clear(&x); + mp_clear(&c); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-5a.c b/security/nss/lib/freebl/mpi/tests/mptest-5a.c new file mode 100644 index 000000000..c410a6a84 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-5a.c @@ -0,0 +1,147 @@ +/* + * Simple test driver for MPI library + * + * Test 5a: Greatest common divisor speed test, binary vs. Euclid + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "mpi.h" +#include "mpprime.h" + +typedef struct { + unsigned int sec; + unsigned int usec; +} instant_t; + +instant_t +now(void) +{ + struct timeval clk; + instant_t res; + + res.sec = res.usec = 0; + + if (gettimeofday(&clk, NULL) != 0) + return res; + + res.sec = clk.tv_sec; + res.usec = clk.tv_usec; + + return res; +} + +#define PRECISION 16 + +int +main(int argc, char *argv[]) +{ + int ix, num, prec = PRECISION; + mp_int a, b, c, d; + instant_t start, finish; + time_t seed; + unsigned int d1, d2; + + seed = time(NULL); + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((num = atoi(argv[1])) < 0) + num = -num; + + printf("Test 5a: Euclid vs. Binary, a GCD speed test\n\n" + "Number of tests: %d\n" + "Precision: %d digits\n\n", + num, prec); + + mp_init_size(&a, prec); + mp_init_size(&b, prec); + mp_init(&c); + mp_init(&d); + + printf("Verifying accuracy ... \n"); + srand((unsigned int)seed); + for (ix = 0; ix < num; ix++) { + mpp_random_size(&a, prec); + mpp_random_size(&b, prec); + + mp_gcd(&a, &b, &c); + mp_bgcd(&a, &b, &d); + + if (mp_cmp(&c, &d) != 0) { + printf("Error! Results not accurate:\n"); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("d = "); + mp_print(&d, stdout); + fputc('\n', stdout); + + mp_clear(&a); + mp_clear(&b); + mp_clear(&c); + mp_clear(&d); + return 1; + } + } + mp_clear(&d); + printf("Accuracy confirmed for the %d test samples\n", num); + + printf("Testing Euclid ... \n"); + srand((unsigned int)seed); + start = now(); + for (ix = 0; ix < num; ix++) { + mpp_random_size(&a, prec); + mpp_random_size(&b, prec); + mp_gcd(&a, &b, &c); + } + finish = now(); + + d1 = (finish.sec - start.sec) * 1000000; + d1 -= start.usec; + d1 += finish.usec; + + printf("Testing binary ... \n"); + srand((unsigned int)seed); + start = now(); + for (ix = 0; ix < num; ix++) { + mpp_random_size(&a, prec); + mpp_random_size(&b, prec); + mp_bgcd(&a, &b, &c); + } + finish = now(); + + d2 = (finish.sec - start.sec) * 1000000; + d2 -= start.usec; + d2 += finish.usec; + + printf("Euclidean algorithm time: %u usec\n", d1); + printf("Binary algorithm time: %u usec\n", d2); + printf("Improvement: %.2f%%\n", + (1.0 - ((double)d2 / (double)d1)) * 100.0); + + mp_clear(&c); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-6.c b/security/nss/lib/freebl/mpi/tests/mptest-6.c new file mode 100644 index 000000000..4febf39c5 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-6.c @@ -0,0 +1,78 @@ +/* + * Simple test driver for MPI library + * + * Test 6: Output functions + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" + +void +print_buf(FILE *ofp, char *buf, int len) +{ + int ix, brk = 0; + + for (ix = 0; ix < len; ix++) { + fprintf(ofp, "%02X ", buf[ix]); + + brk = (brk + 1) & 0xF; + if (!brk) + fputc('\n', ofp); + } + + if (brk) + fputc('\n', ofp); +} + +int +main(int argc, char *argv[]) +{ + int ix, size; + mp_int a; + char *buf; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 6: Output functions\n\n"); + + mp_init(&a); + + mp_read_radix(&a, argv[1], 10); + + printf("\nConverting to a string:\n"); + + printf("Rx Size Representation\n"); + for (ix = 2; ix <= MAX_RADIX; ix++) { + size = mp_radix_size(&a, ix); + + buf = calloc(size, sizeof(char)); + mp_toradix(&a, buf, ix); + printf("%2d: %3d: %s\n", ix, size, buf); + free(buf); + } + + printf("\nRaw output:\n"); + size = mp_raw_size(&a); + buf = calloc(size, sizeof(char)); + + printf("Size: %d bytes\n", size); + + mp_toraw(&a, buf); + print_buf(stdout, buf, size); + free(buf); + + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-7.c b/security/nss/lib/freebl/mpi/tests/mptest-7.c new file mode 100644 index 000000000..1e83fbf96 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-7.c @@ -0,0 +1,85 @@ +/* + * Simple test driver for MPI library + * + * Test 7: Random and divisibility tests + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include + +#define MP_IOFUNC 1 +#include "mpi.h" + +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + mp_digit num; + mp_int a, b; + + srand(time(NULL)); + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 7: Random & divisibility tests\n\n"); + + mp_init(&a); + mp_init(&b); + + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&b, argv[2], 10); + + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + if (mpp_divis(&a, &b) == MP_YES) + printf("a is divisible by b\n"); + else + printf("a is not divisible by b\n"); + + if (mpp_divis(&b, &a) == MP_YES) + printf("b is divisible by a\n"); + else + printf("b is not divisible by a\n"); + + printf("\nb = mpp_random()\n"); + mpp_random(&b); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + mpp_random(&b); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + mpp_random(&b); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + printf("\nTesting a for divisibility by first 170 primes\n"); + num = 170; + if (mpp_divis_primes(&a, &num) == MP_YES) + printf("It is divisible by at least one of them\n"); + else + printf("It is not divisible by any of them\n"); + + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-8.c b/security/nss/lib/freebl/mpi/tests/mptest-8.c new file mode 100644 index 000000000..a9d3afff9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-8.c @@ -0,0 +1,68 @@ +/* + * Simple test driver for MPI library + * + * Test 8: Probabilistic primality tester + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include + +#define MP_IOFUNC 1 +#include "mpi.h" + +#include "mpprime.h" + +int +main(int argc, char *argv[]) +{ + int ix; + mp_digit num; + mp_int a; + + srand(time(NULL)); + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + printf("Test 8: Probabilistic primality testing\n\n"); + + mp_init(&a); + + mp_read_radix(&a, argv[1], 10); + + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + + printf("\nChecking for divisibility by small primes ... \n"); + num = 170; + if (mpp_divis_primes(&a, &num) == MP_YES) { + printf("it is not prime\n"); + goto CLEANUP; + } + printf("Passed that test (not divisible by any small primes).\n"); + + for (ix = 0; ix < 10; ix++) { + printf("\nPerforming Rabin-Miller test, iteration %d\n", ix + 1); + + if (mpp_pprime(&a, 5) == MP_NO) { + printf("it is not prime\n"); + goto CLEANUP; + } + } + printf("All tests passed; a is probably prime\n"); + +CLEANUP: + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-9.c b/security/nss/lib/freebl/mpi/tests/mptest-9.c new file mode 100644 index 000000000..133264e89 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-9.c @@ -0,0 +1,109 @@ +/* + * mptest-9.c + * + * Test logical functions + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include + +#include "mpi.h" +#include "mplogic.h" + +int +main(int argc, char *argv[]) +{ + mp_int a, b, c; + int pco; + mp_err res; + + printf("Test 9: Logical functions\n\n"); + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_init(&b); + mp_init(&c); + mp_read_radix(&a, argv[1], 16); + mp_read_radix(&b, argv[2], 16); + + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + + mpl_not(&a, &c); + printf("~a = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_and(&a, &b, &c); + printf("a & b = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_or(&a, &b, &c); + printf("a | b = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_xor(&a, &b, &c); + printf("a ^ b = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_rsh(&a, &c, 1); + printf("a >> 1 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + mpl_rsh(&a, &c, 5); + printf("a >> 5 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + mpl_rsh(&a, &c, 16); + printf("a >> 16 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_lsh(&a, &c, 1); + printf("a << 1 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + mpl_lsh(&a, &c, 5); + printf("a << 5 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + mpl_lsh(&a, &c, 16); + printf("a << 16 = "); + mp_print(&c, stdout); + fputc('\n', stdout); + + mpl_num_set(&a, &pco); + printf("population(a) = %d\n", pco); + mpl_num_set(&b, &pco); + printf("population(b) = %d\n", pco); + + res = mpl_parity(&a); + if (res == MP_EVEN) + printf("a has even parity\n"); + else + printf("a has odd parity\n"); + + mp_clear(&c); + mp_clear(&b); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/mptest-b.c b/security/nss/lib/freebl/mpi/tests/mptest-b.c new file mode 100644 index 000000000..07f30eaf8 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/mptest-b.c @@ -0,0 +1,230 @@ +/* + * Simple test driver for MPI library + * + * Test GF2m: Binary Polynomial Arithmetic + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mp_gf2m.h" + +int +main(int argc, char *argv[]) +{ + int ix; + mp_int pp, a, b, x, y, order; + mp_int c, d, e; + mp_digit r; + mp_err res; + unsigned int p[] = { 163, 7, 6, 3, 0 }; + unsigned int ptemp[10]; + + printf("Test b: Binary Polynomial Arithmetic\n\n"); + + mp_init(&pp); + mp_init(&a); + mp_init(&b); + mp_init(&x); + mp_init(&y); + mp_init(&order); + + mp_read_radix(&pp, "0800000000000000000000000000000000000000C9", 16); + mp_read_radix(&a, "1", 16); + mp_read_radix(&b, "020A601907B8C953CA1481EB10512F78744A3205FD", 16); + mp_read_radix(&x, "03F0EBA16286A2D57EA0991168D4994637E8343E36", 16); + mp_read_radix(&y, "00D51FBC6C71A0094FA2CDD545B11C5C0C797324F1", 16); + mp_read_radix(&order, "040000000000000000000292FE77E70C12A4234C33", 16); + printf("pp = "); + mp_print(&pp, stdout); + fputc('\n', stdout); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("x = "); + mp_print(&x, stdout); + fputc('\n', stdout); + printf("y = "); + mp_print(&y, stdout); + fputc('\n', stdout); + printf("order = "); + mp_print(&order, stdout); + fputc('\n', stdout); + + mp_init(&c); + mp_init(&d); + mp_init(&e); + + /* Test polynomial conversion */ + ix = mp_bpoly2arr(&pp, ptemp, 10); + if ( + (ix != 5) || + (ptemp[0] != p[0]) || + (ptemp[1] != p[1]) || + (ptemp[2] != p[2]) || + (ptemp[3] != p[3]) || + (ptemp[4] != p[4])) { + printf("Polynomial to array conversion not correct\n"); + return -1; + } + + printf("Polynomial conversion test #1 successful.\n"); + MP_CHECKOK(mp_barr2poly(p, &c)); + if (mp_cmp(&pp, &c) != 0) { + printf("Array to polynomial conversion not correct\n"); + return -1; + } + printf("Polynomial conversion test #2 successful.\n"); + + /* Test addition */ + MP_CHECKOK(mp_badd(&a, &a, &c)); + if (mp_cmp_z(&c) != 0) { + printf("a+a should equal zero\n"); + return -1; + } + printf("Addition test #1 successful.\n"); + MP_CHECKOK(mp_badd(&a, &b, &c)); + MP_CHECKOK(mp_badd(&b, &c, &c)); + if (mp_cmp(&c, &a) != 0) { + printf("c = (a + b) + b should equal a\n"); + printf("a = "); + mp_print(&a, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Addition test #2 successful.\n"); + + /* Test multiplication */ + mp_set(&c, 2); + MP_CHECKOK(mp_bmul(&b, &c, &c)); + MP_CHECKOK(mp_badd(&b, &c, &c)); + mp_set(&d, 3); + MP_CHECKOK(mp_bmul(&b, &d, &d)); + if (mp_cmp(&c, &d) != 0) { + printf("c = (2 * b) + b should equal c = 3 * b\n"); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("d = "); + mp_print(&d, stdout); + fputc('\n', stdout); + return -1; + } + printf("Multiplication test #1 successful.\n"); + + /* Test modular reduction */ + MP_CHECKOK(mp_bmod(&b, p, &c)); + if (mp_cmp(&b, &c) != 0) { + printf("c = b mod p should equal b\n"); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular reduction test #1 successful.\n"); + MP_CHECKOK(mp_badd(&b, &pp, &c)); + MP_CHECKOK(mp_bmod(&c, p, &c)); + if (mp_cmp(&b, &c) != 0) { + printf("c = (b + p) mod p should equal b\n"); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular reduction test #2 successful.\n"); + MP_CHECKOK(mp_bmul(&b, &pp, &c)); + MP_CHECKOK(mp_bmod(&c, p, &c)); + if (mp_cmp_z(&c) != 0) { + printf("c = (b * p) mod p should equal 0\n"); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular reduction test #3 successful.\n"); + + /* Test modular multiplication */ + MP_CHECKOK(mp_bmulmod(&b, &pp, p, &c)); + if (mp_cmp_z(&c) != 0) { + printf("c = (b * p) mod p should equal 0\n"); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular multiplication test #1 successful.\n"); + mp_set(&c, 1); + MP_CHECKOK(mp_badd(&pp, &c, &c)); + MP_CHECKOK(mp_bmulmod(&b, &c, p, &c)); + if (mp_cmp(&b, &c) != 0) { + printf("c = (b * (p + 1)) mod p should equal b\n"); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular multiplication test #2 successful.\n"); + + /* Test modular squaring */ + MP_CHECKOK(mp_copy(&b, &c)); + MP_CHECKOK(mp_bmulmod(&b, &c, p, &c)); + MP_CHECKOK(mp_bsqrmod(&b, p, &d)); + if (mp_cmp(&c, &d) != 0) { + printf("c = (b * b) mod p should equal d = b^2 mod p\n"); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + printf("d = "); + mp_print(&d, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular squaring test #1 successful.\n"); + + /* Test modular division */ + MP_CHECKOK(mp_bdivmod(&b, &x, &pp, p, &c)); + MP_CHECKOK(mp_bmulmod(&c, &x, p, &c)); + if (mp_cmp(&b, &c) != 0) { + printf("c = (b / x) * x mod p should equal b\n"); + printf("b = "); + mp_print(&b, stdout); + fputc('\n', stdout); + printf("c = "); + mp_print(&c, stdout); + fputc('\n', stdout); + return -1; + } + printf("Modular division test #1 successful.\n"); + +CLEANUP: + + mp_clear(&order); + mp_clear(&y); + mp_clear(&x); + mp_clear(&b); + mp_clear(&a); + mp_clear(&pp); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/tests/pi1k.txt b/security/nss/lib/freebl/mpi/tests/pi1k.txt new file mode 100644 index 000000000..5ff6209ff --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/pi1k.txt @@ -0,0 +1 @@ +31415926535897932384626433832795028841971693993751058209749445923078164062862089986280348253421170679821480865132823066470938446095505822317253594081284811174502841027019385211055596446229489549303819644288109756659334461284756482337867831652712019091456485669234603486104543266482133936072602491412737245870066063155881748815209209628292540917153643678925903600113305305488204665213841469519415116094330572703657595919530921861173819326117931051185480744623799627495673518857527248912279381830119491298336733624406566430860213949463952247371907021798609437027705392171762931767523846748184676694051320005681271452635608277857713427577896091736371787214684409012249534301465495853710507922796892589235420199561121290219608640344181598136297747713099605187072113499999983729780499510597317328160963185950244594553469083026425223082533446850352619311881710100031378387528865875332083814206171776691473035982534904287554687311595628638823537875937519577818577805321712268066130019278766111959092164201989 diff --git a/security/nss/lib/freebl/mpi/tests/pi2k.txt b/security/nss/lib/freebl/mpi/tests/pi2k.txt new file mode 100644 index 000000000..9ce82acd1 --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/pi2k.txt @@ -0,0 +1 @@ +314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759010 diff --git a/security/nss/lib/freebl/mpi/tests/pi5k.txt b/security/nss/lib/freebl/mpi/tests/pi5k.txt new file mode 100644 index 000000000..901fac2ea --- /dev/null +++ b/security/nss/lib/freebl/mpi/tests/pi5k.txt @@ -0,0 +1 @@ +314159265358979323846264338327950288419716939937510582097494459230781640628620899862803482534211706798214808651328230664709384460955058223172535940812848111745028410270193852110555964462294895493038196442881097566593344612847564823378678316527120190914564856692346034861045432664821339360726024914127372458700660631558817488152092096282925409171536436789259036001133053054882046652138414695194151160943305727036575959195309218611738193261179310511854807446237996274956735188575272489122793818301194912983367336244065664308602139494639522473719070217986094370277053921717629317675238467481846766940513200056812714526356082778577134275778960917363717872146844090122495343014654958537105079227968925892354201995611212902196086403441815981362977477130996051870721134999999837297804995105973173281609631859502445945534690830264252230825334468503526193118817101000313783875288658753320838142061717766914730359825349042875546873115956286388235378759375195778185778053217122680661300192787661119590921642019893809525720106548586327886593615338182796823030195203530185296899577362259941389124972177528347913151557485724245415069595082953311686172785588907509838175463746493931925506040092770167113900984882401285836160356370766010471018194295559619894676783744944825537977472684710404753464620804668425906949129331367702898915210475216205696602405803815019351125338243003558764024749647326391419927260426992279678235478163600934172164121992458631503028618297455570674983850549458858692699569092721079750930295532116534498720275596023648066549911988183479775356636980742654252786255181841757467289097777279380008164706001614524919217321721477235014144197356854816136115735255213347574184946843852332390739414333454776241686251898356948556209921922218427255025425688767179049460165346680498862723279178608578438382796797668145410095388378636095068006422512520511739298489608412848862694560424196528502221066118630674427862203919494504712371378696095636437191728746776465757396241389086583264599581339047802759009946576407895126946839835259570982582262052248940772671947826848260147699090264013639443745530506820349625245174939965143142980919065925093722169646151570985838741059788595977297549893016175392846813826868386894277415599185592524595395943104997252468084598727364469584865383673622262609912460805124388439045124413654976278079771569143599770012961608944169486855584840635342207222582848864815845602850601684273945226746767889525213852254995466672782398645659611635488623057745649803559363456817432411251507606947945109659609402522887971089314566913686722874894056010150330861792868092087476091782493858900971490967598526136554978189312978482168299894872265880485756401427047755513237964145152374623436454285844479526586782105114135473573952311342716610213596953623144295248493718711014576540359027993440374200731057853906219838744780847848968332144571386875194350643021845319104848100537061468067491927819119793995206141966342875444064374512371819217999839101591956181467514269123974894090718649423196156794520809514655022523160388193014209376213785595663893778708303906979207734672218256259966150142150306803844773454920260541466592520149744285073251866600213243408819071048633173464965145390579626856100550810665879699816357473638405257145910289706414011097120628043903975951567715770042033786993600723055876317635942187312514712053292819182618612586732157919841484882916447060957527069572209175671167229109816909152801735067127485832228718352093539657251210835791513698820914442100675103346711031412671113699086585163983150197016515116851714376576183515565088490998985998238734552833163550764791853589322618548963213293308985706420467525907091548141654985946163718027098199430992448895757128289059232332609729971208443357326548938239119325974636673058360414281388303203824903758985243744170291327656180937734440307074692112019130203303801976211011004492932151608424448596376698389522868478312355265821314495768572624334418930396864262434107732269780280731891544110104468232527162010526522721116603966655730925471105578537634668206531098965269186205647693125705863566201855810072936065987648611791045334885034611365768675324944166803962657978771855608455296541266540853061434443185867697514566140680070023787765913440171274947042056223053899456131407112700040785473326993908145466464588079727082668306343285878569830523580893306575740679545716377525420211495576158140025012622859413021647155097925923099079654737612551765675135751782966645477917450112996148903046399471329621073404375189573596145890193897131117904297828564750320319869151402870808599048010941214722131794764777262241425485454033215718530614228813758504306332175182979866223717215916077166925474873898665494945011465406284336639379003976926567214638530673609657120918076383271664162748888007869256029022847210403172118608204190004229661711963779213375751149595015660496318629472654736425230817703675159067350235072835405670403867435136222247715891504953098444893330963408780769325993978054193414473774418426312986080998886874132604721 diff --git a/security/nss/lib/freebl/mpi/timetest b/security/nss/lib/freebl/mpi/timetest new file mode 100755 index 000000000..c6f07bb30 --- /dev/null +++ b/security/nss/lib/freebl/mpi/timetest @@ -0,0 +1,99 @@ +#!/bin/sh + +# Simple timing test for the MPI library. Basically, we use prime +# generation as a timing test, since it exercises most of the pathways +# of the library fairly heavily. The 'primegen' tool outputs a line +# summarizing timing results. We gather these and process them for +# statistical information, which is collected into a file. + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +# Avoid using built-in shell echoes +ECHO=/bin/echo +MAKE=gmake +PERL=perl + +# Use a fixed seed so timings will be more consistent +# This one is the 11th-18th decimal digits of 'e' +#export SEED=45904523 +SEED=45904523; export SEED + +#------------------------------------------------------------------------ + +$ECHO "\n** Running timing tests for MPI library\n" + +$ECHO "Bringing 'metime' up to date ... " +if $MAKE metime ; then + : +else + $ECHO "\nMake failed to build metime.\n" + exit 1 +fi + +if [ ! -x ./metime ] ; then + $ECHO "\nCannot find 'metime' program, testing cannot continue.\n" + exit 1 +fi + +#------------------------------------------------------------------------ + +$ECHO "Bringing 'primegen' up to date ... " +if $MAKE primegen ; then + : +else + $ECHO "\nMake failed to build primegen.\n" + exit 1 +fi + +if [ ! -x ./primegen ] ; then + $ECHO "\nCannot find 'primegen' program, testing cannot continue.\n" + exit 1 +fi + +#------------------------------------------------------------------------ + +rm -f timing-results.txt +touch timing-results.txt + +sizes="256 512 1024 2048" +ntests=10 + +trap 'echo "oop!";rm -f tt*.tmp timing-results.txt;exit 0' INT HUP + +$ECHO "\n-- Modular exponentiation\n" +$ECHO "Modular exponentiation:" >> timing-results.txt + +$ECHO "Running $ntests modular exponentiations per test:" +for size in $sizes ; do + $ECHO "- Gathering statistics for $size bits ... " + secs=`./metime $ntests $size | tail -1 | awk '{print $2}'` + $ECHO "$size: " $secs " seconds per op" >> timing-results.txt + tail -1 timing-results.txt +done + +$ECHO ""; + +sizes="256 512 1024" +ntests=1 + +$ECHO "\n-- Prime generation\n" +$ECHO "Prime generation:" >> timing-results.txt + +$ECHO "Generating $ntests prime values per test:" +for size in $sizes ; do + $ECHO "- Gathering statistics for $size bits ... " + ./primegen $size $ntests | grep ticks | awk '{print $7}' | tr -d '(' > tt$$.tmp + $ECHO "$size:" >> timing-results.txt + $PERL stats tt$$.tmp >> timing-results.txt + tail -1 timing-results.txt + rm -f tt$$.tmp +done + +$ECHO "" + +trap 'rm -f tt*.tmp timing-results.txt' INT HUP + +exit 0 + diff --git a/security/nss/lib/freebl/mpi/types.pl b/security/nss/lib/freebl/mpi/types.pl new file mode 100755 index 000000000..c5f38afa5 --- /dev/null +++ b/security/nss/lib/freebl/mpi/types.pl @@ -0,0 +1,127 @@ +#!/usr/bin/perl + +# +# types.pl - find recommended type definitions for digits and words +# +# This script scans the Makefile for the C compiler and compilation +# flags currently in use, and using this combination, attempts to +# compile a simple test program that outputs the sizes of the various +# unsigned integer types, in bytes. Armed with these, it finds all +# the "viable" type combinations for mp_digit and mp_word, where +# viability is defined by the requirement that mp_word be at least two +# times the precision of mp_digit. +# +# Of these, the one with the largest digit size is chosen, and +# appropriate typedef statements are written to standard output. + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +@_=split(/\//,$0);chomp($prog=pop(@_)); + +# The array of integer types to be considered... +@TYPES = ( + "unsigned char", + "unsigned short", + "unsigned int", + "unsigned long" +); + +# Macro names for the maximum unsigned value of each type +%TMAX = ( + "unsigned char" => "UCHAR_MAX", + "unsigned short" => "USHRT_MAX", + "unsigned int" => "UINT_MAX", + "unsigned long" => "ULONG_MAX" +); + +# Read the Makefile to find out which C compiler to use +open(MFP, ") { + chomp; + if(/^CC=(.*)$/) { + $cc = $1; + last if $cflags; + } elsif(/^CFLAGS=(.*)$/) { + $cflags = $1; + last if $cc; + } +} +close(MFP); + +# If we couldn't find that, use 'cc' by default +$cc = "cc" unless $cc; + +printf STDERR "Using '%s' as the C compiler.\n", $cc; + +print STDERR "Determining type sizes ... \n"; +open(OFP, ">tc$$.c") or die "$prog: tc$$.c: $!\n"; +print OFP "#include \n\nint main(void)\n{\n"; +foreach $type (@TYPES) { + printf OFP "\tprintf(\"%%d\\n\", (int)sizeof(%s));\n", $type; +} +print OFP "\n\treturn 0;\n}\n"; +close(OFP); + +system("$cc $cflags -o tc$$ tc$$.c"); + +die "$prog: unable to build test program\n" unless(-x "tc$$"); + +open(IFP, "./tc$$|") or die "$prog: can't execute test program\n"; +$ix = 0; +while() { + chomp; + $size{$TYPES[$ix++]} = $_; +} +close(IFP); + +unlink("tc$$"); +unlink("tc$$.c"); + +print STDERR "Selecting viable combinations ... \n"; +while(($type, $size) = each(%size)) { + push(@ts, [ $size, $type ]); +} + +# Sort them ascending by size +@ts = sort { $a->[0] <=> $b->[0] } @ts; + +# Try all possible combinations, finding pairs in which the word size +# is twice the digit size. The number of possible pairs is too small +# to bother doing this more efficiently than by brute force +for($ix = 0; $ix <= $#ts; $ix++) { + $w = $ts[$ix]; + + for($jx = 0; $jx <= $#ts; $jx++) { + $d = $ts[$jx]; + + if($w->[0] == 2 * $d->[0]) { + push(@valid, [ $d, $w ]); + } + } +} + +# Sort descending by digit size +@valid = sort { $b->[0]->[0] <=> $a->[0]->[0] } @valid; + +# Select the maximum as the recommended combination +$rec = shift(@valid); + +printf("typedef %-18s mp_sign;\n", "char"); +printf("typedef %-18s mp_digit; /* %d byte type */\n", + $rec->[0]->[1], $rec->[0]->[0]); +printf("typedef %-18s mp_word; /* %d byte type */\n", + $rec->[1]->[1], $rec->[1]->[0]); +printf("typedef %-18s mp_size;\n", "unsigned int"); +printf("typedef %-18s mp_err;\n\n", "int"); + +printf("#define %-18s (CHAR_BIT*sizeof(mp_digit))\n", "DIGIT_BIT"); +printf("#define %-18s %s\n", "DIGIT_MAX", $TMAX{$rec->[0]->[1]}); +printf("#define %-18s (CHAR_BIT*sizeof(mp_word))\n", "MP_WORD_BIT"); +printf("#define %-18s %s\n\n", "MP_WORD_MAX", $TMAX{$rec->[1]->[1]}); +printf("#define %-18s (DIGIT_MAX+1)\n\n", "RADIX"); + +printf("#define %-18s \"%%0%dX\"\n", "DIGIT_FMT", (2 * $rec->[0]->[0])); + +exit 0; diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE b/security/nss/lib/freebl/mpi/utils/LICENSE new file mode 100644 index 000000000..5f96df7ab --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/LICENSE @@ -0,0 +1,4 @@ +Within this directory, each of the file listed below is licensed under +the terms given in the file LICENSE-MPL, also in this directory. + +PRIMES diff --git a/security/nss/lib/freebl/mpi/utils/LICENSE-MPL b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL new file mode 100644 index 000000000..41dc2327f --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/LICENSE-MPL @@ -0,0 +1,3 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. diff --git a/security/nss/lib/freebl/mpi/utils/PRIMES b/security/nss/lib/freebl/mpi/utils/PRIMES new file mode 100644 index 000000000..ed65703ff --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/PRIMES @@ -0,0 +1,41 @@ +Probable primes (sorted by number of significant bits) + + 128: 81386202757205669562183851789305348631 + + 128: 180241813863264101444573802809858694397 + + 128: 245274683055224433281596312431122059021 + + 128: 187522309397665259809392608791686659539 + + 256: 83252422946206411852330647237287722547866360773229941071371588246436\ + 513990159 + + 256: 79132571131322331023736933767063051273085304521895229780914612117520\ + 058517909 + + 256: 72081815425552909748220041100909735706208853818662000557743644603407\ + 965465527 + + 256: 87504602391905701494845474079163412737334477797316409702279059573654\ + 274811271 + + 512: 12233064210800062190450937494718705259777386009095453001870729392786\ + 63450255179083524798507997690270500580265258111668148238355016411719\ + 9168737693316468563 + + 512: 12003639081420725322369909586347545220275253633035565716386136197501\ + 88208318984400479275215620499883521216480724155582768193682335576385\ + 2069481074929084063 + +1024: 16467877625718912296741904171202513097057724053648819680815842057593\ + 20371835940722471475475803725455063836431454757000451907612224427007\ + 63984592414360595161051906727075047683803534852982766542661204179549\ + 77327573530800542562611753617736693359790119074768292178493884576587\ + 0230450429880021317876149636714743053 + +1024: 16602953991090311275234291158294516471009930684624948451178742895360\ + 86073703307475884280944414508444679430090561246728195735962931545473\ + 40743240318558456247740186704660778277799687988031119436541068736925\ + 20563780233711166724859277827382391527748470939542560819625727876091\ + 5372193745283891895989104479029844957 diff --git a/security/nss/lib/freebl/mpi/utils/README b/security/nss/lib/freebl/mpi/utils/README new file mode 100644 index 000000000..61c8e2efa --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/README @@ -0,0 +1,206 @@ +This Source Code Form is subject to the terms of the Mozilla Public +License, v. 2.0. If a copy of the MPL was not distributed with this +file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Additional MPI utilities +------------------------ + +The files 'mpprime.h' and 'mpprime.c' define some useful extensions to +the MPI library for dealing with prime numbers (in particular, testing +for divisbility, and the Rabin-Miller probabilistic primality test). + +The files 'mplogic.h' and 'mplogic.c' define extensions to the MPI +library for doing bitwise logical operations and shifting. + +This document assumes you have read the help file for the MPI library +and understand its conventions. + +Divisibility (mpprime.h) +------------ + +To test a number for divisibility by another number: + +mpp_divis(a, b) - test if b|a +mpp_divis_d(a, d) - test if d|a + +Each of these functions returns MP_YES if its initial argument is +divisible by its second, or MP_NO if it is not. Other errors may be +returned as appropriate (such as MP_RANGE if you try to test for +divisibility by zero). + +Randomness (mpprime.h) +---------- + +To generate random data: + +mpp_random(a) - fill a with random data +mpp_random_size(a, p) - fill a with p digits of random data + +The mpp_random_size() function increases the precision of a to at +least p, then fills all those digits randomly. The mp_random() +function fills a to its current precision (as determined by the number +of significant digits, USED(a)) + +Note that these functions simply use the C library's rand() function +to fill a with random digits up to its precision. This should be +adequate for primality testing, but should not be used for +cryptographic applications where truly random values are required for +security. + +You should call srand() in your driver program in order to seed the +random generator; this function doesn't call it. + +Primality Testing (mpprime.h) +----------------- + +mpp_divis_vector(a, v, s, w) - is a divisible by any of the s values + in v, and if so, w = which. +mpp_divis_primes(a, np) - is a divisible by any of the first np primes? +mpp_fermat(a, w) - is a pseudoprime with respect to witness w? +mpp_pprime(a, nt) - run nt iterations of Rabin-Miller on a. + +The mpp_divis_vector() function tests a for divisibility by each +member of an array of digits. The array is v, the size of that array +is s. Returns MP_YES if a is divisible, and stores the index of the +offending digit in w. Returns MP_NO if a is not divisible by any of +the digits in the array. + +A small table of primes is compiled into the library (typically the +first 128 primes, although you can change this by editing the file +'primes.c' before you build). The global variable prime_tab_size +contains the number of primes in the table, and the values themselves +are in the array prime_tab[], which is an array of mp_digit. + +The mpp_divis_primes() function is basically just a wrapper around +mpp_divis_vector() that uses prime_tab[] as the test vector. The np +parameter is a pointer to an mp_digit -- on input, it should specify +the number of primes to be tested against. If a is divisible by any +of the primes, MP_YES is returned and np is given the prime value that +divided a (you can use this if you're factoring, for example). +Otherwise, MP_NO is returned and np is untouched. + +The function mpp_fermat() performs Fermat's test, using w as a +witness. This test basically relies on the fact that if a is prime, +and w is relatively prime to a, then: + + w^a = w (mod a) + +That is, + + w^(a - 1) = 1 (mod a) + +The function returns MP_YES if the test passes, MP_NO if it fails. If +w is relatively prime to a, and the test fails, a is definitely +composite. If w is relatively prime to a and the test passes, then a +is either prime, or w is a false witness (the probability of this +happening depends on the choice of w and of a ... consult a number +theory textbook for more information about this). + +Note: If (w, a) != 1, the output of this test is meaningless. +---- + +The function mpp_pprime() performs the Rabin-Miller probabilistic +primality test for nt rounds. If all the tests pass, MP_YES is +returned, and a is probably prime. The probability that an answer of +MP_YES is incorrect is no greater than 1 in 4^nt, and in fact is +usually much less than that (this is a pessimistic estimate). If any +test fails, MP_NO is returned, and a is definitely composite. + +Bruce Schneier recommends at least 5 iterations of this test for most +cryptographic applications; Knuth suggests that 25 are reasonable. +Run it as many times as you feel are necessary. + +See the programs 'makeprime.c' and 'isprime.c' for reasonable examples +of how to use these functions for primality testing. + + +Bitwise Logic (mplogic.c) +------------- + +The four commonest logical operations are implemented as: + +mpl_not(a, b) - Compute bitwise (one's) complement, b = ~a + +mpl_and(a, b, c) - Compute bitwise AND, c = a & b + +mpl_or(a, b, c) - Compute bitwise OR, c = a | b + +mpl_xor(a, b, c) - Compute bitwise XOR, c = a ^ b + +Left and right shifts are available as well. These take a number to +shift, a destination, and a shift amount. The shift amount must be a +digit value between 0 and DIGIT_BIT inclusive; if it is not, MP_RANGE +will be returned and the shift will not happen. + +mpl_rsh(a, b, d) - Compute logical right shift, b = a >> d + +mpl_lsh(a, b, d) - Compute logical left shift, b = a << d + +Since these are logical shifts, they fill with zeroes (the library +uses a signed magnitude representation, so there are no sign bits to +extend anyway). + + +Command-line Utilities +---------------------- + +A handful of interesting command-line utilities are provided. These +are: + +lap.c - Find the order of a mod m. Usage is 'lap '. + This uses a dumb algorithm, so don't use it for + a really big modulus. + +invmod.c - Find the inverse of a mod m, if it exists. Usage + is 'invmod ' + +sieve.c - A simple bitmap-based implementation of the Sieve + of Eratosthenes. Used to generate the table of + primes in primes.c. Usage is 'sieve ' + +prng.c - Uses the routines in bbs_rand.{h,c} to generate + one or more 32-bit pseudo-random integers. This + is mainly an example, not intended for use in a + cryptographic application (the system time is + the only source of entropy used) + +dec2hex.c - Convert decimal to hexadecimal + +hex2dec.c - Convert hexadecimal to decimal + +basecvt.c - General radix conversion tool (supports 2-64) + +isprime.c - Probabilistically test an integer for primality + using the Rabin-Miller pseudoprime test combined + with division by small primes. + +primegen.c - Generate primes at random. + +exptmod.c - Perform modular exponentiation + +ptab.pl - A Perl script to munge the output of the sieve + program into a compilable C structure. + + +Other Files +----------- + +PRIMES - Some randomly generated numbers which are prime with + extremely high probability. + +README - You're reading me already. + + +About the Author +---------------- + +This software was written by Michael J. Fromberger. You can contact +the author as follows: + +E-mail: + +Postal: 8000 Cummings Hall, Thayer School of Engineering + Dartmouth College, Hanover, New Hampshire, USA + +PGP key: http://linguist.dartmouth.edu/~sting/keys/mjf.html + 9736 188B 5AFA 23D6 D6AA BE0D 5856 4525 289D 9907 diff --git a/security/nss/lib/freebl/mpi/utils/basecvt.c b/security/nss/lib/freebl/mpi/utils/basecvt.c new file mode 100644 index 000000000..0e9915406 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/basecvt.c @@ -0,0 +1,68 @@ +/* + * basecvt.c + * + * Convert integer values specified on the command line from one input + * base to another. Accepts input and output bases between 2 and 36 + * inclusive. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +#define IBASE 10 +#define OBASE 16 +#define USAGE "Usage: %s ibase obase [value]\n" +#define MAXBASE 64 +#define MINBASE 2 + +int +main(int argc, char *argv[]) +{ + int ix, ibase = IBASE, obase = OBASE; + mp_int val; + + ix = 1; + if (ix < argc) { + ibase = atoi(argv[ix++]); + + if (ibase < MINBASE || ibase > MAXBASE) { + fprintf(stderr, "%s: input radix must be between %d and %d inclusive\n", + argv[0], MINBASE, MAXBASE); + return 1; + } + } + if (ix < argc) { + obase = atoi(argv[ix++]); + + if (obase < MINBASE || obase > MAXBASE) { + fprintf(stderr, "%s: output radix must be between %d and %d inclusive\n", + argv[0], MINBASE, MAXBASE); + return 1; + } + } + + mp_init(&val); + while (ix < argc) { + char *out; + int outlen; + + mp_read_radix(&val, argv[ix++], ibase); + + outlen = mp_radix_size(&val, obase); + out = calloc(outlen, sizeof(char)); + mp_toradix(&val, out, obase); + + printf("%s\n", out); + free(out); + } + + mp_clear(&val); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.c b/security/nss/lib/freebl/mpi/utils/bbs_rand.c new file mode 100644 index 000000000..fed2fe2e6 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.c @@ -0,0 +1,65 @@ +/* + * Blum, Blum & Shub PRNG using the MPI library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "bbs_rand.h" + +#define SEED 1 +#define MODULUS 2 + +/* This modulus is the product of two randomly generated 512-bit + prime integers, each of which is congruent to 3 (mod 4). */ +static char *bbs_modulus = + "75A2A6E1D27393B86562B9CE7279A8403CB4258A637DAB5233465373E37837383EDC" + "332282B8575927BC4172CE8C147B4894050EE9D2BDEED355C121037270CA2570D127" + "7D2390CD1002263326635CC6B259148DE3A1A03201980A925E395E646A5E9164B0EC" + "28559EBA58C87447245ADD0651EDA507056A1129E3A3E16E903D64B437"; + +static int bbs_init = 0; /* flag set when library is initialized */ +static mp_int bbs_state; /* the current state of the generator */ + +/* Suggested size of random seed data */ +int bbs_seed_size = (sizeof(bbs_modulus) / 2); + +void +bbs_srand(unsigned char *data, int len) +{ + if ((bbs_init & SEED) == 0) { + mp_init(&bbs_state); + bbs_init |= SEED; + } + + mp_read_raw(&bbs_state, (char *)data, len); + +} /* end bbs_srand() */ + +unsigned int +bbs_rand(void) +{ + static mp_int modulus; + unsigned int result = 0, ix; + + if ((bbs_init & MODULUS) == 0) { + mp_init(&modulus); + mp_read_radix(&modulus, bbs_modulus, 16); + bbs_init |= MODULUS; + } + + for (ix = 0; ix < sizeof(unsigned int); ix++) { + mp_digit d; + + mp_sqrmod(&bbs_state, &modulus, &bbs_state); + d = DIGIT(&bbs_state, 0); + + result = (result << CHAR_BIT) | (d & UCHAR_MAX); + } + + return result; + +} /* end bbs_rand() */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/utils/bbs_rand.h b/security/nss/lib/freebl/mpi/utils/bbs_rand.h new file mode 100644 index 000000000..d12269bf9 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/bbs_rand.h @@ -0,0 +1,24 @@ +/* + * bbs_rand.h + * + * Blum, Blum & Shub PRNG using the MPI library + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _H_BBSRAND_ +#define _H_BBSRAND_ + +#include +#include "mpi.h" + +#define BBS_RAND_MAX UINT_MAX + +/* Suggested length of seed data */ +extern int bbs_seed_size; + +void bbs_srand(unsigned char *data, int len); +unsigned int bbs_rand(void); + +#endif /* end _H_BBSRAND_ */ diff --git a/security/nss/lib/freebl/mpi/utils/bbsrand.c b/security/nss/lib/freebl/mpi/utils/bbsrand.c new file mode 100644 index 000000000..d9151e005 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/bbsrand.c @@ -0,0 +1,35 @@ +/* + * bbsrand.c + * + * Test driver for routines in bbs_rand.h + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "bbs_rand.h" + +#define NUM_TESTS 100 + +int +main(void) +{ + unsigned int seed, result, ix; + + seed = time(NULL); + bbs_srand((unsigned char *)&seed, sizeof(seed)); + + for (ix = 0; ix < NUM_TESTS; ix++) { + result = bbs_rand(); + + printf("Test %3u: %08X\n", ix + 1, result); + } + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/dec2hex.c b/security/nss/lib/freebl/mpi/utils/dec2hex.c new file mode 100644 index 000000000..ef3a52095 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/dec2hex.c @@ -0,0 +1,40 @@ +/* + * dec2hex.c + * + * Convert decimal integers into hexadecimal + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a; + char *buf; + int len; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_read_radix(&a, argv[1], 10); + len = mp_radix_size(&a, 16); + buf = malloc(len); + mp_toradix(&a, buf, 16); + + printf("%s\n", buf); + + free(buf); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/exptmod.c b/security/nss/lib/freebl/mpi/utils/exptmod.c new file mode 100644 index 000000000..3ac9078f4 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/exptmod.c @@ -0,0 +1,55 @@ +/* + * exptmod.c + * + * Command line tool to perform modular exponentiation on arbitrary + * precision integers. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a, b, m; + mp_err res; + char *str; + int len, rval = 0; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_init(&b); + mp_init(&m); + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&b, argv[2], 10); + mp_read_radix(&m, argv[3], 10); + + if ((res = mp_exptmod(&a, &b, &m, &a)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res)); + rval = 1; + } else { + len = mp_radix_size(&a, 10); + str = calloc(len, sizeof(char)); + mp_toradix(&a, str, 10); + + printf("%s\n", str); + + free(str); + } + + mp_clear(&a); + mp_clear(&b); + mp_clear(&m); + + return rval; +} diff --git a/security/nss/lib/freebl/mpi/utils/fact.c b/security/nss/lib/freebl/mpi/utils/fact.c new file mode 100644 index 000000000..da8e61a32 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/fact.c @@ -0,0 +1,84 @@ +/* + * fact.c + * + * Compute factorial of input integer + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +mp_err mp_fact(mp_int *a, mp_int *b); + +int +main(int argc, char *argv[]) +{ + mp_int a; + mp_err res; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_read_radix(&a, argv[1], 10); + + if ((res = mp_fact(&a, &a)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", argv[0], + mp_strerror(res)); + mp_clear(&a); + return 1; + } + + { + char *buf; + int len; + + len = mp_radix_size(&a, 10); + buf = malloc(len); + mp_todecimal(&a, buf); + + puts(buf); + + free(buf); + } + + mp_clear(&a); + return 0; +} + +mp_err +mp_fact(mp_int *a, mp_int *b) +{ + mp_int ix, s; + mp_err res = MP_OKAY; + + if (mp_cmp_z(a) < 0) + return MP_UNDEF; + + mp_init(&s); + mp_add_d(&s, 1, &s); /* s = 1 */ + mp_init(&ix); + mp_add_d(&ix, 1, &ix); /* ix = 1 */ + + for (/* */; mp_cmp(&ix, a) <= 0; mp_add_d(&ix, 1, &ix)) { + if ((res = mp_mul(&s, &ix, &s)) != MP_OKAY) + break; + } + + mp_clear(&ix); + + /* Copy out results if we got them */ + if (res == MP_OKAY) + mp_copy(&s, b); + + mp_clear(&s); + + return res; +} diff --git a/security/nss/lib/freebl/mpi/utils/gcd.c b/security/nss/lib/freebl/mpi/utils/gcd.c new file mode 100644 index 000000000..9f11a250b --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/gcd.c @@ -0,0 +1,95 @@ +/* + * gcd.c + * + * Greatest common divisor + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +char *g_prog = NULL; + +void print_mp_int(mp_int *mp, FILE *ofp); + +int +main(int argc, char *argv[]) +{ + mp_int a, b, x, y; + mp_err res; + int ext = 0; + + g_prog = argv[0]; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", g_prog); + return 1; + } + + mp_init(&a); + mp_read_radix(&a, argv[1], 10); + mp_init(&b); + mp_read_radix(&b, argv[2], 10); + + /* If we were called 'xgcd', compute x, y so that g = ax + by */ + if (strcmp(g_prog, "xgcd") == 0) { + ext = 1; + mp_init(&x); + mp_init(&y); + } + + if (ext) { + if ((res = mp_xgcd(&a, &b, &a, &x, &y)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", g_prog, mp_strerror(res)); + mp_clear(&a); + mp_clear(&b); + mp_clear(&x); + mp_clear(&y); + return 1; + } + } else { + if ((res = mp_gcd(&a, &b, &a)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", g_prog, + mp_strerror(res)); + mp_clear(&a); + mp_clear(&b); + return 1; + } + } + + print_mp_int(&a, stdout); + if (ext) { + fputs("x = ", stdout); + print_mp_int(&x, stdout); + fputs("y = ", stdout); + print_mp_int(&y, stdout); + } + + mp_clear(&a); + mp_clear(&b); + + if (ext) { + mp_clear(&x); + mp_clear(&y); + } + + return 0; +} + +void +print_mp_int(mp_int *mp, FILE *ofp) +{ + char *buf; + int len; + + len = mp_radix_size(mp, 10); + buf = calloc(len, sizeof(char)); + mp_todecimal(mp, buf); + fprintf(ofp, "%s\n", buf); + free(buf); +} diff --git a/security/nss/lib/freebl/mpi/utils/hex2dec.c b/security/nss/lib/freebl/mpi/utils/hex2dec.c new file mode 100644 index 000000000..9b21d22e0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/hex2dec.c @@ -0,0 +1,40 @@ +/* + * hex2dec.c + * + * Convert decimal integers into hexadecimal + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a; + char *buf; + int len; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_read_radix(&a, argv[1], 16); + len = mp_radix_size(&a, 10); + buf = malloc(len); + mp_toradix(&a, buf, 10); + + printf("%s\n", buf); + + free(buf); + mp_clear(&a); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/identest.c b/security/nss/lib/freebl/mpi/utils/identest.c new file mode 100644 index 000000000..321d2c2b0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/identest.c @@ -0,0 +1,84 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include "mpi.h" +#include "mpprime.h" +#include +#include + +#define MAX_PREC (4096 / MP_DIGIT_BIT) + +mp_err +identity_test(void) +{ + mp_size preca, precb; + mp_err res; + mp_int a, b; + mp_int t1, t2, t3, t4, t5; + + preca = (rand() % MAX_PREC) + 1; + precb = (rand() % MAX_PREC) + 1; + + MP_DIGITS(&a) = 0; + MP_DIGITS(&b) = 0; + MP_DIGITS(&t1) = 0; + MP_DIGITS(&t2) = 0; + MP_DIGITS(&t3) = 0; + MP_DIGITS(&t4) = 0; + MP_DIGITS(&t5) = 0; + + MP_CHECKOK(mp_init(&a)); + MP_CHECKOK(mp_init(&b)); + MP_CHECKOK(mp_init(&t1)); + MP_CHECKOK(mp_init(&t2)); + MP_CHECKOK(mp_init(&t3)); + MP_CHECKOK(mp_init(&t4)); + MP_CHECKOK(mp_init(&t5)); + + MP_CHECKOK(mpp_random_size(&a, preca)); + MP_CHECKOK(mpp_random_size(&b, precb)); + + if (mp_cmp(&a, &b) < 0) + mp_exch(&a, &b); + + MP_CHECKOK(mp_mod(&a, &b, &t1)); /* t1 = a%b */ + MP_CHECKOK(mp_div(&a, &b, &t2, NULL)); /* t2 = a/b */ + MP_CHECKOK(mp_mul(&b, &t2, &t3)); /* t3 = (a/b)*b */ + MP_CHECKOK(mp_add(&t1, &t3, &t4)); /* t4 = a%b + (a/b)*b */ + MP_CHECKOK(mp_sub(&t4, &a, &t5)); /* t5 = a%b + (a/b)*b - a */ + if (mp_cmp_z(&t5) != 0) { + res = MP_UNDEF; + goto CLEANUP; + } + +CLEANUP: + mp_clear(&t5); + mp_clear(&t4); + mp_clear(&t3); + mp_clear(&t2); + mp_clear(&t1); + mp_clear(&b); + mp_clear(&a); + return res; +} + +int +main(void) +{ + unsigned int seed = (unsigned int)time(NULL); + unsigned long count = 0; + mp_err res; + + srand(seed); + + while (MP_OKAY == (res = identity_test())) { + if ((++count % 100) == 0) + fputc('.', stderr); + } + + fprintf(stderr, "\ntest failed, err %d\n", res); + return res; +} diff --git a/security/nss/lib/freebl/mpi/utils/invmod.c b/security/nss/lib/freebl/mpi/utils/invmod.c new file mode 100644 index 000000000..9b4b04d3f --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/invmod.c @@ -0,0 +1,61 @@ +/* + * invmod.c + * + * Compute modular inverses + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include + +#include "mpi.h" + +int +main(int argc, char *argv[]) +{ + mp_int a, m; + mp_err res; + char *buf; + int len, out = 0; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_init(&m); + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&m, argv[2], 10); + + if (mp_cmp(&a, &m) > 0) + mp_mod(&a, &m, &a); + + switch ((res = mp_invmod(&a, &m, &a))) { + case MP_OKAY: + len = mp_radix_size(&a, 10); + buf = malloc(len); + + mp_toradix(&a, buf, 10); + printf("%s\n", buf); + free(buf); + break; + + case MP_UNDEF: + printf("No inverse\n"); + out = 1; + break; + + default: + printf("error: %s (%d)\n", mp_strerror(res), res); + out = 2; + break; + } + + mp_clear(&a); + mp_clear(&m); + + return out; +} diff --git a/security/nss/lib/freebl/mpi/utils/isprime.c b/security/nss/lib/freebl/mpi/utils/isprime.c new file mode 100644 index 000000000..d2d86957e --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/isprime.c @@ -0,0 +1,89 @@ +/* + * isprime.c + * + * Probabilistic primality tester command-line tool + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" +#include "mpprime.h" + +#define RM_TESTS 15 /* how many iterations of Rabin-Miller? */ +#define MINIMUM 1024 /* don't bother us with a < this */ + +int g_tests = RM_TESTS; +char *g_prog = NULL; + +int +main(int argc, char *argv[]) +{ + mp_int a; + mp_digit np = prime_tab_size; /* from mpprime.h */ + int res = 0; + + g_prog = argv[0]; + + if (argc < 2) { + fprintf(stderr, "Usage: %s , where is a decimal integer\n" + "Use '0x' prefix for a hexadecimal value\n", + g_prog); + return 1; + } + + /* Read number of tests from environment, if present */ + { + char *tmp; + + if ((tmp = PR_GetEnvSecure("RM_TESTS")) != NULL) { + if ((g_tests = atoi(tmp)) <= 0) + g_tests = RM_TESTS; + } + } + + mp_init(&a); + if (argv[1][0] == '0' && argv[1][1] == 'x') + mp_read_radix(&a, argv[1] + 2, 16); + else + mp_read_radix(&a, argv[1], 10); + + if (mp_cmp_d(&a, MINIMUM) <= 0) { + fprintf(stderr, "%s: please use a value greater than %d\n", + g_prog, MINIMUM); + mp_clear(&a); + return 1; + } + + /* Test for divisibility by small primes */ + if (mpp_divis_primes(&a, &np) != MP_NO) { + printf("Not prime (divisible by small prime %d)\n", np); + res = 2; + goto CLEANUP; + } + + /* Test with Fermat's test, using 2 as a witness */ + if (mpp_fermat(&a, 2) != MP_YES) { + printf("Not prime (failed Fermat test)\n"); + res = 2; + goto CLEANUP; + } + + /* Test with Rabin-Miller probabilistic test */ + if (mpp_pprime(&a, g_tests) == MP_NO) { + printf("Not prime (failed pseudoprime test)\n"); + res = 2; + goto CLEANUP; + } + + printf("Probably prime, 1 in 4^%d chance of false positive\n", g_tests); + +CLEANUP: + mp_clear(&a); + + return res; +} diff --git a/security/nss/lib/freebl/mpi/utils/lap.c b/security/nss/lib/freebl/mpi/utils/lap.c new file mode 100644 index 000000000..501e4531d --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/lap.c @@ -0,0 +1,90 @@ +/* + * lap.c + * + * Find least annihilating power of a mod m + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +#include "mpi.h" + +void sig_catch(int ign); + +int g_quit = 0; + +int +main(int argc, char *argv[]) +{ + mp_int a, m, p, k; + + if (argc < 3) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&a); + mp_init(&m); + mp_init(&p); + mp_add_d(&p, 1, &p); + + mp_read_radix(&a, argv[1], 10); + mp_read_radix(&m, argv[2], 10); + + mp_init_copy(&k, &a); + + signal(SIGINT, sig_catch); +#ifndef __OS2__ + signal(SIGHUP, sig_catch); +#endif + signal(SIGTERM, sig_catch); + + while (mp_cmp(&p, &m) < 0) { + if (g_quit) { + int len; + char *buf; + + len = mp_radix_size(&p, 10); + buf = malloc(len); + mp_toradix(&p, buf, 10); + + fprintf(stderr, "Terminated at: %s\n", buf); + free(buf); + return 1; + } + if (mp_cmp_d(&k, 1) == 0) { + int len; + char *buf; + + len = mp_radix_size(&p, 10); + buf = malloc(len); + mp_toradix(&p, buf, 10); + + printf("%s\n", buf); + + free(buf); + break; + } + + mp_mulmod(&k, &a, &m, &k); + mp_add_d(&p, 1, &p); + } + + if (mp_cmp(&p, &m) >= 0) + printf("No annihilating power.\n"); + + mp_clear(&p); + mp_clear(&m); + mp_clear(&a); + return 0; +} + +void +sig_catch(int ign) +{ + g_quit = 1; +} diff --git a/security/nss/lib/freebl/mpi/utils/makeprime.c b/security/nss/lib/freebl/mpi/utils/makeprime.c new file mode 100644 index 000000000..401b7532b --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/makeprime.c @@ -0,0 +1,116 @@ +/* + * makeprime.c + * + * A simple prime generator function (and test driver). Prints out the + * first prime it finds greater than or equal to the starting value. + * + * Usage: makeprime + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +/* These two must be included for make_prime() to work */ + +#include "mpi.h" +#include "mpprime.h" + +/* + make_prime(p, nr) + + Find the smallest prime integer greater than or equal to p, where + primality is verified by 'nr' iterations of the Rabin-Miller + probabilistic primality test. The caller is responsible for + generating the initial value of p. + + Returns MP_OKAY if a prime has been generated, otherwise the error + code indicates some other problem. The value of p is clobbered; the + caller should keep a copy if the value is needed. + */ +mp_err make_prime(mp_int *p, int nr); + +/* The main() is not required -- it's just a test driver */ +int +main(int argc, char *argv[]) +{ + mp_int start; + mp_err res; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + mp_init(&start); + if (argv[1][0] == '0' && tolower(argv[1][1]) == 'x') { + mp_read_radix(&start, argv[1] + 2, 16); + } else { + mp_read_radix(&start, argv[1], 10); + } + mp_abs(&start, &start); + + if ((res = make_prime(&start, 5)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res)); + mp_clear(&start); + + return 1; + + } else { + char *buf = malloc(mp_radix_size(&start, 10)); + + mp_todecimal(&start, buf); + printf("%s\n", buf); + free(buf); + + mp_clear(&start); + + return 0; + } + +} /* end main() */ + +/*------------------------------------------------------------------------*/ + +mp_err +make_prime(mp_int *p, int nr) +{ + mp_err res; + + if (mp_iseven(p)) { + mp_add_d(p, 1, p); + } + + do { + mp_digit which = prime_tab_size; + + /* First test for divisibility by a few small primes */ + if ((res = mpp_divis_primes(p, &which)) == MP_YES) + continue; + else if (res != MP_NO) + goto CLEANUP; + + /* If that passes, try one iteration of Fermat's test */ + if ((res = mpp_fermat(p, 2)) == MP_NO) + continue; + else if (res != MP_YES) + goto CLEANUP; + + /* If that passes, run Rabin-Miller as often as requested */ + if ((res = mpp_pprime(p, nr)) == MP_YES) + break; + else if (res != MP_NO) + goto CLEANUP; + + } while ((res = mp_add_d(p, 2, p)) == MP_OKAY); + +CLEANUP: + return res; + +} /* end make_prime() */ + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/utils/metime.c b/security/nss/lib/freebl/mpi/utils/metime.c new file mode 100644 index 000000000..122875ee0 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/metime.c @@ -0,0 +1,102 @@ +/* + * metime.c + * + * Modular exponentiation timing test + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" +#include "mpprime.h" + +double clk_to_sec(clock_t start, clock_t stop); + +int +main(int argc, char *argv[]) +{ + int ix, num, prec = 8; + unsigned int seed; + clock_t start, stop; + double sec; + + mp_int a, m, c; + + if (PR_GetEnvSecure("SEED") != NULL) + seed = abs(atoi(PR_GetEnvSecure("SEED"))); + else + seed = (unsigned int)time(NULL); + + if (argc < 2) { + fprintf(stderr, "Usage: %s []\n", argv[0]); + return 1; + } + + if ((num = atoi(argv[1])) < 0) + num = -num; + + if (!num) { + fprintf(stderr, "%s: must perform at least 1 test\n", argv[0]); + return 1; + } + + if (argc > 2) { + if ((prec = atoi(argv[2])) <= 0) + prec = 8; + else + prec = (prec + (DIGIT_BIT - 1)) / DIGIT_BIT; + } + + printf("Modular exponentiation timing test\n" + "Precision: %d digits (%d bits)\n" + "# of tests: %d\n\n", + prec, prec * DIGIT_BIT, num); + + mp_init_size(&a, prec); + mp_init_size(&m, prec); + mp_init_size(&c, prec); + + srand(seed); + + start = clock(); + for (ix = 0; ix < num; ix++) { + + mpp_random_size(&a, prec); + mpp_random_size(&c, prec); + mpp_random_size(&m, prec); + /* set msb and lsb of m */ + DIGIT(&m, 0) |= 1; + DIGIT(&m, USED(&m) - 1) |= (mp_digit)1 << (DIGIT_BIT - 1); + if (mp_cmp(&a, &m) > 0) + mp_sub(&a, &m, &a); + + mp_exptmod(&a, &c, &m, &c); + } + stop = clock(); + + sec = clk_to_sec(start, stop); + + printf("Total: %.3f seconds\n", sec); + printf("Individual: %.3f seconds\n", sec / num); + + mp_clear(&c); + mp_clear(&a); + mp_clear(&m); + + return 0; +} + +double +clk_to_sec(clock_t start, clock_t stop) +{ + return (double)(stop - start) / CLOCKS_PER_SEC; +} + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/utils/pi.c b/security/nss/lib/freebl/mpi/utils/pi.c new file mode 100644 index 000000000..7e3109786 --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/pi.c @@ -0,0 +1,171 @@ +/* + * pi.c + * + * Compute pi to an arbitrary number of digits. Uses Machin's formula, + * like everyone else on the planet: + * + * pi = 16 * arctan(1/5) - 4 * arctan(1/239) + * + * This is pretty effective for up to a few thousand digits, but it + * gets pretty slow after that. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" + +mp_err arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum); + +int +main(int argc, char *argv[]) +{ + mp_err res; + mp_digit ndigits; + mp_int sum1, sum2; + clock_t start, stop; + int out = 0; + + /* Make the user specify precision on the command line */ + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + if ((ndigits = abs(atoi(argv[1]))) == 0) { + fprintf(stderr, "%s: you must request at least 1 digit\n", argv[0]); + return 1; + } + + start = clock(); + mp_init(&sum1); + mp_init(&sum2); + + /* sum1 = 16 * arctan(1/5) */ + if ((res = arctan(16, 5, ndigits, &sum1)) != MP_OKAY) { + fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res)); + out = 1; + goto CLEANUP; + } + + /* sum2 = 4 * arctan(1/239) */ + if ((res = arctan(4, 239, ndigits, &sum2)) != MP_OKAY) { + fprintf(stderr, "%s: arctan: %s\n", argv[0], mp_strerror(res)); + out = 1; + goto CLEANUP; + } + + /* pi = sum1 - sum2 */ + if ((res = mp_sub(&sum1, &sum2, &sum1)) != MP_OKAY) { + fprintf(stderr, "%s: mp_sub: %s\n", argv[0], mp_strerror(res)); + out = 1; + goto CLEANUP; + } + stop = clock(); + + /* Write the output in decimal */ + { + char *buf = malloc(mp_radix_size(&sum1, 10)); + + if (buf == NULL) { + fprintf(stderr, "%s: out of memory\n", argv[0]); + out = 1; + goto CLEANUP; + } + mp_todecimal(&sum1, buf); + printf("%s\n", buf); + free(buf); + } + + fprintf(stderr, "Computation took %.2f sec.\n", + (double)(stop - start) / CLOCKS_PER_SEC); + +CLEANUP: + mp_clear(&sum1); + mp_clear(&sum2); + + return out; +} + +/* Compute sum := mul * arctan(1/x), to 'prec' digits of precision */ +mp_err +arctan(mp_digit mul, mp_digit x, mp_digit prec, mp_int *sum) +{ + mp_int t, v; + mp_digit q = 1, rd; + mp_err res; + int sign = 1; + + prec += 3; /* push inaccuracies off the end */ + + mp_init(&t); + mp_set(&t, 10); + mp_init(&v); + if ((res = mp_expt_d(&t, prec, &t)) != MP_OKAY || /* get 10^prec */ + (res = mp_mul_d(&t, mul, &t)) != MP_OKAY || /* ... times mul */ + (res = mp_mul_d(&t, x, &t)) != MP_OKAY) /* ... times x */ + goto CLEANUP; + + /* + The extra multiplication by x in the above takes care of what + would otherwise have to be a special case for 1 / x^1 during the + first loop iteration. A little sneaky, but effective. + + We compute arctan(1/x) by the formula: + + 1 1 1 1 + - - ----- + ----- - ----- + ... + x 3 x^3 5 x^5 7 x^7 + + We multiply through by 'mul' beforehand, which gives us a couple + more iterations and more precision + */ + + x *= x; /* works as long as x < sqrt(RADIX), which it is here */ + + mp_zero(sum); + + do { + if ((res = mp_div_d(&t, x, &t, &rd)) != MP_OKAY) + goto CLEANUP; + + if (sign < 0 && rd != 0) + mp_add_d(&t, 1, &t); + + if ((res = mp_div_d(&t, q, &v, &rd)) != MP_OKAY) + goto CLEANUP; + + if (sign < 0 && rd != 0) + mp_add_d(&v, 1, &v); + + if (sign > 0) + res = mp_add(sum, &v, sum); + else + res = mp_sub(sum, &v, sum); + + if (res != MP_OKAY) + goto CLEANUP; + + sign *= -1; + q += 2; + + } while (mp_cmp_z(&t) != 0); + + /* Chop off inaccurate low-order digits */ + mp_div_d(sum, 1000, sum, NULL); + +CLEANUP: + mp_clear(&v); + mp_clear(&t); + + return res; +} + +/*------------------------------------------------------------------------*/ +/* HERE THERE BE DRAGONS */ diff --git a/security/nss/lib/freebl/mpi/utils/primegen.c b/security/nss/lib/freebl/mpi/utils/primegen.c new file mode 100644 index 000000000..f62a56a4e --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/primegen.c @@ -0,0 +1,159 @@ +/* + * primegen.c + * + * Generates random integers which are prime with a high degree of + * probability using the Miller-Rabin probabilistic primality testing + * algorithm. + * + * Usage: + * primegen [] + * + * - number of significant bits each prime should have + * - number of primes to generate + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include + +#include "mpi.h" +#include "mplogic.h" +#include "mpprime.h" + +#define NUM_TESTS 5 /* Number of Rabin-Miller iterations to test with */ + +#ifdef DEBUG +#define FPUTC(x, y) fputc(x, y) +#else +#define FPUTC(x, y) +#endif + +int +main(int argc, char *argv[]) +{ + unsigned char *raw; + char *out; + unsigned long nTries; + int rawlen, bits, outlen, ngen, ix, jx; + int g_strong = 0; + mp_int testval; + mp_err res; + clock_t start, end; + + /* We'll just use the C library's rand() for now, although this + won't be good enough for cryptographic purposes */ + if ((out = PR_GetEnvSecure("SEED")) == NULL) { + srand((unsigned int)time(NULL)); + } else { + srand((unsigned int)atoi(out)); + } + + if (argc < 2) { + fprintf(stderr, "Usage: %s [ [strong]]\n", argv[0]); + return 1; + } + + if ((bits = abs(atoi(argv[1]))) < CHAR_BIT) { + fprintf(stderr, "%s: please request at least %d bits.\n", + argv[0], CHAR_BIT); + return 1; + } + + /* If optional third argument is given, use that as the number of + primes to generate; otherwise generate one prime only. + */ + if (argc < 3) { + ngen = 1; + } else { + ngen = abs(atoi(argv[2])); + } + + /* If fourth argument is given, and is the word "strong", we'll + generate strong (Sophie Germain) primes. + */ + if (argc > 3 && strcmp(argv[3], "strong") == 0) + g_strong = 1; + + /* testval - candidate being tested; nTries - number tried so far */ + if ((res = mp_init(&testval)) != MP_OKAY) { + fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res)); + return 1; + } + + if (g_strong) { + printf("Requested %d strong prime value(s) of %d bits.\n", + ngen, bits); + } else { + printf("Requested %d prime value(s) of %d bits.\n", ngen, bits); + } + + rawlen = (bits / CHAR_BIT) + ((bits % CHAR_BIT) ? 1 : 0) + 1; + + if ((raw = calloc(rawlen, sizeof(unsigned char))) == NULL) { + fprintf(stderr, "%s: out of memory, sorry.\n", argv[0]); + return 1; + } + + /* This loop is one for each prime we need to generate */ + for (jx = 0; jx < ngen; jx++) { + + raw[0] = 0; /* sign is positive */ + + /* Pack the initializer with random bytes */ + for (ix = 1; ix < rawlen; ix++) + raw[ix] = (rand() * rand()) & UCHAR_MAX; + + raw[1] |= 0x80; /* set high-order bit of test value */ + raw[rawlen - 1] |= 1; /* set low-order bit of test value */ + + /* Make an mp_int out of the initializer */ + mp_read_raw(&testval, (char *)raw, rawlen); + + /* Initialize candidate counter */ + nTries = 0; + + start = clock(); /* time generation for this prime */ + do { + res = mpp_make_prime(&testval, bits, g_strong, &nTries); + if (res != MP_NO) + break; + /* This code works whether digits are 16 or 32 bits */ + res = mp_add_d(&testval, 32 * 1024, &testval); + res = mp_add_d(&testval, 32 * 1024, &testval); + FPUTC(',', stderr); + } while (1); + end = clock(); + + if (res != MP_YES) { + break; + } + FPUTC('\n', stderr); + puts("The following value is probably prime:"); + outlen = mp_radix_size(&testval, 10); + out = calloc(outlen, sizeof(unsigned char)); + mp_toradix(&testval, (char *)out, 10); + printf("10: %s\n", out); + mp_toradix(&testval, (char *)out, 16); + printf("16: %s\n\n", out); + free(out); + + printf("Number of candidates tried: %lu\n", nTries); + printf("This computation took %ld clock ticks (%.2f seconds)\n", + (end - start), ((double)(end - start) / CLOCKS_PER_SEC)); + + FPUTC('\n', stderr); + } /* end of loop to generate all requested primes */ + + if (res != MP_OKAY) + fprintf(stderr, "%s: error: %s\n", argv[0], mp_strerror(res)); + + free(raw); + mp_clear(&testval); + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/prng.c b/security/nss/lib/freebl/mpi/utils/prng.c new file mode 100644 index 000000000..38748d18e --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/prng.c @@ -0,0 +1,57 @@ +/* + * prng.c + * + * Command-line pseudo-random number generator + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include + +#ifdef __OS2__ +#include +#include +#else +#include +#endif + +#include "bbs_rand.h" + +int +main(int argc, char *argv[]) +{ + unsigned char *seed; + unsigned int ix, num = 1; + pid_t pid; + + if (argc > 1) { + num = atoi(argv[1]); + if (num <= 0) + num = 1; + } + + pid = getpid(); + srand(time(NULL) * (unsigned int)pid); + + /* Not a perfect seed, but not bad */ + seed = malloc(bbs_seed_size); + for (ix = 0; ix < bbs_seed_size; ix++) { + seed[ix] = rand() % UCHAR_MAX; + } + + bbs_srand(seed, bbs_seed_size); + memset(seed, 0, bbs_seed_size); + free(seed); + + while (num-- > 0) { + ix = bbs_rand(); + + printf("%u\n", ix); + } + + return 0; +} diff --git a/security/nss/lib/freebl/mpi/utils/ptab.pl b/security/nss/lib/freebl/mpi/utils/ptab.pl new file mode 100755 index 000000000..ef2e565be --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/ptab.pl @@ -0,0 +1,26 @@ +#!/usr/bin/perl + +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +while(<>) { + chomp; + push(@primes, $_); +} + +printf("mp_size prime_tab_size = %d;\n", ($#primes + 1)); +print "mp_digit prime_tab[] = {\n"; + +print "\t"; +$last = pop(@primes); +foreach $prime (sort {$a<=>$b} @primes) { + printf("0x%04X, ", $prime); + $brk = ($brk + 1) % 8; + print "\n\t" if(!$brk); +} +printf("0x%04X", $last); +print "\n" if($brk); +print "};\n\n"; + +exit 0; diff --git a/security/nss/lib/freebl/mpi/utils/sieve.c b/security/nss/lib/freebl/mpi/utils/sieve.c new file mode 100644 index 000000000..57768af9e --- /dev/null +++ b/security/nss/lib/freebl/mpi/utils/sieve.c @@ -0,0 +1,243 @@ +/* + * sieve.c + * + * Finds prime numbers using the Sieve of Eratosthenes + * + * This implementation uses a bitmap to represent all odd integers in a + * given range. We iterate over this bitmap, crossing off the + * multiples of each prime we find. At the end, all the remaining set + * bits correspond to prime integers. + * + * Here, we make two passes -- once we have generated a sieve-ful of + * primes, we copy them out, reset the sieve using the highest + * generated prime from the first pass as a base. Then we cross out + * all the multiples of all the primes we found the first time through, + * and re-sieve. In this way, we get double use of the memory we + * allocated for the sieve the first time though. Since we also + * implicitly ignore multiples of 2, this amounts to 4 times the + * values. + * + * This could (and probably will) be generalized to re-use the sieve a + * few more times. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include + +typedef unsigned char byte; + +typedef struct { + int size; + byte *bits; + long base; + int next; + int nbits; +} sieve; + +void sieve_init(sieve *sp, long base, int nbits); +void sieve_grow(sieve *sp, int nbits); +long sieve_next(sieve *sp); +void sieve_reset(sieve *sp, long base); +void sieve_cross(sieve *sp, long val); +void sieve_clear(sieve *sp); + +#define S_ISSET(S, B) (((S)->bits[(B) / CHAR_BIT] >> ((B) % CHAR_BIT)) & 1) +#define S_SET(S, B) ((S)->bits[(B) / CHAR_BIT] |= (1 << ((B) % CHAR_BIT))) +#define S_CLR(S, B) ((S)->bits[(B) / CHAR_BIT] &= ~(1 << ((B) % CHAR_BIT))) +#define S_VAL(S, B) ((S)->base + (2 * (B))) +#define S_BIT(S, V) (((V) - ((S)->base)) / 2) + +int +main(int argc, char *argv[]) +{ + sieve s; + long pr, *p; + int c, ix, cur = 0; + + if (argc < 2) { + fprintf(stderr, "Usage: %s \n", argv[0]); + return 1; + } + + c = atoi(argv[1]); + if (c < 0) + c = -c; + + fprintf(stderr, "%s: sieving to %d positions\n", argv[0], c); + + sieve_init(&s, 3, c); + + c = 0; + while ((pr = sieve_next(&s)) > 0) { + ++c; + } + + p = calloc(c, sizeof(long)); + if (!p) { + fprintf(stderr, "%s: out of memory after first half\n", argv[0]); + sieve_clear(&s); + exit(1); + } + + fprintf(stderr, "%s: half done ... \n", argv[0]); + + for (ix = 0; ix < s.nbits; ix++) { + if (S_ISSET(&s, ix)) { + p[cur] = S_VAL(&s, ix); + printf("%ld\n", p[cur]); + ++cur; + } + } + + sieve_reset(&s, p[cur - 1]); + fprintf(stderr, "%s: crossing off %d found primes ... \n", argv[0], cur); + for (ix = 0; ix < cur; ix++) { + sieve_cross(&s, p[ix]); + if (!(ix % 1000)) + fputc('.', stderr); + } + fputc('\n', stderr); + + free(p); + + fprintf(stderr, "%s: sieving again from %ld ... \n", argv[0], p[cur - 1]); + c = 0; + while ((pr = sieve_next(&s)) > 0) { + ++c; + } + + fprintf(stderr, "%s: done!\n", argv[0]); + for (ix = 0; ix < s.nbits; ix++) { + if (S_ISSET(&s, ix)) { + printf("%ld\n", S_VAL(&s, ix)); + } + } + + sieve_clear(&s); + + return 0; +} + +void +sieve_init(sieve *sp, long base, int nbits) +{ + sp->size = (nbits / CHAR_BIT); + + if (nbits % CHAR_BIT) + ++sp->size; + + sp->bits = calloc(sp->size, sizeof(byte)); + memset(sp->bits, UCHAR_MAX, sp->size); + if (!(base & 1)) + ++base; + sp->base = base; + + sp->next = 0; + sp->nbits = sp->size * CHAR_BIT; +} + +void +sieve_grow(sieve *sp, int nbits) +{ + int ns = (nbits / CHAR_BIT); + + if (nbits % CHAR_BIT) + ++ns; + + if (ns > sp->size) { + byte *tmp; + int ix; + + tmp = calloc(ns, sizeof(byte)); + if (tmp == NULL) { + fprintf(stderr, "Error: out of memory in sieve_grow\n"); + return; + } + + memcpy(tmp, sp->bits, sp->size); + for (ix = sp->size; ix < ns; ix++) { + tmp[ix] = UCHAR_MAX; + } + + free(sp->bits); + sp->bits = tmp; + sp->size = ns; + + sp->nbits = sp->size * CHAR_BIT; + } +} + +long +sieve_next(sieve *sp) +{ + long out; + int ix = 0; + long val; + + if (sp->next > sp->nbits) + return -1; + + out = S_VAL(sp, sp->next); +#ifdef DEBUG + fprintf(stderr, "Sieving %ld\n", out); +#endif + + /* Sieve out all multiples of the current prime */ + val = out; + while (ix < sp->nbits) { + val += out; + ix = S_BIT(sp, val); + if ((val & 1) && ix < sp->nbits) { /* && S_ISSET(sp, ix)) { */ + S_CLR(sp, ix); +#ifdef DEBUG + fprintf(stderr, "Crossing out %ld (bit %d)\n", val, ix); +#endif + } + } + + /* Scan ahead to the next prime */ + ++sp->next; + while (sp->next < sp->nbits && !S_ISSET(sp, sp->next)) + ++sp->next; + + return out; +} + +void +sieve_cross(sieve *sp, long val) +{ + int ix = 0; + long cur = val; + + while (cur < sp->base) + cur += val; + + ix = S_BIT(sp, cur); + while (ix < sp->nbits) { + if (cur & 1) + S_CLR(sp, ix); + cur += val; + ix = S_BIT(sp, cur); + } +} + +void +sieve_reset(sieve *sp, long base) +{ + memset(sp->bits, UCHAR_MAX, sp->size); + sp->base = base; + sp->next = 0; +} + +void +sieve_clear(sieve *sp) +{ + if (sp->bits) + free(sp->bits); + + sp->bits = NULL; +} diff --git a/security/nss/lib/freebl/mpi/vis_32.il b/security/nss/lib/freebl/mpi/vis_32.il new file mode 100644 index 000000000..d2e8024ac --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_32.il @@ -0,0 +1,1291 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! The interface to the VIS instructions as declared below (and in the VIS +! User's Manual) will not change, but the macro implementation might change +! in the future. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,8 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,8 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,8 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,8 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,8 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,8 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,8 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,8 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,8 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,8 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,8 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,8 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %icc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,8 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,8 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + faligndata %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple16 %f4,%f10,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne16 %f4,%f10,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmple32 %f4,%f10,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpne32 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt16 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq16 %f4,%f10,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpgt32 %f4,%f10,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fcmpeq32 %f4,%f10,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + st %o2,[%sp+0x4c] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8x16 %f4,%f10,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16au %f4,%f10,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + st %o0,[%sp+0x44] + ld [%sp+0x44],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmul8x16al %f4,%f10,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fmul8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8sux16 %f4,%f10,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fmuld8ulx16 %f4,%f10,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + std %o0,[%sp+0x40] + ldd [%sp+0x40],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd16 %f4,%f10,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd16s %f4,%f10,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpadd32 %f4,%f10,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpadd32s %f4,%f10,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub16 %f4,%f10,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub16s %f4,%f10,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpsub32 %f4,%f10,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpsub32s %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack16 %f4,%f0 + fpack16 %f10,%f1 + std %f0,[%o4] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,20 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpackfix %f4,%f0 + fpackfix %f10,%f1 + st %f0,[%o4+0] + st %f1,[%o4+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpack16 %f4,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fpack32 %f4,%f10,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fpackfix %f4,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f6 + fpackfix %f4,%f0 + fpackfix %f6,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); +! + .inline vis_pdist,24 + std %o4,[%sp+0x48] + ldd [%sp+0x48],%f0 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + pdist %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fpmerge %f4,%f10,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fexpand %f4,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fmovs %f5, %f2 + fexpand %f2,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnor %f4,%f10,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnors %f4,%f10,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fandnot1 %f4,%f10,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fandnot1s %f4,%f10,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fnot1 %f4,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fnot1s %f4,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxor %f4,%f10,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxors %f4,%f10,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fnand %f4,%f10,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fnands %f4,%f10,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fand %f4,%f10,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fands %f4,%f10,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fxnor %f4,%f10,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fxnors %f4,%f10,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + fsrc1 %f4,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + fsrc1s %f4,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + fornot1 %f4,%f10,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fornot1s %f4,%f10,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + std %o2,[%sp+0x48] + ldd [%sp+0x48],%f10 + for %f4,%f10,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + st %o1,[%sp+0x48] + ld [%sp+0x48],%f10 + fors %f4,%f10,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,16 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]%o3,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,16 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+%o3]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,4 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,8 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,8 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,8 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,4 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,4 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,8 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,4 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,4 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,8 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,4 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,4 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,8 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,4 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array8 %o3,%o2,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array16 %o3,%o2,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + sllx %o0,32,%o0 + srl %o1,0,%o1 ! clear the most significant 32 bits of %o1 + or %o0,%o1,%o3 ! join %o0 and %o1 into %o3 + array32 %o3,%o2,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; return %f0; + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + std %o0,[%sp+0x48] ! store double frs1 + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f0 ! %f0 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + std %o0,[%sp+0x48] ! store double frs1; + ldd [%sp+0x48],%f0 ! %f0:%f1 = double frs1; + st %o2,[%sp+0x44] ! store float frs2; + ld [%sp+0x44],%f2 ! %f2 = float frs2; + fmovs %f2,%f1 ! %f1 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + st %o0,[%sp+0x48] ! store float frs1 + ld [%sp+0x48],%f0 + st %o1,[%sp+0x48] ! store float frs2 + ld [%sp+0x48],%f1 + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,4 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,4 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,4 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,4 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,4 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,4 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,8 + st %o0,[%sp+0x48] + ld [%sp+0x48],%f4 + sta %f4,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,12 + std %o0,[%sp+0x48] + ldd [%sp+0x48],%f4 + stda %f4,[%o2]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,4 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,4 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,4 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,8 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,8 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,4 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,4 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_64.il b/security/nss/lib/freebl/mpi/vis_64.il new file mode 100644 index 000000000..cbe2b5aa2 --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_64.il @@ -0,0 +1,997 @@ +! +! This Source Code Form is subject to the terms of the Mozilla Public +! License, v. 2.0. If a copy of the MPL was not distributed with this +! file, You can obtain one at http://mozilla.org/MPL/2.0/. + +! This file is to be used in place of vis.il in 64-bit builds. + +!-------------------------------------------------------------------- +! Pure edge handling instructions +! +! int vis_edge8(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8,16 + edge8 %o0,%o1,%o0 + .end +! +! int vis_edge8l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8l,16 + edge8l %o0,%o1,%o0 + .end +! +! int vis_edge16(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16,16 + edge16 %o0,%o1,%o0 + .end +! +! int vis_edge16l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16l,16 + edge16l %o0,%o1,%o0 + .end +! +! int vis_edge32(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32,16 + edge32 %o0,%o1,%o0 + .end +! +! int vis_edge32l(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32l,16 + edge32l %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Edge handling instructions with negative return values if cc set +! +! int vis_edge8cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8cc,16 + edge8 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge8lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge8lcc,16 + edge8l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16cc,16 + edge16 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge16lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge16lcc,16 + edge16l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32cc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32cc,16 + edge32 %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end +! +! int vis_edge32lcc(void */*frs1*/, void */*frs2*/); +! + .inline vis_edge32lcc,16 + edge32l %o0,%o1,%o0 + mov 0,%o1 + movgu %xcc,-1024,%o1 + or %o1,%o0,%o0 + .end + +!-------------------------------------------------------------------- +! Alignment instructions +! +! void *vis_alignaddr(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddr,12 + alignaddr %o0,%o1,%o0 + .end +! +! void *vis_alignaddrl(void */*rs1*/, int /*rs2*/); +! + .inline vis_alignaddrl,12 + alignaddrl %o0,%o1,%o0 + .end +! +! double vis_faligndata(double /*frs1*/, double /*frs2*/); +! + .inline vis_faligndata,16 + faligndata %f0,%f2,%f0 + .end + +!-------------------------------------------------------------------- +! Partitioned comparison instructions +! +! int vis_fcmple16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple16,16 + fcmple16 %f0,%f2,%o0 + .end +! +! int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne16,16 + fcmpne16 %f0,%f2,%o0 + .end +! +! int vis_fcmple32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmple32,16 + fcmple32 %f0,%f2,%o0 + .end +! +! int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpne32,16 + fcmpne32 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt16,16 + fcmpgt16 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq16,16 + fcmpeq16 %f0,%f2,%o0 + .end +! +! int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpgt32,16 + fcmpgt32 %f0,%f2,%o0 + .end +! +! int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fcmpeq32,16 + fcmpeq32 %f0,%f2,%o0 + .end + +!-------------------------------------------------------------------- +! Partitioned arithmetic +! +! double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8x16,12 + fmul8x16 %f1,%f2,%f0 + .end +! +! double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +! + .inline vis_fmul8x16_dummy,16 + fmul8x16 %f1,%f4,%f0 + .end +! +! double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16au,8 + fmul8x16au %f1,%f3,%f0 + .end +! +! double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmul8x16al,8 + fmul8x16al %f1,%f3,%f0 + .end +! +! double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8sux16,16 + fmul8sux16 %f0,%f2,%f0 + .end +! +! double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fmul8ulx16,16 + fmul8ulx16 %f0,%f2,%f0 + .end +! +! double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8sux16,8 + fmuld8sux16 %f1,%f3,%f0 + .end +! +! double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +! + .inline vis_fmuld8ulx16,8 + fmuld8ulx16 %f1,%f3,%f0 + .end +! +! double vis_fpadd16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd16,16 + fpadd16 %f0,%f2,%f0 + .end +! +! float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd16s,8 + fpadd16s %f1,%f3,%f0 + .end +! +! double vis_fpadd32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpadd32,16 + fpadd32 %f0,%f2,%f0 + .end +! +! float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpadd32s,8 + fpadd32s %f1,%f3,%f0 + .end +! +! double vis_fpsub16(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub16,16 + fpsub16 %f0,%f2,%f0 + .end +! +! float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub16s,8 + fpsub16s %f1,%f3,%f0 + .end +! +! double vis_fpsub32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpsub32,16 + fpsub32 %f0,%f2,%f0 + .end +! +! float vis_fpsub32s(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpsub32s,8 + fpsub32s %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel packing +! +! float vis_fpack16(double /*frs2*/); +! + .inline vis_fpack16,8 + fpack16 %f0,%f0 + .end +! +! double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpack16_pair,16 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + .end +! +! void vis_st2_fpack16(double, double, double *) +! + .inline vis_st2_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! void vis_std_fpack16(double, double, double *) +! + .inline vis_std_fpack16,24 + fpack16 %f0,%f0 + fpack16 %f2,%f1 + std %f0,[%o2] + .end +! +! void vis_st2_fpackfix(double, double, double *) +! + .inline vis_st2_fpackfix,24 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + st %f0,[%o2+0] + st %f1,[%o2+4] + .end +! +! double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_hi,16 + fpack16 %f2,%f0 + .end + +! double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack16_to_lo,16 + fpack16 %f2,%f3 + fmovs %f3,%f1 /* without this, optimizer goes wrong */ + .end + +! +! double vis_fpack32(double /*frs1*/, double /*frs2*/); +! + .inline vis_fpack32,16 + fpack32 %f0,%f2,%f0 + .end +! +! float vis_fpackfix(double /*frs2*/); +! + .inline vis_fpackfix,8 + fpackfix %f0,%f0 + .end +! +! double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +! + .inline vis_fpackfix_pair,16 + fpackfix %f0,%f0 + fpackfix %f2,%f1 + .end + +!-------------------------------------------------------------------- +! Motion estimation +! +! double vis_pxldist64(double accum /*frd*/, double pxls1 /*frs1*/, +! double pxls2 /*frs2*/); +! + .inline vis_pxldist64,24 + pdist %f2,%f4,%f0 + .end + +!-------------------------------------------------------------------- +! Channel merging +! +! double vis_fpmerge(float /*frs1*/, float /*frs2*/); +! + .inline vis_fpmerge,8 + fpmerge %f1,%f3,%f0 + .end + +!-------------------------------------------------------------------- +! Pixel expansion +! +! double vis_fexpand(float /*frs2*/); +! + .inline vis_fexpand,4 + fexpand %f1,%f0 + .end + +! double vis_fexpand_hi(double /*frs2*/); +! + .inline vis_fexpand_hi,8 + fexpand %f0,%f0 + .end + +! double vis_fexpand_lo(double /*frs2*/); +! + .inline vis_fexpand_lo,8 + fexpand %f1,%f0 + .end + +!-------------------------------------------------------------------- +! Bitwise logical operations +! +! double vis_fnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnor,16 + fnor %f0,%f2,%f0 + .end +! +! float vis_fnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnors,8 + fnors %f1,%f3,%f0 + .end +! +! double vis_fandnot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fandnot,16 + fandnot1 %f0,%f2,%f0 + .end +! +! float vis_fandnots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fandnots,8 + fandnot1s %f1,%f3,%f0 + .end +! +! double vis_fnot(double /*frs1*/); +! + .inline vis_fnot,8 + fnot1 %f0,%f0 + .end +! +! float vis_fnots(float /*frs1*/); +! + .inline vis_fnots,4 + fnot1s %f1,%f0 + .end +! +! double vis_fxor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxor,16 + fxor %f0,%f2,%f0 + .end +! +! float vis_fxors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxors,8 + fxors %f1,%f3,%f0 + .end +! +! double vis_fnand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fnand,16 + fnand %f0,%f2,%f0 + .end +! +! float vis_fnands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fnands,8 + fnands %f1,%f3,%f0 + .end +! +! double vis_fand(double /*frs1*/, double /*frs2*/); +! + .inline vis_fand,16 + fand %f0,%f2,%f0 + .end +! +! float vis_fands(float /*frs1*/, float /*frs2*/); +! + .inline vis_fands,8 + fands %f1,%f3,%f0 + .end +! +! double vis_fxnor(double /*frs1*/, double /*frs2*/); +! + .inline vis_fxnor,16 + fxnor %f0,%f2,%f0 + .end +! +! float vis_fxnors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fxnors,8 + fxnors %f1,%f3,%f0 + .end +! +! double vis_fsrc(double /*frs1*/); +! + .inline vis_fsrc,8 + fsrc1 %f0,%f0 + .end +! +! float vis_fsrcs(float /*frs1*/); +! + .inline vis_fsrcs,4 + fsrc1s %f1,%f0 + .end +! +! double vis_fornot(double /*frs1*/, double /*frs2*/); +! + .inline vis_fornot,16 + fornot1 %f0,%f2,%f0 + .end +! +! float vis_fornots(float /*frs1*/, float /*frs2*/); +! + .inline vis_fornots,8 + fornot1s %f1,%f3,%f0 + .end +! +! double vis_for(double /*frs1*/, double /*frs2*/); +! + .inline vis_for,16 + for %f0,%f2,%f0 + .end +! +! float vis_fors(float /*frs1*/, float /*frs2*/); +! + .inline vis_fors,8 + fors %f1,%f3,%f0 + .end +! +! double vis_fzero(/* void */) +! + .inline vis_fzero,0 + fzero %f0 + .end +! +! float vis_fzeros(/* void */) +! + .inline vis_fzeros,0 + fzeros %f0 + .end +! +! double vis_fone(/* void */) +! + .inline vis_fone,0 + fone %f0 + .end +! +! float vis_fones(/* void */) +! + .inline vis_fones,0 + fones %f0 + .end + +!-------------------------------------------------------------------- +! Partial store instructions +! +! vis_stdfa_ASI_PST8P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8P,20 + stda %f0,[%o1]%o2,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8PL(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8PL,20 + stda %f0,[%o1]%o2,0xc8 ! ASI_PST8_PL + .end +! +! vis_stdfa_ASI_PST8P_int_pair(void *rs1, void *rs2, void *rs3, int rmask); +! + .inline vis_stdfa_ASI_PST8P_int_pair,28 + ld [%o0],%f4 + ld [%o1],%f5 + stda %f4,[%o2]%o3,0xc0 ! ASI_PST8_P + .end +! +! vis_stdfa_ASI_PST8S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST8S,20 + stda %f0,[%o1]%o2,0xc1 ! ASI_PST8_S + .end +! +! vis_stdfa_ASI_PST16P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16P,20 + stda %f0,[%o1]%o2,0xc2 ! ASI_PST16_P + .end +! +! vis_stdfa_ASI_PST16S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST16S,20 + stda %f0,[%o1]%o2,0xc3 ! ASI_PST16_S + .end +! +! vis_stdfa_ASI_PST32P(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32P,20 + stda %f0,[%o1]%o2,0xc4 ! ASI_PST32_P + .end +! +! vis_stdfa_ASI_PST32S(double frd, void *rs1, int rmask) +! + .inline vis_stdfa_ASI_PST32S,20 + stda %f0,[%o1]%o2,0xc5 ! ASI_PST32_S + .end + +!-------------------------------------------------------------------- +! Short store instructions +! +! vis_stdfa_ASI_FL8P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8P,16 + stda %f0,[%o1]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL8P_index,24 + stda %f0,[%o1+%o2]0xd0 ! ASI_FL8_P + .end +! +! vis_stdfa_ASI_FL8S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8S,16 + stda %f0,[%o1]0xd1 ! ASI_FL8_S + .end +! +! vis_stdfa_ASI_FL16P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16P,16 + stda %f0,[%o1]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16P_index(double frd, void *rs1, long index) +! + .inline vis_stdfa_ASI_FL16P_index,24 + stda %f0,[%o1+%o2]0xd2 ! ASI_FL16_P + .end +! +! vis_stdfa_ASI_FL16S(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16S,16 + stda %f0,[%o1]0xd3 ! ASI_FL16_S + .end +! +! vis_stdfa_ASI_FL8PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8PL,16 + stda %f0,[%o1]0xd8 ! ASI_FL8_PL + .end +! +! vis_stdfa_ASI_FL8SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL8SL,16 + stda %f0,[%o1]0xd9 ! ASI_FL8_SL + .end +! +! vis_stdfa_ASI_FL16PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16PL,16 + stda %f0,[%o1]0xda ! ASI_FL16_PL + .end +! +! vis_stdfa_ASI_FL16SL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_FL16SL,16 + stda %f0,[%o1]0xdb ! ASI_FL16_SL + .end + +!-------------------------------------------------------------------- +! Short load instructions +! +! double vis_lddfa_ASI_FL8P(void *rs1) +! + .inline vis_lddfa_ASI_FL8P,8 + ldda [%o0]0xd0,%f4 ! ASI_FL8_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_FL8P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8P_index,16 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_hi(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_hi,12 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8P_lo(void *rs1, unsigned int index) +! + .inline vis_lddfa_ASI_FL8P_lo,12 + sll %o1,16,%o1 + sra %o1,16,%o1 + ldda [%o0+%o1]0xd0,%f4 + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8S(void *rs1) +! + .inline vis_lddfa_ASI_FL8S,8 + ldda [%o0]0xd1,%f4 ! ASI_FL8_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P(void *rs1) +! + .inline vis_lddfa_ASI_FL16P,8 + ldda [%o0]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16P_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16P_index,16 + ldda [%o0+%o1]0xd2,%f4 ! ASI_FL16_P + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16S(void *rs1) +! + .inline vis_lddfa_ASI_FL16S,8 + ldda [%o0]0xd3,%f4 ! ASI_FL16_S + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL(void *rs1) +! + .inline vis_lddfa_ASI_FL8PL,8 + ldda [%o0]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL8PL_index,16 + ldda [%o0+%o1]0xd8,%f4 ! ASI_FL8_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL8SL(void *rs1) +! + .inline vis_lddfa_ASI_FL8SL,8 + ldda [%o0]0xd9,%f4 ! ASI_FL8_SL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL(void *rs1) +! + .inline vis_lddfa_ASI_FL16PL,8 + ldda [%o0]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16PL_index(void *rs1, long index) +! + .inline vis_lddfa_ASI_FL16PL_index,16 + ldda [%o0+%o1]0xda,%f4 ! ASI_FL16_PL + fmovd %f4,%f0 + .end +! +! double vis_lddfa_ASI_FL16SL(void *rs1) +! + .inline vis_lddfa_ASI_FL16SL,8 + ldda [%o0]0xdb,%f4 ! ASI_FL16_SL + fmovd %f4,%f0 + .end + +!-------------------------------------------------------------------- +! Graphics status register +! +! unsigned int vis_read_gsr(void) +! + .inline vis_read_gsr,0 + rd %gsr,%o0 + .end +! +! void vis_write_gsr(unsigned int /* GSR */) +! + .inline vis_write_gsr,4 + wr %g0,%o0,%gsr + .end + +!-------------------------------------------------------------------- +! Voxel texture mapping +! +! unsigned long vis_array8(unsigned long long /*rs1 */, int /*rs2*/) +! + .inline vis_array8,12 + array8 %o0,%o1,%o0 + .end +! +! unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array16,12 + array16 %o0,%o1,%o0 + .end +! +! unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/) +! + .inline vis_array32,12 + array32 %o0,%o1,%o0 + .end + +!-------------------------------------------------------------------- +! Register aliasing and type casts +! +! float vis_read_hi(double /* frs1 */); +! + .inline vis_read_hi,8 + fmovs %f0,%f0 + .end +! +! float vis_read_lo(double /* frs1 */); +! + .inline vis_read_lo,8 + fmovs %f1,%f0 ! %f0 = low word (frs1); return %f0; + .end +! +! double vis_write_hi(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_hi,12 + fmovs %f3,%f0 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_write_lo(double /* frs1 */, float /* frs2 */); +! + .inline vis_write_lo,12 + fmovs %f3,%f1 ! %f3 = float frs2; return %f0:f1; + .end +! +! double vis_freg_pair(float /* frs1 */, float /* frs2 */); +! + .inline vis_freg_pair,8 + fmovs %f1,%f0 ! %f1 = float frs1; put in hi; + fmovs %f3,%f1 ! %f3 = float frs2; put in lo; return %f0:f1; + .end +! +! float vis_to_float(unsigned int /*value*/); +! + .inline vis_to_float,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + .end +! +! double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +! + .inline vis_to_double,8 + st %o0,[%sp+2183] + ld [%sp+2183],%f0 + st %o1,[%sp+2183] + ld [%sp+2183],%f1 + .end +! +! double vis_to_double_dup(unsigned int /*value*/); +! + .inline vis_to_double_dup,4 + st %o0,[%sp+2183] + ld [%sp+2183],%f1 + fmovs %f1,%f0 ! duplicate value + .end +! +! double vis_ll_to_double(unsigned long long /*value*/); +! + .inline vis_ll_to_double,8 + stx %o0,[%sp+2183] + ldd [%sp+2183],%f0 + .end + +!-------------------------------------------------------------------- +! Address space identifier (ASI) register +! +! unsigned int vis_read_asi(void) +! + .inline vis_read_asi,0 + rd %asi,%o0 + .end +! +! void vis_write_asi(unsigned int /* ASI */) +! + .inline vis_write_asi,4 + wr %g0,%o0,%asi + .end + +!-------------------------------------------------------------------- +! Load/store from/into alternate space +! +! float vis_ldfa_ASI_REG(void *rs1) +! + .inline vis_ldfa_ASI_REG,8 + lda [%o0+0]%asi,%f4 + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_P(void *rs1) +! + .inline vis_ldfa_ASI_P,8 + lda [%o0]0x80,%f4 ! ASI_P + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! float vis_ldfa_ASI_PL(void *rs1) +! + .inline vis_ldfa_ASI_PL,8 + lda [%o0]0x88,%f4 ! ASI_PL + fmovs %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_REG(void *rs1) +! + .inline vis_lddfa_ASI_REG,8 + ldda [%o0+0]%asi,%f4 + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_P(void *rs1) +! + .inline vis_lddfa_ASI_P,8 + ldda [%o0]0x80,%f4 ! ASI_P + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! double vis_lddfa_ASI_PL(void *rs1) +! + .inline vis_lddfa_ASI_PL,8 + ldda [%o0]0x88,%f4 ! ASI_PL + fmovd %f4,%f0 ! Compiler can clean this up + .end +! +! vis_stfa_ASI_REG(float frs, void *rs1) +! + .inline vis_stfa_ASI_REG,12 + sta %f1,[%o1+0]%asi + .end +! +! vis_stfa_ASI_P(float frs, void *rs1) +! + .inline vis_stfa_ASI_P,12 + sta %f1,[%o1]0x80 ! ASI_P + .end +! +! vis_stfa_ASI_PL(float frs, void *rs1) +! + .inline vis_stfa_ASI_PL,12 + sta %f1,[%o1]0x88 ! ASI_PL + .end +! +! vis_stdfa_ASI_REG(double frd, void *rs1) +! + .inline vis_stdfa_ASI_REG,16 + stda %f0,[%o1+0]%asi + .end +! +! vis_stdfa_ASI_P(double frd, void *rs1) +! + .inline vis_stdfa_ASI_P,16 + stda %f0,[%o1]0x80 ! ASI_P + .end +! +! vis_stdfa_ASI_PL(double frd, void *rs1) +! + .inline vis_stdfa_ASI_PL,16 + stda %f0,[%o1]0x88 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_REG(void *rs1) +! + .inline vis_lduha_ASI_REG,8 + lduha [%o0+0]%asi,%o0 + .end +! +! unsigned short vis_lduha_ASI_P(void *rs1) +! + .inline vis_lduha_ASI_P,8 + lduha [%o0]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL(void *rs1) +! + .inline vis_lduha_ASI_PL,8 + lduha [%o0]0x88,%o0 ! ASI_PL + .end +! +! unsigned short vis_lduha_ASI_P_index(void *rs1, long index) +! + .inline vis_lduha_ASI_P_index,16 + lduha [%o0+%o1]0x80,%o0 ! ASI_P + .end +! +! unsigned short vis_lduha_ASI_PL_index(void *rs1, long index) +! + .inline vis_lduha_ASI_PL_index,16 + lduha [%o0+%o1]0x88,%o0 ! ASI_PL + .end + +!-------------------------------------------------------------------- +! Prefetch +! +! void vis_prefetch_read(void * /*address*/); +! + .inline vis_prefetch_read,8 + prefetch [%o0+0],0 + .end +! +! void vis_prefetch_write(void * /*address*/); +! + .inline vis_prefetch_write,8 + prefetch [%o0+0],2 + .end diff --git a/security/nss/lib/freebl/mpi/vis_proto.h b/security/nss/lib/freebl/mpi/vis_proto.h new file mode 100644 index 000000000..275de59df --- /dev/null +++ b/security/nss/lib/freebl/mpi/vis_proto.h @@ -0,0 +1,234 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Prototypes for the inline templates in vis.il + */ + +#ifndef VIS_PROTO_H +#define VIS_PROTO_H + +#pragma ident "@(#)vis_proto.h 1.3 97/03/30 SMI" + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* Pure edge handling instructions */ +int vis_edge8(void * /*frs1*/, void * /*frs2*/); +int vis_edge8l(void * /*frs1*/, void * /*frs2*/); +int vis_edge16(void * /*frs1*/, void * /*frs2*/); +int vis_edge16l(void * /*frs1*/, void * /*frs2*/); +int vis_edge32(void * /*frs1*/, void * /*frs2*/); +int vis_edge32l(void * /*frs1*/, void * /*frs2*/); + +/* Edge handling instructions with negative return values if cc set. */ +int vis_edge8cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge8lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge16lcc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32cc(void * /*frs1*/, void * /*frs2*/); +int vis_edge32lcc(void * /*frs1*/, void * /*frs2*/); + +/* Alignment instructions. */ +void *vis_alignaddr(void * /*rs1*/, int /*rs2*/); +void *vis_alignaddrl(void * /*rs1*/, int /*rs2*/); +double vis_faligndata(double /*frs1*/, double /*frs2*/); + +/* Partitioned comparison instructions. */ +int vis_fcmple16(double /*frs1*/, double /*frs2*/); +int vis_fcmpne16(double /*frs1*/, double /*frs2*/); +int vis_fcmple32(double /*frs1*/, double /*frs2*/); +int vis_fcmpne32(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt16(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq16(double /*frs1*/, double /*frs2*/); +int vis_fcmpgt32(double /*frs1*/, double /*frs2*/); +int vis_fcmpeq32(double /*frs1*/, double /*frs2*/); + +/* Partitioned multiplication. */ +#if 0 +double vis_fmul8x16(float /*frs1*/, double /*frs2*/); +#endif +double vis_fmul8x16_dummy(float /*frs1*/, int /*dummy*/, double /*frs2*/); +double vis_fmul8x16au(float /*frs1*/, float /*frs2*/); +double vis_fmul8x16al(float /*frs1*/, float /*frs2*/); +double vis_fmul8sux16(double /*frs1*/, double /*frs2*/); +double vis_fmul8ulx16(double /*frs1*/, double /*frs2*/); +double vis_fmuld8ulx16(float /*frs1*/, float /*frs2*/); +double vis_fmuld8sux16(float /*frs1*/, float /*frs2*/); + +/* Partitioned addition & subtraction. */ +double vis_fpadd16(double /*frs1*/, double /*frs2*/); +float vis_fpadd16s(float /*frs1*/, float /*frs2*/); +double vis_fpadd32(double /*frs1*/, double /*frs2*/); +float vis_fpadd32s(float /*frs1*/, float /*frs2*/); +double vis_fpsub16(double /*frs1*/, double /*frs2*/); +float vis_fpsub16s(float /*frs1*/, float /*frs2*/); +double vis_fpsub32(double /*frs1*/, double /*frs2*/); +float vis_fpsub32s(float /*frs1*/, float /*frs2*/); + +/* Pixel packing & clamping. */ +float vis_fpack16(double /*frs2*/); +double vis_fpack32(double /*frs1*/, double /*frs2*/); +float vis_fpackfix(double /*frs2*/); + +/* Combined pack ops. */ +double vis_fpack16_pair(double /*frs2*/, double /*frs2*/); +double vis_fpackfix_pair(double /*frs2*/, double /*frs2*/); +void vis_st2_fpack16(double, double, double *); +void vis_std_fpack16(double, double, double *); +void vis_st2_fpackfix(double, double, double *); + +double vis_fpack16_to_hi(double /*frs1*/, double /*frs2*/); +double vis_fpack16_to_lo(double /*frs1*/, double /*frs2*/); + +/* Motion estimation. */ +double vis_pdist(double /*frs1*/, double /*frs2*/, double /*frd*/); + +/* Channel merging. */ +double vis_fpmerge(float /*frs1*/, float /*frs2*/); + +/* Pixel expansion. */ +double vis_fexpand(float /*frs2*/); +double vis_fexpand_hi(double /*frs2*/); +double vis_fexpand_lo(double /*frs2*/); + +/* Bitwise logical operators. */ +double vis_fnor(double /*frs1*/, double /*frs2*/); +float vis_fnors(float /*frs1*/, float /*frs2*/); +double vis_fandnot(double /*frs1*/, double /*frs2*/); +float vis_fandnots(float /*frs1*/, float /*frs2*/); +double vis_fnot(double /*frs1*/); +float vis_fnots(float /*frs1*/); +double vis_fxor(double /*frs1*/, double /*frs2*/); +float vis_fxors(float /*frs1*/, float /*frs2*/); +double vis_fnand(double /*frs1*/, double /*frs2*/); +float vis_fnands(float /*frs1*/, float /*frs2*/); +double vis_fand(double /*frs1*/, double /*frs2*/); +float vis_fands(float /*frs1*/, float /*frs2*/); +double vis_fxnor(double /*frs1*/, double /*frs2*/); +float vis_fxnors(float /*frs1*/, float /*frs2*/); +double vis_fsrc(double /*frs1*/); +float vis_fsrcs(float /*frs1*/); +double vis_fornot(double /*frs1*/, double /*frs2*/); +float vis_fornots(float /*frs1*/, float /*frs2*/); +double vis_for(double /*frs1*/, double /*frs2*/); +float vis_fors(float /*frs1*/, float /*frs2*/); +double vis_fzero(void); +float vis_fzeros(void); +double vis_fone(void); +float vis_fones(void); + +/* Partial stores. */ +void vis_stdfa_ASI_PST8P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8PL(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST8P_int_pair(void * /*rs1*/, void * /*rs2*/, + void * /*rs3*/, int /*rmask*/); +void vis_stdfa_ASI_PST8S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST16S(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32P(double /*frd*/, void * /*rs1*/, int /*rmask*/); +void vis_stdfa_ASI_PST32S(double /*frd*/, void * /*rs1*/, int /*rmask*/); + +/* Byte & short stores. */ +void vis_stdfa_ASI_FL8P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL8S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16P_index(double /*frd*/, void * /*rs1*/, long /*index*/); +void vis_stdfa_ASI_FL16S(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL8SL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16PL(double /*frd*/, void * /*rs1*/); +void vis_stdfa_ASI_FL16SL(double /*frd*/, void * /*rs1*/); + +/* Byte & short loads. */ +double vis_lddfa_ASI_FL8P(void * /*rs1*/); +double vis_lddfa_ASI_FL8P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL8P_hi(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8P_lo(void * /*rs1*/, unsigned int /*index*/); +double vis_lddfa_ASI_FL8S(void * /*rs1*/); +double vis_lddfa_ASI_FL16P(void * /*rs1*/); +double vis_lddfa_ASI_FL16P_index(void * /*rs1*/, long /*index*/); +double vis_lddfa_ASI_FL16S(void * /*rs1*/); +double vis_lddfa_ASI_FL8PL(void * /*rs1*/); +double vis_lddfa_ASI_FL8SL(void * /*rs1*/); +double vis_lddfa_ASI_FL16PL(void * /*rs1*/); +double vis_lddfa_ASI_FL16SL(void * /*rs1*/); + +/* Direct write to GSR, read from GSR */ +void vis_write_gsr(unsigned int /*GSR*/); +unsigned int vis_read_gsr(void); + +/* Voxel texture mapping. */ +#if !defined(_NO_LONGLONG) +unsigned long vis_array8(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array16(unsigned long long /*rs1*/, int /*rs2*/); +unsigned long vis_array32(unsigned long long /*rs1*/, int /*rs2*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Register aliasing and type casts. */ +float vis_read_hi(double /*frs1*/); +float vis_read_lo(double /*frs1*/); +double vis_write_hi(double /*frs1*/, float /*frs2*/); +double vis_write_lo(double /*frs1*/, float /*frs2*/); +double vis_freg_pair(float /*frs1*/, float /*frs2*/); +float vis_to_float(unsigned int /*value*/); +double vis_to_double(unsigned int /*value1*/, unsigned int /*value2*/); +double vis_to_double_dup(unsigned int /*value*/); +#if !defined(_NO_LONGLONG) +double vis_ll_to_double(unsigned long long /*value*/); +#endif /* !defined(_NO_LONGLONG) */ + +/* Miscellany (no inlines) */ +void vis_error(char * /*fmt*/, int /*a0*/); +void vis_sim_init(void); + +/* For better performance */ +#define vis_fmul8x16(farg, darg) vis_fmul8x16_dummy((farg), 0, (darg)) + +/* Nicknames for explicit ASI loads and stores. */ +#define vis_st_u8 vis_stdfa_ASI_FL8P +#define vis_st_u8_i vis_stdfa_ASI_FL8P_index +#define vis_st_u8_le vis_stdfa_ASI_FL8PL +#define vis_st_u16 vis_stdfa_ASI_FL16P +#define vis_st_u16_i vis_stdfa_ASI_FL16P_index +#define vis_st_u16_le vis_stdfa_ASI_FL16PL + +#define vis_ld_u8 vis_lddfa_ASI_FL8P +#define vis_ld_u8_i vis_lddfa_ASI_FL8P_index +#define vis_ld_u8_le vis_lddfa_ASI_FL8PL +#define vis_ld_u16 vis_lddfa_ASI_FL16P +#define vis_ld_u16_i vis_lddfa_ASI_FL16P_index +#define vis_ld_u16_le vis_lddfa_ASI_FL16PL + +#define vis_pst_8 vis_stdfa_ASI_PST8P +#define vis_pst_16 vis_stdfa_ASI_PST16P +#define vis_pst_32 vis_stdfa_ASI_PST32P + +#define vis_st_u8s vis_stdfa_ASI_FL8S +#define vis_st_u8s_le vis_stdfa_ASI_FL8SL +#define vis_st_u16s vis_stdfa_ASI_FL16S +#define vis_st_u16s_le vis_stdfa_ASI_FL16SL + +#define vis_ld_u8s vis_lddfa_ASI_FL8S +#define vis_ld_u8s_le vis_lddfa_ASI_FL8SL +#define vis_ld_u16s vis_lddfa_ASI_FL16S +#define vis_ld_u16s_le vis_lddfa_ASI_FL16SL + +#define vis_pst_8s vis_stdfa_ASI_PST8S +#define vis_pst_16s vis_stdfa_ASI_PST16S +#define vis_pst_32s vis_stdfa_ASI_PST32S + +/* "<" and ">=" may be implemented in terms of ">" and "<=". */ +#define vis_fcmplt16(a, b) vis_fcmpgt16((b), (a)) +#define vis_fcmplt32(a, b) vis_fcmpgt32((b), (a)) +#define vis_fcmpge16(a, b) vis_fcmple16((b), (a)) +#define vis_fcmpge32(a, b) vis_fcmple32((b), (a)) + +#ifdef __cplusplus +} // End of extern "C" +#endif /* __cplusplus */ + +#endif /* VIS_PROTO_H */ diff --git a/security/nss/lib/freebl/nsslowhash.c b/security/nss/lib/freebl/nsslowhash.c new file mode 100644 index 000000000..5ed039689 --- /dev/null +++ b/security/nss/lib/freebl/nsslowhash.c @@ -0,0 +1,150 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif +#include "prtypes.h" +#include "secerr.h" +#include "blapi.h" +#include "hasht.h" +#include "plhash.h" +#include "nsslowhash.h" +#include "blapii.h" + +struct NSSLOWInitContextStr { + int count; +}; + +struct NSSLOWHASHContextStr { + const SECHashObject *hashObj; + void *hashCtxt; +}; + +static int +nsslow_GetFIPSEnabled(void) +{ +#ifdef LINUX + FILE *f; + char d; + size_t size; + + f = fopen("/proc/sys/crypto/fips_enabled", "r"); + if (!f) + return 0; + + size = fread(&d, 1, 1, f); + fclose(f); + if (size != 1) + return 0; + if (d != '1') + return 0; +#endif + return 1; +} + +static NSSLOWInitContext dummyContext = { 0 }; +static PRBool post_failed = PR_TRUE; + +NSSLOWInitContext * +NSSLOW_Init(void) +{ +#ifdef FREEBL_NO_DEPEND + (void)FREEBL_InitStubs(); +#endif + + /* make sure the FIPS product is installed if we are trying to + * go into FIPS mode */ + if (nsslow_GetFIPSEnabled()) { + if (BL_FIPSEntryOK(PR_TRUE) != SECSuccess) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + post_failed = PR_TRUE; + return NULL; + } + } + post_failed = PR_FALSE; + + return &dummyContext; +} + +void +NSSLOW_Shutdown(NSSLOWInitContext *context) +{ + PORT_Assert(context == &dummyContext); + return; +} + +void +NSSLOW_Reset(NSSLOWInitContext *context) +{ + PORT_Assert(context == &dummyContext); + return; +} + +NSSLOWHASHContext * +NSSLOWHASH_NewContext(NSSLOWInitContext *initContext, + HASH_HashType hashType) +{ + NSSLOWHASHContext *context; + + if (post_failed) { + PORT_SetError(SEC_ERROR_PKCS11_DEVICE_ERROR); + return NULL; + } + + if (initContext != &dummyContext) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return (NULL); + } + + context = PORT_ZNew(NSSLOWHASHContext); + if (!context) { + return NULL; + } + context->hashObj = HASH_GetRawHashObject(hashType); + if (!context->hashObj) { + PORT_Free(context); + return NULL; + } + context->hashCtxt = context->hashObj->create(); + if (!context->hashCtxt) { + PORT_Free(context); + return NULL; + } + + return context; +} + +void +NSSLOWHASH_Begin(NSSLOWHASHContext *context) +{ + return context->hashObj->begin(context->hashCtxt); +} + +void +NSSLOWHASH_Update(NSSLOWHASHContext *context, const unsigned char *buf, + unsigned int len) +{ + return context->hashObj->update(context->hashCtxt, buf, len); +} + +void +NSSLOWHASH_End(NSSLOWHASHContext *context, unsigned char *buf, + unsigned int *ret, unsigned int len) +{ + return context->hashObj->end(context->hashCtxt, buf, ret, len); +} + +void +NSSLOWHASH_Destroy(NSSLOWHASHContext *context) +{ + context->hashObj->destroy(context->hashCtxt, PR_TRUE); + PORT_Free(context); +} + +unsigned int +NSSLOWHASH_Length(NSSLOWHASHContext *context) +{ + return context->hashObj->length; +} diff --git a/security/nss/lib/freebl/nsslowhash.h b/security/nss/lib/freebl/nsslowhash.h new file mode 100644 index 000000000..d8f058715 --- /dev/null +++ b/security/nss/lib/freebl/nsslowhash.h @@ -0,0 +1,33 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Provide FIPS validated hashing for applications that only need hashing. + * NOTE: mac'ing requires keys and will not work in this interface. + * Also NOTE: this only works with Hashing. Only the FIPS interface is enabled. + */ + +#ifndef _NSSLOWHASH_H_ +#define _NSSLOWHASH_H_ + +typedef struct NSSLOWInitContextStr NSSLOWInitContext; +typedef struct NSSLOWHASHContextStr NSSLOWHASHContext; + +NSSLOWInitContext *NSSLOW_Init(void); +void NSSLOW_Shutdown(NSSLOWInitContext *context); +void NSSLOW_Reset(NSSLOWInitContext *context); +NSSLOWHASHContext *NSSLOWHASH_NewContext( + NSSLOWInitContext *initContext, + HASH_HashType hashType); +void NSSLOWHASH_Begin(NSSLOWHASHContext *context); +void NSSLOWHASH_Update(NSSLOWHASHContext *context, + const unsigned char *buf, + unsigned int len); +void NSSLOWHASH_End(NSSLOWHASHContext *context, + unsigned char *buf, + unsigned int *ret, unsigned int len); +void NSSLOWHASH_Destroy(NSSLOWHASHContext *context); +unsigned int NSSLOWHASH_Length(NSSLOWHASHContext *context); + +#endif diff --git a/security/nss/lib/freebl/os2_rand.c b/security/nss/lib/freebl/os2_rand.c new file mode 100644 index 000000000..407b08014 --- /dev/null +++ b/security/nss/lib/freebl/os2_rand.c @@ -0,0 +1,334 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#define INCL_DOS +#define INCL_DOSERRORS +#include +#include "secrng.h" +#include "prerror.h" +#include +#include +#include +#include + +static BOOL +clockTickTime(unsigned long *phigh, unsigned long *plow) +{ + APIRET rc = NO_ERROR; + QWORD qword = { 0, 0 }; + + rc = DosTmrQueryTime(&qword); + if (rc != NO_ERROR) + return FALSE; + + *phigh = qword.ulHi; + *plow = qword.ulLo; + + return TRUE; +} + +size_t +RNG_GetNoise(void *buf, size_t maxbuf) +{ + unsigned long high = 0; + unsigned long low = 0; + clock_t val = 0; + int n = 0; + int nBytes = 0; + time_t sTime; + + if (maxbuf <= 0) + return 0; + + clockTickTime(&high, &low); + + /* get the maximally changing bits first */ + nBytes = sizeof(low) > maxbuf ? maxbuf : sizeof(low); + memcpy(buf, &low, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + nBytes = sizeof(high) > maxbuf ? maxbuf : sizeof(high); + memcpy(((char *)buf) + n, &high, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + /* get the number of milliseconds that have elapsed since application started */ + val = clock(); + + nBytes = sizeof(val) > maxbuf ? maxbuf : sizeof(val); + memcpy(((char *)buf) + n, &val, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + /* get the time in seconds since midnight Jan 1, 1970 */ + time(&sTime); + nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime); + memcpy(((char *)buf) + n, &sTime, nBytes); + n += nBytes; + + return n; +} + +static BOOL +EnumSystemFiles(void (*func)(const char *)) +{ + APIRET rc; + ULONG sysInfo = 0; + char bootLetter[2]; + char sysDir[_MAX_PATH] = ""; + char filename[_MAX_PATH]; + HDIR hdir = HDIR_CREATE; + ULONG numFiles = 1; + FILEFINDBUF3 fileBuf = { 0 }; + ULONG buflen = sizeof(FILEFINDBUF3); + + if (DosQuerySysInfo(QSV_BOOT_DRIVE, QSV_BOOT_DRIVE, (PVOID)&sysInfo, + sizeof(ULONG)) == NO_ERROR) { + bootLetter[0] = sysInfo + 'A' - 1; + bootLetter[1] = '\0'; + strcpy(sysDir, bootLetter); + strcpy(sysDir + 1, ":\\OS2\\"); + + strcpy(filename, sysDir); + strcat(filename, "*.*"); + } + + rc = DosFindFirst(filename, &hdir, FILE_NORMAL, &fileBuf, buflen, + &numFiles, FIL_STANDARD); + if (rc == NO_ERROR) { + do { + // pass the full pathname to the callback + sprintf(filename, "%s%s", sysDir, fileBuf.achName); + (*func)(filename); + + numFiles = 1; + rc = DosFindNext(hdir, &fileBuf, buflen, &numFiles); + if (rc != NO_ERROR && rc != ERROR_NO_MORE_FILES) + printf("DosFindNext errod code = %d\n", rc); + } while (rc == NO_ERROR); + + rc = DosFindClose(hdir); + if (rc != NO_ERROR) + printf("DosFindClose error code = %d", rc); + } else + printf("DosFindFirst error code = %d", rc); + + return TRUE; +} + +static int dwNumFiles, dwReadEvery, dwFileToRead = 0; + +static void +CountFiles(const char *file) +{ + dwNumFiles++; +} + +static void +ReadFiles(const char *file) +{ + if ((dwNumFiles % dwReadEvery) == 0) + RNG_FileForRNG(file); + + dwNumFiles++; +} + +static void +ReadSingleFile(const char *filename) +{ + unsigned char buffer[1024]; + FILE *file; + + file = fopen((char *)filename, "rb"); + if (file != NULL) { + while (fread(buffer, 1, sizeof(buffer), file) > 0) + ; + fclose(file); + } +} + +static void +ReadOneFile(const char *file) +{ + if (dwNumFiles == dwFileToRead) { + ReadSingleFile(file); + } + + dwNumFiles++; +} + +static void +ReadSystemFiles(void) +{ + // first count the number of files + dwNumFiles = 0; + if (!EnumSystemFiles(CountFiles)) + return; + + RNG_RandomUpdate(&dwNumFiles, sizeof(dwNumFiles)); + + // now read 10 files + if (dwNumFiles == 0) + return; + + dwReadEvery = dwNumFiles / 10; + if (dwReadEvery == 0) + dwReadEvery = 1; // less than 10 files + + dwNumFiles = 0; + EnumSystemFiles(ReadFiles); +} + +void +RNG_SystemInfoForRNG(void) +{ + unsigned long *plong = 0; + PTIB ptib; + PPIB ppib; + APIRET rc = NO_ERROR; + DATETIME dt; + COUNTRYCODE cc = { 0 }; + COUNTRYINFO ci = { 0 }; + unsigned long actual = 0; + char path[_MAX_PATH] = ""; + char fullpath[_MAX_PATH] = ""; + unsigned long pathlength = sizeof(path); + FSALLOCATE fsallocate; + FILESTATUS3 fstatus; + unsigned long defaultdrive = 0; + unsigned long logicaldrives = 0; + unsigned long sysInfo[QSV_MAX] = { 0 }; + char buffer[20]; + int nBytes = 0; + + nBytes = RNG_GetNoise(buffer, sizeof(buffer)); + RNG_RandomUpdate(buffer, nBytes); + + /* allocate memory and use address and memory */ + plong = (unsigned long *)malloc(sizeof(*plong)); + RNG_RandomUpdate(&plong, sizeof(plong)); + RNG_RandomUpdate(plong, sizeof(*plong)); + free(plong); + + /* process info */ + rc = DosGetInfoBlocks(&ptib, &ppib); + if (rc == NO_ERROR) { + RNG_RandomUpdate(ptib, sizeof(*ptib)); + RNG_RandomUpdate(ppib, sizeof(*ppib)); + } + + /* time */ + rc = DosGetDateTime(&dt); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&dt, sizeof(dt)); + } + + /* country */ + rc = DosQueryCtryInfo(sizeof(ci), &cc, &ci, &actual); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&cc, sizeof(cc)); + RNG_RandomUpdate(&ci, sizeof(ci)); + RNG_RandomUpdate(&actual, sizeof(actual)); + } + + /* current directory */ + rc = DosQueryCurrentDir(0, path, &pathlength); + strcat(fullpath, "\\"); + strcat(fullpath, path); + if (rc == NO_ERROR) { + RNG_RandomUpdate(fullpath, strlen(fullpath)); + // path info + rc = DosQueryPathInfo(fullpath, FIL_STANDARD, &fstatus, sizeof(fstatus)); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&fstatus, sizeof(fstatus)); + } + } + + /* file system info */ + rc = DosQueryFSInfo(0, FSIL_ALLOC, &fsallocate, sizeof(fsallocate)); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&fsallocate, sizeof(fsallocate)); + } + + /* drive info */ + rc = DosQueryCurrentDisk(&defaultdrive, &logicaldrives); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&defaultdrive, sizeof(defaultdrive)); + RNG_RandomUpdate(&logicaldrives, sizeof(logicaldrives)); + } + + /* system info */ + rc = DosQuerySysInfo(1L, QSV_MAX, (PVOID)&sysInfo, sizeof(ULONG) * QSV_MAX); + if (rc == NO_ERROR) { + RNG_RandomUpdate(&sysInfo, sizeof(sysInfo)); + } + + // now let's do some files + ReadSystemFiles(); + + /* more noise */ + nBytes = RNG_GetNoise(buffer, sizeof(buffer)); + RNG_RandomUpdate(buffer, nBytes); +} + +void +RNG_FileForRNG(const char *filename) +{ + struct stat stat_buf; + unsigned char buffer[1024]; + FILE *file = 0; + int nBytes = 0; + static int totalFileBytes = 0; + + if (stat((char *)filename, &stat_buf) < 0) + return; + + RNG_RandomUpdate((unsigned char *)&stat_buf, sizeof(stat_buf)); + + file = fopen((char *)filename, "r"); + if (file != NULL) { + for (;;) { + size_t bytes = fread(buffer, 1, sizeof(buffer), file); + + if (bytes == 0) + break; + + RNG_RandomUpdate(buffer, bytes); + totalFileBytes += bytes; + if (totalFileBytes > 250000) + break; + } + fclose(file); + } + + nBytes = RNG_GetNoise(buffer, 20); + RNG_RandomUpdate(buffer, nBytes); +} + +static void +rng_systemJitter(void) +{ + dwNumFiles = 0; + EnumSystemFiles(ReadOneFile); + dwFileToRead++; + if (dwFileToRead >= dwNumFiles) { + dwFileToRead = 0; + } +} + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + return rng_systemFromNoise(dest, maxLen); +} diff --git a/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c new file mode 100644 index 000000000..3c803c167 --- /dev/null +++ b/security/nss/lib/freebl/poly1305-donna-x64-sse2-incremental-source.c @@ -0,0 +1,881 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This implementation of poly1305 is by Andrew Moon + * (https://github.com/floodyberry/poly1305-donna) and released as public + * domain. It implements SIMD vectorization based on the algorithm described in + * http://cr.yp.to/papers.html#neoncrypto. Unrolled to 2 powers, i.e. 64 byte + * block size. */ + +#include +#include + +#include "poly1305.h" +#include "blapii.h" + +#define ALIGN(x) __attribute__((aligned(x))) +#define INLINE inline +#define U8TO64_LE(m) (*(uint64_t *)(m)) +#define U8TO32_LE(m) (*(uint32_t *)(m)) +#define U64TO8_LE(m, v) (*(uint64_t *)(m)) = v + +typedef __m128i xmmi; +typedef unsigned __int128 uint128_t; + +static const uint32_t ALIGN(16) poly1305_x64_sse2_message_mask[4] = { (1 << 26) - 1, 0, (1 << 26) - 1, 0 }; +static const uint32_t ALIGN(16) poly1305_x64_sse2_5[4] = { 5, 0, 5, 0 }; +static const uint32_t ALIGN(16) poly1305_x64_sse2_1shl128[4] = { (1 << 24), 0, (1 << 24), 0 }; + +static uint128_t INLINE +add128(uint128_t a, uint128_t b) +{ + return a + b; +} + +static uint128_t INLINE +add128_64(uint128_t a, uint64_t b) +{ + return a + b; +} + +static uint128_t INLINE +mul64x64_128(uint64_t a, uint64_t b) +{ + return (uint128_t)a * b; +} + +static uint64_t INLINE +lo128(uint128_t a) +{ + return (uint64_t)a; +} + +static uint64_t INLINE +shr128(uint128_t v, const int shift) +{ + return (uint64_t)(v >> shift); +} + +static uint64_t INLINE +shr128_pair(uint64_t hi, uint64_t lo, const int shift) +{ + return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); +} + +typedef struct poly1305_power_t { + union { + xmmi v; + uint64_t u[2]; + uint32_t d[4]; + } R20, R21, R22, R23, R24, S21, S22, S23, S24; +} poly1305_power; + +typedef struct poly1305_state_internal_t { + poly1305_power P[2]; /* 288 bytes, top 32 bit halves unused = 144 bytes of free storage */ + union { + xmmi H[5]; /* 80 bytes */ + uint64_t HH[10]; + }; + /* uint64_t r0,r1,r2; [24 bytes] */ + /* uint64_t pad0,pad1; [16 bytes] */ + uint64_t started; /* 8 bytes */ + uint64_t leftover; /* 8 bytes */ + uint8_t buffer[64]; /* 64 bytes */ +} poly1305_state_internal; /* 448 bytes total + 63 bytes for alignment = 511 bytes raw */ + +static poly1305_state_internal INLINE + * + poly1305_aligned_state(poly1305_state *state) +{ + return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63); +} + +/* copy 0-63 bytes */ +static void INLINE NO_SANITIZE_ALIGNMENT +poly1305_block_copy(uint8_t *dst, const uint8_t *src, size_t bytes) +{ + size_t offset = src - dst; + if (bytes & 32) { + _mm_storeu_si128((xmmi *)(dst + 0), _mm_loadu_si128((xmmi *)(dst + offset + 0))); + _mm_storeu_si128((xmmi *)(dst + 16), _mm_loadu_si128((xmmi *)(dst + offset + 16))); + dst += 32; + } + if (bytes & 16) { + _mm_storeu_si128((xmmi *)dst, _mm_loadu_si128((xmmi *)(dst + offset))); + dst += 16; + } + if (bytes & 8) { + *(uint64_t *)dst = *(uint64_t *)(dst + offset); + dst += 8; + } + if (bytes & 4) { + *(uint32_t *)dst = *(uint32_t *)(dst + offset); + dst += 4; + } + if (bytes & 2) { + *(uint16_t *)dst = *(uint16_t *)(dst + offset); + dst += 2; + } + if (bytes & 1) { + *(uint8_t *)dst = *(uint8_t *)(dst + offset); + } +} + +/* zero 0-15 bytes */ +static void INLINE +poly1305_block_zero(uint8_t *dst, size_t bytes) +{ + if (bytes & 8) { + *(uint64_t *)dst = 0; + dst += 8; + } + if (bytes & 4) { + *(uint32_t *)dst = 0; + dst += 4; + } + if (bytes & 2) { + *(uint16_t *)dst = 0; + dst += 2; + } + if (bytes & 1) { + *(uint8_t *)dst = 0; + } +} + +static size_t INLINE +poly1305_min(size_t a, size_t b) +{ + return (a < b) ? a : b; +} + +void +Poly1305Init(poly1305_state *state, const unsigned char key[32]) +{ + poly1305_state_internal *st = poly1305_aligned_state(state); + poly1305_power *p; + uint64_t r0, r1, r2; + uint64_t t0, t1; + + /* clamp key */ + t0 = U8TO64_LE(key + 0); + t1 = U8TO64_LE(key + 8); + r0 = t0 & 0xffc0fffffff; + t0 >>= 44; + t0 |= t1 << 20; + r1 = t0 & 0xfffffc0ffff; + t1 >>= 24; + r2 = t1 & 0x00ffffffc0f; + + /* store r in un-used space of st->P[1] */ + p = &st->P[1]; + p->R20.d[1] = (uint32_t)(r0); + p->R20.d[3] = (uint32_t)(r0 >> 32); + p->R21.d[1] = (uint32_t)(r1); + p->R21.d[3] = (uint32_t)(r1 >> 32); + p->R22.d[1] = (uint32_t)(r2); + p->R22.d[3] = (uint32_t)(r2 >> 32); + + /* store pad */ + p->R23.d[1] = U8TO32_LE(key + 16); + p->R23.d[3] = U8TO32_LE(key + 20); + p->R24.d[1] = U8TO32_LE(key + 24); + p->R24.d[3] = U8TO32_LE(key + 28); + + /* H = 0 */ + st->H[0] = _mm_setzero_si128(); + st->H[1] = _mm_setzero_si128(); + st->H[2] = _mm_setzero_si128(); + st->H[3] = _mm_setzero_si128(); + st->H[4] = _mm_setzero_si128(); + + st->started = 0; + st->leftover = 0; +} + +static void +poly1305_first_block(poly1305_state_internal *st, const uint8_t *m) +{ + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); + const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); + const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); + xmmi T5, T6; + poly1305_power *p; + uint128_t d[3]; + uint64_t r0, r1, r2; + uint64_t r20, r21, r22, s22; + uint64_t pad0, pad1; + uint64_t c; + uint64_t i; + + /* pull out stored info */ + p = &st->P[1]; + + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; + pad0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; + pad1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; + + /* compute powers r^2,r^4 */ + r20 = r0; + r21 = r1; + r22 = r2; + for (i = 0; i < 2; i++) { + s22 = r22 * (5 << 2); + + d[0] = add128(mul64x64_128(r20, r20), mul64x64_128(r21 * 2, s22)); + d[1] = add128(mul64x64_128(r22, s22), mul64x64_128(r20 * 2, r21)); + d[2] = add128(mul64x64_128(r21, r21), mul64x64_128(r22 * 2, r20)); + + r20 = lo128(d[0]) & 0xfffffffffff; + c = shr128(d[0], 44); + d[1] = add128_64(d[1], c); + r21 = lo128(d[1]) & 0xfffffffffff; + c = shr128(d[1], 44); + d[2] = add128_64(d[2], c); + r22 = lo128(d[2]) & 0x3ffffffffff; + c = shr128(d[2], 42); + r20 += c * 5; + c = (r20 >> 44); + r20 = r20 & 0xfffffffffff; + r21 += c; + + p->R20.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)(r20)&0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); + p->R21.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r20 >> 26) | (r21 << 18)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); + p->R22.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 8)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); + p->R23.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r21 >> 34) | (r22 << 10)) & 0x3ffffff), _MM_SHUFFLE(1, 0, 1, 0)); + p->R24.v = _mm_shuffle_epi32(_mm_cvtsi32_si128((uint32_t)((r22 >> 16))), _MM_SHUFFLE(1, 0, 1, 0)); + p->S21.v = _mm_mul_epu32(p->R21.v, FIVE); + p->S22.v = _mm_mul_epu32(p->R22.v, FIVE); + p->S23.v = _mm_mul_epu32(p->R23.v, FIVE); + p->S24.v = _mm_mul_epu32(p->R24.v, FIVE); + p--; + } + + /* put saved info back */ + p = &st->P[1]; + p->R20.d[1] = (uint32_t)(r0); + p->R20.d[3] = (uint32_t)(r0 >> 32); + p->R21.d[1] = (uint32_t)(r1); + p->R21.d[3] = (uint32_t)(r1 >> 32); + p->R22.d[1] = (uint32_t)(r2); + p->R22.d[3] = (uint32_t)(r2 >> 32); + p->R23.d[1] = (uint32_t)(pad0); + p->R23.d[3] = (uint32_t)(pad0 >> 32); + p->R24.d[1] = (uint32_t)(pad1); + p->R24.d[3] = (uint32_t)(pad1 >> 32); + + /* H = [Mx,My] */ + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); + st->H[0] = _mm_and_si128(MMASK, T5); + st->H[1] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); + st->H[2] = _mm_and_si128(MMASK, T5); + st->H[3] = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + st->H[4] = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); +} + +static void +poly1305_blocks(poly1305_state_internal *st, const uint8_t *m, size_t bytes) +{ + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); + const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); + const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); + + poly1305_power *p; + xmmi H0, H1, H2, H3, H4; + xmmi T0, T1, T2, T3, T4, T5, T6; + xmmi M0, M1, M2, M3, M4; + xmmi C1, C2; + + H0 = st->H[0]; + H1 = st->H[1]; + H2 = st->H[2]; + H3 = st->H[3]; + H4 = st->H[4]; + + while (bytes >= 64) { + /* H *= [r^4,r^4] */ + p = &st->P[0]; + T0 = _mm_mul_epu32(H0, p->R20.v); + T1 = _mm_mul_epu32(H0, p->R21.v); + T2 = _mm_mul_epu32(H0, p->R22.v); + T3 = _mm_mul_epu32(H0, p->R23.v); + T4 = _mm_mul_epu32(H0, p->R24.v); + T5 = _mm_mul_epu32(H1, p->S24.v); + T6 = _mm_mul_epu32(H1, p->R20.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H2, p->S23.v); + T6 = _mm_mul_epu32(H2, p->S24.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H3, p->S22.v); + T6 = _mm_mul_epu32(H3, p->S23.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H4, p->S21.v); + T6 = _mm_mul_epu32(H4, p->S22.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H1, p->R21.v); + T6 = _mm_mul_epu32(H1, p->R22.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H2, p->R20.v); + T6 = _mm_mul_epu32(H2, p->R21.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H3, p->S24.v); + T6 = _mm_mul_epu32(H3, p->R20.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H4, p->S23.v); + T6 = _mm_mul_epu32(H4, p->S24.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H1, p->R23.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H2, p->R22.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H3, p->R21.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H4, p->R20.v); + T4 = _mm_add_epi64(T4, T5); + + /* H += [Mx,My]*[r^2,r^2] */ + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); + M0 = _mm_and_si128(MMASK, T5); + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); + M2 = _mm_and_si128(MMASK, T5); + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); + + p = &st->P[1]; + T5 = _mm_mul_epu32(M0, p->R20.v); + T6 = _mm_mul_epu32(M0, p->R21.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(M1, p->S24.v); + T6 = _mm_mul_epu32(M1, p->R20.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(M2, p->S23.v); + T6 = _mm_mul_epu32(M2, p->S24.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(M3, p->S22.v); + T6 = _mm_mul_epu32(M3, p->S23.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(M4, p->S21.v); + T6 = _mm_mul_epu32(M4, p->S22.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(M0, p->R22.v); + T6 = _mm_mul_epu32(M0, p->R23.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(M1, p->R21.v); + T6 = _mm_mul_epu32(M1, p->R22.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(M2, p->R20.v); + T6 = _mm_mul_epu32(M2, p->R21.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(M3, p->S24.v); + T6 = _mm_mul_epu32(M3, p->R20.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(M4, p->S23.v); + T6 = _mm_mul_epu32(M4, p->S24.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(M0, p->R24.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(M1, p->R23.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(M2, p->R22.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(M3, p->R21.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(M4, p->R20.v); + T4 = _mm_add_epi64(T4, T5); + + /* H += [Mx,My] */ + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 32)), _mm_loadl_epi64((xmmi *)(m + 48))); + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 40)), _mm_loadl_epi64((xmmi *)(m + 56))); + M0 = _mm_and_si128(MMASK, T5); + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); + M2 = _mm_and_si128(MMASK, T5); + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); + + T0 = _mm_add_epi64(T0, M0); + T1 = _mm_add_epi64(T1, M1); + T2 = _mm_add_epi64(T2, M2); + T3 = _mm_add_epi64(T3, M3); + T4 = _mm_add_epi64(T4, M4); + + /* reduce */ + C1 = _mm_srli_epi64(T0, 26); + C2 = _mm_srli_epi64(T3, 26); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_and_si128(T3, MMASK); + T1 = _mm_add_epi64(T1, C1); + T4 = _mm_add_epi64(T4, C2); + C1 = _mm_srli_epi64(T1, 26); + C2 = _mm_srli_epi64(T4, 26); + T1 = _mm_and_si128(T1, MMASK); + T4 = _mm_and_si128(T4, MMASK); + T2 = _mm_add_epi64(T2, C1); + T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); + C1 = _mm_srli_epi64(T2, 26); + C2 = _mm_srli_epi64(T0, 26); + T2 = _mm_and_si128(T2, MMASK); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_add_epi64(T3, C1); + T1 = _mm_add_epi64(T1, C2); + C1 = _mm_srli_epi64(T3, 26); + T3 = _mm_and_si128(T3, MMASK); + T4 = _mm_add_epi64(T4, C1); + + /* H = (H*[r^4,r^4] + [Mx,My]*[r^2,r^2] + [Mx,My]) */ + H0 = T0; + H1 = T1; + H2 = T2; + H3 = T3; + H4 = T4; + + m += 64; + bytes -= 64; + } + + st->H[0] = H0; + st->H[1] = H1; + st->H[2] = H2; + st->H[3] = H3; + st->H[4] = H4; +} + +static size_t +poly1305_combine(poly1305_state_internal *st, const uint8_t *m, size_t bytes) +{ + const xmmi MMASK = _mm_load_si128((xmmi *)poly1305_x64_sse2_message_mask); + const xmmi HIBIT = _mm_load_si128((xmmi *)poly1305_x64_sse2_1shl128); + const xmmi FIVE = _mm_load_si128((xmmi *)poly1305_x64_sse2_5); + + poly1305_power *p; + xmmi H0, H1, H2, H3, H4; + xmmi M0, M1, M2, M3, M4; + xmmi T0, T1, T2, T3, T4, T5, T6; + xmmi C1, C2; + + uint64_t r0, r1, r2; + uint64_t t0, t1, t2, t3, t4; + uint64_t c; + size_t consumed = 0; + + H0 = st->H[0]; + H1 = st->H[1]; + H2 = st->H[2]; + H3 = st->H[3]; + H4 = st->H[4]; + + /* p = [r^2,r^2] */ + p = &st->P[1]; + + if (bytes >= 32) { + /* H *= [r^2,r^2] */ + T0 = _mm_mul_epu32(H0, p->R20.v); + T1 = _mm_mul_epu32(H0, p->R21.v); + T2 = _mm_mul_epu32(H0, p->R22.v); + T3 = _mm_mul_epu32(H0, p->R23.v); + T4 = _mm_mul_epu32(H0, p->R24.v); + T5 = _mm_mul_epu32(H1, p->S24.v); + T6 = _mm_mul_epu32(H1, p->R20.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H2, p->S23.v); + T6 = _mm_mul_epu32(H2, p->S24.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H3, p->S22.v); + T6 = _mm_mul_epu32(H3, p->S23.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H4, p->S21.v); + T6 = _mm_mul_epu32(H4, p->S22.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H1, p->R21.v); + T6 = _mm_mul_epu32(H1, p->R22.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H2, p->R20.v); + T6 = _mm_mul_epu32(H2, p->R21.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H3, p->S24.v); + T6 = _mm_mul_epu32(H3, p->R20.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H4, p->S23.v); + T6 = _mm_mul_epu32(H4, p->S24.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H1, p->R23.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H2, p->R22.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H3, p->R21.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H4, p->R20.v); + T4 = _mm_add_epi64(T4, T5); + + /* H += [Mx,My] */ + T5 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 0)), _mm_loadl_epi64((xmmi *)(m + 16))); + T6 = _mm_unpacklo_epi64(_mm_loadl_epi64((xmmi *)(m + 8)), _mm_loadl_epi64((xmmi *)(m + 24))); + M0 = _mm_and_si128(MMASK, T5); + M1 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + T5 = _mm_or_si128(_mm_srli_epi64(T5, 52), _mm_slli_epi64(T6, 12)); + M2 = _mm_and_si128(MMASK, T5); + M3 = _mm_and_si128(MMASK, _mm_srli_epi64(T5, 26)); + M4 = _mm_or_si128(_mm_srli_epi64(T6, 40), HIBIT); + + T0 = _mm_add_epi64(T0, M0); + T1 = _mm_add_epi64(T1, M1); + T2 = _mm_add_epi64(T2, M2); + T3 = _mm_add_epi64(T3, M3); + T4 = _mm_add_epi64(T4, M4); + + /* reduce */ + C1 = _mm_srli_epi64(T0, 26); + C2 = _mm_srli_epi64(T3, 26); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_and_si128(T3, MMASK); + T1 = _mm_add_epi64(T1, C1); + T4 = _mm_add_epi64(T4, C2); + C1 = _mm_srli_epi64(T1, 26); + C2 = _mm_srli_epi64(T4, 26); + T1 = _mm_and_si128(T1, MMASK); + T4 = _mm_and_si128(T4, MMASK); + T2 = _mm_add_epi64(T2, C1); + T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); + C1 = _mm_srli_epi64(T2, 26); + C2 = _mm_srli_epi64(T0, 26); + T2 = _mm_and_si128(T2, MMASK); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_add_epi64(T3, C1); + T1 = _mm_add_epi64(T1, C2); + C1 = _mm_srli_epi64(T3, 26); + T3 = _mm_and_si128(T3, MMASK); + T4 = _mm_add_epi64(T4, C1); + + /* H = (H*[r^2,r^2] + [Mx,My]) */ + H0 = T0; + H1 = T1; + H2 = T2; + H3 = T3; + H4 = T4; + + consumed = 32; + } + + /* finalize, H *= [r^2,r] */ + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; + + p->R20.d[2] = (uint32_t)(r0)&0x3ffffff; + p->R21.d[2] = (uint32_t)((r0 >> 26) | (r1 << 18)) & 0x3ffffff; + p->R22.d[2] = (uint32_t)((r1 >> 8)) & 0x3ffffff; + p->R23.d[2] = (uint32_t)((r1 >> 34) | (r2 << 10)) & 0x3ffffff; + p->R24.d[2] = (uint32_t)((r2 >> 16)); + p->S21.d[2] = p->R21.d[2] * 5; + p->S22.d[2] = p->R22.d[2] * 5; + p->S23.d[2] = p->R23.d[2] * 5; + p->S24.d[2] = p->R24.d[2] * 5; + + /* H *= [r^2,r] */ + T0 = _mm_mul_epu32(H0, p->R20.v); + T1 = _mm_mul_epu32(H0, p->R21.v); + T2 = _mm_mul_epu32(H0, p->R22.v); + T3 = _mm_mul_epu32(H0, p->R23.v); + T4 = _mm_mul_epu32(H0, p->R24.v); + T5 = _mm_mul_epu32(H1, p->S24.v); + T6 = _mm_mul_epu32(H1, p->R20.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H2, p->S23.v); + T6 = _mm_mul_epu32(H2, p->S24.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H3, p->S22.v); + T6 = _mm_mul_epu32(H3, p->S23.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H4, p->S21.v); + T6 = _mm_mul_epu32(H4, p->S22.v); + T0 = _mm_add_epi64(T0, T5); + T1 = _mm_add_epi64(T1, T6); + T5 = _mm_mul_epu32(H1, p->R21.v); + T6 = _mm_mul_epu32(H1, p->R22.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H2, p->R20.v); + T6 = _mm_mul_epu32(H2, p->R21.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H3, p->S24.v); + T6 = _mm_mul_epu32(H3, p->R20.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H4, p->S23.v); + T6 = _mm_mul_epu32(H4, p->S24.v); + T2 = _mm_add_epi64(T2, T5); + T3 = _mm_add_epi64(T3, T6); + T5 = _mm_mul_epu32(H1, p->R23.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H2, p->R22.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H3, p->R21.v); + T4 = _mm_add_epi64(T4, T5); + T5 = _mm_mul_epu32(H4, p->R20.v); + T4 = _mm_add_epi64(T4, T5); + + C1 = _mm_srli_epi64(T0, 26); + C2 = _mm_srli_epi64(T3, 26); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_and_si128(T3, MMASK); + T1 = _mm_add_epi64(T1, C1); + T4 = _mm_add_epi64(T4, C2); + C1 = _mm_srli_epi64(T1, 26); + C2 = _mm_srli_epi64(T4, 26); + T1 = _mm_and_si128(T1, MMASK); + T4 = _mm_and_si128(T4, MMASK); + T2 = _mm_add_epi64(T2, C1); + T0 = _mm_add_epi64(T0, _mm_mul_epu32(C2, FIVE)); + C1 = _mm_srli_epi64(T2, 26); + C2 = _mm_srli_epi64(T0, 26); + T2 = _mm_and_si128(T2, MMASK); + T0 = _mm_and_si128(T0, MMASK); + T3 = _mm_add_epi64(T3, C1); + T1 = _mm_add_epi64(T1, C2); + C1 = _mm_srli_epi64(T3, 26); + T3 = _mm_and_si128(T3, MMASK); + T4 = _mm_add_epi64(T4, C1); + + /* H = H[0]+H[1] */ + H0 = _mm_add_epi64(T0, _mm_srli_si128(T0, 8)); + H1 = _mm_add_epi64(T1, _mm_srli_si128(T1, 8)); + H2 = _mm_add_epi64(T2, _mm_srli_si128(T2, 8)); + H3 = _mm_add_epi64(T3, _mm_srli_si128(T3, 8)); + H4 = _mm_add_epi64(T4, _mm_srli_si128(T4, 8)); + + t0 = _mm_cvtsi128_si32(H0); + c = (t0 >> 26); + t0 &= 0x3ffffff; + t1 = _mm_cvtsi128_si32(H1) + c; + c = (t1 >> 26); + t1 &= 0x3ffffff; + t2 = _mm_cvtsi128_si32(H2) + c; + c = (t2 >> 26); + t2 &= 0x3ffffff; + t3 = _mm_cvtsi128_si32(H3) + c; + c = (t3 >> 26); + t3 &= 0x3ffffff; + t4 = _mm_cvtsi128_si32(H4) + c; + c = (t4 >> 26); + t4 &= 0x3ffffff; + t0 = t0 + (c * 5); + c = (t0 >> 26); + t0 &= 0x3ffffff; + t1 = t1 + c; + + st->HH[0] = ((t0) | (t1 << 26)) & 0xfffffffffffull; + st->HH[1] = ((t1 >> 18) | (t2 << 8) | (t3 << 34)) & 0xfffffffffffull; + st->HH[2] = ((t3 >> 10) | (t4 << 16)) & 0x3ffffffffffull; + + return consumed; +} + +void +Poly1305Update(poly1305_state *state, const unsigned char *m, size_t bytes) +{ + poly1305_state_internal *st = poly1305_aligned_state(state); + size_t want; + + /* need at least 32 initial bytes to start the accelerated branch */ + if (!st->started) { + if ((st->leftover == 0) && (bytes > 32)) { + poly1305_first_block(st, m); + m += 32; + bytes -= 32; + } else { + want = poly1305_min(32 - st->leftover, bytes); + poly1305_block_copy(st->buffer + st->leftover, m, want); + bytes -= want; + m += want; + st->leftover += want; + if ((st->leftover < 32) || (bytes == 0)) + return; + poly1305_first_block(st, st->buffer); + st->leftover = 0; + } + st->started = 1; + } + + /* handle leftover */ + if (st->leftover) { + want = poly1305_min(64 - st->leftover, bytes); + poly1305_block_copy(st->buffer + st->leftover, m, want); + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < 64) + return; + poly1305_blocks(st, st->buffer, 64); + st->leftover = 0; + } + + /* process 64 byte blocks */ + if (bytes >= 64) { + want = (bytes & ~63); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + if (bytes) { + poly1305_block_copy(st->buffer + st->leftover, m, bytes); + st->leftover += bytes; + } +} + +void +Poly1305Finish(poly1305_state *state, unsigned char mac[16]) +{ + poly1305_state_internal *st = poly1305_aligned_state(state); + size_t leftover = st->leftover; + uint8_t *m = st->buffer; + uint128_t d[3]; + uint64_t h0, h1, h2; + uint64_t t0, t1; + uint64_t g0, g1, g2, c, nc; + uint64_t r0, r1, r2, s1, s2; + poly1305_power *p; + + if (st->started) { + size_t consumed = poly1305_combine(st, m, leftover); + leftover -= consumed; + m += consumed; + } + + /* st->HH will either be 0 or have the combined result */ + h0 = st->HH[0]; + h1 = st->HH[1]; + h2 = st->HH[2]; + + p = &st->P[1]; + r0 = ((uint64_t)p->R20.d[3] << 32) | (uint64_t)p->R20.d[1]; + r1 = ((uint64_t)p->R21.d[3] << 32) | (uint64_t)p->R21.d[1]; + r2 = ((uint64_t)p->R22.d[3] << 32) | (uint64_t)p->R22.d[1]; + s1 = r1 * (5 << 2); + s2 = r2 * (5 << 2); + + if (leftover < 16) + goto poly1305_donna_atmost15bytes; + +poly1305_donna_atleast16bytes: + t0 = U8TO64_LE(m + 0); + t1 = U8TO64_LE(m + 8); + h0 += t0 & 0xfffffffffff; + t0 = shr128_pair(t1, t0, 44); + h1 += t0 & 0xfffffffffff; + h2 += (t1 >> 24) | ((uint64_t)1 << 40); + +poly1305_donna_mul: + d[0] = add128(add128(mul64x64_128(h0, r0), mul64x64_128(h1, s2)), mul64x64_128(h2, s1)); + d[1] = add128(add128(mul64x64_128(h0, r1), mul64x64_128(h1, r0)), mul64x64_128(h2, s2)); + d[2] = add128(add128(mul64x64_128(h0, r2), mul64x64_128(h1, r1)), mul64x64_128(h2, r0)); + h0 = lo128(d[0]) & 0xfffffffffff; + c = shr128(d[0], 44); + d[1] = add128_64(d[1], c); + h1 = lo128(d[1]) & 0xfffffffffff; + c = shr128(d[1], 44); + d[2] = add128_64(d[2], c); + h2 = lo128(d[2]) & 0x3ffffffffff; + c = shr128(d[2], 42); + h0 += c * 5; + + m += 16; + leftover -= 16; + if (leftover >= 16) + goto poly1305_donna_atleast16bytes; + +/* final bytes */ +poly1305_donna_atmost15bytes: + if (!leftover) + goto poly1305_donna_finish; + + m[leftover++] = 1; + poly1305_block_zero(m + leftover, 16 - leftover); + leftover = 16; + + t0 = U8TO64_LE(m + 0); + t1 = U8TO64_LE(m + 8); + h0 += t0 & 0xfffffffffff; + t0 = shr128_pair(t1, t0, 44); + h1 += t0 & 0xfffffffffff; + h2 += (t1 >> 24); + + goto poly1305_donna_mul; + +poly1305_donna_finish: + c = (h0 >> 44); + h0 &= 0xfffffffffff; + h1 += c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + h2 += c; + c = (h2 >> 42); + h2 &= 0x3ffffffffff; + h0 += c * 5; + + g0 = h0 + 5; + c = (g0 >> 44); + g0 &= 0xfffffffffff; + g1 = h1 + c; + c = (g1 >> 44); + g1 &= 0xfffffffffff; + g2 = h2 + c - ((uint64_t)1 << 42); + + c = (g2 >> 63) - 1; + nc = ~c; + h0 = (h0 & nc) | (g0 & c); + h1 = (h1 & nc) | (g1 & c); + h2 = (h2 & nc) | (g2 & c); + + /* pad */ + t0 = ((uint64_t)p->R23.d[3] << 32) | (uint64_t)p->R23.d[1]; + t1 = ((uint64_t)p->R24.d[3] << 32) | (uint64_t)p->R24.d[1]; + h0 += (t0 & 0xfffffffffff); + c = (h0 >> 44); + h0 &= 0xfffffffffff; + t0 = shr128_pair(t1, t0, 44); + h1 += (t0 & 0xfffffffffff) + c; + c = (h1 >> 44); + h1 &= 0xfffffffffff; + t1 = (t1 >> 24); + h2 += (t1) + c; + + U64TO8_LE(mac + 0, ((h0) | (h1 << 44))); + U64TO8_LE(mac + 8, ((h1 >> 20) | (h2 << 24))); +} diff --git a/security/nss/lib/freebl/poly1305.c b/security/nss/lib/freebl/poly1305.c new file mode 100644 index 000000000..eb3e3cd55 --- /dev/null +++ b/security/nss/lib/freebl/poly1305.c @@ -0,0 +1,314 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* This implementation of poly1305 is by Andrew Moon + * (https://github.com/floodyberry/poly1305-donna) and released as public + * domain. */ + +#include + +#include "poly1305.h" + +#if defined(_MSC_VER) && _MSC_VER < 1600 +#include "prtypes.h" +typedef PRUint32 uint32_t; +typedef PRUint64 uint64_t; +#else +#include +#endif + +#if defined(NSS_X86) || defined(NSS_X64) +/* We can assume little-endian. */ +static uint32_t +U8TO32_LE(const unsigned char *m) +{ + uint32_t r; + memcpy(&r, m, sizeof(r)); + return r; +} + +static void +U32TO8_LE(unsigned char *m, uint32_t v) +{ + memcpy(m, &v, sizeof(v)); +} +#else +static uint32_t +U8TO32_LE(const unsigned char *m) +{ + return (uint32_t)m[0] | + (uint32_t)m[1] << 8 | + (uint32_t)m[2] << 16 | + (uint32_t)m[3] << 24; +} + +static void +U32TO8_LE(unsigned char *m, uint32_t v) +{ + m[0] = v; + m[1] = v >> 8; + m[2] = v >> 16; + m[3] = v >> 24; +} +#endif + +static uint64_t +mul32x32_64(uint32_t a, uint32_t b) +{ + return (uint64_t)a * b; +} + +struct poly1305_state_st { + uint32_t r0, r1, r2, r3, r4; + uint32_t s1, s2, s3, s4; + uint32_t h0, h1, h2, h3, h4; + unsigned char buf[16]; + unsigned int buf_used; + unsigned char key[16]; +}; + +/* update updates |state| given some amount of input data. This function may + * only be called with a |len| that is not a multiple of 16 at the end of the + * data. Otherwise the input must be buffered into 16 byte blocks. */ +static void +update(struct poly1305_state_st *state, const unsigned char *in, + size_t len) +{ + uint32_t t0, t1, t2, t3; + uint64_t t[5]; + uint32_t b; + uint64_t c; + size_t j; + unsigned char mp[16]; + + if (len < 16) + goto poly1305_donna_atmost15bytes; + +poly1305_donna_16bytes: + t0 = U8TO32_LE(in); + t1 = U8TO32_LE(in + 4); + t2 = U8TO32_LE(in + 8); + t3 = U8TO32_LE(in + 12); + + in += 16; + len -= 16; + + state->h0 += t0 & 0x3ffffff; + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; + state->h4 += (t3 >> 8) | (1 << 24); + +poly1305_donna_mul: + t[0] = mul32x32_64(state->h0, state->r0) + + mul32x32_64(state->h1, state->s4) + + mul32x32_64(state->h2, state->s3) + + mul32x32_64(state->h3, state->s2) + + mul32x32_64(state->h4, state->s1); + t[1] = mul32x32_64(state->h0, state->r1) + + mul32x32_64(state->h1, state->r0) + + mul32x32_64(state->h2, state->s4) + + mul32x32_64(state->h3, state->s3) + + mul32x32_64(state->h4, state->s2); + t[2] = mul32x32_64(state->h0, state->r2) + + mul32x32_64(state->h1, state->r1) + + mul32x32_64(state->h2, state->r0) + + mul32x32_64(state->h3, state->s4) + + mul32x32_64(state->h4, state->s3); + t[3] = mul32x32_64(state->h0, state->r3) + + mul32x32_64(state->h1, state->r2) + + mul32x32_64(state->h2, state->r1) + + mul32x32_64(state->h3, state->r0) + + mul32x32_64(state->h4, state->s4); + t[4] = mul32x32_64(state->h0, state->r4) + + mul32x32_64(state->h1, state->r3) + + mul32x32_64(state->h2, state->r2) + + mul32x32_64(state->h3, state->r1) + + mul32x32_64(state->h4, state->r0); + + state->h0 = (uint32_t)t[0] & 0x3ffffff; + c = (t[0] >> 26); + t[1] += c; + state->h1 = (uint32_t)t[1] & 0x3ffffff; + b = (uint32_t)(t[1] >> 26); + t[2] += b; + state->h2 = (uint32_t)t[2] & 0x3ffffff; + b = (uint32_t)(t[2] >> 26); + t[3] += b; + state->h3 = (uint32_t)t[3] & 0x3ffffff; + b = (uint32_t)(t[3] >> 26); + t[4] += b; + state->h4 = (uint32_t)t[4] & 0x3ffffff; + b = (uint32_t)(t[4] >> 26); + state->h0 += b * 5; + + if (len >= 16) + goto poly1305_donna_16bytes; + +/* final bytes */ +poly1305_donna_atmost15bytes: + if (!len) + return; + + for (j = 0; j < len; j++) + mp[j] = in[j]; + mp[j++] = 1; + for (; j < 16; j++) + mp[j] = 0; + len = 0; + + t0 = U8TO32_LE(mp + 0); + t1 = U8TO32_LE(mp + 4); + t2 = U8TO32_LE(mp + 8); + t3 = U8TO32_LE(mp + 12); + + state->h0 += t0 & 0x3ffffff; + state->h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; + state->h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; + state->h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; + state->h4 += (t3 >> 8); + + goto poly1305_donna_mul; +} + +void +Poly1305Init(poly1305_state *statep, const unsigned char key[32]) +{ + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; + uint32_t t0, t1, t2, t3; + + t0 = U8TO32_LE(key + 0); + t1 = U8TO32_LE(key + 4); + t2 = U8TO32_LE(key + 8); + t3 = U8TO32_LE(key + 12); + + /* precompute multipliers */ + state->r0 = t0 & 0x3ffffff; + t0 >>= 26; + t0 |= t1 << 6; + state->r1 = t0 & 0x3ffff03; + t1 >>= 20; + t1 |= t2 << 12; + state->r2 = t1 & 0x3ffc0ff; + t2 >>= 14; + t2 |= t3 << 18; + state->r3 = t2 & 0x3f03fff; + t3 >>= 8; + state->r4 = t3 & 0x00fffff; + + state->s1 = state->r1 * 5; + state->s2 = state->r2 * 5; + state->s3 = state->r3 * 5; + state->s4 = state->r4 * 5; + + /* init state */ + state->h0 = 0; + state->h1 = 0; + state->h2 = 0; + state->h3 = 0; + state->h4 = 0; + + state->buf_used = 0; + memcpy(state->key, key + 16, sizeof(state->key)); +} + +void +Poly1305Update(poly1305_state *statep, const unsigned char *in, + size_t in_len) +{ + unsigned int i; + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; + + if (state->buf_used) { + unsigned int todo = 16 - state->buf_used; + if (todo > in_len) + todo = in_len; + for (i = 0; i < todo; i++) + state->buf[state->buf_used + i] = in[i]; + state->buf_used += todo; + in_len -= todo; + in += todo; + + if (state->buf_used == 16) { + update(state, state->buf, 16); + state->buf_used = 0; + } + } + + if (in_len >= 16) { + size_t todo = in_len & ~0xf; + update(state, in, todo); + in += todo; + in_len &= 0xf; + } + + if (in_len) { + for (i = 0; i < in_len; i++) + state->buf[i] = in[i]; + state->buf_used = in_len; + } +} + +void +Poly1305Finish(poly1305_state *statep, unsigned char mac[16]) +{ + struct poly1305_state_st *state = (struct poly1305_state_st *)statep; + uint64_t f0, f1, f2, f3; + uint32_t g0, g1, g2, g3, g4; + uint32_t b, nb; + + if (state->buf_used) + update(state, state->buf, state->buf_used); + + b = state->h0 >> 26; + state->h0 = state->h0 & 0x3ffffff; + state->h1 += b; + b = state->h1 >> 26; + state->h1 = state->h1 & 0x3ffffff; + state->h2 += b; + b = state->h2 >> 26; + state->h2 = state->h2 & 0x3ffffff; + state->h3 += b; + b = state->h3 >> 26; + state->h3 = state->h3 & 0x3ffffff; + state->h4 += b; + b = state->h4 >> 26; + state->h4 = state->h4 & 0x3ffffff; + state->h0 += b * 5; + + g0 = state->h0 + 5; + b = g0 >> 26; + g0 &= 0x3ffffff; + g1 = state->h1 + b; + b = g1 >> 26; + g1 &= 0x3ffffff; + g2 = state->h2 + b; + b = g2 >> 26; + g2 &= 0x3ffffff; + g3 = state->h3 + b; + b = g3 >> 26; + g3 &= 0x3ffffff; + g4 = state->h4 + b - (1 << 26); + + b = (g4 >> 31) - 1; + nb = ~b; + state->h0 = (state->h0 & nb) | (g0 & b); + state->h1 = (state->h1 & nb) | (g1 & b); + state->h2 = (state->h2 & nb) | (g2 & b); + state->h3 = (state->h3 & nb) | (g3 & b); + state->h4 = (state->h4 & nb) | (g4 & b); + + f0 = ((state->h0) | (state->h1 << 26)) + (uint64_t)U8TO32_LE(&state->key[0]); + f1 = ((state->h1 >> 6) | (state->h2 << 20)) + (uint64_t)U8TO32_LE(&state->key[4]); + f2 = ((state->h2 >> 12) | (state->h3 << 14)) + (uint64_t)U8TO32_LE(&state->key[8]); + f3 = ((state->h3 >> 18) | (state->h4 << 8)) + (uint64_t)U8TO32_LE(&state->key[12]); + + U32TO8_LE(&mac[0], (uint32_t)f0); + f1 += (f0 >> 32); + U32TO8_LE(&mac[4], (uint32_t)f1); + f2 += (f1 >> 32); + U32TO8_LE(&mac[8], (uint32_t)f2); + f3 += (f2 >> 32); + U32TO8_LE(&mac[12], (uint32_t)f3); +} diff --git a/security/nss/lib/freebl/poly1305.h b/security/nss/lib/freebl/poly1305.h new file mode 100644 index 000000000..0a463483f --- /dev/null +++ b/security/nss/lib/freebl/poly1305.h @@ -0,0 +1,28 @@ +/* + * poly1305.h - header file for Poly1305 implementation. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef FREEBL_POLY1305_H_ +#define FREEBL_POLY1305_H_ + +typedef unsigned char poly1305_state[512]; + +/* Poly1305Init sets up |state| so that it can be used to calculate an + * authentication tag with the one-time key |key|. Note that |key| is a + * one-time key and therefore there is no `reset' method because that would + * enable several messages to be authenticated with the same key. */ +extern void Poly1305Init(poly1305_state* state, const unsigned char key[32]); + +/* Poly1305Update processes |in_len| bytes from |in|. It can be called zero or + * more times after poly1305_init. */ +extern void Poly1305Update(poly1305_state* state, const unsigned char* in, + size_t inLen); + +/* Poly1305Finish completes the poly1305 calculation and writes a 16 byte + * authentication tag to |mac|. */ +extern void Poly1305Finish(poly1305_state* state, unsigned char mac[16]); + +#endif /* FREEBL_POLY1305_H_ */ diff --git a/security/nss/lib/freebl/pqg.c b/security/nss/lib/freebl/pqg.c new file mode 100644 index 000000000..2f24afd24 --- /dev/null +++ b/security/nss/lib/freebl/pqg.c @@ -0,0 +1,1878 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * PQG parameter generation/verification. Based on FIPS 186-3. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "secitem.h" +#include "mpi.h" +#include "mpprime.h" +#include "mplogic.h" +#include "secmpi.h" + +#define MAX_ITERATIONS 1000 /* Maximum number of iterations of primegen */ + +typedef enum { + FIPS186_1_TYPE, /* Probablistic */ + FIPS186_3_TYPE, /* Probablistic */ + FIPS186_3_ST_TYPE /* Shawe-Taylor provable */ +} pqgGenType; + +/* + * These test iterations are quite a bit larger than we previously had. + * This is because FIPS 186-3 is worried about the primes in PQG generation. + * It may be possible to purposefully construct composites which more + * iterations of Miller-Rabin than the for your normal randomly selected + * numbers.There are 3 ways to counter this: 1) use one of the cool provably + * prime algorithms (which would require a lot more work than DSA-2 deservers. + * 2) add a Lucas primality test (which requires coding a Lucas primality test, + * or 3) use a larger M-R test count. I chose the latter. It increases the time + * that it takes to prove the selected prime, but it shouldn't increase the + * overall time to run the algorithm (non-primes should still faile M-R + * realively quickly). If you want to get that last bit of performance, + * implement Lucas and adjust these two functions. See FIPS 186-3 Appendix C + * and F for more information. + */ +static int +prime_testcount_p(int L, int N) +{ + switch (L) { + case 1024: + return 40; + case 2048: + return 56; + case 3072: + return 64; + default: + break; + } + return 50; /* L = 512-960 */ +} + +/* The q numbers are different if you run M-R followd by Lucas. I created + * a separate function so if someone wanted to add the Lucas check, they + * could do so fairly easily */ +static int +prime_testcount_q(int L, int N) +{ + return prime_testcount_p(L, N); +} + +/* + * generic function to make sure our input matches DSA2 requirements + * this gives us one place to go if we need to bump the requirements in the + * future. + */ +static SECStatus +pqg_validate_dsa2(unsigned int L, unsigned int N) +{ + + switch (L) { + case 1024: + if (N != DSA1_Q_BITS) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + case 2048: + if ((N != 224) && (N != 256)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + case 3072: + if (N != 256) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + return SECSuccess; +} + +static unsigned int +pqg_get_default_N(unsigned int L) +{ + unsigned int N = 0; + switch (L) { + case 1024: + N = DSA1_Q_BITS; + break; + case 2048: + N = 224; + break; + case 3072: + N = 256; + break; + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + break; /* N already set to zero */ + } + return N; +} + +/* + * Select the lowest hash algorithm usable + */ +static HASH_HashType +getFirstHash(unsigned int L, unsigned int N) +{ + if (N < 224) { + return HASH_AlgSHA1; + } + if (N < 256) { + return HASH_AlgSHA224; + } + if (N < 384) { + return HASH_AlgSHA256; + } + if (N < 512) { + return HASH_AlgSHA384; + } + return HASH_AlgSHA512; +} + +/* + * find the next usable hash algorthim + */ +static HASH_HashType +getNextHash(HASH_HashType hashtype) +{ + switch (hashtype) { + case HASH_AlgSHA1: + hashtype = HASH_AlgSHA224; + break; + case HASH_AlgSHA224: + hashtype = HASH_AlgSHA256; + break; + case HASH_AlgSHA256: + hashtype = HASH_AlgSHA384; + break; + case HASH_AlgSHA384: + hashtype = HASH_AlgSHA512; + break; + case HASH_AlgSHA512: + default: + hashtype = HASH_AlgTOTAL; + break; + } + return hashtype; +} + +static unsigned int +HASH_ResultLen(HASH_HashType type) +{ + const SECHashObject *hash_obj = HASH_GetRawHashObject(type); + PORT_Assert(hash_obj != NULL); + if (hash_obj == NULL) { + /* type is always a valid HashType. Thus a null hash_obj must be a bug */ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return 0; + } + PORT_Assert(hash_obj->length != 0); + return hash_obj->length; +} + +static SECStatus +HASH_HashBuf(HASH_HashType type, unsigned char *dest, + const unsigned char *src, PRUint32 src_len) +{ + const SECHashObject *hash_obj = HASH_GetRawHashObject(type); + void *hashcx = NULL; + unsigned int dummy; + + if (hash_obj == NULL) { + return SECFailure; + } + + hashcx = hash_obj->create(); + if (hashcx == NULL) { + return SECFailure; + } + hash_obj->begin(hashcx); + hash_obj->update(hashcx, src, src_len); + hash_obj->end(hashcx, dest, &dummy, hash_obj->length); + hash_obj->destroy(hashcx, PR_TRUE); + return SECSuccess; +} + +unsigned int +PQG_GetLength(const SECItem *obj) +{ + unsigned int len = obj->len; + + if (obj->data == NULL) { + return 0; + } + if (len > 1 && obj->data[0] == 0) { + len--; + } + return len; +} + +SECStatus +PQG_Check(const PQGParams *params) +{ + unsigned int L, N; + SECStatus rv = SECSuccess; + + if (params == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + L = PQG_GetLength(¶ms->prime) * PR_BITS_PER_BYTE; + N = PQG_GetLength(¶ms->subPrime) * PR_BITS_PER_BYTE; + + if (L < 1024) { + int j; + + /* handle DSA1 pqg parameters with less thatn 1024 bits*/ + if (N != DSA1_Q_BITS) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + j = PQG_PBITS_TO_INDEX(L); + if (j < 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + rv = SECFailure; + } + } else { + /* handle DSA2 parameters (includes DSA1, 1024 bits) */ + rv = pqg_validate_dsa2(L, N); + } + return rv; +} + +HASH_HashType +PQG_GetHashType(const PQGParams *params) +{ + unsigned int L, N; + + if (params == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return HASH_AlgNULL; + } + + L = PQG_GetLength(¶ms->prime) * PR_BITS_PER_BYTE; + N = PQG_GetLength(¶ms->subPrime) * PR_BITS_PER_BYTE; + return getFirstHash(L, N); +} + +/* Get a seed for generating P and Q. If in testing mode, copy in the +** seed from FIPS 186-1 appendix 5. Otherwise, obtain bytes from the +** global random number generator. +*/ +static SECStatus +getPQseed(SECItem *seed, PLArenaPool *arena) +{ + SECStatus rv; + + if (!seed->data) { + seed->data = (unsigned char *)PORT_ArenaZAlloc(arena, seed->len); + } + if (!seed->data) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + rv = RNG_GenerateGlobalRandomBytes(seed->data, seed->len); + /* + * NIST CMVP disallows a sequence of 20 bytes with the most + * significant byte equal to 0. Perhaps they interpret + * "a sequence of at least 160 bits" as "a number >= 2^159". + * So we always set the most significant bit to 1. (bug 334533) + */ + seed->data[0] |= 0x80; + return rv; +} + +/* Generate a candidate h value. If in testing mode, use the h value +** specified in FIPS 186-1 appendix 5, h = 2. Otherwise, obtain bytes +** from the global random number generator. +*/ +static SECStatus +generate_h_candidate(SECItem *hit, mp_int *H) +{ + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; +#ifdef FIPS_186_1_A5_TEST + memset(hit->data, 0, hit->len); + hit->data[hit->len - 1] = 0x02; +#else + rv = RNG_GenerateGlobalRandomBytes(hit->data, hit->len); +#endif + if (rv) + return SECFailure; + err = mp_read_unsigned_octets(H, hit->data, hit->len); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return SECSuccess; +} + +static SECStatus +addToSeed(const SECItem *seed, + unsigned long addend, + int seedlen, /* g in 186-1 */ + SECItem *seedout) +{ + mp_int s, sum, modulus, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&s) = 0; + MP_DIGITS(&sum) = 0; + MP_DIGITS(&modulus) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&s)); + CHECK_MPI_OK(mp_init(&sum)); + CHECK_MPI_OK(mp_init(&modulus)); + SECITEM_TO_MPINT(*seed, &s); /* s = seed */ + /* seed += addend */ + if (addend < MP_DIGIT_MAX) { + CHECK_MPI_OK(mp_add_d(&s, (mp_digit)addend, &s)); + } else { + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_set_ulong(&tmp, addend)); + CHECK_MPI_OK(mp_add(&s, &tmp, &s)); + } + /*sum = s mod 2**seedlen */ + CHECK_MPI_OK(mp_div_2d(&s, (mp_digit)seedlen, NULL, &sum)); + if (seedout->data != NULL) { + SECITEM_ZfreeItem(seedout, PR_FALSE); + } + MPINT_TO_SECITEM(&sum, seedout, NULL); +cleanup: + mp_clear(&s); + mp_clear(&sum); + mp_clear(&modulus); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* Compute Hash[(SEED + addend) mod 2**g] +** Result is placed in shaOutBuf. +** This computation is used in steps 2 and 7 of FIPS 186 Appendix 2.2 and +** step 11.2 of FIPS 186-3 Appendix A.1.1.2 . +*/ +static SECStatus +addToSeedThenHash(HASH_HashType hashtype, + const SECItem *seed, + unsigned long addend, + int seedlen, /* g in 186-1 */ + unsigned char *hashOutBuf) +{ + SECItem str = { 0, 0, 0 }; + SECStatus rv; + rv = addToSeed(seed, addend, seedlen, &str); + if (rv != SECSuccess) { + return rv; + } + rv = HASH_HashBuf(hashtype, hashOutBuf, str.data, str.len); /* hash result */ + if (str.data) + SECITEM_ZfreeItem(&str, PR_FALSE); + return rv; +} + +/* +** Perform steps 2 and 3 of FIPS 186-1, appendix 2.2. +** Generate Q from seed. +*/ +static SECStatus +makeQfromSeed( + unsigned int g, /* input. Length of seed in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q) /* output. */ +{ + unsigned char sha1[SHA1_LENGTH]; + unsigned char sha2[SHA1_LENGTH]; + unsigned char U[SHA1_LENGTH]; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int i; + /* ****************************************************************** + ** Step 2. + ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]." + **/ + CHECK_SEC_OK(SHA1_HashBuf(sha1, seed->data, seed->len)); + CHECK_SEC_OK(addToSeedThenHash(HASH_AlgSHA1, seed, 1, g, sha2)); + for (i = 0; i < SHA1_LENGTH; ++i) + U[i] = sha1[i] ^ sha2[i]; + /* ****************************************************************** + ** Step 3. + ** "Form Q from U by setting the most signficant bit (the 2**159 bit) + ** and the least signficant bit to 1. In terms of boolean operations, + ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160." + */ + U[0] |= 0x80; /* U is MSB first */ + U[SHA1_LENGTH - 1] |= 0x01; + err = mp_read_unsigned_octets(Q, U, SHA1_LENGTH); +cleanup: + memset(U, 0, SHA1_LENGTH); + memset(sha1, 0, SHA1_LENGTH); + memset(sha2, 0, SHA1_LENGTH); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* +** Perform steps 6 and 7 of FIPS 186-3, appendix A.1.1.2. +** Generate Q from seed. +*/ +static SECStatus +makeQ2fromSeed( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int N, /* input. Length of q in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q) /* output. */ +{ + unsigned char U[HASH_LENGTH_MAX]; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int N_bytes = N / PR_BITS_PER_BYTE; /* length of N in bytes rather than bits */ + int hashLen = HASH_ResultLen(hashtype); + int offset = 0; + + /* ****************************************************************** + ** Step 6. + ** "Compute U = hash[SEED] mod 2**N-1]." + **/ + CHECK_SEC_OK(HASH_HashBuf(hashtype, U, seed->data, seed->len)); + /* mod 2**N . Step 7 will explicitly set the top bit to 1, so no need + * to handle mod 2**N-1 */ + if (hashLen > N_bytes) { + offset = hashLen - N_bytes; + } + /* ****************************************************************** + ** Step 7. + ** computed_q = 2**(N-1) + U + 1 - (U mod 2) + ** + ** This is the same as: + ** computed_q = 2**(N-1) | U | 1; + */ + U[offset] |= 0x80; /* U is MSB first */ + U[hashLen - 1] |= 0x01; + err = mp_read_unsigned_octets(Q, &U[offset], N_bytes); +cleanup: + memset(U, 0, HASH_LENGTH_MAX); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* +** Perform steps from FIPS 186-3, Appendix A.1.2.1 and Appendix C.6 +** +** This generates a provable prime from two smaller prime. The resulting +** prime p will have q0 as a multiple of p-1. q0 can be 1. +** +** This implments steps 4 thorough 22 of FIPS 186-3 A.1.2.1 and +** steps 16 through 34 of FIPS 186-2 C.6 +*/ +#define MAX_ST_SEED_BITS (HASH_LENGTH_MAX * PR_BITS_PER_BYTE) +static SECStatus +makePrimefromPrimesShaweTaylor( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int length, /* input. Length of prime in bits. */ + mp_int *c0, /* seed prime */ + mp_int *q, /* sub prime, can be 1 */ + mp_int *prime, /* output. */ + SECItem *prime_seed, /* input/output. */ + unsigned int *prime_gen_counter) /* input/output. */ +{ + mp_int c; + mp_int c0_2; + mp_int t; + mp_int a; + mp_int z; + mp_int two_length_minus_1; + SECStatus rv = SECFailure; + int hashlen = HASH_ResultLen(hashtype); + int outlen = hashlen * PR_BITS_PER_BYTE; + int offset; + unsigned char bit, mask; + /* x needs to hold roundup(L/outlen)*outlen. + * This can be no larger than L+outlen-1, So we set it's size to + * our max L + max outlen and know we are safe */ + unsigned char x[DSA_MAX_P_BITS / 8 + HASH_LENGTH_MAX]; + mp_err err = MP_OKAY; + int i; + int iterations; + int old_counter; + + MP_DIGITS(&c) = 0; + MP_DIGITS(&c0_2) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&z) = 0; + MP_DIGITS(&two_length_minus_1) = 0; + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&c0_2)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&z)); + CHECK_MPI_OK(mp_init(&two_length_minus_1)); + + /* + ** There is a slight mapping of variable names depending on which + ** FIPS 186 steps are being carried out. The mapping is as follows: + ** variable A.1.2.1 C.6 + ** c0 p0 c0 + ** q q 1 + ** c p c + ** c0_2 2*p0*q 2*c0 + ** length L length + ** prime_seed pseed prime_seed + ** prime_gen_counter pgen_counter prime_gen_counter + ** + ** Also note: or iterations variable is actually iterations+1, since + ** iterations+1 works better in C. + */ + + /* Step 4/16 iterations = ceiling(length/outlen)-1 */ + iterations = (length + outlen - 1) / outlen; /* NOTE: iterations +1 */ + /* Step 5/17 old_counter = prime_gen_counter */ + old_counter = *prime_gen_counter; + /* + ** Comment: Generate a pseudorandom integer x in the interval + ** [2**(lenght-1), 2**length]. + ** + ** Step 6/18 x = 0 + */ + PORT_Memset(x, 0, sizeof(x)); + /* + ** Step 7/19 for i = 0 to iterations do + ** x = x + (HASH(prime_seed + i) * 2^(i*outlen)) + */ + for (i = 0; i < iterations; i++) { + /* is bigger than prime_seed should get to */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i, + MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen])); + } + /* Step 8/20 prime_seed = prime_seed + iterations + 1 */ + CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS, + prime_seed)); + /* + ** Step 9/21 x = 2 ** (length-1) + x mod 2 ** (length-1) + ** + ** This step mathematically sets the high bit and clears out + ** all the other bits higher than length. 'x' is stored + ** in the x array, MSB first. The above formula gives us an 'x' + ** which is length bytes long and has the high bit set. We also know + ** that length <= iterations*outlen since + ** iterations=ceiling(length/outlen). First we find the offset in + ** bytes into the array where the high bit is. + */ + offset = (outlen * iterations - length) / PR_BITS_PER_BYTE; + /* now we want to set the 'high bit', since length may not be a + * multiple of 8,*/ + bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */ + /* we need to zero out the rest of the bits in the byte above */ + mask = (bit - 1); + /* now we set it */ + x[offset] = (mask & x[offset]) | bit; + /* + ** Comment: Generate a candidate prime c in the interval + ** [2**(lenght-1), 2**length]. + ** + ** Step 10 t = ceiling(x/(2q(p0))) + ** Step 22 t = ceiling(x/(2(c0))) + */ + CHECK_MPI_OK(mp_read_unsigned_octets(&t, &x[offset], + hashlen * iterations - offset)); /* t = x */ + CHECK_MPI_OK(mp_mul(c0, q, &c0_2)); /* c0_2 is now c0*q */ + CHECK_MPI_OK(mp_add(&c0_2, &c0_2, &c0_2)); /* c0_2 is now 2*q*c0 */ + CHECK_MPI_OK(mp_add(&t, &c0_2, &t)); /* t = x+2*q*c0 */ + CHECK_MPI_OK(mp_sub_d(&t, (mp_digit)1, &t)); /* t = x+2*q*c0 -1 */ + /* t = floor((x+2qc0-1)/2qc0) = ceil(x/2qc0) */ + CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL)); + /* + ** step 11: if (2tqp0 +1 > 2**length), then t = ceiling(2**(length-1)/2qp0) + ** step 12: t = 2tqp0 +1. + ** + ** step 23: if (2tc0 +1 > 2**length), then t = ceiling(2**(length-1)/2c0) + ** step 24: t = 2tc0 +1. + */ + CHECK_MPI_OK(mp_2expt(&two_length_minus_1, length - 1)); +step_23: + CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); /* c = t*2qc0 */ + CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/ + if (mpl_significant_bits(&c) > length) { /* if c > 2**length */ + CHECK_MPI_OK(mp_sub_d(&c0_2, (mp_digit)1, &t)); /* t = 2qc0-1 */ + /* t = 2**(length-1) + 2qc0 -1 */ + CHECK_MPI_OK(mp_add(&two_length_minus_1, &t, &t)); + /* t = floor((2**(length-1)+2qc0 -1)/2qco) + * = ceil(2**(lenght-2)/2qc0) */ + CHECK_MPI_OK(mp_div(&t, &c0_2, &t, NULL)); + CHECK_MPI_OK(mp_mul(&t, &c0_2, &c)); + CHECK_MPI_OK(mp_add_d(&c, (mp_digit)1, &c)); /* c= 2tqc0 + 1*/ + } + /* Step 13/25 prime_gen_counter = prime_gen_counter + 1*/ + (*prime_gen_counter)++; + /* + ** Comment: Test the candidate prime c for primality; first pick an + ** integer a between 2 and c-2. + ** + ** Step 14/26 a=0 + */ + PORT_Memset(x, 0, sizeof(x)); /* use x for a */ + /* + ** Step 15/27 for i = 0 to iterations do + ** a = a + (HASH(prime_seed + i) * 2^(i*outlen)) + ** + ** NOTE: we reuse the x array for 'a' initially. + */ + for (i = 0; i < iterations; i++) { + /* MAX_ST_SEED_BITS is bigger than prime_seed should get to */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, i, + MAX_ST_SEED_BITS, &x[(iterations - i - 1) * hashlen])); + } + /* Step 16/28 prime_seed = prime_seed + iterations + 1 */ + CHECK_SEC_OK(addToSeed(prime_seed, iterations, MAX_ST_SEED_BITS, + prime_seed)); + /* Step 17/29 a = 2 + (a mod (c-3)). */ + CHECK_MPI_OK(mp_read_unsigned_octets(&a, x, iterations * hashlen)); + CHECK_MPI_OK(mp_sub_d(&c, (mp_digit)3, &z)); /* z = c -3 */ + CHECK_MPI_OK(mp_mod(&a, &z, &a)); /* a = a mod c -3 */ + CHECK_MPI_OK(mp_add_d(&a, (mp_digit)2, &a)); /* a = 2 + a mod c -3 */ + /* + ** Step 18 z = a**(2tq) mod p. + ** Step 30 z = a**(2t) mod c. + */ + CHECK_MPI_OK(mp_mul(&t, q, &z)); /* z = tq */ + CHECK_MPI_OK(mp_add(&z, &z, &z)); /* z = 2tq */ + CHECK_MPI_OK(mp_exptmod(&a, &z, &c, &z)); /* z = a**(2tq) mod c */ + /* + ** Step 19 if (( 1 == GCD(z-1,p)) and ( 1 == z**p0 mod p )), then + ** Step 31 if (( 1 == GCD(z-1,c)) and ( 1 == z**c0 mod c )), then + */ + CHECK_MPI_OK(mp_sub_d(&z, (mp_digit)1, &a)); + CHECK_MPI_OK(mp_gcd(&a, &c, &a)); + if (mp_cmp_d(&a, (mp_digit)1) == 0) { + CHECK_MPI_OK(mp_exptmod(&z, c0, &c, &a)); + if (mp_cmp_d(&a, (mp_digit)1) == 0) { + /* Step 31.1 prime = c */ + CHECK_MPI_OK(mp_copy(&c, prime)); + /* + ** Step 31.2 return Success, prime, prime_seed, + ** prime_gen_counter + */ + rv = SECSuccess; + goto cleanup; + } + } + /* + ** Step 20/32 If (prime_gen_counter > 4 * length + old_counter then + ** return (FAILURE, 0, 0, 0). + ** NOTE: the test is reversed, so we fall through on failure to the + ** cleanup routine + */ + if (*prime_gen_counter < (4 * length + old_counter)) { + /* Step 21/33 t = t + 1 */ + CHECK_MPI_OK(mp_add_d(&t, (mp_digit)1, &t)); + /* Step 22/34 Go to step 23/11 */ + goto step_23; + } + + /* if (prime_gencont > (4*length + old_counter), fall through to failure */ + rv = SECFailure; /* really is already set, but paranoia is good */ + +cleanup: + mp_clear(&c); + mp_clear(&c0_2); + mp_clear(&t); + mp_clear(&a); + mp_clear(&z); + mp_clear(&two_length_minus_1); + PORT_Memset(x, 0, sizeof(x)); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv == SECFailure) { + mp_zero(prime); + if (prime_seed->data) { + SECITEM_FreeItem(prime_seed, PR_FALSE); + } + *prime_gen_counter = 0; + } + return rv; +} + +/* +** Perform steps from FIPS 186-3, Appendix C.6 +** +** This generates a provable prime from a seed +*/ +static SECStatus +makePrimefromSeedShaweTaylor( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int length, /* input. Length of prime in bits. */ + const SECItem *input_seed, /* input. */ + mp_int *prime, /* output. */ + SECItem *prime_seed, /* output. */ + unsigned int *prime_gen_counter) /* output. */ +{ + mp_int c; + mp_int c0; + mp_int one; + SECStatus rv = SECFailure; + int hashlen = HASH_ResultLen(hashtype); + int outlen = hashlen * PR_BITS_PER_BYTE; + int offset; + unsigned char bit, mask; + unsigned char x[HASH_LENGTH_MAX * 2]; + mp_digit dummy; + mp_err err = MP_OKAY; + int i; + + MP_DIGITS(&c) = 0; + MP_DIGITS(&c0) = 0; + MP_DIGITS(&one) = 0; + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&c0)); + CHECK_MPI_OK(mp_init(&one)); + + /* Step 1. if length < 2 then return (FAILURE, 0, 0, 0) */ + if (length < 2) { + rv = SECFailure; + goto cleanup; + } + /* Step 2. if length >= 33 then goto step 14 */ + if (length >= 33) { + mp_zero(&one); + CHECK_MPI_OK(mp_add_d(&one, (mp_digit)1, &one)); + + /* Step 14 (status, c0, prime_seed, prime_gen_counter) = + ** (ST_Random_Prime((ceil(length/2)+1, input_seed) + */ + rv = makePrimefromSeedShaweTaylor(hashtype, (length + 1) / 2 + 1, + input_seed, &c0, prime_seed, prime_gen_counter); + /* Step 15 if FAILURE is returned, return (FAILURE, 0, 0, 0). */ + if (rv != SECSuccess) { + goto cleanup; + } + /* Steps 16-34 */ + rv = makePrimefromPrimesShaweTaylor(hashtype, length, &c0, &one, + prime, prime_seed, prime_gen_counter); + goto cleanup; /* we're done, one way or the other */ + } + /* Step 3 prime_seed = input_seed */ + CHECK_SEC_OK(SECITEM_CopyItem(NULL, prime_seed, input_seed)); + /* Step 4 prime_gen_count = 0 */ + *prime_gen_counter = 0; + +step_5: + /* Step 5 c = Hash(prime_seed) xor Hash(prime_seed+1). */ + CHECK_SEC_OK(HASH_HashBuf(hashtype, x, prime_seed->data, prime_seed->len)); + CHECK_SEC_OK(addToSeedThenHash(hashtype, prime_seed, 1, + MAX_ST_SEED_BITS, &x[hashlen])); + for (i = 0; i < hashlen; i++) { + x[i] = x[i] ^ x[i + hashlen]; + } + /* Step 6 c = 2**length-1 + c mod 2**length-1 */ + /* This step mathematically sets the high bit and clears out + ** all the other bits higher than length. Right now c is stored + ** in the x array, MSB first. The above formula gives us a c which + ** is length bytes long and has the high bit set. We also know that + ** length < outlen since the smallest outlen is 160 bits and the largest + ** length at this point is 32 bits. So first we find the offset in bytes + ** into the array where the high bit is. + */ + offset = (outlen - length) / PR_BITS_PER_BYTE; + /* now we want to set the 'high bit'. We have to calculate this since + * length may not be a multiple of 8.*/ + bit = 1 << ((length - 1) & 0x7); /* select the proper bit in the byte */ + /* we need to zero out the rest of the bits in the byte above */ + mask = (bit - 1); + /* now we set it */ + x[offset] = (mask & x[offset]) | bit; + /* Step 7 c = c*floor(c/2) + 1 */ + /* set the low bit. much easier to find (the end of the array) */ + x[hashlen - 1] |= 1; + /* now that we've set our bits, we can create our candidate "c" */ + CHECK_MPI_OK(mp_read_unsigned_octets(&c, &x[offset], hashlen - offset)); + /* Step 8 prime_gen_counter = prime_gen_counter + 1 */ + (*prime_gen_counter)++; + /* Step 9 prime_seed = prime_seed + 2 */ + CHECK_SEC_OK(addToSeed(prime_seed, 2, MAX_ST_SEED_BITS, prime_seed)); + /* Step 10 Perform deterministic primality test on c. For example, since + ** c is small, it's primality can be tested by trial division, See + ** See Appendic C.7. + ** + ** We in fact test with trial division. mpi has a built int trial divider + ** that divides all divisors up to 2^16. + */ + if (prime_tab[prime_tab_size - 1] < 0xFFF1) { + /* we aren't testing all the primes between 0 and 2^16, we really + * can't use this construction. Just fail. */ + rv = SECFailure; + goto cleanup; + } + dummy = prime_tab_size; + err = mpp_divis_primes(&c, &dummy); + /* Step 11 if c is prime then */ + if (err == MP_NO) { + /* Step 11.1 prime = c */ + CHECK_MPI_OK(mp_copy(&c, prime)); + /* Step 11.2 return SUCCESS prime, prime_seed, prime_gen_counter */ + err = MP_OKAY; + rv = SECSuccess; + goto cleanup; + } else if (err != MP_YES) { + goto cleanup; /* function failed, bail out */ + } else { + /* reset mp_err */ + err = MP_OKAY; + } + /* + ** Step 12 if (prime_gen_counter > (4*len)) + ** then return (FAILURE, 0, 0, 0)) + ** Step 13 goto step 5 + */ + if (*prime_gen_counter <= (4 * length)) { + goto step_5; + } + /* if (prime_gencont > 4*length), fall through to failure */ + rv = SECFailure; /* really is already set, but paranoia is good */ + +cleanup: + mp_clear(&c); + mp_clear(&c0); + mp_clear(&one); + PORT_Memset(x, 0, sizeof(x)); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv == SECFailure) { + mp_zero(prime); + if (prime_seed->data) { + SECITEM_FreeItem(prime_seed, PR_FALSE); + } + *prime_gen_counter = 0; + } + return rv; +} + +/* + * Find a Q and algorithm from Seed. + */ +static SECStatus +findQfromSeed( + unsigned int L, /* input. Length of p in bits. */ + unsigned int N, /* input. Length of q in bits. */ + unsigned int g, /* input. Length of seed in bits. */ + const SECItem *seed, /* input. */ + mp_int *Q, /* input. */ + mp_int *Q_, /* output. */ + unsigned int *qseed_len, /* output */ + HASH_HashType *hashtypePtr, /* output. Hash uses */ + pqgGenType *typePtr) /* output. Generation Type used */ +{ + HASH_HashType hashtype; + SECItem firstseed = { 0, 0, 0 }; + SECItem qseed = { 0, 0, 0 }; + SECStatus rv; + + *qseed_len = 0; /* only set if FIPS186_3_ST_TYPE */ + + /* handle legacy small DSA first can only be FIPS186_1_TYPE */ + if (L < 1024) { + rv = makeQfromSeed(g, seed, Q_); + if ((rv == SECSuccess) && (mp_cmp(Q, Q_) == 0)) { + *hashtypePtr = HASH_AlgSHA1; + *typePtr = FIPS186_1_TYPE; + return SECSuccess; + } + return SECFailure; + } + /* 1024 could use FIPS186_1 or FIPS186_3 algorithms, we need to try + * them both */ + if (L == 1024) { + rv = makeQfromSeed(g, seed, Q_); + if (rv == SECSuccess) { + if (mp_cmp(Q, Q_) == 0) { + *hashtypePtr = HASH_AlgSHA1; + *typePtr = FIPS186_1_TYPE; + return SECSuccess; + } + } + /* fall through for FIPS186_3 types */ + } + /* at this point we know we aren't using FIPS186_1, start trying FIPS186_3 + * with appropriate hash types */ + for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL; + hashtype = getNextHash(hashtype)) { + rv = makeQ2fromSeed(hashtype, N, seed, Q_); + if (rv != SECSuccess) { + continue; + } + if (mp_cmp(Q, Q_) == 0) { + *hashtypePtr = hashtype; + *typePtr = FIPS186_3_TYPE; + return SECSuccess; + } + } + /* + * OK finally try FIPS186_3 Shawe-Taylor + */ + firstseed = *seed; + firstseed.len = seed->len / 3; + for (hashtype = getFirstHash(L, N); hashtype != HASH_AlgTOTAL; + hashtype = getNextHash(hashtype)) { + unsigned int count; + + rv = makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, Q_, + &qseed, &count); + if (rv != SECSuccess) { + continue; + } + if (mp_cmp(Q, Q_) == 0) { + /* check qseed as well... */ + int offset = seed->len - qseed.len; + if ((offset < 0) || + (PORT_Memcmp(&seed->data[offset], qseed.data, qseed.len) != 0)) { + /* we found q, but the seeds don't match. This isn't an + * accident, someone has been tweeking with the seeds, just + * fail a this point. */ + SECITEM_FreeItem(&qseed, PR_FALSE); + return SECFailure; + } + *qseed_len = qseed.len; + *hashtypePtr = hashtype; + *typePtr = FIPS186_3_ST_TYPE; + SECITEM_FreeItem(&qseed, PR_FALSE); + return SECSuccess; + } + SECITEM_FreeItem(&qseed, PR_FALSE); + } + /* no hash algorithms found which match seed to Q, fail */ + return SECFailure; +} + +/* +** Perform steps 7, 8 and 9 of FIPS 186, appendix 2.2. +** which are the same as steps 11.1-11.5 of FIPS 186-2, App A.1.1.2 +** Generate P from Q, seed, L, and offset. +*/ +static SECStatus +makePfromQandSeed( + HASH_HashType hashtype, /* selected Hashing algorithm */ + unsigned int L, /* Length of P in bits. Per FIPS 186. */ + unsigned int N, /* Length of Q in bits. Per FIPS 186. */ + unsigned int offset, /* Per FIPS 186, App 2.2. & 186-3 App A.1.1.2 */ + unsigned int seedlen, /* input. Length of seed in bits. (g in 186-1)*/ + const SECItem *seed, /* input. */ + const mp_int *Q, /* input. */ + mp_int *P) /* output. */ +{ + unsigned int j; /* Per FIPS 186-3 App. A.1.1.2 (k in 186-1)*/ + unsigned int n; /* Per FIPS 186, appendix 2.2. */ + mp_digit b; /* Per FIPS 186, appendix 2.2. */ + unsigned int outlen; /* Per FIPS 186-3 App. A.1.1.2 */ + unsigned int hashlen; /* outlen in bytes */ + unsigned char V_j[HASH_LENGTH_MAX]; + mp_int W, X, c, twoQ, V_n, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + /* Initialize bignums */ + MP_DIGITS(&W) = 0; + MP_DIGITS(&X) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&twoQ) = 0; + MP_DIGITS(&V_n) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&W)); + CHECK_MPI_OK(mp_init(&X)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&twoQ)); + CHECK_MPI_OK(mp_init(&tmp)); + CHECK_MPI_OK(mp_init(&V_n)); + + hashlen = HASH_ResultLen(hashtype); + outlen = hashlen * PR_BITS_PER_BYTE; + + /* L - 1 = n*outlen + b */ + n = (L - 1) / outlen; + b = (L - 1) % outlen; + + /* ****************************************************************** + ** Step 11.1 (Step 7 in 186-1) + ** "for j = 0 ... n let + ** V_j = SHA[(SEED + offset + j) mod 2**seedlen]." + ** + ** Step 11.2 (Step 8 in 186-1) + ** "W = V_0 + (V_1 * 2**outlen) + ... + (V_n-1 * 2**((n-1)*outlen)) + ** + ((V_n mod 2**b) * 2**(n*outlen)) + */ + for (j = 0; j < n; ++j) { /* Do the first n terms of V_j */ + /* Do step 11.1 for iteration j. + ** V_j = HASH[(seed + offset + j) mod 2**g] + */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + j, seedlen, V_j)); + /* Do step 11.2 for iteration j. + ** W += V_j * 2**(j*outlen) + */ + OCTETS_TO_MPINT(V_j, &tmp, hashlen); /* get bignum V_j */ + CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, j * outlen)); /* tmp=V_j << j*outlen */ + CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */ + } + /* Step 11.2, continued. + ** [W += ((V_n mod 2**b) * 2**(n*outlen))] + */ + CHECK_SEC_OK(addToSeedThenHash(hashtype, seed, offset + n, seedlen, V_j)); + OCTETS_TO_MPINT(V_j, &V_n, hashlen); /* get bignum V_n */ + CHECK_MPI_OK(mp_div_2d(&V_n, b, NULL, &tmp)); /* tmp = V_n mod 2**b */ + CHECK_MPI_OK(mpl_lsh(&tmp, &tmp, n * outlen)); /* tmp = tmp << n*outlen */ + CHECK_MPI_OK(mp_add(&W, &tmp, &W)); /* W += tmp */ + /* Step 11.3, (Step 8 in 186-1) + ** "X = W + 2**(L-1). + ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L." + */ + CHECK_MPI_OK(mpl_set_bit(&X, (mp_size)(L - 1), 1)); /* X = 2**(L-1) */ + CHECK_MPI_OK(mp_add(&X, &W, &X)); /* X += W */ + /************************************************************* + ** Step 11.4. (Step 9 in 186-1) + ** "c = X mod 2q" + */ + CHECK_MPI_OK(mp_mul_2(Q, &twoQ)); /* 2q */ + CHECK_MPI_OK(mp_mod(&X, &twoQ, &c)); /* c = X mod 2q */ + /************************************************************* + ** Step 11.5. (Step 9 in 186-1) + ** "p = X - (c - 1). + ** Note that p is congruent to 1 mod 2q." + */ + CHECK_MPI_OK(mp_sub_d(&c, 1, &c)); /* c -= 1 */ + CHECK_MPI_OK(mp_sub(&X, &c, P)); /* P = X - c */ +cleanup: + mp_clear(&W); + mp_clear(&X); + mp_clear(&c); + mp_clear(&twoQ); + mp_clear(&V_n); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + return SECFailure; + } + return rv; +} + +/* +** Generate G from h, P, and Q. +*/ +static SECStatus +makeGfromH(const mp_int *P, /* input. */ + const mp_int *Q, /* input. */ + mp_int *H, /* input and output. */ + mp_int *G, /* output. */ + PRBool *passed) +{ + mp_int exp, pm1; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + *passed = PR_FALSE; + MP_DIGITS(&exp) = 0; + MP_DIGITS(&pm1) = 0; + CHECK_MPI_OK(mp_init(&exp)); + CHECK_MPI_OK(mp_init(&pm1)); + CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */ + if (mp_cmp(H, &pm1) >= 0) /* H >= P-1 */ + CHECK_MPI_OK(mp_sub(H, &pm1, H)); /* H = H mod (P-1) */ + /* Let b = 2**n (smallest power of 2 greater than P). + ** Since P-1 >= b/2, and H < b, quotient(H/(P-1)) = 0 or 1 + ** so the above operation safely computes H mod (P-1) + */ + /* Check for H = to 0 or 1. Regen H if so. (Regen means return error). */ + if (mp_cmp_d(H, 1) <= 0) { + rv = SECFailure; + goto cleanup; + } + /* Compute G, according to the equation G = (H ** ((P-1)/Q)) mod P */ + CHECK_MPI_OK(mp_div(&pm1, Q, &exp, NULL)); /* exp = (P-1)/Q */ + CHECK_MPI_OK(mp_exptmod(H, &exp, P, G)); /* G = H ** exp mod P */ + /* Check for G == 0 or G == 1, return error if so. */ + if (mp_cmp_d(G, 1) <= 0) { + rv = SECFailure; + goto cleanup; + } + *passed = PR_TRUE; +cleanup: + mp_clear(&exp); + mp_clear(&pm1); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** Generate G from seed, index, P, and Q. +*/ +static SECStatus +makeGfromIndex(HASH_HashType hashtype, + const mp_int *P, /* input. */ + const mp_int *Q, /* input. */ + const SECItem *seed, /* input. */ + unsigned char index, /* input. */ + mp_int *G) /* input/output */ +{ + mp_int e, pm1, W; + unsigned int count; + unsigned char data[HASH_LENGTH_MAX]; + unsigned int len; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + const SECHashObject *hashobj = NULL; + void *hashcx = NULL; + + MP_DIGITS(&e) = 0; + MP_DIGITS(&pm1) = 0; + MP_DIGITS(&W) = 0; + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&pm1)); + CHECK_MPI_OK(mp_init(&W)); + + /* initialize our hash stuff */ + hashobj = HASH_GetRawHashObject(hashtype); + if (hashobj == NULL) { + /* shouldn't happen */ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + rv = SECFailure; + goto cleanup; + } + hashcx = hashobj->create(); + if (hashcx == NULL) { + rv = SECFailure; + goto cleanup; + } + + CHECK_MPI_OK(mp_sub_d(P, 1, &pm1)); /* P - 1 */ + /* Step 3 e = (p-1)/q */ + CHECK_MPI_OK(mp_div(&pm1, Q, &e, NULL)); /* e = (P-1)/Q */ +/* Steps 4, 5, and 6 */ +/* count is a 16 bit value in the spec. We actually represent count + * as more than 16 bits so we can easily detect the 16 bit overflow */ +#define MAX_COUNT 0x10000 + for (count = 1; count < MAX_COUNT; count++) { + /* step 7 + * U = domain_param_seed || "ggen" || index || count + * step 8 + * W = HASH(U) + */ + hashobj->begin(hashcx); + hashobj->update(hashcx, seed->data, seed->len); + hashobj->update(hashcx, (unsigned char *)"ggen", 4); + hashobj->update(hashcx, &index, 1); + data[0] = (count >> 8) & 0xff; + data[1] = count & 0xff; + hashobj->update(hashcx, data, 2); + hashobj->end(hashcx, data, &len, sizeof(data)); + OCTETS_TO_MPINT(data, &W, len); + /* step 9. g = W**e mod p */ + CHECK_MPI_OK(mp_exptmod(&W, &e, P, G)); + /* step 10. if (g < 2) then goto step 5 */ + /* NOTE: this weird construct is to keep the flow according to the spec. + * the continue puts us back to step 5 of the for loop */ + if (mp_cmp_d(G, 2) < 0) { + continue; + } + break; /* step 11 follows step 10 if the test condition is false */ + } + if (count >= MAX_COUNT) { + rv = SECFailure; /* last part of step 6 */ + } +/* step 11. + * return valid G */ +cleanup: + PORT_Memset(data, 0, sizeof(data)); + if (hashcx) { + hashobj->destroy(hashcx, PR_TRUE); + } + mp_clear(&e); + mp_clear(&pm1); + mp_clear(&W); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* This code uses labels and gotos, so that it can follow the numbered +** steps in the algorithms from FIPS 186-3 appendix A.1.1.2 very closely, +** and so that the correctness of this code can be easily verified. +** So, please forgive the ugly c code. +**/ +static SECStatus +pqg_ParamGen(unsigned int L, unsigned int N, pqgGenType type, + unsigned int seedBytes, PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int n; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int seedlen; /* Per FIPS 186-3 app A.1.1.2 (was 'g' 186-1)*/ + unsigned int counter; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int offset; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + unsigned int outlen; /* Per FIPS 186-3, appendix A.1.1.2. */ + unsigned int maxCount; + HASH_HashType hashtype; + SECItem *seed; /* Per FIPS 186, app 2.2. 186-3 app A.1.1.2 */ + PLArenaPool *arena = NULL; + PQGParams *params = NULL; + PQGVerify *verify = NULL; + PRBool passed; + SECItem hit = { 0, 0, 0 }; + SECItem firstseed = { 0, 0, 0 }; + SECItem qseed = { 0, 0, 0 }; + SECItem pseed = { 0, 0, 0 }; + mp_int P, Q, G, H, l, p0; + mp_err err = MP_OKAY; + SECStatus rv = SECFailure; + int iterations = 0; + + /* Step 1. L and N already checked by caller*/ + /* Step 2. if (seedlen < N) return INVALID; */ + if (seedBytes < N / PR_BITS_PER_BYTE || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + /* Initialize bignums */ + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&G) = 0; + MP_DIGITS(&H) = 0; + MP_DIGITS(&l) = 0; + MP_DIGITS(&p0) = 0; + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&G)); + CHECK_MPI_OK(mp_init(&H)); + CHECK_MPI_OK(mp_init(&l)); + CHECK_MPI_OK(mp_init(&p0)); + + /* parameters have been passed in, only generate G */ + if (*pParams != NULL) { + /* we only support G index generation if generating separate from PQ */ + if ((*pVfy == NULL) || (type == FIPS186_1_TYPE) || + ((*pVfy)->h.len != 1) || ((*pVfy)->h.data == NULL) || + ((*pVfy)->seed.data == NULL) || ((*pVfy)->seed.len == 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + params = *pParams; + verify = *pVfy; + + /* fill in P Q, */ + SECITEM_TO_MPINT((*pParams)->prime, &P); + SECITEM_TO_MPINT((*pParams)->subPrime, &Q); + hashtype = getFirstHash(L, N); + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &(*pVfy)->seed, + (*pVfy)->h.data[0], &G)); + MPINT_TO_SECITEM(&G, &(*pParams)->base, (*pParams)->arena); + goto cleanup; + } + /* Initialize an arena for the params. */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + params = (PQGParams *)PORT_ArenaZAlloc(arena, sizeof(PQGParams)); + if (!params) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return SECFailure; + } + params->arena = arena; + /* Initialize an arena for the verify. */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(params->arena, PR_TRUE); + return SECFailure; + } + verify = (PQGVerify *)PORT_ArenaZAlloc(arena, sizeof(PQGVerify)); + if (!verify) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + PORT_FreeArena(params->arena, PR_TRUE); + return SECFailure; + } + verify->arena = arena; + seed = &verify->seed; + arena = NULL; + + /* Select Hash and Compute lengths. */ + /* getFirstHash gives us the smallest acceptable hash for this key + * strength */ + hashtype = getFirstHash(L, N); + outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE; + + /* Step 3: n = Ceil(L/outlen)-1; (same as n = Floor((L-1)/outlen)) */ + n = (L - 1) / outlen; + /* Step 4: (skipped since we don't use b): b = L -1 - (n*outlen); */ + seedlen = seedBytes * PR_BITS_PER_BYTE; /* bits in seed */ +step_5: + /* ****************************************************************** + ** Step 5. (Step 1 in 186-1) + ** "Choose an abitrary sequence of at least N bits and call it SEED. + ** Let g be the length of SEED in bits." + */ + if (++iterations > MAX_ITERATIONS) { /* give up after a while */ + PORT_SetError(SEC_ERROR_NEED_RANDOM); + goto cleanup; + } + seed->len = seedBytes; + CHECK_SEC_OK(getPQseed(seed, verify->arena)); + /* ****************************************************************** + ** Step 6. (Step 2 in 186-1) + ** + ** "Compute U = SHA[SEED] XOR SHA[(SEED+1) mod 2**g]. (186-1)" + ** "Compute U = HASH[SEED] 2**(N-1). (186-3)" + ** + ** Step 7. (Step 3 in 186-1) + ** "Form Q from U by setting the most signficant bit (the 2**159 bit) + ** and the least signficant bit to 1. In terms of boolean operations, + ** Q = U OR 2**159 OR 1. Note that 2**159 < Q < 2**160. (186-1)" + ** + ** "q = 2**(N-1) + U + 1 - (U mod 2) (186-3) + ** + ** Note: Both formulations are the same for U < 2**(N-1) and N=160 + ** + ** If using Shawe-Taylor, We do the entire A.1.2.1.2 setps in the block + ** FIPS186_3_ST_TYPE. + */ + if (type == FIPS186_1_TYPE) { + CHECK_SEC_OK(makeQfromSeed(seedlen, seed, &Q)); + } else if (type == FIPS186_3_TYPE) { + CHECK_SEC_OK(makeQ2fromSeed(hashtype, N, seed, &Q)); + } else { + /* FIPS186_3_ST_TYPE */ + unsigned int qgen_counter, pgen_counter; + + /* Step 1 (L,N) already checked for acceptability */ + + firstseed = *seed; + qgen_counter = 0; + /* Step 2. Use N and firstseed to generate random prime q + * using Apendix C.6 */ + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, N, &firstseed, &Q, + &qseed, &qgen_counter)); + /* Step 3. Use floor(L/2+1) and qseed to generate random prime p0 + * using Appendix C.6 */ + pgen_counter = 0; + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1, + &qseed, &p0, &pseed, &pgen_counter)); + /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */ + CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, + &p0, &Q, &P, &pseed, &pgen_counter)); + + /* combine all the seeds */ + seed->len = firstseed.len + qseed.len + pseed.len; + seed->data = PORT_ArenaZAlloc(verify->arena, seed->len); + if (seed->data == NULL) { + goto cleanup; + } + PORT_Memcpy(seed->data, firstseed.data, firstseed.len); + PORT_Memcpy(seed->data + firstseed.len, pseed.data, pseed.len); + PORT_Memcpy(seed->data + firstseed.len + pseed.len, qseed.data, qseed.len); + counter = 0; /* (qgen_counter << 16) | pgen_counter; */ + + /* we've generated both P and Q now, skip to generating G */ + goto generate_G; + } + /* ****************************************************************** + ** Step 8. (Step 4 in 186-1) + ** "Use a robust primality testing algorithm to test whether q is prime." + ** + ** Appendix 2.1 states that a Rabin test with at least 50 iterations + ** "will give an acceptable probability of error." + */ + /*CHECK_SEC_OK( prm_RabinTest(&Q, &passed) );*/ + err = mpp_pprime(&Q, prime_testcount_q(L, N)); + passed = (err == MP_YES) ? SECSuccess : SECFailure; + /* ****************************************************************** + ** Step 9. (Step 5 in 186-1) "If q is not prime, goto step 5 (1 in 186-1)." + */ + if (passed != SECSuccess) + goto step_5; + /* ****************************************************************** + ** Step 10. + ** offset = 1; + **( Step 6b 186-1)"Let counter = 0 and offset = 2." + */ + offset = (type == FIPS186_1_TYPE) ? 2 : 1; + /* + ** Step 11. (Step 6a,13a,14 in 186-1) + ** For counter - 0 to (4L-1) do + ** + */ + maxCount = L >= 1024 ? (4 * L - 1) : 4095; + for (counter = 0; counter <= maxCount; counter++) { + /* ****************************************************************** + ** Step 11.1 (Step 7 in 186-1) + ** "for j = 0 ... n let + ** V_j = HASH[(SEED + offset + j) mod 2**seedlen]." + ** + ** Step 11.2 (Step 8 in 186-1) + ** "W = V_0 + V_1*2**outlen+...+ V_n-1 * 2**((n-1)*outlen) + + ** ((Vn* mod 2**b)*2**(n*outlen))" + ** Step 11.3 (Step 8 in 186-1) + ** "X = W + 2**(L-1) + ** Note that 0 <= W < 2**(L-1) and hence 2**(L-1) <= X < 2**L." + ** + ** Step 11.4 (Step 9 in 186-1). + ** "c = X mod 2q" + ** + ** Step 11.5 (Step 9 in 186-1). + ** " p = X - (c - 1). + ** Note that p is congruent to 1 mod 2q." + */ + CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, seedlen, + seed, &Q, &P)); + /************************************************************* + ** Step 11.6. (Step 10 in 186-1) + ** "if p < 2**(L-1), then goto step 11.9. (step 13 in 186-1)" + */ + CHECK_MPI_OK(mpl_set_bit(&l, (mp_size)(L - 1), 1)); /* l = 2**(L-1) */ + if (mp_cmp(&P, &l) < 0) + goto step_11_9; + /************************************************************ + ** Step 11.7 (step 11 in 186-1) + ** "Perform a robust primality test on p." + */ + /*CHECK_SEC_OK( prm_RabinTest(&P, &passed) );*/ + err = mpp_pprime(&P, prime_testcount_p(L, N)); + passed = (err == MP_YES) ? SECSuccess : SECFailure; + /* ****************************************************************** + ** Step 11.8. "If p is determined to be primed return VALID + ** values of p, q, seed and counter." + */ + if (passed == SECSuccess) + break; + step_11_9: + /* ****************************************************************** + ** Step 11.9. "offset = offset + n + 1." + */ + offset += n + 1; + } + /* ****************************************************************** + ** Step 12. "goto step 5." + ** + ** NOTE: if counter <= maxCount, then we exited the loop at Step 11.8 + ** and now need to return p,q, seed, and counter. + */ + if (counter > maxCount) + goto step_5; + +generate_G: + /* ****************************************************************** + ** returning p, q, seed and counter + */ + if (type == FIPS186_1_TYPE) { + /* Generate g, This is called the "Unverifiable Generation of g + * in FIPA186-3 Appedix A.2.1. For compatibility we maintain + * this version of the code */ + SECITEM_AllocItem(NULL, &hit, L / 8); /* h is no longer than p */ + if (!hit.data) + goto cleanup; + do { + /* loop generate h until 1 1 */ + CHECK_SEC_OK(generate_h_candidate(&hit, &H)); + CHECK_SEC_OK(makeGfromH(&P, &Q, &H, &G, &passed)); + } while (passed != PR_TRUE); + MPINT_TO_SECITEM(&H, &verify->h, verify->arena); + } else { + unsigned char index = 1; /* default to 1 */ + verify->h.data = (unsigned char *)PORT_ArenaZAlloc(verify->arena, 1); + if (verify->h.data == NULL) { + goto cleanup; + } + verify->h.len = 1; + verify->h.data[0] = index; + /* Generate g, using the FIPS 186-3 Appendix A.23 */ + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, seed, index, &G)); + } + /* All generation is done. Now, save the PQG params. */ + MPINT_TO_SECITEM(&P, ¶ms->prime, params->arena); + MPINT_TO_SECITEM(&Q, ¶ms->subPrime, params->arena); + MPINT_TO_SECITEM(&G, ¶ms->base, params->arena); + verify->counter = counter; + *pParams = params; + *pVfy = verify; +cleanup: + if (pseed.data) { + PORT_Free(pseed.data); + } + if (qseed.data) { + PORT_Free(qseed.data); + } + mp_clear(&P); + mp_clear(&Q); + mp_clear(&G); + mp_clear(&H); + mp_clear(&l); + mp_clear(&p0); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv) { + if (params) { + PORT_FreeArena(params->arena, PR_TRUE); + } + if (verify) { + PORT_FreeArena(verify->arena, PR_TRUE); + } + } + if (hit.data) { + SECITEM_FreeItem(&hit, PR_FALSE); + } + return rv; +} + +SECStatus +PQG_ParamGen(unsigned int j, PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int L; /* Length of P in bits. Per FIPS 186. */ + unsigned int seedBytes; + + if (j > 8 || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + L = 512 + (j * 64); /* bits in P */ + seedBytes = L / 8; + return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes, + pParams, pVfy); +} + +SECStatus +PQG_ParamGenSeedLen(unsigned int j, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + unsigned int L; /* Length of P in bits. Per FIPS 186. */ + + if (j > 8 || !pParams || !pVfy) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + L = 512 + (j * 64); /* bits in P */ + return pqg_ParamGen(L, DSA1_Q_BITS, FIPS186_1_TYPE, seedBytes, + pParams, pVfy); +} + +SECStatus +PQG_ParamGenV2(unsigned int L, unsigned int N, unsigned int seedBytes, + PQGParams **pParams, PQGVerify **pVfy) +{ + if (N == 0) { + N = pqg_get_default_N(L); + } + if (seedBytes == 0) { + /* seedBytes == L/8 for probable primes, N/8 for Shawe-Taylor Primes */ + seedBytes = N / 8; + } + if (pqg_validate_dsa2(L, N) != SECSuccess) { + /* error code already set */ + return SECFailure; + } + return pqg_ParamGen(L, N, FIPS186_3_ST_TYPE, seedBytes, pParams, pVfy); +} + +/* + * verify can use vfy structures returned from either FIPS186-1 or + * FIPS186-2, and can handle differences in selected Hash functions to + * generate the parameters. + */ +SECStatus +PQG_VerifyParams(const PQGParams *params, + const PQGVerify *vfy, SECStatus *result) +{ + SECStatus rv = SECSuccess; + unsigned int g, n, L, N, offset, outlen; + mp_int p0, P, Q, G, P_, Q_, G_, r, h; + mp_err err = MP_OKAY; + int j; + unsigned int counter_max = 0; /* handle legacy L < 1024 */ + unsigned int qseed_len; + SECItem pseed_ = { 0, 0, 0 }; + HASH_HashType hashtype; + pqgGenType type; + +#define CHECKPARAM(cond) \ + if (!(cond)) { \ + *result = SECFailure; \ + goto cleanup; \ + } + if (!params || !vfy || !result) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* always need at least p, q, and seed for any meaningful check */ + if ((params->prime.len == 0) || (params->subPrime.len == 0) || + (vfy->seed.len == 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* we want to either check PQ or G or both. If we don't have G, make + * sure we have count so we can check P. */ + if ((params->base.len == 0) && (vfy->counter == -1)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + MP_DIGITS(&p0) = 0; + MP_DIGITS(&P) = 0; + MP_DIGITS(&Q) = 0; + MP_DIGITS(&G) = 0; + MP_DIGITS(&P_) = 0; + MP_DIGITS(&Q_) = 0; + MP_DIGITS(&G_) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&h) = 0; + CHECK_MPI_OK(mp_init(&p0)); + CHECK_MPI_OK(mp_init(&P)); + CHECK_MPI_OK(mp_init(&Q)); + CHECK_MPI_OK(mp_init(&G)); + CHECK_MPI_OK(mp_init(&P_)); + CHECK_MPI_OK(mp_init(&Q_)); + CHECK_MPI_OK(mp_init(&G_)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&h)); + *result = SECSuccess; + SECITEM_TO_MPINT(params->prime, &P); + SECITEM_TO_MPINT(params->subPrime, &Q); + /* if G isn't specified, just check P and Q */ + if (params->base.len != 0) { + SECITEM_TO_MPINT(params->base, &G); + } + /* 1. Check (L,N) pair */ + N = mpl_significant_bits(&Q); + L = mpl_significant_bits(&P); + if (L < 1024) { + /* handle DSA1 pqg parameters with less thatn 1024 bits*/ + CHECKPARAM(N == DSA1_Q_BITS); + j = PQG_PBITS_TO_INDEX(L); + CHECKPARAM(j >= 0 && j <= 8); + counter_max = 4096; + } else { + /* handle DSA2 parameters (includes DSA1, 1024 bits) */ + CHECKPARAM(pqg_validate_dsa2(L, N) == SECSuccess); + counter_max = 4 * L; + } + /* 3. G < P */ + if (params->base.len != 0) { + CHECKPARAM(mp_cmp(&G, &P) < 0); + } + /* 4. P % Q == 1 */ + CHECK_MPI_OK(mp_mod(&P, &Q, &r)); + CHECKPARAM(mp_cmp_d(&r, 1) == 0); + /* 5. Q is prime */ + CHECKPARAM(mpp_pprime(&Q, prime_testcount_q(L, N)) == MP_YES); + /* 6. P is prime */ + CHECKPARAM(mpp_pprime(&P, prime_testcount_p(L, N)) == MP_YES); + /* Steps 7-12 are done only if the optional PQGVerify is supplied. */ + /* continue processing P */ + /* 7. counter < 4*L */ + CHECKPARAM((vfy->counter == -1) || (vfy->counter < counter_max)); + /* 8. g >= N and g < 2*L (g is length of seed in bits) */ + g = vfy->seed.len * 8; + CHECKPARAM(g >= N && g < counter_max / 2); + /* 9. Q generated from SEED matches Q in PQGParams. */ + /* This function checks all possible hash and generation types to + * find a Q_ which matches Q. */ + CHECKPARAM(findQfromSeed(L, N, g, &vfy->seed, &Q, &Q_, &qseed_len, + &hashtype, &type) == SECSuccess); + CHECKPARAM(mp_cmp(&Q, &Q_) == 0); + if (type == FIPS186_3_ST_TYPE) { + SECItem qseed = { 0, 0, 0 }; + SECItem pseed = { 0, 0, 0 }; + unsigned int first_seed_len; + unsigned int pgen_counter = 0; + + /* extract pseed and qseed from domain_parameter_seed, which is + * first_seed || pseed || qseed. qseed is first_seed + small_integer + * pseed is qseed + small_integer. This means most of the time + * first_seed.len == qseed.len == pseed.len. Rarely qseed.len and/or + * pseed.len will be one greater than first_seed.len, so we can + * depend on the fact that + * first_seed.len = floor(domain_parameter_seed.len/3). + * findQfromSeed returned qseed.len, so we can calculate pseed.len as + * pseed.len = domain_parameter_seed.len - first_seed.len - qseed.len + * this is probably over kill, since 99.999% of the time they will all + * be equal. + * + * With the lengths, we can now find the offsets; + * first_seed.data = domain_parameter_seed.data + 0 + * pseed.data = domain_parameter_seed.data + first_seed.len + * qseed.data = domain_parameter_seed.data + * + domain_paramter_seed.len - qseed.len + * + */ + first_seed_len = vfy->seed.len / 3; + CHECKPARAM(qseed_len < vfy->seed.len); + CHECKPARAM(first_seed_len * 8 > N - 1); + CHECKPARAM(first_seed_len + qseed_len < vfy->seed.len); + qseed.len = qseed_len; + qseed.data = vfy->seed.data + vfy->seed.len - qseed.len; + pseed.len = vfy->seed.len - (first_seed_len + qseed_len); + pseed.data = vfy->seed.data + first_seed_len; + + /* + * now complete FIPS 186-3 A.1.2.1.2. Step 1 was completed + * above in our initial checks, Step 2 was completed by + * findQfromSeed */ + + /* Step 3 (status, c0, prime_seed, prime_gen_counter) = + ** (ST_Random_Prime((ceil(length/2)+1, input_seed) + */ + CHECK_SEC_OK(makePrimefromSeedShaweTaylor(hashtype, (L + 1) / 2 + 1, + &qseed, &p0, &pseed_, &pgen_counter)); + /* Steps 4-22 FIPS 186-3 appendix A.1.2.1.2 */ + CHECK_SEC_OK(makePrimefromPrimesShaweTaylor(hashtype, L, + &p0, &Q_, &P_, &pseed_, &pgen_counter)); + CHECKPARAM(mp_cmp(&P, &P_) == 0); + /* make sure pseed wasn't tampered with (since it is part of + * calculating G) */ + CHECKPARAM(SECITEM_CompareItem(&pseed, &pseed_) == SECEqual); + } else if (vfy->counter == -1) { + /* If counter is set to -1, we are really only verifying G, skip + * the remainder of the checks for P */ + CHECKPARAM(type != FIPS186_1_TYPE); /* we only do this for DSA2 */ + } else { + /* 10. P generated from (L, counter, g, SEED, Q) matches P + * in PQGParams. */ + outlen = HASH_ResultLen(hashtype) * PR_BITS_PER_BYTE; + n = (L - 1) / outlen; + offset = vfy->counter * (n + 1) + ((type == FIPS186_1_TYPE) ? 2 : 1); + CHECK_SEC_OK(makePfromQandSeed(hashtype, L, N, offset, g, &vfy->seed, + &Q, &P_)); + CHECKPARAM(mp_cmp(&P, &P_) == 0); + } + + /* now check G, skip if don't have a g */ + if (params->base.len == 0) + goto cleanup; + + /* first Always check that G is OK FIPS186-3 A.2.2 & A.2.4*/ + /* 1. 2 < G < P-1 */ + /* P is prime, p-1 == zero 1st bit */ + CHECK_MPI_OK(mpl_set_bit(&P, 0, 0)); + CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P) < 0); + CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */ + /* 2. verify g**q mod p == 1 */ + CHECK_MPI_OK(mp_exptmod(&G, &Q, &P, &h)); /* h = G ** Q mod P */ + CHECKPARAM(mp_cmp_d(&h, 1) == 0); + + /* no h, the above is the best we can do */ + if (vfy->h.len == 0) { + if (type != FIPS186_1_TYPE) { + *result = SECWouldBlock; + } + goto cleanup; + } + + /* + * If h is one byte and FIPS186-3 was used to generate Q (we've verified + * Q was generated from seed already, then we assume that FIPS 186-3 + * appendix A.2.3 was used to generate G. Otherwise we assume A.2.1 was + * used to generate G. + */ + if ((vfy->h.len == 1) && (type != FIPS186_1_TYPE)) { + /* A.2.3 */ + CHECK_SEC_OK(makeGfromIndex(hashtype, &P, &Q, &vfy->seed, + vfy->h.data[0], &G_)); + CHECKPARAM(mp_cmp(&G, &G_) == 0); + } else { + int passed; + /* A.2.1 */ + SECITEM_TO_MPINT(vfy->h, &h); + /* 11. 1 < h < P-1 */ + /* P is prime, p-1 == zero 1st bit */ + CHECK_MPI_OK(mpl_set_bit(&P, 0, 0)); + CHECKPARAM(mp_cmp_d(&G, 2) > 0 && mp_cmp(&G, &P)); + CHECK_MPI_OK(mpl_set_bit(&P, 0, 1)); /* set it back */ + /* 12. G generated from h matches G in PQGParams. */ + CHECK_SEC_OK(makeGfromH(&P, &Q, &h, &G_, &passed)); + CHECKPARAM(passed && mp_cmp(&G, &G_) == 0); + } +cleanup: + mp_clear(&p0); + mp_clear(&P); + mp_clear(&Q); + mp_clear(&G); + mp_clear(&P_); + mp_clear(&Q_); + mp_clear(&G_); + mp_clear(&r); + mp_clear(&h); + if (pseed_.data) { + SECITEM_FreeItem(&pseed_, PR_FALSE); + } + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/************************************************************************** + * Free the PQGParams struct and the things it points to. * + **************************************************************************/ +void +PQG_DestroyParams(PQGParams *params) +{ + if (params == NULL) + return; + if (params->arena != NULL) { + PORT_FreeArena(params->arena, PR_FALSE); /* don't zero it */ + } else { + SECITEM_FreeItem(¶ms->prime, PR_FALSE); /* don't free prime */ + SECITEM_FreeItem(¶ms->subPrime, PR_FALSE); /* don't free subPrime */ + SECITEM_FreeItem(¶ms->base, PR_FALSE); /* don't free base */ + PORT_Free(params); + } +} + +/************************************************************************** + * Free the PQGVerify struct and the things it points to. * + **************************************************************************/ + +void +PQG_DestroyVerify(PQGVerify *vfy) +{ + if (vfy == NULL) + return; + if (vfy->arena != NULL) { + PORT_FreeArena(vfy->arena, PR_FALSE); /* don't zero it */ + } else { + SECITEM_FreeItem(&vfy->seed, PR_FALSE); /* don't free seed */ + SECITEM_FreeItem(&vfy->h, PR_FALSE); /* don't free h */ + PORT_Free(vfy); + } +} diff --git a/security/nss/lib/freebl/pqg.h b/security/nss/lib/freebl/pqg.h new file mode 100644 index 000000000..c4eecd590 --- /dev/null +++ b/security/nss/lib/freebl/pqg.h @@ -0,0 +1,25 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * pqg.h + * + * header file for pqg functions exported just to freebl + */ + +#ifndef _PQG_H_ +#define _PQG_H_ 1 + +/* PQG_GetLength returns the significant bytes in the SECItem object (that is + * the length of the object minus any leading zeros. Any SECItem may be used, + * though this function is usually used for P, Q, or G values */ +unsigned int PQG_GetLength(const SECItem *obj); +/* Check to see the PQG parameters patch a NIST defined DSA size, + * returns SECFaillure and sets SEC_ERROR_INVALID_ARGS if it doesn't. + * See blapi.h for legal DSA PQG sizes. */ +SECStatus PQG_Check(const PQGParams *params); +/* Return the prefered hash algorithm for the given PQGParameters. */ +HASH_HashType PQG_GetHashType(const PQGParams *params); + +#endif /* _PQG_H_ */ diff --git a/security/nss/lib/freebl/rawhash.c b/security/nss/lib/freebl/rawhash.c new file mode 100644 index 000000000..551727b89 --- /dev/null +++ b/security/nss/lib/freebl/rawhash.c @@ -0,0 +1,154 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "nspr.h" +#include "hasht.h" +#include "blapi.h" /* below the line */ +#include "secerr.h" + +static void * +null_hash_new_context(void) +{ + return NULL; +} + +static void * +null_hash_clone_context(void *v) +{ + PORT_Assert(v == NULL); + return NULL; +} + +static void +null_hash_begin(void *v) +{ +} + +static void +null_hash_update(void *v, const unsigned char *input, unsigned int length) +{ +} + +static void +null_hash_end(void *v, unsigned char *output, unsigned int *outLen, + unsigned int maxOut) +{ + *outLen = 0; +} + +static void +null_hash_destroy_context(void *v, PRBool b) +{ + PORT_Assert(v == NULL); +} + +const SECHashObject SECRawHashObjects[] = { + { 0, + (void *(*)(void))null_hash_new_context, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))null_hash_destroy_context, + (void (*)(void *))null_hash_begin, + (void (*)(void *, const unsigned char *, unsigned int))null_hash_update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))null_hash_end, + 0, + HASH_AlgNULL, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))null_hash_end }, + { + MD2_LENGTH, + (void *(*)(void))MD2_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))MD2_DestroyContext, + (void (*)(void *))MD2_Begin, + (void (*)(void *, const unsigned char *, unsigned int))MD2_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD2_End, + MD2_BLOCK_LENGTH, + HASH_AlgMD2, + NULL /* end_raw */ + }, + { MD5_LENGTH, + (void *(*)(void))MD5_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))MD5_DestroyContext, + (void (*)(void *))MD5_Begin, + (void (*)(void *, const unsigned char *, unsigned int))MD5_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_End, + MD5_BLOCK_LENGTH, + HASH_AlgMD5, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))MD5_EndRaw }, + { SHA1_LENGTH, + (void *(*)(void))SHA1_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA1_DestroyContext, + (void (*)(void *))SHA1_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA1_Update, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int))SHA1_End, + SHA1_BLOCK_LENGTH, + HASH_AlgSHA1, + (void (*)(void *, unsigned char *, unsigned int *, unsigned int)) + SHA1_EndRaw }, + { SHA256_LENGTH, + (void *(*)(void))SHA256_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA256_DestroyContext, + (void (*)(void *))SHA256_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA256_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA256_End, + SHA256_BLOCK_LENGTH, + HASH_AlgSHA256, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA256_EndRaw }, + { SHA384_LENGTH, + (void *(*)(void))SHA384_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA384_DestroyContext, + (void (*)(void *))SHA384_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA384_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA384_End, + SHA384_BLOCK_LENGTH, + HASH_AlgSHA384, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA384_EndRaw }, + { SHA512_LENGTH, + (void *(*)(void))SHA512_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA512_DestroyContext, + (void (*)(void *))SHA512_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA512_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA512_End, + SHA512_BLOCK_LENGTH, + HASH_AlgSHA512, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA512_EndRaw }, + { SHA224_LENGTH, + (void *(*)(void))SHA224_NewContext, + (void *(*)(void *))null_hash_clone_context, + (void (*)(void *, PRBool))SHA224_DestroyContext, + (void (*)(void *))SHA224_Begin, + (void (*)(void *, const unsigned char *, unsigned int))SHA224_Update, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA224_End, + SHA224_BLOCK_LENGTH, + HASH_AlgSHA224, + (void (*)(void *, unsigned char *, unsigned int *, + unsigned int))SHA224_EndRaw }, +}; + +const SECHashObject * +HASH_GetRawHashObject(HASH_HashType hashType) +{ + if (hashType <= HASH_AlgNULL || hashType >= HASH_AlgTOTAL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + return &SECRawHashObjects[hashType]; +} diff --git a/security/nss/lib/freebl/ret_cr16.s b/security/nss/lib/freebl/ret_cr16.s new file mode 100644 index 000000000..1f53fc900 --- /dev/null +++ b/security/nss/lib/freebl/ret_cr16.s @@ -0,0 +1,27 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef __LP64__ + .LEVEL 2.0W +#else + .LEVEL 1.1 +#endif + + .CODE ; equivalent to the following two lines +; .SPACE $TEXT$,SORT=8 +; .SUBSPA $CODE$,QUAD=0,ALIGN=4,ACCESS=0x2c,CODE_ONLY,SORT=24 + +ret_cr16 + .PROC + .CALLINFO FRAME=0, NO_CALLS + .EXPORT ret_cr16,ENTRY + .ENTRY +; BV %r0(%rp) + BV 0(%rp) + MFCTL %cr16,%ret0 + BV %r0(%rp) + .EXIT + NOP + .PROCEND + .END diff --git a/security/nss/lib/freebl/rijndael.c b/security/nss/lib/freebl/rijndael.c new file mode 100644 index 000000000..4bb182693 --- /dev/null +++ b/security/nss/lib/freebl/rijndael.c @@ -0,0 +1,1375 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prinit.h" +#include "prenv.h" +#include "prerr.h" +#include "secerr.h" + +#include "prtypes.h" +#include "blapi.h" +#include "rijndael.h" + +#include "cts.h" +#include "ctr.h" +#include "gcm.h" + +#ifdef USE_HW_AES +#include "intel-aes.h" +#endif + +#include "mpi.h" + +#ifdef USE_HW_AES +static int has_intel_aes = 0; +static PRBool use_hw_aes = PR_FALSE; + +#ifdef INTEL_GCM +#include "intel-gcm.h" +static int has_intel_avx = 0; +static int has_intel_clmul = 0; +static PRBool use_hw_gcm = PR_FALSE; +#if defined(_MSC_VER) && !defined(_M_IX86) +#include /* for _xgetbv() */ +#endif +#endif +#endif /* USE_HW_AES */ + +/* + * There are currently five ways to build this code, varying in performance + * and code size. + * + * RIJNDAEL_INCLUDE_TABLES Include all tables from rijndael32.tab + * RIJNDAEL_GENERATE_TABLES Generate tables on first + * encryption/decryption, then store them; + * use the function gfm + * RIJNDAEL_GENERATE_TABLES_MACRO Same as above, but use macros to do + * the generation + * RIJNDAEL_GENERATE_VALUES Do not store tables, generate the table + * values "on-the-fly", using gfm + * RIJNDAEL_GENERATE_VALUES_MACRO Same as above, but use macros + * + * The default is RIJNDAEL_INCLUDE_TABLES. + */ + +/* + * When building RIJNDAEL_INCLUDE_TABLES, includes S**-1, Rcon, T[0..4], + * T**-1[0..4], IMXC[0..4] + * When building anything else, includes S, S**-1, Rcon + */ +#include "rijndael32.tab" + +#if defined(RIJNDAEL_INCLUDE_TABLES) +/* + * RIJNDAEL_INCLUDE_TABLES + */ +#define T0(i) _T0[i] +#define T1(i) _T1[i] +#define T2(i) _T2[i] +#define T3(i) _T3[i] +#define TInv0(i) _TInv0[i] +#define TInv1(i) _TInv1[i] +#define TInv2(i) _TInv2[i] +#define TInv3(i) _TInv3[i] +#define IMXC0(b) _IMXC0[b] +#define IMXC1(b) _IMXC1[b] +#define IMXC2(b) _IMXC2[b] +#define IMXC3(b) _IMXC3[b] +/* The S-box can be recovered from the T-tables */ +#ifdef IS_LITTLE_ENDIAN +#define SBOX(b) ((PRUint8)_T3[b]) +#else +#define SBOX(b) ((PRUint8)_T1[b]) +#endif +#define SINV(b) (_SInv[b]) + +#else /* not RIJNDAEL_INCLUDE_TABLES */ + +/* + * Code for generating T-table values. + */ + +#ifdef IS_LITTLE_ENDIAN +#define WORD4(b0, b1, b2, b3) \ + ((((PRUint32)b3) << 24) | \ + (((PRUint32)b2) << 16) | \ + (((PRUint32)b1) << 8) | \ + ((PRUint32)b0)) +#else +#define WORD4(b0, b1, b2, b3) \ + ((((PRUint32)b0) << 24) | \ + (((PRUint32)b1) << 16) | \ + (((PRUint32)b2) << 8) | \ + ((PRUint32)b3)) +#endif + +/* + * Define the S and S**-1 tables (both have been stored) + */ +#define SBOX(b) (_S[b]) +#define SINV(b) (_SInv[b]) + +/* + * The function xtime, used for Galois field multiplication + */ +#define XTIME(a) \ + ((a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1)) + +/* Choose GFM method (macros or function) */ +#if defined(RIJNDAEL_GENERATE_TABLES_MACRO) || \ + defined(RIJNDAEL_GENERATE_VALUES_MACRO) + +/* + * Galois field GF(2**8) multipliers, in macro form + */ +#define GFM01(a) \ + (a) /* a * 01 = a, the identity */ +#define GFM02(a) \ + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ +#define GFM04(a) \ + (GFM02(GFM02(a))) /* a * 04 = xtime**2(a) */ +#define GFM08(a) \ + (GFM02(GFM04(a))) /* a * 08 = xtime**3(a) */ +#define GFM03(a) \ + (GFM01(a) ^ GFM02(a)) /* a * 03 = a * (01 + 02) */ +#define GFM09(a) \ + (GFM01(a) ^ GFM08(a)) /* a * 09 = a * (01 + 08) */ +#define GFM0B(a) \ + (GFM01(a) ^ GFM02(a) ^ GFM08(a)) /* a * 0B = a * (01 + 02 + 08) */ +#define GFM0D(a) \ + (GFM01(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0D = a * (01 + 04 + 08) */ +#define GFM0E(a) \ + (GFM02(a) ^ GFM04(a) ^ GFM08(a)) /* a * 0E = a * (02 + 04 + 08) */ + +#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_VALUES */ + +/* GF_MULTIPLY + * + * multiply two bytes represented in GF(2**8), mod (x**4 + 1) + */ +PRUint8 +gfm(PRUint8 a, PRUint8 b) +{ + PRUint8 res = 0; + while (b > 0) { + res = (b & 0x01) ? res ^ a : res; + a = XTIME(a); + b >>= 1; + } + return res; +} + +#define GFM01(a) \ + (a) /* a * 01 = a, the identity */ +#define GFM02(a) \ + (XTIME(a) & 0xff) /* a * 02 = xtime(a) */ +#define GFM03(a) \ + (gfm(a, 0x03)) /* a * 03 */ +#define GFM09(a) \ + (gfm(a, 0x09)) /* a * 09 */ +#define GFM0B(a) \ + (gfm(a, 0x0B)) /* a * 0B */ +#define GFM0D(a) \ + (gfm(a, 0x0D)) /* a * 0D */ +#define GFM0E(a) \ + (gfm(a, 0x0E)) /* a * 0E */ + +#endif /* choosing GFM function */ + +/* + * The T-tables + */ +#define G_T0(i) \ + (WORD4(GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)))) +#define G_T1(i) \ + (WORD4(GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)), GFM01(SBOX(i)))) +#define G_T2(i) \ + (WORD4(GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)), GFM01(SBOX(i)))) +#define G_T3(i) \ + (WORD4(GFM01(SBOX(i)), GFM01(SBOX(i)), GFM03(SBOX(i)), GFM02(SBOX(i)))) + +/* + * The inverse T-tables + */ +#define G_TInv0(i) \ + (WORD4(GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)))) +#define G_TInv1(i) \ + (WORD4(GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)), GFM0D(SINV(i)))) +#define G_TInv2(i) \ + (WORD4(GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)), GFM09(SINV(i)))) +#define G_TInv3(i) \ + (WORD4(GFM09(SINV(i)), GFM0D(SINV(i)), GFM0B(SINV(i)), GFM0E(SINV(i)))) + +/* + * The inverse mix column tables + */ +#define G_IMXC0(i) \ + (WORD4(GFM0E(i), GFM09(i), GFM0D(i), GFM0B(i))) +#define G_IMXC1(i) \ + (WORD4(GFM0B(i), GFM0E(i), GFM09(i), GFM0D(i))) +#define G_IMXC2(i) \ + (WORD4(GFM0D(i), GFM0B(i), GFM0E(i), GFM09(i))) +#define G_IMXC3(i) \ + (WORD4(GFM09(i), GFM0D(i), GFM0B(i), GFM0E(i))) + +/* Now choose the T-table indexing method */ +#if defined(RIJNDAEL_GENERATE_VALUES) +/* generate values for the tables with a function*/ +static PRUint32 +gen_TInvXi(PRUint8 tx, PRUint8 i) +{ + PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; + si01 = SINV(i); + si02 = XTIME(si01); + si04 = XTIME(si02); + si08 = XTIME(si04); + si03 = si02 ^ si01; + si09 = si08 ^ si01; + si0B = si08 ^ si03; + si0D = si09 ^ si04; + si0E = si08 ^ si04 ^ si02; + switch (tx) { + case 0: + return WORD4(si0E, si09, si0D, si0B); + case 1: + return WORD4(si0B, si0E, si09, si0D); + case 2: + return WORD4(si0D, si0B, si0E, si09); + case 3: + return WORD4(si09, si0D, si0B, si0E); + } + return -1; +} +#define T0(i) G_T0(i) +#define T1(i) G_T1(i) +#define T2(i) G_T2(i) +#define T3(i) G_T3(i) +#define TInv0(i) gen_TInvXi(0, i) +#define TInv1(i) gen_TInvXi(1, i) +#define TInv2(i) gen_TInvXi(2, i) +#define TInv3(i) gen_TInvXi(3, i) +#define IMXC0(b) G_IMXC0(b) +#define IMXC1(b) G_IMXC1(b) +#define IMXC2(b) G_IMXC2(b) +#define IMXC3(b) G_IMXC3(b) +#elif defined(RIJNDAEL_GENERATE_VALUES_MACRO) +/* generate values for the tables with macros */ +#define T0(i) G_T0(i) +#define T1(i) G_T1(i) +#define T2(i) G_T2(i) +#define T3(i) G_T3(i) +#define TInv0(i) G_TInv0(i) +#define TInv1(i) G_TInv1(i) +#define TInv2(i) G_TInv2(i) +#define TInv3(i) G_TInv3(i) +#define IMXC0(b) G_IMXC0(b) +#define IMXC1(b) G_IMXC1(b) +#define IMXC2(b) G_IMXC2(b) +#define IMXC3(b) G_IMXC3(b) +#else /* RIJNDAEL_GENERATE_TABLES or RIJNDAEL_GENERATE_TABLES_MACRO */ +/* Generate T and T**-1 table values and store, then index */ +/* The inverse mix column tables are still generated */ +#define T0(i) rijndaelTables->T0[i] +#define T1(i) rijndaelTables->T1[i] +#define T2(i) rijndaelTables->T2[i] +#define T3(i) rijndaelTables->T3[i] +#define TInv0(i) rijndaelTables->TInv0[i] +#define TInv1(i) rijndaelTables->TInv1[i] +#define TInv2(i) rijndaelTables->TInv2[i] +#define TInv3(i) rijndaelTables->TInv3[i] +#define IMXC0(b) G_IMXC0(b) +#define IMXC1(b) G_IMXC1(b) +#define IMXC2(b) G_IMXC2(b) +#define IMXC3(b) G_IMXC3(b) +#endif /* choose T-table indexing method */ + +#endif /* not RIJNDAEL_INCLUDE_TABLES */ + +#if defined(RIJNDAEL_GENERATE_TABLES) || \ + defined(RIJNDAEL_GENERATE_TABLES_MACRO) + +/* Code to generate and store the tables */ + +struct rijndael_tables_str { + PRUint32 T0[256]; + PRUint32 T1[256]; + PRUint32 T2[256]; + PRUint32 T3[256]; + PRUint32 TInv0[256]; + PRUint32 TInv1[256]; + PRUint32 TInv2[256]; + PRUint32 TInv3[256]; +}; + +static struct rijndael_tables_str *rijndaelTables = NULL; +static PRCallOnceType coRTInit = { 0, 0, 0 }; +static PRStatus +init_rijndael_tables(void) +{ + PRUint32 i; + PRUint8 si01, si02, si03, si04, si08, si09, si0B, si0D, si0E; + struct rijndael_tables_str *rts; + rts = (struct rijndael_tables_str *) + PORT_Alloc(sizeof(struct rijndael_tables_str)); + if (!rts) + return PR_FAILURE; + for (i = 0; i < 256; i++) { + /* The forward values */ + si01 = SBOX(i); + si02 = XTIME(si01); + si03 = si02 ^ si01; + rts->T0[i] = WORD4(si02, si01, si01, si03); + rts->T1[i] = WORD4(si03, si02, si01, si01); + rts->T2[i] = WORD4(si01, si03, si02, si01); + rts->T3[i] = WORD4(si01, si01, si03, si02); + /* The inverse values */ + si01 = SINV(i); + si02 = XTIME(si01); + si04 = XTIME(si02); + si08 = XTIME(si04); + si03 = si02 ^ si01; + si09 = si08 ^ si01; + si0B = si08 ^ si03; + si0D = si09 ^ si04; + si0E = si08 ^ si04 ^ si02; + rts->TInv0[i] = WORD4(si0E, si09, si0D, si0B); + rts->TInv1[i] = WORD4(si0B, si0E, si09, si0D); + rts->TInv2[i] = WORD4(si0D, si0B, si0E, si09); + rts->TInv3[i] = WORD4(si09, si0D, si0B, si0E); + } + /* wait until all the values are in to set */ + rijndaelTables = rts; + return PR_SUCCESS; +} + +#endif /* code to generate tables */ + +/************************************************************************** + * + * Stuff related to the Rijndael key schedule + * + *************************************************************************/ + +#define SUBBYTE(w) \ + ((((PRUint32)SBOX((w >> 24) & 0xff)) << 24) | \ + (((PRUint32)SBOX((w >> 16) & 0xff)) << 16) | \ + (((PRUint32)SBOX((w >> 8) & 0xff)) << 8) | \ + (((PRUint32)SBOX((w)&0xff)))) + +#ifdef IS_LITTLE_ENDIAN +#define ROTBYTE(b) \ + ((b >> 8) | (b << 24)) +#else +#define ROTBYTE(b) \ + ((b << 8) | (b >> 24)) +#endif + +/* rijndael_key_expansion7 + * + * Generate the expanded key from the key input by the user. + * XXX + * Nk == 7 (224 key bits) is a weird case. Since Nk > 6, an added SubByte + * transformation is done periodically. The period is every 4 bytes, and + * since 7%4 != 0 this happens at different times for each key word (unlike + * Nk == 8 where it happens twice in every key word, in the same positions). + * For now, I'm implementing this case "dumbly", w/o any unrolling. + */ +static SECStatus +rijndael_key_expansion7(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int i; + PRUint32 *W; + PRUint32 *pW; + PRUint32 tmp; + W = cx->expandedKey; + /* 1. the first Nk words contain the cipher key */ + memcpy(W, key, Nk * 4); + i = Nk; + /* 2. loop until full expanded key is obtained */ + pW = W + i - 1; + for (; i < cx->Nb * (cx->Nr + 1); ++i) { + tmp = *pW++; + if (i % Nk == 0) + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + else if (i % Nk == 4) + tmp = SUBBYTE(tmp); + *pW = W[i - Nk] ^ tmp; + } + return SECSuccess; +} + +/* rijndael_key_expansion + * + * Generate the expanded key from the key input by the user. + */ +static SECStatus +rijndael_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int i; + PRUint32 *W; + PRUint32 *pW; + PRUint32 tmp; + unsigned int round_key_words = cx->Nb * (cx->Nr + 1); + if (Nk == 7) + return rijndael_key_expansion7(cx, key, Nk); + W = cx->expandedKey; + /* The first Nk words contain the input cipher key */ + memcpy(W, key, Nk * 4); + i = Nk; + pW = W + i - 1; + /* Loop over all sets of Nk words, except the last */ + while (i < round_key_words - Nk) { + tmp = *pW++; + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + if (Nk == 4) + continue; + switch (Nk) { + case 8: + tmp = *pW++; + tmp = SUBBYTE(tmp); + *pW = W[i++ - Nk] ^ tmp; + case 7: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + case 6: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + case 5: + tmp = *pW++; + *pW = W[i++ - Nk] ^ tmp; + } + } + /* Generate the last word */ + tmp = *pW++; + tmp = SUBBYTE(ROTBYTE(tmp)) ^ Rcon[i / Nk - 1]; + *pW = W[i++ - Nk] ^ tmp; + /* There may be overflow here, if Nk % (Nb * (Nr + 1)) > 0. However, + * since the above loop generated all but the last Nk key words, there + * is no more need for the SubByte transformation. + */ + if (Nk < 8) { + for (; i < round_key_words; ++i) { + tmp = *pW++; + *pW = W[i - Nk] ^ tmp; + } + } else { + /* except in the case when Nk == 8. Then one more SubByte may have + * to be performed, at i % Nk == 4. + */ + for (; i < round_key_words; ++i) { + tmp = *pW++; + if (i % Nk == 4) + tmp = SUBBYTE(tmp); + *pW = W[i - Nk] ^ tmp; + } + } + return SECSuccess; +} + +/* rijndael_invkey_expansion + * + * Generate the expanded key for the inverse cipher from the key input by + * the user. + */ +static SECStatus +rijndael_invkey_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) +{ + unsigned int r; + PRUint32 *roundkeyw; + PRUint8 *b; + int Nb = cx->Nb; + /* begins like usual key expansion ... */ + if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) + return SECFailure; + /* ... but has the additional step of InvMixColumn, + * excepting the first and last round keys. + */ + roundkeyw = cx->expandedKey + cx->Nb; + for (r = 1; r < cx->Nr; ++r) { + /* each key word, roundkeyw, represents a column in the key + * matrix. Each column is multiplied by the InvMixColumn matrix. + * [ 0E 0B 0D 09 ] [ b0 ] + * [ 09 0E 0B 0D ] * [ b1 ] + * [ 0D 09 0E 0B ] [ b2 ] + * [ 0B 0D 09 0E ] [ b3 ] + */ + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ IMXC2(b[2]) ^ IMXC3(b[3]); + if (Nb <= 4) + continue; + switch (Nb) { + case 8: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 7: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 6: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + case 5: + b = (PRUint8 *)roundkeyw; + *roundkeyw++ = IMXC0(b[0]) ^ IMXC1(b[1]) ^ + IMXC2(b[2]) ^ IMXC3(b[3]); + } + } + return SECSuccess; +} +/************************************************************************** + * + * Stuff related to Rijndael encryption/decryption, optimized for + * a 128-bit blocksize. + * + *************************************************************************/ + +#ifdef IS_LITTLE_ENDIAN +#define BYTE0WORD(w) ((w)&0x000000ff) +#define BYTE1WORD(w) ((w)&0x0000ff00) +#define BYTE2WORD(w) ((w)&0x00ff0000) +#define BYTE3WORD(w) ((w)&0xff000000) +#else +#define BYTE0WORD(w) ((w)&0xff000000) +#define BYTE1WORD(w) ((w)&0x00ff0000) +#define BYTE2WORD(w) ((w)&0x0000ff00) +#define BYTE3WORD(w) ((w)&0x000000ff) +#endif + +typedef union { + PRUint32 w[4]; + PRUint8 b[16]; +} rijndael_state; + +#define COLUMN_0(state) state.w[0] +#define COLUMN_1(state) state.w[1] +#define COLUMN_2(state) state.w[2] +#define COLUMN_3(state) state.w[3] + +#define STATE_BYTE(i) state.b[i] + +static SECStatus NO_SANITIZE_ALIGNMENT +rijndael_encryptBlock128(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + unsigned int r; + PRUint32 *roundkeyw; + rijndael_state state; + PRUint32 C0, C1, C2, C3; +#if defined(NSS_X86_OR_X64) +#define pIn input +#define pOut output +#else + unsigned char *pIn, *pOut; + PRUint32 inBuf[4], outBuf[4]; + + if ((ptrdiff_t)input & 0x3) { + memcpy(inBuf, input, sizeof inBuf); + pIn = (unsigned char *)inBuf; + } else { + pIn = (unsigned char *)input; + } + if ((ptrdiff_t)output & 0x3) { + pOut = (unsigned char *)outBuf; + } else { + pOut = (unsigned char *)output; + } +#endif + roundkeyw = cx->expandedKey; + /* Step 1: Add Round Key 0 to initial state */ + COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw++; + COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw++; + COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw++; + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw++; + /* Step 2: Loop over rounds [1..NR-1] */ + for (r = 1; r < cx->Nr; ++r) { + /* Do ShiftRow, ByteSub, and MixColumn all at once */ + C0 = T0(STATE_BYTE(0)) ^ + T1(STATE_BYTE(5)) ^ + T2(STATE_BYTE(10)) ^ + T3(STATE_BYTE(15)); + C1 = T0(STATE_BYTE(4)) ^ + T1(STATE_BYTE(9)) ^ + T2(STATE_BYTE(14)) ^ + T3(STATE_BYTE(3)); + C2 = T0(STATE_BYTE(8)) ^ + T1(STATE_BYTE(13)) ^ + T2(STATE_BYTE(2)) ^ + T3(STATE_BYTE(7)); + C3 = T0(STATE_BYTE(12)) ^ + T1(STATE_BYTE(1)) ^ + T2(STATE_BYTE(6)) ^ + T3(STATE_BYTE(11)); + /* Round key addition */ + COLUMN_0(state) = C0 ^ *roundkeyw++; + COLUMN_1(state) = C1 ^ *roundkeyw++; + COLUMN_2(state) = C2 ^ *roundkeyw++; + COLUMN_3(state) = C3 ^ *roundkeyw++; + } + /* Step 3: Do the last round */ + /* Final round does not employ MixColumn */ + C0 = ((BYTE0WORD(T2(STATE_BYTE(0)))) | + (BYTE1WORD(T3(STATE_BYTE(5)))) | + (BYTE2WORD(T0(STATE_BYTE(10)))) | + (BYTE3WORD(T1(STATE_BYTE(15))))) ^ + *roundkeyw++; + C1 = ((BYTE0WORD(T2(STATE_BYTE(4)))) | + (BYTE1WORD(T3(STATE_BYTE(9)))) | + (BYTE2WORD(T0(STATE_BYTE(14)))) | + (BYTE3WORD(T1(STATE_BYTE(3))))) ^ + *roundkeyw++; + C2 = ((BYTE0WORD(T2(STATE_BYTE(8)))) | + (BYTE1WORD(T3(STATE_BYTE(13)))) | + (BYTE2WORD(T0(STATE_BYTE(2)))) | + (BYTE3WORD(T1(STATE_BYTE(7))))) ^ + *roundkeyw++; + C3 = ((BYTE0WORD(T2(STATE_BYTE(12)))) | + (BYTE1WORD(T3(STATE_BYTE(1)))) | + (BYTE2WORD(T0(STATE_BYTE(6)))) | + (BYTE3WORD(T1(STATE_BYTE(11))))) ^ + *roundkeyw++; + *((PRUint32 *)pOut) = C0; + *((PRUint32 *)(pOut + 4)) = C1; + *((PRUint32 *)(pOut + 8)) = C2; + *((PRUint32 *)(pOut + 12)) = C3; +#if defined(NSS_X86_OR_X64) +#undef pIn +#undef pOut +#else + if ((ptrdiff_t)output & 0x3) { + memcpy(output, outBuf, sizeof outBuf); + } +#endif + return SECSuccess; +} + +static SECStatus NO_SANITIZE_ALIGNMENT +rijndael_decryptBlock128(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + int r; + PRUint32 *roundkeyw; + rijndael_state state; + PRUint32 C0, C1, C2, C3; +#if defined(NSS_X86_OR_X64) +#define pIn input +#define pOut output +#else + unsigned char *pIn, *pOut; + PRUint32 inBuf[4], outBuf[4]; + + if ((ptrdiff_t)input & 0x3) { + memcpy(inBuf, input, sizeof inBuf); + pIn = (unsigned char *)inBuf; + } else { + pIn = (unsigned char *)input; + } + if ((ptrdiff_t)output & 0x3) { + pOut = (unsigned char *)outBuf; + } else { + pOut = (unsigned char *)output; + } +#endif + roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; + /* reverse the final key addition */ + COLUMN_3(state) = *((PRUint32 *)(pIn + 12)) ^ *roundkeyw--; + COLUMN_2(state) = *((PRUint32 *)(pIn + 8)) ^ *roundkeyw--; + COLUMN_1(state) = *((PRUint32 *)(pIn + 4)) ^ *roundkeyw--; + COLUMN_0(state) = *((PRUint32 *)(pIn)) ^ *roundkeyw--; + /* Loop over rounds in reverse [NR..1] */ + for (r = cx->Nr; r > 1; --r) { + /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ + C0 = TInv0(STATE_BYTE(0)) ^ + TInv1(STATE_BYTE(13)) ^ + TInv2(STATE_BYTE(10)) ^ + TInv3(STATE_BYTE(7)); + C1 = TInv0(STATE_BYTE(4)) ^ + TInv1(STATE_BYTE(1)) ^ + TInv2(STATE_BYTE(14)) ^ + TInv3(STATE_BYTE(11)); + C2 = TInv0(STATE_BYTE(8)) ^ + TInv1(STATE_BYTE(5)) ^ + TInv2(STATE_BYTE(2)) ^ + TInv3(STATE_BYTE(15)); + C3 = TInv0(STATE_BYTE(12)) ^ + TInv1(STATE_BYTE(9)) ^ + TInv2(STATE_BYTE(6)) ^ + TInv3(STATE_BYTE(3)); + /* Invert the key addition step */ + COLUMN_3(state) = C3 ^ *roundkeyw--; + COLUMN_2(state) = C2 ^ *roundkeyw--; + COLUMN_1(state) = C1 ^ *roundkeyw--; + COLUMN_0(state) = C0 ^ *roundkeyw--; + } + /* inverse sub */ + pOut[0] = SINV(STATE_BYTE(0)); + pOut[1] = SINV(STATE_BYTE(13)); + pOut[2] = SINV(STATE_BYTE(10)); + pOut[3] = SINV(STATE_BYTE(7)); + pOut[4] = SINV(STATE_BYTE(4)); + pOut[5] = SINV(STATE_BYTE(1)); + pOut[6] = SINV(STATE_BYTE(14)); + pOut[7] = SINV(STATE_BYTE(11)); + pOut[8] = SINV(STATE_BYTE(8)); + pOut[9] = SINV(STATE_BYTE(5)); + pOut[10] = SINV(STATE_BYTE(2)); + pOut[11] = SINV(STATE_BYTE(15)); + pOut[12] = SINV(STATE_BYTE(12)); + pOut[13] = SINV(STATE_BYTE(9)); + pOut[14] = SINV(STATE_BYTE(6)); + pOut[15] = SINV(STATE_BYTE(3)); + /* final key addition */ + *((PRUint32 *)(pOut + 12)) ^= *roundkeyw--; + *((PRUint32 *)(pOut + 8)) ^= *roundkeyw--; + *((PRUint32 *)(pOut + 4)) ^= *roundkeyw--; + *((PRUint32 *)pOut) ^= *roundkeyw--; +#if defined(NSS_X86_OR_X64) +#undef pIn +#undef pOut +#else + if ((ptrdiff_t)output & 0x3) { + memcpy(output, outBuf, sizeof outBuf); + } +#endif + return SECSuccess; +} + +/************************************************************************** + * + * Stuff related to general Rijndael encryption/decryption, for blocksizes + * greater than 128 bits. + * + * XXX This code is currently untested! So far, AES specs have only been + * released for 128 bit blocksizes. This will be tested, but for now + * only the code above has been tested using known values. + * + *************************************************************************/ + +#define COLUMN(array, j) *((PRUint32 *)(array + j)) + +SECStatus +rijndael_encryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + return SECFailure; +#ifdef rijndael_large_blocks_fixed + unsigned int j, r, Nb; + unsigned int c2 = 0, c3 = 0; + PRUint32 *roundkeyw; + PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; + Nb = cx->Nb; + roundkeyw = cx->expandedKey; + /* Step 1: Add Round Key 0 to initial state */ + for (j = 0; j < 4 * Nb; j += 4) { + COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw++; + } + /* Step 2: Loop over rounds [1..NR-1] */ + for (r = 1; r < cx->Nr; ++r) { + for (j = 0; j < Nb; ++j) { + COLUMN(output, j) = T0(STATE_BYTE(4 * j)) ^ + T1(STATE_BYTE(4 * ((j + 1) % Nb) + 1)) ^ + T2(STATE_BYTE(4 * ((j + c2) % Nb) + 2)) ^ + T3(STATE_BYTE(4 * ((j + c3) % Nb) + 3)); + } + for (j = 0; j < 4 * Nb; j += 4) { + COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw++; + } + } + /* Step 3: Do the last round */ + /* Final round does not employ MixColumn */ + for (j = 0; j < Nb; ++j) { + COLUMN(output, j) = ((BYTE0WORD(T2(STATE_BYTE(4 * j)))) | + (BYTE1WORD(T3(STATE_BYTE(4 * (j + 1) % Nb) + 1))) | + (BYTE2WORD(T0(STATE_BYTE(4 * (j + c2) % Nb) + 2))) | + (BYTE3WORD(T1(STATE_BYTE(4 * (j + c3) % Nb) + 3)))) ^ + *roundkeyw++; + } + return SECSuccess; +#endif +} + +SECStatus +rijndael_decryptBlock(AESContext *cx, + unsigned char *output, + const unsigned char *input) +{ + return SECFailure; +#ifdef rijndael_large_blocks_fixed + int j, r, Nb; + int c2 = 0, c3 = 0; + PRUint32 *roundkeyw; + PRUint8 clone[RIJNDAEL_MAX_STATE_SIZE]; + Nb = cx->Nb; + roundkeyw = cx->expandedKey + cx->Nb * cx->Nr + 3; + /* reverse key addition */ + for (j = 4 * Nb; j >= 0; j -= 4) { + COLUMN(clone, j) = COLUMN(input, j) ^ *roundkeyw--; + } + /* Loop over rounds in reverse [NR..1] */ + for (r = cx->Nr; r > 1; --r) { + /* Invert the (InvByteSub*InvMixColumn)(InvShiftRow(state)) */ + for (j = 0; j < Nb; ++j) { + COLUMN(output, 4 * j) = TInv0(STATE_BYTE(4 * j)) ^ + TInv1(STATE_BYTE(4 * (j + Nb - 1) % Nb) + 1) ^ + TInv2(STATE_BYTE(4 * (j + Nb - c2) % Nb) + 2) ^ + TInv3(STATE_BYTE(4 * (j + Nb - c3) % Nb) + 3); + } + /* Invert the key addition step */ + for (j = 4 * Nb; j >= 0; j -= 4) { + COLUMN(clone, j) = COLUMN(output, j) ^ *roundkeyw--; + } + } + /* inverse sub */ + for (j = 0; j < 4 * Nb; ++j) { + output[j] = SINV(clone[j]); + } + /* final key addition */ + for (j = 4 * Nb; j >= 0; j -= 4) { + COLUMN(output, j) ^= *roundkeyw--; + } + return SECSuccess; +#endif +} + +/************************************************************************** + * + * Rijndael modes of operation (ECB and CBC) + * + *************************************************************************/ + +static SECStatus +rijndael_encryptECB(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize) +{ + SECStatus rv; + AESBlockFunc *encryptor; + + encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) + ? &rijndael_encryptBlock128 + : &rijndael_encryptBlock; + while (inputLen > 0) { + rv = (*encryptor)(cx, output, input); + if (rv != SECSuccess) + return rv; + output += blocksize; + input += blocksize; + inputLen -= blocksize; + } + return SECSuccess; +} + +static SECStatus +rijndael_encryptCBC(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize) +{ + unsigned int j; + SECStatus rv; + AESBlockFunc *encryptor; + unsigned char *lastblock; + unsigned char inblock[RIJNDAEL_MAX_STATE_SIZE * 8]; + + if (!inputLen) + return SECSuccess; + lastblock = cx->iv; + encryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) + ? &rijndael_encryptBlock128 + : &rijndael_encryptBlock; + while (inputLen > 0) { + /* XOR with the last block (IV if first block) */ + for (j = 0; j < blocksize; ++j) + inblock[j] = input[j] ^ lastblock[j]; + /* encrypt */ + rv = (*encryptor)(cx, output, inblock); + if (rv != SECSuccess) + return rv; + /* move to the next block */ + lastblock = output; + output += blocksize; + input += blocksize; + inputLen -= blocksize; + } + memcpy(cx->iv, lastblock, blocksize); + return SECSuccess; +} + +static SECStatus +rijndael_decryptECB(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize) +{ + SECStatus rv; + AESBlockFunc *decryptor; + + decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) + ? &rijndael_decryptBlock128 + : &rijndael_decryptBlock; + while (inputLen > 0) { + rv = (*decryptor)(cx, output, input); + if (rv != SECSuccess) + return rv; + output += blocksize; + input += blocksize; + inputLen -= blocksize; + } + return SECSuccess; +} + +static SECStatus +rijndael_decryptCBC(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen, + unsigned int blocksize) +{ + SECStatus rv; + AESBlockFunc *decryptor; + const unsigned char *in; + unsigned char *out; + unsigned int j; + unsigned char newIV[RIJNDAEL_MAX_BLOCKSIZE]; + + if (!inputLen) + return SECSuccess; + PORT_Assert(output - input >= 0 || input - output >= (int)inputLen); + decryptor = (blocksize == RIJNDAEL_MIN_BLOCKSIZE) + ? &rijndael_decryptBlock128 + : &rijndael_decryptBlock; + in = input + (inputLen - blocksize); + memcpy(newIV, in, blocksize); + out = output + (inputLen - blocksize); + while (inputLen > blocksize) { + rv = (*decryptor)(cx, out, in); + if (rv != SECSuccess) + return rv; + for (j = 0; j < blocksize; ++j) + out[j] ^= in[(int)(j - blocksize)]; + out -= blocksize; + in -= blocksize; + inputLen -= blocksize; + } + if (in == input) { + rv = (*decryptor)(cx, out, in); + if (rv != SECSuccess) + return rv; + for (j = 0; j < blocksize; ++j) + out[j] ^= cx->iv[j]; + } + memcpy(cx->iv, newIV, blocksize); + return SECSuccess; +} + +/************************************************************************ + * + * BLAPI Interface functions + * + * The following functions implement the encryption routines defined in + * BLAPI for the AES cipher, Rijndael. + * + ***********************************************************************/ + +AESContext * +AES_AllocateContext(void) +{ + return PORT_ZNew(AESContext); +} + +#ifdef INTEL_GCM +/* + * Adapted from the example code in "How to detect New Instruction support in + * the 4th generation Intel Core processor family" by Max Locktyukhin. + * + * XGETBV: + * Reads an extended control register (XCR) specified by ECX into EDX:EAX. + */ +static PRBool +check_xcr0_ymm() +{ + PRUint32 xcr0; +#if defined(_MSC_VER) +#if defined(_M_IX86) + __asm { + mov ecx, 0 + xgetbv + mov xcr0, eax + } +#else + xcr0 = (PRUint32)_xgetbv(0); /* Requires VS2010 SP1 or later. */ +#endif +#else + __asm__("xgetbv" + : "=a"(xcr0) + : "c"(0) + : "%edx"); +#endif + /* Check if xmm and ymm state are enabled in XCR0. */ + return (xcr0 & 6) == 6; +} +#endif + +/* +** Initialize a new AES context suitable for AES encryption/decryption in +** the ECB or CBC mode. +** "mode" the mode of operation, which must be NSS_AES or NSS_AES_CBC +*/ +static SECStatus +aes_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int blocksize) +{ + unsigned int Nk; + /* According to Rijndael AES Proposal, section 12.1, block and key + * lengths between 128 and 256 bits are supported, as long as the + * length in bytes is divisible by 4. + */ + if (key == NULL || + keysize < RIJNDAEL_MIN_BLOCKSIZE || + keysize > RIJNDAEL_MAX_BLOCKSIZE || + keysize % 4 != 0 || + blocksize < RIJNDAEL_MIN_BLOCKSIZE || + blocksize > RIJNDAEL_MAX_BLOCKSIZE || + blocksize % 4 != 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode != NSS_AES && mode != NSS_AES_CBC) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (mode == NSS_AES_CBC && iv == NULL) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } +#ifdef USE_HW_AES + if (has_intel_aes == 0) { + unsigned long eax, ebx, ecx, edx; + char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); + + if (disable_hw_aes == NULL) { + freebl_cpuid(1, &eax, &ebx, &ecx, &edx); + has_intel_aes = (ecx & (1 << 25)) != 0 ? 1 : -1; +#ifdef INTEL_GCM + has_intel_clmul = (ecx & (1 << 1)) != 0 ? 1 : -1; + if ((ecx & (1 << 27)) != 0 && (ecx & (1 << 28)) != 0 && + check_xcr0_ymm()) { + has_intel_avx = 1; + } else { + has_intel_avx = -1; + } +#endif + } else { + has_intel_aes = -1; +#ifdef INTEL_GCM + has_intel_avx = -1; + has_intel_clmul = -1; +#endif + } + } + use_hw_aes = (PRBool)(has_intel_aes > 0 && (keysize % 8) == 0 && blocksize == 16); +#ifdef INTEL_GCM + use_hw_gcm = (PRBool)(use_hw_aes && has_intel_avx > 0 && has_intel_clmul > 0); +#endif +#endif /* USE_HW_AES */ + /* Nb = (block size in bits) / 32 */ + cx->Nb = blocksize / 4; + /* Nk = (key size in bits) / 32 */ + Nk = keysize / 4; + /* Obtain number of rounds from "table" */ + cx->Nr = RIJNDAEL_NUM_ROUNDS(Nk, cx->Nb); + /* copy in the iv, if neccessary */ + if (mode == NSS_AES_CBC) { + memcpy(cx->iv, iv, blocksize); +#ifdef USE_HW_AES + if (use_hw_aes) { + cx->worker = (freeblCipherFunc) + intel_aes_cbc_worker(encrypt, keysize); + } else +#endif + { + cx->worker = (freeblCipherFunc)(encrypt + ? &rijndael_encryptCBC + : &rijndael_decryptCBC); + } + } else { +#ifdef USE_HW_AES + if (use_hw_aes) { + cx->worker = (freeblCipherFunc) + intel_aes_ecb_worker(encrypt, keysize); + } else +#endif + { + cx->worker = (freeblCipherFunc)(encrypt + ? &rijndael_encryptECB + : &rijndael_decryptECB); + } + } + PORT_Assert((cx->Nb * (cx->Nr + 1)) <= RIJNDAEL_MAX_EXP_KEY_SIZE); + if ((cx->Nb * (cx->Nr + 1)) > RIJNDAEL_MAX_EXP_KEY_SIZE) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + goto cleanup; + } +#ifdef USE_HW_AES + if (use_hw_aes) { + intel_aes_init(encrypt, keysize); + } else +#endif + { + +#if defined(RIJNDAEL_GENERATE_TABLES) || \ + defined(RIJNDAEL_GENERATE_TABLES_MACRO) + if (rijndaelTables == NULL) { + if (PR_CallOnce(&coRTInit, init_rijndael_tables) != PR_SUCCESS) { + return SecFailure; + } + } +#endif + /* Generate expanded key */ + if (encrypt) { + if (rijndael_key_expansion(cx, key, Nk) != SECSuccess) + goto cleanup; + } else { + if (rijndael_invkey_expansion(cx, key, Nk) != SECSuccess) + goto cleanup; + } + } + cx->worker_cx = cx; + cx->destroy = NULL; + cx->isBlock = PR_TRUE; + return SECSuccess; +cleanup: + return SECFailure; +} + +SECStatus +AES_InitContext(AESContext *cx, const unsigned char *key, unsigned int keysize, + const unsigned char *iv, int mode, unsigned int encrypt, + unsigned int blocksize) +{ + int basemode = mode; + PRBool baseencrypt = encrypt; + SECStatus rv; + + switch (mode) { + case NSS_AES_CTS: + basemode = NSS_AES_CBC; + break; + case NSS_AES_GCM: + case NSS_AES_CTR: + basemode = NSS_AES; + baseencrypt = PR_TRUE; + break; + } + /* make sure enough is initializes so we can safely call Destroy */ + cx->worker_cx = NULL; + cx->destroy = NULL; + rv = aes_InitContext(cx, key, keysize, iv, basemode, + baseencrypt, blocksize); + if (rv != SECSuccess) { + AES_DestroyContext(cx, PR_FALSE); + return rv; + } + cx->mode = mode; + + /* finally, set up any mode specific contexts */ + switch (mode) { + case NSS_AES_CTS: + cx->worker_cx = CTS_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker = (freeblCipherFunc)(encrypt ? CTS_EncryptUpdate : CTS_DecryptUpdate); + cx->destroy = (freeblDestroyFunc)CTS_DestroyContext; + cx->isBlock = PR_FALSE; + break; + case NSS_AES_GCM: +#ifdef INTEL_GCM + if (use_hw_gcm) { + cx->worker_cx = intel_AES_GCM_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker = (freeblCipherFunc)(encrypt ? intel_AES_GCM_EncryptUpdate : intel_AES_GCM_DecryptUpdate); + cx->destroy = (freeblDestroyFunc)intel_AES_GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } else +#endif + { + cx->worker_cx = GCM_CreateContext(cx, cx->worker, iv, blocksize); + cx->worker = (freeblCipherFunc)(encrypt ? GCM_EncryptUpdate : GCM_DecryptUpdate); + cx->destroy = (freeblDestroyFunc)GCM_DestroyContext; + cx->isBlock = PR_FALSE; + } + break; + case NSS_AES_CTR: + cx->worker_cx = CTR_CreateContext(cx, cx->worker, iv, blocksize); +#if defined(USE_HW_AES) && defined(_MSC_VER) + if (use_hw_aes) { + cx->worker = (freeblCipherFunc)CTR_Update_HW_AES; + } else +#endif + { + cx->worker = (freeblCipherFunc)CTR_Update; + } + cx->destroy = (freeblDestroyFunc)CTR_DestroyContext; + cx->isBlock = PR_FALSE; + break; + default: + /* everything has already been set up by aes_InitContext, just + * return */ + return SECSuccess; + } + /* check to see if we succeeded in getting the worker context */ + if (cx->worker_cx == NULL) { + /* no, just destroy the existing context */ + cx->destroy = NULL; /* paranoia, though you can see a dozen lines */ + /* below that this isn't necessary */ + AES_DestroyContext(cx, PR_FALSE); + return SECFailure; + } + return SECSuccess; +} + +/* AES_CreateContext + * + * create a new context for Rijndael operations + */ +AESContext * +AES_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, int encrypt, + unsigned int keysize, unsigned int blocksize) +{ + AESContext *cx = AES_AllocateContext(); + if (cx) { + SECStatus rv = AES_InitContext(cx, key, keysize, iv, mode, encrypt, + blocksize); + if (rv != SECSuccess) { + AES_DestroyContext(cx, PR_TRUE); + cx = NULL; + } + } + return cx; +} + +/* + * AES_DestroyContext + * + * Zero an AES cipher context. If freeit is true, also free the pointer + * to the context. + */ +void +AES_DestroyContext(AESContext *cx, PRBool freeit) +{ + if (cx->worker_cx && cx->destroy) { + (*cx->destroy)(cx->worker_cx, PR_TRUE); + cx->worker_cx = NULL; + cx->destroy = NULL; + } + if (freeit) + PORT_Free(cx); +} + +/* + * AES_Encrypt + * + * Encrypt an arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +AES_Encrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + int blocksize; + /* Check args */ + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + blocksize = 4 * cx->Nb; + if (cx->isBlock && (inputLen % blocksize != 0)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; +#if UINT_MAX > MP_32BIT_MAX + /* + * we can guarentee that GSM won't overlfow if we limit the input to + * 2^36 bytes. For simplicity, we are limiting it to 2^32 for now. + * + * We do it here to cover both hardware and software GCM operations. + */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) > 4); + } + if ((cx->mode == NSS_AES_GCM) && (inputLen > MP_32BIT_MAX)) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } +#else + /* if we can't pass in a 32_bit number, then no such check needed */ + { + PR_STATIC_ASSERT(sizeof(unsigned int) <= 4); + } +#endif + + return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, + input, inputLen, blocksize); +} + +/* + * AES_Decrypt + * + * Decrypt and arbitrary-length buffer. The output buffer must already be + * allocated to at least inputLen. + */ +SECStatus +AES_Decrypt(AESContext *cx, unsigned char *output, + unsigned int *outputLen, unsigned int maxOutputLen, + const unsigned char *input, unsigned int inputLen) +{ + int blocksize; + /* Check args */ + if (cx == NULL || output == NULL || (input == NULL && inputLen != 0)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + blocksize = 4 * cx->Nb; + if (cx->isBlock && (inputLen % blocksize != 0)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + if (maxOutputLen < inputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + *outputLen = inputLen; + return (*cx->worker)(cx->worker_cx, output, outputLen, maxOutputLen, + input, inputLen, blocksize); +} diff --git a/security/nss/lib/freebl/rijndael.h b/security/nss/lib/freebl/rijndael.h new file mode 100644 index 000000000..0e14ec2fc --- /dev/null +++ b/security/nss/lib/freebl/rijndael.h @@ -0,0 +1,67 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _RIJNDAEL_H_ +#define _RIJNDAEL_H_ 1 + +#include "blapii.h" + +#define RIJNDAEL_MIN_BLOCKSIZE 16 /* bytes */ +#define RIJNDAEL_MAX_BLOCKSIZE 32 /* bytes */ + +typedef SECStatus AESBlockFunc(AESContext *cx, + unsigned char *output, + const unsigned char *input); + +/* RIJNDAEL_NUM_ROUNDS + * + * Number of rounds per execution + * Nk - number of key bytes + * Nb - blocksize (in bytes) + */ +#define RIJNDAEL_NUM_ROUNDS(Nk, Nb) \ + (PR_MAX(Nk, Nb) + 6) + +/* RIJNDAEL_MAX_STATE_SIZE + * + * Maximum number of bytes in the state (spec includes up to 256-bit block + * size) + */ +#define RIJNDAEL_MAX_STATE_SIZE 32 + +/* + * This magic number is (Nb_max * (Nr_max + 1)) + * where Nb_max is the maximum block size in 32-bit words, + * Nr_max is the maximum number of rounds, which is Nb_max + 6 + */ +#define RIJNDAEL_MAX_EXP_KEY_SIZE (8 * 15) + +/* AESContextStr + * + * Values which maintain the state for Rijndael encryption/decryption. + * + * iv - initialization vector for CBC mode + * Nb - the number of bytes in a block, specified by user + * Nr - the number of rounds, specified by a table + * expandedKey - the round keys in 4-byte words, the length is Nr * Nb + * worker - the encryption/decryption function to use with worker_cx + * destroy - if not NULL, the destroy function to use with worker_cx + * worker_cx - the context for worker and destroy + * isBlock - is the mode of operation a block cipher or a stream cipher? + */ +struct AESContextStr { + unsigned int Nb; + unsigned int Nr; + freeblCipherFunc worker; + /* NOTE: The offsets of iv and expandedKey are hardcoded in intel-aes.s. + * Don't add new members before them without updating intel-aes.s. */ + unsigned char iv[RIJNDAEL_MAX_BLOCKSIZE]; + PRUint32 expandedKey[RIJNDAEL_MAX_EXP_KEY_SIZE]; + freeblDestroyFunc destroy; + void *worker_cx; + PRBool isBlock; + int mode; +}; + +#endif /* _RIJNDAEL_H_ */ diff --git a/security/nss/lib/freebl/rijndael32.tab b/security/nss/lib/freebl/rijndael32.tab new file mode 100644 index 000000000..59be7c2c0 --- /dev/null +++ b/security/nss/lib/freebl/rijndael32.tab @@ -0,0 +1,1219 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef RIJNDAEL_INCLUDE_TABLES +static const PRUint8 _S[256] = +{ + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, +202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, +183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, +208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, +205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, +224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, +231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, +186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, +112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, +225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, +140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22 +}; +#endif /* not RIJNDAEL_INCLUDE_TABLES */ + +static const PRUint8 _SInv[256] = +{ + 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, +124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, + 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, + 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, +114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, +108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, +144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, +208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, + 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, +150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, + 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, +252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, + 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, + 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, +160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, + 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125 +}; + +#ifdef RIJNDAEL_INCLUDE_TABLES +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T0[256] = +{ +0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6, 0x0df2f2ff, 0xbd6b6bd6, +0xb16f6fde, 0x54c5c591, 0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56, +0x19fefee7, 0x62d7d7b5, 0xe6abab4d, 0x9a7676ec, 0x45caca8f, 0x9d82821f, +0x40c9c989, 0x877d7dfa, 0x15fafaef, 0xeb5959b2, 0xc947478e, 0x0bf0f0fb, +0xecadad41, 0x67d4d4b3, 0xfda2a25f, 0xeaafaf45, 0xbf9c9c23, 0xf7a4a453, +0x967272e4, 0x5bc0c09b, 0xc2b7b775, 0x1cfdfde1, 0xae93933d, 0x6a26264c, +0x5a36366c, 0x413f3f7e, 0x02f7f7f5, 0x4fcccc83, 0x5c343468, 0xf4a5a551, +0x34e5e5d1, 0x08f1f1f9, 0x937171e2, 0x73d8d8ab, 0x53313162, 0x3f15152a, +0x0c040408, 0x52c7c795, 0x65232346, 0x5ec3c39d, 0x28181830, 0xa1969637, +0x0f05050a, 0xb59a9a2f, 0x0907070e, 0x36121224, 0x9b80801b, 0x3de2e2df, +0x26ebebcd, 0x6927274e, 0xcdb2b27f, 0x9f7575ea, 0x1b090912, 0x9e83831d, +0x742c2c58, 0x2e1a1a34, 0x2d1b1b36, 0xb26e6edc, 0xee5a5ab4, 0xfba0a05b, +0xf65252a4, 0x4d3b3b76, 0x61d6d6b7, 0xceb3b37d, 0x7b292952, 0x3ee3e3dd, +0x712f2f5e, 0x97848413, 0xf55353a6, 0x68d1d1b9, 0x00000000, 0x2cededc1, +0x60202040, 0x1ffcfce3, 0xc8b1b179, 0xed5b5bb6, 0xbe6a6ad4, 0x46cbcb8d, +0xd9bebe67, 0x4b393972, 0xde4a4a94, 0xd44c4c98, 0xe85858b0, 0x4acfcf85, +0x6bd0d0bb, 0x2aefefc5, 0xe5aaaa4f, 0x16fbfbed, 0xc5434386, 0xd74d4d9a, +0x55333366, 0x94858511, 0xcf45458a, 0x10f9f9e9, 0x06020204, 0x817f7ffe, +0xf05050a0, 0x443c3c78, 0xba9f9f25, 0xe3a8a84b, 0xf35151a2, 0xfea3a35d, +0xc0404080, 0x8a8f8f05, 0xad92923f, 0xbc9d9d21, 0x48383870, 0x04f5f5f1, +0xdfbcbc63, 0xc1b6b677, 0x75dadaaf, 0x63212142, 0x30101020, 0x1affffe5, +0x0ef3f3fd, 0x6dd2d2bf, 0x4ccdcd81, 0x140c0c18, 0x35131326, 0x2fececc3, +0xe15f5fbe, 0xa2979735, 0xcc444488, 0x3917172e, 0x57c4c493, 0xf2a7a755, +0x827e7efc, 0x473d3d7a, 0xac6464c8, 0xe75d5dba, 0x2b191932, 0x957373e6, +0xa06060c0, 0x98818119, 0xd14f4f9e, 0x7fdcdca3, 0x66222244, 0x7e2a2a54, +0xab90903b, 0x8388880b, 0xca46468c, 0x29eeeec7, 0xd3b8b86b, 0x3c141428, +0x79dedea7, 0xe25e5ebc, 0x1d0b0b16, 0x76dbdbad, 0x3be0e0db, 0x56323264, +0x4e3a3a74, 0x1e0a0a14, 0xdb494992, 0x0a06060c, 0x6c242448, 0xe45c5cb8, +0x5dc2c29f, 0x6ed3d3bd, 0xefacac43, 0xa66262c4, 0xa8919139, 0xa4959531, +0x37e4e4d3, 0x8b7979f2, 0x32e7e7d5, 0x43c8c88b, 0x5937376e, 0xb76d6dda, +0x8c8d8d01, 0x64d5d5b1, 0xd24e4e9c, 0xe0a9a949, 0xb46c6cd8, 0xfa5656ac, +0x07f4f4f3, 0x25eaeacf, 0xaf6565ca, 0x8e7a7af4, 0xe9aeae47, 0x18080810, +0xd5baba6f, 0x887878f0, 0x6f25254a, 0x722e2e5c, 0x241c1c38, 0xf1a6a657, +0xc7b4b473, 0x51c6c697, 0x23e8e8cb, 0x7cdddda1, 0x9c7474e8, 0x211f1f3e, +0xdd4b4b96, 0xdcbdbd61, 0x868b8b0d, 0x858a8a0f, 0x907070e0, 0x423e3e7c, +0xc4b5b571, 0xaa6666cc, 0xd8484890, 0x05030306, 0x01f6f6f7, 0x120e0e1c, +0xa36161c2, 0x5f35356a, 0xf95757ae, 0xd0b9b969, 0x91868617, 0x58c1c199, +0x271d1d3a, 0xb99e9e27, 0x38e1e1d9, 0x13f8f8eb, 0xb398982b, 0x33111122, +0xbb6969d2, 0x70d9d9a9, 0x898e8e07, 0xa7949433, 0xb69b9b2d, 0x221e1e3c, +0x92878715, 0x20e9e9c9, 0x49cece87, 0xff5555aa, 0x78282850, 0x7adfdfa5, +0x8f8c8c03, 0xf8a1a159, 0x80898909, 0x170d0d1a, 0xdabfbf65, 0x31e6e6d7, +0xc6424284, 0xb86868d0, 0xc3414182, 0xb0999929, 0x772d2d5a, 0x110f0f1e, +0xcbb0b07b, 0xfc5454a8, 0xd6bbbb6d, 0x3a16162c +}; +#else +static const PRUint32 _T0[256] = +{ +0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d, 0xfff2f20d, 0xd66b6bbd, +0xde6f6fb1, 0x91c5c554, 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d, +0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a, 0x8fcaca45, 0x1f82829d, +0x89c9c940, 0xfa7d7d87, 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b, +0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea, 0x239c9cbf, 0x53a4a4f7, +0xe4727296, 0x9bc0c05b, 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a, +0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f, 0x6834345c, 0x51a5a5f4, +0xd1e5e534, 0xf9f1f108, 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f, +0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e, 0x30181828, 0x379696a1, +0x0a05050f, 0x2f9a9ab5, 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d, +0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f, 0x1209091b, 0x1d83839e, +0x582c2c74, 0x341a1a2e, 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb, +0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce, 0x5229297b, 0xdde3e33e, +0x5e2f2f71, 0x13848497, 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c, +0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed, 0xd46a6abe, 0x8dcbcb46, +0x67bebed9, 0x7239394b, 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a, +0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16, 0x864343c5, 0x9a4d4dd7, +0x66333355, 0x11858594, 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81, +0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3, 0xa25151f3, 0x5da3a3fe, +0x804040c0, 0x058f8f8a, 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504, +0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163, 0x20101030, 0xe5ffff1a, +0xfdf3f30e, 0xbfd2d26d, 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f, +0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739, 0x93c4c457, 0x55a7a7f2, +0xfc7e7e82, 0x7a3d3d47, 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395, +0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f, 0x44222266, 0x542a2a7e, +0x3b9090ab, 0x0b888883, 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c, +0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76, 0xdbe0e03b, 0x64323256, +0x743a3a4e, 0x140a0a1e, 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4, +0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6, 0x399191a8, 0x319595a4, +0xd3e4e437, 0xf279798b, 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7, +0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0, 0xd86c6cb4, 0xac5656fa, +0xf3f4f407, 0xcfeaea25, 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818, +0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72, 0x381c1c24, 0x57a6a6f1, +0x73b4b4c7, 0x97c6c651, 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21, +0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85, 0xe0707090, 0x7c3e3e42, +0x71b5b5c4, 0xcc6666aa, 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12, +0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0, 0x17868691, 0x99c1c158, +0x3a1d1d27, 0x279e9eb9, 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133, +0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7, 0x2d9b9bb6, 0x3c1e1e22, +0x15878792, 0xc9e9e920, 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a, +0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17, 0x65bfbfda, 0xd7e6e631, +0x844242c6, 0xd06868b8, 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11, +0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T1[256] = +{ +0x6363c6a5, 0x7c7cf884, 0x7777ee99, 0x7b7bf68d, 0xf2f2ff0d, 0x6b6bd6bd, +0x6f6fdeb1, 0xc5c59154, 0x30306050, 0x01010203, 0x6767cea9, 0x2b2b567d, +0xfefee719, 0xd7d7b562, 0xabab4de6, 0x7676ec9a, 0xcaca8f45, 0x82821f9d, +0xc9c98940, 0x7d7dfa87, 0xfafaef15, 0x5959b2eb, 0x47478ec9, 0xf0f0fb0b, +0xadad41ec, 0xd4d4b367, 0xa2a25ffd, 0xafaf45ea, 0x9c9c23bf, 0xa4a453f7, +0x7272e496, 0xc0c09b5b, 0xb7b775c2, 0xfdfde11c, 0x93933dae, 0x26264c6a, +0x36366c5a, 0x3f3f7e41, 0xf7f7f502, 0xcccc834f, 0x3434685c, 0xa5a551f4, +0xe5e5d134, 0xf1f1f908, 0x7171e293, 0xd8d8ab73, 0x31316253, 0x15152a3f, +0x0404080c, 0xc7c79552, 0x23234665, 0xc3c39d5e, 0x18183028, 0x969637a1, +0x05050a0f, 0x9a9a2fb5, 0x07070e09, 0x12122436, 0x80801b9b, 0xe2e2df3d, +0xebebcd26, 0x27274e69, 0xb2b27fcd, 0x7575ea9f, 0x0909121b, 0x83831d9e, +0x2c2c5874, 0x1a1a342e, 0x1b1b362d, 0x6e6edcb2, 0x5a5ab4ee, 0xa0a05bfb, +0x5252a4f6, 0x3b3b764d, 0xd6d6b761, 0xb3b37dce, 0x2929527b, 0xe3e3dd3e, +0x2f2f5e71, 0x84841397, 0x5353a6f5, 0xd1d1b968, 0x00000000, 0xededc12c, +0x20204060, 0xfcfce31f, 0xb1b179c8, 0x5b5bb6ed, 0x6a6ad4be, 0xcbcb8d46, +0xbebe67d9, 0x3939724b, 0x4a4a94de, 0x4c4c98d4, 0x5858b0e8, 0xcfcf854a, +0xd0d0bb6b, 0xefefc52a, 0xaaaa4fe5, 0xfbfbed16, 0x434386c5, 0x4d4d9ad7, +0x33336655, 0x85851194, 0x45458acf, 0xf9f9e910, 0x02020406, 0x7f7ffe81, +0x5050a0f0, 0x3c3c7844, 0x9f9f25ba, 0xa8a84be3, 0x5151a2f3, 0xa3a35dfe, +0x404080c0, 0x8f8f058a, 0x92923fad, 0x9d9d21bc, 0x38387048, 0xf5f5f104, +0xbcbc63df, 0xb6b677c1, 0xdadaaf75, 0x21214263, 0x10102030, 0xffffe51a, +0xf3f3fd0e, 0xd2d2bf6d, 0xcdcd814c, 0x0c0c1814, 0x13132635, 0xececc32f, +0x5f5fbee1, 0x979735a2, 0x444488cc, 0x17172e39, 0xc4c49357, 0xa7a755f2, +0x7e7efc82, 0x3d3d7a47, 0x6464c8ac, 0x5d5dbae7, 0x1919322b, 0x7373e695, +0x6060c0a0, 0x81811998, 0x4f4f9ed1, 0xdcdca37f, 0x22224466, 0x2a2a547e, +0x90903bab, 0x88880b83, 0x46468cca, 0xeeeec729, 0xb8b86bd3, 0x1414283c, +0xdedea779, 0x5e5ebce2, 0x0b0b161d, 0xdbdbad76, 0xe0e0db3b, 0x32326456, +0x3a3a744e, 0x0a0a141e, 0x494992db, 0x06060c0a, 0x2424486c, 0x5c5cb8e4, +0xc2c29f5d, 0xd3d3bd6e, 0xacac43ef, 0x6262c4a6, 0x919139a8, 0x959531a4, +0xe4e4d337, 0x7979f28b, 0xe7e7d532, 0xc8c88b43, 0x37376e59, 0x6d6ddab7, +0x8d8d018c, 0xd5d5b164, 0x4e4e9cd2, 0xa9a949e0, 0x6c6cd8b4, 0x5656acfa, +0xf4f4f307, 0xeaeacf25, 0x6565caaf, 0x7a7af48e, 0xaeae47e9, 0x08081018, +0xbaba6fd5, 0x7878f088, 0x25254a6f, 0x2e2e5c72, 0x1c1c3824, 0xa6a657f1, +0xb4b473c7, 0xc6c69751, 0xe8e8cb23, 0xdddda17c, 0x7474e89c, 0x1f1f3e21, +0x4b4b96dd, 0xbdbd61dc, 0x8b8b0d86, 0x8a8a0f85, 0x7070e090, 0x3e3e7c42, +0xb5b571c4, 0x6666ccaa, 0x484890d8, 0x03030605, 0xf6f6f701, 0x0e0e1c12, +0x6161c2a3, 0x35356a5f, 0x5757aef9, 0xb9b969d0, 0x86861791, 0xc1c19958, +0x1d1d3a27, 0x9e9e27b9, 0xe1e1d938, 0xf8f8eb13, 0x98982bb3, 0x11112233, +0x6969d2bb, 0xd9d9a970, 0x8e8e0789, 0x949433a7, 0x9b9b2db6, 0x1e1e3c22, +0x87871592, 0xe9e9c920, 0xcece8749, 0x5555aaff, 0x28285078, 0xdfdfa57a, +0x8c8c038f, 0xa1a159f8, 0x89890980, 0x0d0d1a17, 0xbfbf65da, 0xe6e6d731, +0x424284c6, 0x6868d0b8, 0x414182c3, 0x999929b0, 0x2d2d5a77, 0x0f0f1e11, +0xb0b07bcb, 0x5454a8fc, 0xbbbb6dd6, 0x16162c3a +}; +#else +static const PRUint32 _T1[256] = +{ +0xa5c66363, 0x84f87c7c, 0x99ee7777, 0x8df67b7b, 0x0dfff2f2, 0xbdd66b6b, +0xb1de6f6f, 0x5491c5c5, 0x50603030, 0x03020101, 0xa9ce6767, 0x7d562b2b, +0x19e7fefe, 0x62b5d7d7, 0xe64dabab, 0x9aec7676, 0x458fcaca, 0x9d1f8282, +0x4089c9c9, 0x87fa7d7d, 0x15effafa, 0xebb25959, 0xc98e4747, 0x0bfbf0f0, +0xec41adad, 0x67b3d4d4, 0xfd5fa2a2, 0xea45afaf, 0xbf239c9c, 0xf753a4a4, +0x96e47272, 0x5b9bc0c0, 0xc275b7b7, 0x1ce1fdfd, 0xae3d9393, 0x6a4c2626, +0x5a6c3636, 0x417e3f3f, 0x02f5f7f7, 0x4f83cccc, 0x5c683434, 0xf451a5a5, +0x34d1e5e5, 0x08f9f1f1, 0x93e27171, 0x73abd8d8, 0x53623131, 0x3f2a1515, +0x0c080404, 0x5295c7c7, 0x65462323, 0x5e9dc3c3, 0x28301818, 0xa1379696, +0x0f0a0505, 0xb52f9a9a, 0x090e0707, 0x36241212, 0x9b1b8080, 0x3ddfe2e2, +0x26cdebeb, 0x694e2727, 0xcd7fb2b2, 0x9fea7575, 0x1b120909, 0x9e1d8383, +0x74582c2c, 0x2e341a1a, 0x2d361b1b, 0xb2dc6e6e, 0xeeb45a5a, 0xfb5ba0a0, +0xf6a45252, 0x4d763b3b, 0x61b7d6d6, 0xce7db3b3, 0x7b522929, 0x3edde3e3, +0x715e2f2f, 0x97138484, 0xf5a65353, 0x68b9d1d1, 0x00000000, 0x2cc1eded, +0x60402020, 0x1fe3fcfc, 0xc879b1b1, 0xedb65b5b, 0xbed46a6a, 0x468dcbcb, +0xd967bebe, 0x4b723939, 0xde944a4a, 0xd4984c4c, 0xe8b05858, 0x4a85cfcf, +0x6bbbd0d0, 0x2ac5efef, 0xe54faaaa, 0x16edfbfb, 0xc5864343, 0xd79a4d4d, +0x55663333, 0x94118585, 0xcf8a4545, 0x10e9f9f9, 0x06040202, 0x81fe7f7f, +0xf0a05050, 0x44783c3c, 0xba259f9f, 0xe34ba8a8, 0xf3a25151, 0xfe5da3a3, +0xc0804040, 0x8a058f8f, 0xad3f9292, 0xbc219d9d, 0x48703838, 0x04f1f5f5, +0xdf63bcbc, 0xc177b6b6, 0x75afdada, 0x63422121, 0x30201010, 0x1ae5ffff, +0x0efdf3f3, 0x6dbfd2d2, 0x4c81cdcd, 0x14180c0c, 0x35261313, 0x2fc3ecec, +0xe1be5f5f, 0xa2359797, 0xcc884444, 0x392e1717, 0x5793c4c4, 0xf255a7a7, +0x82fc7e7e, 0x477a3d3d, 0xacc86464, 0xe7ba5d5d, 0x2b321919, 0x95e67373, +0xa0c06060, 0x98198181, 0xd19e4f4f, 0x7fa3dcdc, 0x66442222, 0x7e542a2a, +0xab3b9090, 0x830b8888, 0xca8c4646, 0x29c7eeee, 0xd36bb8b8, 0x3c281414, +0x79a7dede, 0xe2bc5e5e, 0x1d160b0b, 0x76addbdb, 0x3bdbe0e0, 0x56643232, +0x4e743a3a, 0x1e140a0a, 0xdb924949, 0x0a0c0606, 0x6c482424, 0xe4b85c5c, +0x5d9fc2c2, 0x6ebdd3d3, 0xef43acac, 0xa6c46262, 0xa8399191, 0xa4319595, +0x37d3e4e4, 0x8bf27979, 0x32d5e7e7, 0x438bc8c8, 0x596e3737, 0xb7da6d6d, +0x8c018d8d, 0x64b1d5d5, 0xd29c4e4e, 0xe049a9a9, 0xb4d86c6c, 0xfaac5656, +0x07f3f4f4, 0x25cfeaea, 0xafca6565, 0x8ef47a7a, 0xe947aeae, 0x18100808, +0xd56fbaba, 0x88f07878, 0x6f4a2525, 0x725c2e2e, 0x24381c1c, 0xf157a6a6, +0xc773b4b4, 0x5197c6c6, 0x23cbe8e8, 0x7ca1dddd, 0x9ce87474, 0x213e1f1f, +0xdd964b4b, 0xdc61bdbd, 0x860d8b8b, 0x850f8a8a, 0x90e07070, 0x427c3e3e, +0xc471b5b5, 0xaacc6666, 0xd8904848, 0x05060303, 0x01f7f6f6, 0x121c0e0e, +0xa3c26161, 0x5f6a3535, 0xf9ae5757, 0xd069b9b9, 0x91178686, 0x5899c1c1, +0x273a1d1d, 0xb9279e9e, 0x38d9e1e1, 0x13ebf8f8, 0xb32b9898, 0x33221111, +0xbbd26969, 0x70a9d9d9, 0x89078e8e, 0xa7339494, 0xb62d9b9b, 0x223c1e1e, +0x92158787, 0x20c9e9e9, 0x4987cece, 0xffaa5555, 0x78502828, 0x7aa5dfdf, +0x8f038c8c, 0xf859a1a1, 0x80098989, 0x171a0d0d, 0xda65bfbf, 0x31d7e6e6, +0xc6844242, 0xb8d06868, 0xc3824141, 0xb0299999, 0x775a2d2d, 0x111e0f0f, +0xcb7bb0b0, 0xfca85454, 0xd66dbbbb, 0x3a2c1616 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T2[256] = +{ +0x63c6a563, 0x7cf8847c, 0x77ee9977, 0x7bf68d7b, 0xf2ff0df2, 0x6bd6bd6b, +0x6fdeb16f, 0xc59154c5, 0x30605030, 0x01020301, 0x67cea967, 0x2b567d2b, +0xfee719fe, 0xd7b562d7, 0xab4de6ab, 0x76ec9a76, 0xca8f45ca, 0x821f9d82, +0xc98940c9, 0x7dfa877d, 0xfaef15fa, 0x59b2eb59, 0x478ec947, 0xf0fb0bf0, +0xad41ecad, 0xd4b367d4, 0xa25ffda2, 0xaf45eaaf, 0x9c23bf9c, 0xa453f7a4, +0x72e49672, 0xc09b5bc0, 0xb775c2b7, 0xfde11cfd, 0x933dae93, 0x264c6a26, +0x366c5a36, 0x3f7e413f, 0xf7f502f7, 0xcc834fcc, 0x34685c34, 0xa551f4a5, +0xe5d134e5, 0xf1f908f1, 0x71e29371, 0xd8ab73d8, 0x31625331, 0x152a3f15, +0x04080c04, 0xc79552c7, 0x23466523, 0xc39d5ec3, 0x18302818, 0x9637a196, +0x050a0f05, 0x9a2fb59a, 0x070e0907, 0x12243612, 0x801b9b80, 0xe2df3de2, +0xebcd26eb, 0x274e6927, 0xb27fcdb2, 0x75ea9f75, 0x09121b09, 0x831d9e83, +0x2c58742c, 0x1a342e1a, 0x1b362d1b, 0x6edcb26e, 0x5ab4ee5a, 0xa05bfba0, +0x52a4f652, 0x3b764d3b, 0xd6b761d6, 0xb37dceb3, 0x29527b29, 0xe3dd3ee3, +0x2f5e712f, 0x84139784, 0x53a6f553, 0xd1b968d1, 0x00000000, 0xedc12ced, +0x20406020, 0xfce31ffc, 0xb179c8b1, 0x5bb6ed5b, 0x6ad4be6a, 0xcb8d46cb, +0xbe67d9be, 0x39724b39, 0x4a94de4a, 0x4c98d44c, 0x58b0e858, 0xcf854acf, +0xd0bb6bd0, 0xefc52aef, 0xaa4fe5aa, 0xfbed16fb, 0x4386c543, 0x4d9ad74d, +0x33665533, 0x85119485, 0x458acf45, 0xf9e910f9, 0x02040602, 0x7ffe817f, +0x50a0f050, 0x3c78443c, 0x9f25ba9f, 0xa84be3a8, 0x51a2f351, 0xa35dfea3, +0x4080c040, 0x8f058a8f, 0x923fad92, 0x9d21bc9d, 0x38704838, 0xf5f104f5, +0xbc63dfbc, 0xb677c1b6, 0xdaaf75da, 0x21426321, 0x10203010, 0xffe51aff, +0xf3fd0ef3, 0xd2bf6dd2, 0xcd814ccd, 0x0c18140c, 0x13263513, 0xecc32fec, +0x5fbee15f, 0x9735a297, 0x4488cc44, 0x172e3917, 0xc49357c4, 0xa755f2a7, +0x7efc827e, 0x3d7a473d, 0x64c8ac64, 0x5dbae75d, 0x19322b19, 0x73e69573, +0x60c0a060, 0x81199881, 0x4f9ed14f, 0xdca37fdc, 0x22446622, 0x2a547e2a, +0x903bab90, 0x880b8388, 0x468cca46, 0xeec729ee, 0xb86bd3b8, 0x14283c14, +0xdea779de, 0x5ebce25e, 0x0b161d0b, 0xdbad76db, 0xe0db3be0, 0x32645632, +0x3a744e3a, 0x0a141e0a, 0x4992db49, 0x060c0a06, 0x24486c24, 0x5cb8e45c, +0xc29f5dc2, 0xd3bd6ed3, 0xac43efac, 0x62c4a662, 0x9139a891, 0x9531a495, +0xe4d337e4, 0x79f28b79, 0xe7d532e7, 0xc88b43c8, 0x376e5937, 0x6ddab76d, +0x8d018c8d, 0xd5b164d5, 0x4e9cd24e, 0xa949e0a9, 0x6cd8b46c, 0x56acfa56, +0xf4f307f4, 0xeacf25ea, 0x65caaf65, 0x7af48e7a, 0xae47e9ae, 0x08101808, +0xba6fd5ba, 0x78f08878, 0x254a6f25, 0x2e5c722e, 0x1c38241c, 0xa657f1a6, +0xb473c7b4, 0xc69751c6, 0xe8cb23e8, 0xdda17cdd, 0x74e89c74, 0x1f3e211f, +0x4b96dd4b, 0xbd61dcbd, 0x8b0d868b, 0x8a0f858a, 0x70e09070, 0x3e7c423e, +0xb571c4b5, 0x66ccaa66, 0x4890d848, 0x03060503, 0xf6f701f6, 0x0e1c120e, +0x61c2a361, 0x356a5f35, 0x57aef957, 0xb969d0b9, 0x86179186, 0xc19958c1, +0x1d3a271d, 0x9e27b99e, 0xe1d938e1, 0xf8eb13f8, 0x982bb398, 0x11223311, +0x69d2bb69, 0xd9a970d9, 0x8e07898e, 0x9433a794, 0x9b2db69b, 0x1e3c221e, +0x87159287, 0xe9c920e9, 0xce8749ce, 0x55aaff55, 0x28507828, 0xdfa57adf, +0x8c038f8c, 0xa159f8a1, 0x89098089, 0x0d1a170d, 0xbf65dabf, 0xe6d731e6, +0x4284c642, 0x68d0b868, 0x4182c341, 0x9929b099, 0x2d5a772d, 0x0f1e110f, +0xb07bcbb0, 0x54a8fc54, 0xbb6dd6bb, 0x162c3a16 +}; +#else +static const PRUint32 _T2[256] = +{ +0x63a5c663, 0x7c84f87c, 0x7799ee77, 0x7b8df67b, 0xf20dfff2, 0x6bbdd66b, +0x6fb1de6f, 0xc55491c5, 0x30506030, 0x01030201, 0x67a9ce67, 0x2b7d562b, +0xfe19e7fe, 0xd762b5d7, 0xabe64dab, 0x769aec76, 0xca458fca, 0x829d1f82, +0xc94089c9, 0x7d87fa7d, 0xfa15effa, 0x59ebb259, 0x47c98e47, 0xf00bfbf0, +0xadec41ad, 0xd467b3d4, 0xa2fd5fa2, 0xafea45af, 0x9cbf239c, 0xa4f753a4, +0x7296e472, 0xc05b9bc0, 0xb7c275b7, 0xfd1ce1fd, 0x93ae3d93, 0x266a4c26, +0x365a6c36, 0x3f417e3f, 0xf702f5f7, 0xcc4f83cc, 0x345c6834, 0xa5f451a5, +0xe534d1e5, 0xf108f9f1, 0x7193e271, 0xd873abd8, 0x31536231, 0x153f2a15, +0x040c0804, 0xc75295c7, 0x23654623, 0xc35e9dc3, 0x18283018, 0x96a13796, +0x050f0a05, 0x9ab52f9a, 0x07090e07, 0x12362412, 0x809b1b80, 0xe23ddfe2, +0xeb26cdeb, 0x27694e27, 0xb2cd7fb2, 0x759fea75, 0x091b1209, 0x839e1d83, +0x2c74582c, 0x1a2e341a, 0x1b2d361b, 0x6eb2dc6e, 0x5aeeb45a, 0xa0fb5ba0, +0x52f6a452, 0x3b4d763b, 0xd661b7d6, 0xb3ce7db3, 0x297b5229, 0xe33edde3, +0x2f715e2f, 0x84971384, 0x53f5a653, 0xd168b9d1, 0x00000000, 0xed2cc1ed, +0x20604020, 0xfc1fe3fc, 0xb1c879b1, 0x5bedb65b, 0x6abed46a, 0xcb468dcb, +0xbed967be, 0x394b7239, 0x4ade944a, 0x4cd4984c, 0x58e8b058, 0xcf4a85cf, +0xd06bbbd0, 0xef2ac5ef, 0xaae54faa, 0xfb16edfb, 0x43c58643, 0x4dd79a4d, +0x33556633, 0x85941185, 0x45cf8a45, 0xf910e9f9, 0x02060402, 0x7f81fe7f, +0x50f0a050, 0x3c44783c, 0x9fba259f, 0xa8e34ba8, 0x51f3a251, 0xa3fe5da3, +0x40c08040, 0x8f8a058f, 0x92ad3f92, 0x9dbc219d, 0x38487038, 0xf504f1f5, +0xbcdf63bc, 0xb6c177b6, 0xda75afda, 0x21634221, 0x10302010, 0xff1ae5ff, +0xf30efdf3, 0xd26dbfd2, 0xcd4c81cd, 0x0c14180c, 0x13352613, 0xec2fc3ec, +0x5fe1be5f, 0x97a23597, 0x44cc8844, 0x17392e17, 0xc45793c4, 0xa7f255a7, +0x7e82fc7e, 0x3d477a3d, 0x64acc864, 0x5de7ba5d, 0x192b3219, 0x7395e673, +0x60a0c060, 0x81981981, 0x4fd19e4f, 0xdc7fa3dc, 0x22664422, 0x2a7e542a, +0x90ab3b90, 0x88830b88, 0x46ca8c46, 0xee29c7ee, 0xb8d36bb8, 0x143c2814, +0xde79a7de, 0x5ee2bc5e, 0x0b1d160b, 0xdb76addb, 0xe03bdbe0, 0x32566432, +0x3a4e743a, 0x0a1e140a, 0x49db9249, 0x060a0c06, 0x246c4824, 0x5ce4b85c, +0xc25d9fc2, 0xd36ebdd3, 0xacef43ac, 0x62a6c462, 0x91a83991, 0x95a43195, +0xe437d3e4, 0x798bf279, 0xe732d5e7, 0xc8438bc8, 0x37596e37, 0x6db7da6d, +0x8d8c018d, 0xd564b1d5, 0x4ed29c4e, 0xa9e049a9, 0x6cb4d86c, 0x56faac56, +0xf407f3f4, 0xea25cfea, 0x65afca65, 0x7a8ef47a, 0xaee947ae, 0x08181008, +0xbad56fba, 0x7888f078, 0x256f4a25, 0x2e725c2e, 0x1c24381c, 0xa6f157a6, +0xb4c773b4, 0xc65197c6, 0xe823cbe8, 0xdd7ca1dd, 0x749ce874, 0x1f213e1f, +0x4bdd964b, 0xbddc61bd, 0x8b860d8b, 0x8a850f8a, 0x7090e070, 0x3e427c3e, +0xb5c471b5, 0x66aacc66, 0x48d89048, 0x03050603, 0xf601f7f6, 0x0e121c0e, +0x61a3c261, 0x355f6a35, 0x57f9ae57, 0xb9d069b9, 0x86911786, 0xc15899c1, +0x1d273a1d, 0x9eb9279e, 0xe138d9e1, 0xf813ebf8, 0x98b32b98, 0x11332211, +0x69bbd269, 0xd970a9d9, 0x8e89078e, 0x94a73394, 0x9bb62d9b, 0x1e223c1e, +0x87921587, 0xe920c9e9, 0xce4987ce, 0x55ffaa55, 0x28785028, 0xdf7aa5df, +0x8c8f038c, 0xa1f859a1, 0x89800989, 0x0d171a0d, 0xbfda65bf, 0xe631d7e6, +0x42c68442, 0x68b8d068, 0x41c38241, 0x99b02999, 0x2d775a2d, 0x0f111e0f, +0xb0cb7bb0, 0x54fca854, 0xbbd66dbb, 0x163a2c16 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _T3[256] = +{ +0xc6a56363, 0xf8847c7c, 0xee997777, 0xf68d7b7b, 0xff0df2f2, 0xd6bd6b6b, +0xdeb16f6f, 0x9154c5c5, 0x60503030, 0x02030101, 0xcea96767, 0x567d2b2b, +0xe719fefe, 0xb562d7d7, 0x4de6abab, 0xec9a7676, 0x8f45caca, 0x1f9d8282, +0x8940c9c9, 0xfa877d7d, 0xef15fafa, 0xb2eb5959, 0x8ec94747, 0xfb0bf0f0, +0x41ecadad, 0xb367d4d4, 0x5ffda2a2, 0x45eaafaf, 0x23bf9c9c, 0x53f7a4a4, +0xe4967272, 0x9b5bc0c0, 0x75c2b7b7, 0xe11cfdfd, 0x3dae9393, 0x4c6a2626, +0x6c5a3636, 0x7e413f3f, 0xf502f7f7, 0x834fcccc, 0x685c3434, 0x51f4a5a5, +0xd134e5e5, 0xf908f1f1, 0xe2937171, 0xab73d8d8, 0x62533131, 0x2a3f1515, +0x080c0404, 0x9552c7c7, 0x46652323, 0x9d5ec3c3, 0x30281818, 0x37a19696, +0x0a0f0505, 0x2fb59a9a, 0x0e090707, 0x24361212, 0x1b9b8080, 0xdf3de2e2, +0xcd26ebeb, 0x4e692727, 0x7fcdb2b2, 0xea9f7575, 0x121b0909, 0x1d9e8383, +0x58742c2c, 0x342e1a1a, 0x362d1b1b, 0xdcb26e6e, 0xb4ee5a5a, 0x5bfba0a0, +0xa4f65252, 0x764d3b3b, 0xb761d6d6, 0x7dceb3b3, 0x527b2929, 0xdd3ee3e3, +0x5e712f2f, 0x13978484, 0xa6f55353, 0xb968d1d1, 0x00000000, 0xc12ceded, +0x40602020, 0xe31ffcfc, 0x79c8b1b1, 0xb6ed5b5b, 0xd4be6a6a, 0x8d46cbcb, +0x67d9bebe, 0x724b3939, 0x94de4a4a, 0x98d44c4c, 0xb0e85858, 0x854acfcf, +0xbb6bd0d0, 0xc52aefef, 0x4fe5aaaa, 0xed16fbfb, 0x86c54343, 0x9ad74d4d, +0x66553333, 0x11948585, 0x8acf4545, 0xe910f9f9, 0x04060202, 0xfe817f7f, +0xa0f05050, 0x78443c3c, 0x25ba9f9f, 0x4be3a8a8, 0xa2f35151, 0x5dfea3a3, +0x80c04040, 0x058a8f8f, 0x3fad9292, 0x21bc9d9d, 0x70483838, 0xf104f5f5, +0x63dfbcbc, 0x77c1b6b6, 0xaf75dada, 0x42632121, 0x20301010, 0xe51affff, +0xfd0ef3f3, 0xbf6dd2d2, 0x814ccdcd, 0x18140c0c, 0x26351313, 0xc32fecec, +0xbee15f5f, 0x35a29797, 0x88cc4444, 0x2e391717, 0x9357c4c4, 0x55f2a7a7, +0xfc827e7e, 0x7a473d3d, 0xc8ac6464, 0xbae75d5d, 0x322b1919, 0xe6957373, +0xc0a06060, 0x19988181, 0x9ed14f4f, 0xa37fdcdc, 0x44662222, 0x547e2a2a, +0x3bab9090, 0x0b838888, 0x8cca4646, 0xc729eeee, 0x6bd3b8b8, 0x283c1414, +0xa779dede, 0xbce25e5e, 0x161d0b0b, 0xad76dbdb, 0xdb3be0e0, 0x64563232, +0x744e3a3a, 0x141e0a0a, 0x92db4949, 0x0c0a0606, 0x486c2424, 0xb8e45c5c, +0x9f5dc2c2, 0xbd6ed3d3, 0x43efacac, 0xc4a66262, 0x39a89191, 0x31a49595, +0xd337e4e4, 0xf28b7979, 0xd532e7e7, 0x8b43c8c8, 0x6e593737, 0xdab76d6d, +0x018c8d8d, 0xb164d5d5, 0x9cd24e4e, 0x49e0a9a9, 0xd8b46c6c, 0xacfa5656, +0xf307f4f4, 0xcf25eaea, 0xcaaf6565, 0xf48e7a7a, 0x47e9aeae, 0x10180808, +0x6fd5baba, 0xf0887878, 0x4a6f2525, 0x5c722e2e, 0x38241c1c, 0x57f1a6a6, +0x73c7b4b4, 0x9751c6c6, 0xcb23e8e8, 0xa17cdddd, 0xe89c7474, 0x3e211f1f, +0x96dd4b4b, 0x61dcbdbd, 0x0d868b8b, 0x0f858a8a, 0xe0907070, 0x7c423e3e, +0x71c4b5b5, 0xccaa6666, 0x90d84848, 0x06050303, 0xf701f6f6, 0x1c120e0e, +0xc2a36161, 0x6a5f3535, 0xaef95757, 0x69d0b9b9, 0x17918686, 0x9958c1c1, +0x3a271d1d, 0x27b99e9e, 0xd938e1e1, 0xeb13f8f8, 0x2bb39898, 0x22331111, +0xd2bb6969, 0xa970d9d9, 0x07898e8e, 0x33a79494, 0x2db69b9b, 0x3c221e1e, +0x15928787, 0xc920e9e9, 0x8749cece, 0xaaff5555, 0x50782828, 0xa57adfdf, +0x038f8c8c, 0x59f8a1a1, 0x09808989, 0x1a170d0d, 0x65dabfbf, 0xd731e6e6, +0x84c64242, 0xd0b86868, 0x82c34141, 0x29b09999, 0x5a772d2d, 0x1e110f0f, +0x7bcbb0b0, 0xa8fc5454, 0x6dd6bbbb, 0x2c3a1616 +}; +#else +static const PRUint32 _T3[256] = +{ +0x6363a5c6, 0x7c7c84f8, 0x777799ee, 0x7b7b8df6, 0xf2f20dff, 0x6b6bbdd6, +0x6f6fb1de, 0xc5c55491, 0x30305060, 0x01010302, 0x6767a9ce, 0x2b2b7d56, +0xfefe19e7, 0xd7d762b5, 0xababe64d, 0x76769aec, 0xcaca458f, 0x82829d1f, +0xc9c94089, 0x7d7d87fa, 0xfafa15ef, 0x5959ebb2, 0x4747c98e, 0xf0f00bfb, +0xadadec41, 0xd4d467b3, 0xa2a2fd5f, 0xafafea45, 0x9c9cbf23, 0xa4a4f753, +0x727296e4, 0xc0c05b9b, 0xb7b7c275, 0xfdfd1ce1, 0x9393ae3d, 0x26266a4c, +0x36365a6c, 0x3f3f417e, 0xf7f702f5, 0xcccc4f83, 0x34345c68, 0xa5a5f451, +0xe5e534d1, 0xf1f108f9, 0x717193e2, 0xd8d873ab, 0x31315362, 0x15153f2a, +0x04040c08, 0xc7c75295, 0x23236546, 0xc3c35e9d, 0x18182830, 0x9696a137, +0x05050f0a, 0x9a9ab52f, 0x0707090e, 0x12123624, 0x80809b1b, 0xe2e23ddf, +0xebeb26cd, 0x2727694e, 0xb2b2cd7f, 0x75759fea, 0x09091b12, 0x83839e1d, +0x2c2c7458, 0x1a1a2e34, 0x1b1b2d36, 0x6e6eb2dc, 0x5a5aeeb4, 0xa0a0fb5b, +0x5252f6a4, 0x3b3b4d76, 0xd6d661b7, 0xb3b3ce7d, 0x29297b52, 0xe3e33edd, +0x2f2f715e, 0x84849713, 0x5353f5a6, 0xd1d168b9, 0x00000000, 0xeded2cc1, +0x20206040, 0xfcfc1fe3, 0xb1b1c879, 0x5b5bedb6, 0x6a6abed4, 0xcbcb468d, +0xbebed967, 0x39394b72, 0x4a4ade94, 0x4c4cd498, 0x5858e8b0, 0xcfcf4a85, +0xd0d06bbb, 0xefef2ac5, 0xaaaae54f, 0xfbfb16ed, 0x4343c586, 0x4d4dd79a, +0x33335566, 0x85859411, 0x4545cf8a, 0xf9f910e9, 0x02020604, 0x7f7f81fe, +0x5050f0a0, 0x3c3c4478, 0x9f9fba25, 0xa8a8e34b, 0x5151f3a2, 0xa3a3fe5d, +0x4040c080, 0x8f8f8a05, 0x9292ad3f, 0x9d9dbc21, 0x38384870, 0xf5f504f1, +0xbcbcdf63, 0xb6b6c177, 0xdada75af, 0x21216342, 0x10103020, 0xffff1ae5, +0xf3f30efd, 0xd2d26dbf, 0xcdcd4c81, 0x0c0c1418, 0x13133526, 0xecec2fc3, +0x5f5fe1be, 0x9797a235, 0x4444cc88, 0x1717392e, 0xc4c45793, 0xa7a7f255, +0x7e7e82fc, 0x3d3d477a, 0x6464acc8, 0x5d5de7ba, 0x19192b32, 0x737395e6, +0x6060a0c0, 0x81819819, 0x4f4fd19e, 0xdcdc7fa3, 0x22226644, 0x2a2a7e54, +0x9090ab3b, 0x8888830b, 0x4646ca8c, 0xeeee29c7, 0xb8b8d36b, 0x14143c28, +0xdede79a7, 0x5e5ee2bc, 0x0b0b1d16, 0xdbdb76ad, 0xe0e03bdb, 0x32325664, +0x3a3a4e74, 0x0a0a1e14, 0x4949db92, 0x06060a0c, 0x24246c48, 0x5c5ce4b8, +0xc2c25d9f, 0xd3d36ebd, 0xacacef43, 0x6262a6c4, 0x9191a839, 0x9595a431, +0xe4e437d3, 0x79798bf2, 0xe7e732d5, 0xc8c8438b, 0x3737596e, 0x6d6db7da, +0x8d8d8c01, 0xd5d564b1, 0x4e4ed29c, 0xa9a9e049, 0x6c6cb4d8, 0x5656faac, +0xf4f407f3, 0xeaea25cf, 0x6565afca, 0x7a7a8ef4, 0xaeaee947, 0x08081810, +0xbabad56f, 0x787888f0, 0x25256f4a, 0x2e2e725c, 0x1c1c2438, 0xa6a6f157, +0xb4b4c773, 0xc6c65197, 0xe8e823cb, 0xdddd7ca1, 0x74749ce8, 0x1f1f213e, +0x4b4bdd96, 0xbdbddc61, 0x8b8b860d, 0x8a8a850f, 0x707090e0, 0x3e3e427c, +0xb5b5c471, 0x6666aacc, 0x4848d890, 0x03030506, 0xf6f601f7, 0x0e0e121c, +0x6161a3c2, 0x35355f6a, 0x5757f9ae, 0xb9b9d069, 0x86869117, 0xc1c15899, +0x1d1d273a, 0x9e9eb927, 0xe1e138d9, 0xf8f813eb, 0x9898b32b, 0x11113322, +0x6969bbd2, 0xd9d970a9, 0x8e8e8907, 0x9494a733, 0x9b9bb62d, 0x1e1e223c, +0x87879215, 0xe9e920c9, 0xcece4987, 0x5555ffaa, 0x28287850, 0xdfdf7aa5, +0x8c8c8f03, 0xa1a1f859, 0x89898009, 0x0d0d171a, 0xbfbfda65, 0xe6e631d7, +0x4242c684, 0x6868b8d0, 0x4141c382, 0x9999b029, 0x2d2d775a, 0x0f0f111e, +0xb0b0cb7b, 0x5454fca8, 0xbbbbd66d, 0x16163a2c +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv0[256] = +{ +0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a, 0xcb6bab3b, 0xf1459d1f, +0xab58faac, 0x9303e34b, 0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5, +0xfcd7e54f, 0xd7cb2ac5, 0x80443526, 0x8fa362b5, 0x495ab1de, 0x671bba25, +0x980eea45, 0xe1c0fe5d, 0x02752fc3, 0x12f04c81, 0xa397468d, 0xc6f9d36b, +0xe75f8f03, 0x959c9215, 0xeb7a6dbf, 0xda595295, 0x2d83bed4, 0xd3217458, +0x2969e049, 0x44c8c98e, 0x6a89c275, 0x78798ef4, 0x6b3e5899, 0xdd71b927, +0xb64fe1be, 0x17ad88f0, 0x66ac20c9, 0xb43ace7d, 0x184adf63, 0x82311ae5, +0x60335197, 0x457f5362, 0xe07764b1, 0x84ae6bbb, 0x1ca081fe, 0x942b08f9, +0x58684870, 0x19fd458f, 0x876cde94, 0xb7f87b52, 0x23d373ab, 0xe2024b72, +0x578f1fe3, 0x2aab5566, 0x0728ebb2, 0x03c2b52f, 0x9a7bc586, 0xa50837d3, +0xf2872830, 0xb2a5bf23, 0xba6a0302, 0x5c8216ed, 0x2b1ccf8a, 0x92b479a7, +0xf0f207f3, 0xa1e2694e, 0xcdf4da65, 0xd5be0506, 0x1f6234d1, 0x8afea6c4, +0x9d532e34, 0xa055f3a2, 0x32e18a05, 0x75ebf6a4, 0x39ec830b, 0xaaef6040, +0x069f715e, 0x51106ebd, 0xf98a213e, 0x3d06dd96, 0xae053edd, 0x46bde64d, +0xb58d5491, 0x055dc471, 0x6fd40604, 0xff155060, 0x24fb9819, 0x97e9bdd6, +0xcc434089, 0x779ed967, 0xbd42e8b0, 0x888b8907, 0x385b19e7, 0xdbeec879, +0x470a7ca1, 0xe90f427c, 0xc91e84f8, 0x00000000, 0x83868009, 0x48ed2b32, +0xac70111e, 0x4e725a6c, 0xfbff0efd, 0x5638850f, 0x1ed5ae3d, 0x27392d36, +0x64d90f0a, 0x21a65c68, 0xd1545b9b, 0x3a2e3624, 0xb1670a0c, 0x0fe75793, +0xd296eeb4, 0x9e919b1b, 0x4fc5c080, 0xa220dc61, 0x694b775a, 0x161a121c, +0x0aba93e2, 0xe52aa0c0, 0x43e0223c, 0x1d171b12, 0x0b0d090e, 0xadc78bf2, +0xb9a8b62d, 0xc8a91e14, 0x8519f157, 0x4c0775af, 0xbbdd99ee, 0xfd607fa3, +0x9f2601f7, 0xbcf5725c, 0xc53b6644, 0x347efb5b, 0x7629438b, 0xdcc623cb, +0x68fcedb6, 0x63f1e4b8, 0xcadc31d7, 0x10856342, 0x40229713, 0x2011c684, +0x7d244a85, 0xf83dbbd2, 0x1132f9ae, 0x6da129c7, 0x4b2f9e1d, 0xf330b2dc, +0xec52860d, 0xd0e3c177, 0x6c16b32b, 0x99b970a9, 0xfa489411, 0x2264e947, +0xc48cfca8, 0x1a3ff0a0, 0xd82c7d56, 0xef903322, 0xc74e4987, 0xc1d138d9, +0xfea2ca8c, 0x360bd498, 0xcf81f5a6, 0x28de7aa5, 0x268eb7da, 0xa4bfad3f, +0xe49d3a2c, 0x0d927850, 0x9bcc5f6a, 0x62467e54, 0xc2138df6, 0xe8b8d890, +0x5ef7392e, 0xf5afc382, 0xbe805d9f, 0x7c93d069, 0xa92dd56f, 0xb31225cf, +0x3b99acc8, 0xa77d1810, 0x6e639ce8, 0x7bbb3bdb, 0x097826cd, 0xf418596e, +0x01b79aec, 0xa89a4f83, 0x656e95e6, 0x7ee6ffaa, 0x08cfbc21, 0xe6e815ef, +0xd99be7ba, 0xce366f4a, 0xd4099fea, 0xd67cb029, 0xafb2a431, 0x31233f2a, +0x3094a5c6, 0xc066a235, 0x37bc4e74, 0xa6ca82fc, 0xb0d090e0, 0x15d8a733, +0x4a9804f1, 0xf7daec41, 0x0e50cd7f, 0x2ff69117, 0x8dd64d76, 0x4db0ef43, +0x544daacc, 0xdf0496e4, 0xe3b5d19e, 0x1b886a4c, 0xb81f2cc1, 0x7f516546, +0x04ea5e9d, 0x5d358c01, 0x737487fa, 0x2e410bfb, 0x5a1d67b3, 0x52d2db92, +0x335610e9, 0x1347d66d, 0x8c61d79a, 0x7a0ca137, 0x8e14f859, 0x893c13eb, +0xee27a9ce, 0x35c961b7, 0xede51ce1, 0x3cb1477a, 0x59dfd29c, 0x3f73f255, +0x79ce1418, 0xbf37c773, 0xeacdf753, 0x5baafd5f, 0x146f3ddf, 0x86db4478, +0x81f3afca, 0x3ec468b9, 0x2c342438, 0x5f40a3c2, 0x72c31d16, 0x0c25e2bc, +0x8b493c28, 0x41950dff, 0x7101a839, 0xdeb30c08, 0x9ce4b4d8, 0x90c15664, +0x6184cb7b, 0x70b632d5, 0x745c6c48, 0x4257b8d0 +}; +#else +static const PRUint32 _TInv0[256] = +{ +0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96, 0x3bab6bcb, 0x1f9d45f1, +0xacfa58ab, 0x4be30393, 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25, +0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f, 0xdeb15a49, 0x25ba1b67, +0x45ea0e98, 0x5dfec0e1, 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6, +0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da, 0xd4be832d, 0x587421d3, +0x49e06929, 0x8ec9c844, 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd, +0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4, 0x63df4a18, 0xe51a3182, +0x97513360, 0x62537f45, 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94, +0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7, 0xab73d323, 0x724b02e2, +0xe31f8f57, 0x6655ab2a, 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5, +0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c, 0x8acf1c2b, 0xa779b492, +0xf307f2f0, 0x4e69e2a1, 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a, +0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75, 0x0b83ec39, 0x4060efaa, +0x5e719f06, 0xbd6e1051, 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46, +0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff, 0x1998fb24, 0xd6bde997, +0x894043cc, 0x67d99e77, 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb, +0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000, 0x09808683, 0x322bed48, +0x1e1170ac, 0x6c5a724e, 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927, +0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a, 0x0c0a67b1, 0x9357e70f, +0xb4ee96d2, 0x1b9b919e, 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16, +0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d, 0x0e090d0b, 0xf28bc7ad, +0x2db6a8b9, 0x141ea9c8, 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd, +0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34, 0x8b432976, 0xcb23c6dc, +0xb6edfc68, 0xb8e4f163, 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120, +0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d, 0x1d9e2f4b, 0xdcb230f3, +0x0d8652ec, 0x77c1e3d0, 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422, +0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef, 0x87494ec7, 0xd938d1c1, +0x8ccaa2fe, 0x98d40b36, 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4, +0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662, 0xf68d13c2, 0x90d8b8e8, +0x2e39f75e, 0x82c3aff5, 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3, +0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b, 0xcd267809, 0x6e5918f4, +0xec9ab701, 0x834f9aa8, 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6, +0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6, 0x31a4b2af, 0x2a3f2331, +0xc6a59430, 0x35a266c0, 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815, +0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f, 0x764dd68d, 0x43efb04d, +0xccaa4d54, 0xe49604df, 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f, +0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e, 0xb3671d5a, 0x92dbd252, +0xe9105633, 0x6dd64713, 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89, +0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c, 0x9cd2df59, 0x55f2733f, +0x1814ce79, 0x73c737bf, 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86, +0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f, 0x161dc372, 0xbce2250c, +0x283c498b, 0xff0d9541, 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190, +0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv1[256] = +{ +0xa7f45150, 0x65417e53, 0xa4171ac3, 0x5e273a96, 0x6bab3bcb, 0x459d1ff1, +0x58faacab, 0x03e34b93, 0xfa302055, 0x6d76adf6, 0x76cc8891, 0x4c02f525, +0xd7e54ffc, 0xcb2ac5d7, 0x44352680, 0xa362b58f, 0x5ab1de49, 0x1bba2567, +0x0eea4598, 0xc0fe5de1, 0x752fc302, 0xf04c8112, 0x97468da3, 0xf9d36bc6, +0x5f8f03e7, 0x9c921595, 0x7a6dbfeb, 0x595295da, 0x83bed42d, 0x217458d3, +0x69e04929, 0xc8c98e44, 0x89c2756a, 0x798ef478, 0x3e58996b, 0x71b927dd, +0x4fe1beb6, 0xad88f017, 0xac20c966, 0x3ace7db4, 0x4adf6318, 0x311ae582, +0x33519760, 0x7f536245, 0x7764b1e0, 0xae6bbb84, 0xa081fe1c, 0x2b08f994, +0x68487058, 0xfd458f19, 0x6cde9487, 0xf87b52b7, 0xd373ab23, 0x024b72e2, +0x8f1fe357, 0xab55662a, 0x28ebb207, 0xc2b52f03, 0x7bc5869a, 0x0837d3a5, +0x872830f2, 0xa5bf23b2, 0x6a0302ba, 0x8216ed5c, 0x1ccf8a2b, 0xb479a792, +0xf207f3f0, 0xe2694ea1, 0xf4da65cd, 0xbe0506d5, 0x6234d11f, 0xfea6c48a, +0x532e349d, 0x55f3a2a0, 0xe18a0532, 0xebf6a475, 0xec830b39, 0xef6040aa, +0x9f715e06, 0x106ebd51, 0x8a213ef9, 0x06dd963d, 0x053eddae, 0xbde64d46, +0x8d5491b5, 0x5dc47105, 0xd406046f, 0x155060ff, 0xfb981924, 0xe9bdd697, +0x434089cc, 0x9ed96777, 0x42e8b0bd, 0x8b890788, 0x5b19e738, 0xeec879db, +0x0a7ca147, 0x0f427ce9, 0x1e84f8c9, 0x00000000, 0x86800983, 0xed2b3248, +0x70111eac, 0x725a6c4e, 0xff0efdfb, 0x38850f56, 0xd5ae3d1e, 0x392d3627, +0xd90f0a64, 0xa65c6821, 0x545b9bd1, 0x2e36243a, 0x670a0cb1, 0xe757930f, +0x96eeb4d2, 0x919b1b9e, 0xc5c0804f, 0x20dc61a2, 0x4b775a69, 0x1a121c16, +0xba93e20a, 0x2aa0c0e5, 0xe0223c43, 0x171b121d, 0x0d090e0b, 0xc78bf2ad, +0xa8b62db9, 0xa91e14c8, 0x19f15785, 0x0775af4c, 0xdd99eebb, 0x607fa3fd, +0x2601f79f, 0xf5725cbc, 0x3b6644c5, 0x7efb5b34, 0x29438b76, 0xc623cbdc, +0xfcedb668, 0xf1e4b863, 0xdc31d7ca, 0x85634210, 0x22971340, 0x11c68420, +0x244a857d, 0x3dbbd2f8, 0x32f9ae11, 0xa129c76d, 0x2f9e1d4b, 0x30b2dcf3, +0x52860dec, 0xe3c177d0, 0x16b32b6c, 0xb970a999, 0x489411fa, 0x64e94722, +0x8cfca8c4, 0x3ff0a01a, 0x2c7d56d8, 0x903322ef, 0x4e4987c7, 0xd138d9c1, +0xa2ca8cfe, 0x0bd49836, 0x81f5a6cf, 0xde7aa528, 0x8eb7da26, 0xbfad3fa4, +0x9d3a2ce4, 0x9278500d, 0xcc5f6a9b, 0x467e5462, 0x138df6c2, 0xb8d890e8, +0xf7392e5e, 0xafc382f5, 0x805d9fbe, 0x93d0697c, 0x2dd56fa9, 0x1225cfb3, +0x99acc83b, 0x7d1810a7, 0x639ce86e, 0xbb3bdb7b, 0x7826cd09, 0x18596ef4, +0xb79aec01, 0x9a4f83a8, 0x6e95e665, 0xe6ffaa7e, 0xcfbc2108, 0xe815efe6, +0x9be7bad9, 0x366f4ace, 0x099fead4, 0x7cb029d6, 0xb2a431af, 0x233f2a31, +0x94a5c630, 0x66a235c0, 0xbc4e7437, 0xca82fca6, 0xd090e0b0, 0xd8a73315, +0x9804f14a, 0xdaec41f7, 0x50cd7f0e, 0xf691172f, 0xd64d768d, 0xb0ef434d, +0x4daacc54, 0x0496e4df, 0xb5d19ee3, 0x886a4c1b, 0x1f2cc1b8, 0x5165467f, +0xea5e9d04, 0x358c015d, 0x7487fa73, 0x410bfb2e, 0x1d67b35a, 0xd2db9252, +0x5610e933, 0x47d66d13, 0x61d79a8c, 0x0ca1377a, 0x14f8598e, 0x3c13eb89, +0x27a9ceee, 0xc961b735, 0xe51ce1ed, 0xb1477a3c, 0xdfd29c59, 0x73f2553f, +0xce141879, 0x37c773bf, 0xcdf753ea, 0xaafd5f5b, 0x6f3ddf14, 0xdb447886, +0xf3afca81, 0xc468b93e, 0x3424382c, 0x40a3c25f, 0xc31d1672, 0x25e2bc0c, +0x493c288b, 0x950dff41, 0x01a83971, 0xb30c08de, 0xe4b4d89c, 0xc1566490, +0x84cb7b61, 0xb632d570, 0x5c6c4874, 0x57b8d042 +}; +#else +static const PRUint32 _TInv1[256] = +{ +0x5051f4a7, 0x537e4165, 0xc31a17a4, 0x963a275e, 0xcb3bab6b, 0xf11f9d45, +0xabacfa58, 0x934be303, 0x552030fa, 0xf6ad766d, 0x9188cc76, 0x25f5024c, +0xfc4fe5d7, 0xd7c52acb, 0x80263544, 0x8fb562a3, 0x49deb15a, 0x6725ba1b, +0x9845ea0e, 0xe15dfec0, 0x02c32f75, 0x12814cf0, 0xa38d4697, 0xc66bd3f9, +0xe7038f5f, 0x9515929c, 0xebbf6d7a, 0xda955259, 0x2dd4be83, 0xd3587421, +0x2949e069, 0x448ec9c8, 0x6a75c289, 0x78f48e79, 0x6b99583e, 0xdd27b971, +0xb6bee14f, 0x17f088ad, 0x66c920ac, 0xb47dce3a, 0x1863df4a, 0x82e51a31, +0x60975133, 0x4562537f, 0xe0b16477, 0x84bb6bae, 0x1cfe81a0, 0x94f9082b, +0x58704868, 0x198f45fd, 0x8794de6c, 0xb7527bf8, 0x23ab73d3, 0xe2724b02, +0x57e31f8f, 0x2a6655ab, 0x07b2eb28, 0x032fb5c2, 0x9a86c57b, 0xa5d33708, +0xf2302887, 0xb223bfa5, 0xba02036a, 0x5ced1682, 0x2b8acf1c, 0x92a779b4, +0xf0f307f2, 0xa14e69e2, 0xcd65daf4, 0xd50605be, 0x1fd13462, 0x8ac4a6fe, +0x9d342e53, 0xa0a2f355, 0x32058ae1, 0x75a4f6eb, 0x390b83ec, 0xaa4060ef, +0x065e719f, 0x51bd6e10, 0xf93e218a, 0x3d96dd06, 0xaedd3e05, 0x464de6bd, +0xb591548d, 0x0571c45d, 0x6f0406d4, 0xff605015, 0x241998fb, 0x97d6bde9, +0xcc894043, 0x7767d99e, 0xbdb0e842, 0x8807898b, 0x38e7195b, 0xdb79c8ee, +0x47a17c0a, 0xe97c420f, 0xc9f8841e, 0x00000000, 0x83098086, 0x48322bed, +0xac1e1170, 0x4e6c5a72, 0xfbfd0eff, 0x560f8538, 0x1e3daed5, 0x27362d39, +0x640a0fd9, 0x21685ca6, 0xd19b5b54, 0x3a24362e, 0xb10c0a67, 0x0f9357e7, +0xd2b4ee96, 0x9e1b9b91, 0x4f80c0c5, 0xa261dc20, 0x695a774b, 0x161c121a, +0x0ae293ba, 0xe5c0a02a, 0x433c22e0, 0x1d121b17, 0x0b0e090d, 0xadf28bc7, +0xb92db6a8, 0xc8141ea9, 0x8557f119, 0x4caf7507, 0xbbee99dd, 0xfda37f60, +0x9ff70126, 0xbc5c72f5, 0xc544663b, 0x345bfb7e, 0x768b4329, 0xdccb23c6, +0x68b6edfc, 0x63b8e4f1, 0xcad731dc, 0x10426385, 0x40139722, 0x2084c611, +0x7d854a24, 0xf8d2bb3d, 0x11aef932, 0x6dc729a1, 0x4b1d9e2f, 0xf3dcb230, +0xec0d8652, 0xd077c1e3, 0x6c2bb316, 0x99a970b9, 0xfa119448, 0x2247e964, +0xc4a8fc8c, 0x1aa0f03f, 0xd8567d2c, 0xef223390, 0xc787494e, 0xc1d938d1, +0xfe8ccaa2, 0x3698d40b, 0xcfa6f581, 0x28a57ade, 0x26dab78e, 0xa43fadbf, +0xe42c3a9d, 0x0d507892, 0x9b6a5fcc, 0x62547e46, 0xc2f68d13, 0xe890d8b8, +0x5e2e39f7, 0xf582c3af, 0xbe9f5d80, 0x7c69d093, 0xa96fd52d, 0xb3cf2512, +0x3bc8ac99, 0xa710187d, 0x6ee89c63, 0x7bdb3bbb, 0x09cd2678, 0xf46e5918, +0x01ec9ab7, 0xa8834f9a, 0x65e6956e, 0x7eaaffe6, 0x0821bccf, 0xe6ef15e8, +0xd9bae79b, 0xce4a6f36, 0xd4ea9f09, 0xd629b07c, 0xaf31a4b2, 0x312a3f23, +0x30c6a594, 0xc035a266, 0x37744ebc, 0xa6fc82ca, 0xb0e090d0, 0x1533a7d8, +0x4af10498, 0xf741ecda, 0x0e7fcd50, 0x2f1791f6, 0x8d764dd6, 0x4d43efb0, +0x54ccaa4d, 0xdfe49604, 0xe39ed1b5, 0x1b4c6a88, 0xb8c12c1f, 0x7f466551, +0x049d5eea, 0x5d018c35, 0x73fa8774, 0x2efb0b41, 0x5ab3671d, 0x5292dbd2, +0x33e91056, 0x136dd647, 0x8c9ad761, 0x7a37a10c, 0x8e59f814, 0x89eb133c, +0xeecea927, 0x35b761c9, 0xede11ce5, 0x3c7a47b1, 0x599cd2df, 0x3f55f273, +0x791814ce, 0xbf73c737, 0xea53f7cd, 0x5b5ffdaa, 0x14df3d6f, 0x867844db, +0x81caaff3, 0x3eb968c4, 0x2c382434, 0x5fc2a340, 0x72161dc3, 0x0cbce225, +0x8b283c49, 0x41ff0d95, 0x7139a801, 0xde080cb3, 0x9cd8b4e4, 0x906456c1, +0x617bcb84, 0x70d532b6, 0x74486c5c, 0x42d0b857 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv2[256] = +{ +0xf45150a7, 0x417e5365, 0x171ac3a4, 0x273a965e, 0xab3bcb6b, 0x9d1ff145, +0xfaacab58, 0xe34b9303, 0x302055fa, 0x76adf66d, 0xcc889176, 0x02f5254c, +0xe54ffcd7, 0x2ac5d7cb, 0x35268044, 0x62b58fa3, 0xb1de495a, 0xba25671b, +0xea45980e, 0xfe5de1c0, 0x2fc30275, 0x4c8112f0, 0x468da397, 0xd36bc6f9, +0x8f03e75f, 0x9215959c, 0x6dbfeb7a, 0x5295da59, 0xbed42d83, 0x7458d321, +0xe0492969, 0xc98e44c8, 0xc2756a89, 0x8ef47879, 0x58996b3e, 0xb927dd71, +0xe1beb64f, 0x88f017ad, 0x20c966ac, 0xce7db43a, 0xdf63184a, 0x1ae58231, +0x51976033, 0x5362457f, 0x64b1e077, 0x6bbb84ae, 0x81fe1ca0, 0x08f9942b, +0x48705868, 0x458f19fd, 0xde94876c, 0x7b52b7f8, 0x73ab23d3, 0x4b72e202, +0x1fe3578f, 0x55662aab, 0xebb20728, 0xb52f03c2, 0xc5869a7b, 0x37d3a508, +0x2830f287, 0xbf23b2a5, 0x0302ba6a, 0x16ed5c82, 0xcf8a2b1c, 0x79a792b4, +0x07f3f0f2, 0x694ea1e2, 0xda65cdf4, 0x0506d5be, 0x34d11f62, 0xa6c48afe, +0x2e349d53, 0xf3a2a055, 0x8a0532e1, 0xf6a475eb, 0x830b39ec, 0x6040aaef, +0x715e069f, 0x6ebd5110, 0x213ef98a, 0xdd963d06, 0x3eddae05, 0xe64d46bd, +0x5491b58d, 0xc471055d, 0x06046fd4, 0x5060ff15, 0x981924fb, 0xbdd697e9, +0x4089cc43, 0xd967779e, 0xe8b0bd42, 0x8907888b, 0x19e7385b, 0xc879dbee, +0x7ca1470a, 0x427ce90f, 0x84f8c91e, 0x00000000, 0x80098386, 0x2b3248ed, +0x111eac70, 0x5a6c4e72, 0x0efdfbff, 0x850f5638, 0xae3d1ed5, 0x2d362739, +0x0f0a64d9, 0x5c6821a6, 0x5b9bd154, 0x36243a2e, 0x0a0cb167, 0x57930fe7, +0xeeb4d296, 0x9b1b9e91, 0xc0804fc5, 0xdc61a220, 0x775a694b, 0x121c161a, +0x93e20aba, 0xa0c0e52a, 0x223c43e0, 0x1b121d17, 0x090e0b0d, 0x8bf2adc7, +0xb62db9a8, 0x1e14c8a9, 0xf1578519, 0x75af4c07, 0x99eebbdd, 0x7fa3fd60, +0x01f79f26, 0x725cbcf5, 0x6644c53b, 0xfb5b347e, 0x438b7629, 0x23cbdcc6, +0xedb668fc, 0xe4b863f1, 0x31d7cadc, 0x63421085, 0x97134022, 0xc6842011, +0x4a857d24, 0xbbd2f83d, 0xf9ae1132, 0x29c76da1, 0x9e1d4b2f, 0xb2dcf330, +0x860dec52, 0xc177d0e3, 0xb32b6c16, 0x70a999b9, 0x9411fa48, 0xe9472264, +0xfca8c48c, 0xf0a01a3f, 0x7d56d82c, 0x3322ef90, 0x4987c74e, 0x38d9c1d1, +0xca8cfea2, 0xd498360b, 0xf5a6cf81, 0x7aa528de, 0xb7da268e, 0xad3fa4bf, +0x3a2ce49d, 0x78500d92, 0x5f6a9bcc, 0x7e546246, 0x8df6c213, 0xd890e8b8, +0x392e5ef7, 0xc382f5af, 0x5d9fbe80, 0xd0697c93, 0xd56fa92d, 0x25cfb312, +0xacc83b99, 0x1810a77d, 0x9ce86e63, 0x3bdb7bbb, 0x26cd0978, 0x596ef418, +0x9aec01b7, 0x4f83a89a, 0x95e6656e, 0xffaa7ee6, 0xbc2108cf, 0x15efe6e8, +0xe7bad99b, 0x6f4ace36, 0x9fead409, 0xb029d67c, 0xa431afb2, 0x3f2a3123, +0xa5c63094, 0xa235c066, 0x4e7437bc, 0x82fca6ca, 0x90e0b0d0, 0xa73315d8, +0x04f14a98, 0xec41f7da, 0xcd7f0e50, 0x91172ff6, 0x4d768dd6, 0xef434db0, +0xaacc544d, 0x96e4df04, 0xd19ee3b5, 0x6a4c1b88, 0x2cc1b81f, 0x65467f51, +0x5e9d04ea, 0x8c015d35, 0x87fa7374, 0x0bfb2e41, 0x67b35a1d, 0xdb9252d2, +0x10e93356, 0xd66d1347, 0xd79a8c61, 0xa1377a0c, 0xf8598e14, 0x13eb893c, +0xa9ceee27, 0x61b735c9, 0x1ce1ede5, 0x477a3cb1, 0xd29c59df, 0xf2553f73, +0x141879ce, 0xc773bf37, 0xf753eacd, 0xfd5f5baa, 0x3ddf146f, 0x447886db, +0xafca81f3, 0x68b93ec4, 0x24382c34, 0xa3c25f40, 0x1d1672c3, 0xe2bc0c25, +0x3c288b49, 0x0dff4195, 0xa8397101, 0x0c08deb3, 0xb4d89ce4, 0x566490c1, +0xcb7b6184, 0x32d570b6, 0x6c48745c, 0xb8d04257 +}; +#else +static const PRUint32 _TInv2[256] = +{ +0xa75051f4, 0x65537e41, 0xa4c31a17, 0x5e963a27, 0x6bcb3bab, 0x45f11f9d, +0x58abacfa, 0x03934be3, 0xfa552030, 0x6df6ad76, 0x769188cc, 0x4c25f502, +0xd7fc4fe5, 0xcbd7c52a, 0x44802635, 0xa38fb562, 0x5a49deb1, 0x1b6725ba, +0x0e9845ea, 0xc0e15dfe, 0x7502c32f, 0xf012814c, 0x97a38d46, 0xf9c66bd3, +0x5fe7038f, 0x9c951592, 0x7aebbf6d, 0x59da9552, 0x832dd4be, 0x21d35874, +0x692949e0, 0xc8448ec9, 0x896a75c2, 0x7978f48e, 0x3e6b9958, 0x71dd27b9, +0x4fb6bee1, 0xad17f088, 0xac66c920, 0x3ab47dce, 0x4a1863df, 0x3182e51a, +0x33609751, 0x7f456253, 0x77e0b164, 0xae84bb6b, 0xa01cfe81, 0x2b94f908, +0x68587048, 0xfd198f45, 0x6c8794de, 0xf8b7527b, 0xd323ab73, 0x02e2724b, +0x8f57e31f, 0xab2a6655, 0x2807b2eb, 0xc2032fb5, 0x7b9a86c5, 0x08a5d337, +0x87f23028, 0xa5b223bf, 0x6aba0203, 0x825ced16, 0x1c2b8acf, 0xb492a779, +0xf2f0f307, 0xe2a14e69, 0xf4cd65da, 0xbed50605, 0x621fd134, 0xfe8ac4a6, +0x539d342e, 0x55a0a2f3, 0xe132058a, 0xeb75a4f6, 0xec390b83, 0xefaa4060, +0x9f065e71, 0x1051bd6e, 0x8af93e21, 0x063d96dd, 0x05aedd3e, 0xbd464de6, +0x8db59154, 0x5d0571c4, 0xd46f0406, 0x15ff6050, 0xfb241998, 0xe997d6bd, +0x43cc8940, 0x9e7767d9, 0x42bdb0e8, 0x8b880789, 0x5b38e719, 0xeedb79c8, +0x0a47a17c, 0x0fe97c42, 0x1ec9f884, 0x00000000, 0x86830980, 0xed48322b, +0x70ac1e11, 0x724e6c5a, 0xfffbfd0e, 0x38560f85, 0xd51e3dae, 0x3927362d, +0xd9640a0f, 0xa621685c, 0x54d19b5b, 0x2e3a2436, 0x67b10c0a, 0xe70f9357, +0x96d2b4ee, 0x919e1b9b, 0xc54f80c0, 0x20a261dc, 0x4b695a77, 0x1a161c12, +0xba0ae293, 0x2ae5c0a0, 0xe0433c22, 0x171d121b, 0x0d0b0e09, 0xc7adf28b, +0xa8b92db6, 0xa9c8141e, 0x198557f1, 0x074caf75, 0xddbbee99, 0x60fda37f, +0x269ff701, 0xf5bc5c72, 0x3bc54466, 0x7e345bfb, 0x29768b43, 0xc6dccb23, +0xfc68b6ed, 0xf163b8e4, 0xdccad731, 0x85104263, 0x22401397, 0x112084c6, +0x247d854a, 0x3df8d2bb, 0x3211aef9, 0xa16dc729, 0x2f4b1d9e, 0x30f3dcb2, +0x52ec0d86, 0xe3d077c1, 0x166c2bb3, 0xb999a970, 0x48fa1194, 0x642247e9, +0x8cc4a8fc, 0x3f1aa0f0, 0x2cd8567d, 0x90ef2233, 0x4ec78749, 0xd1c1d938, +0xa2fe8cca, 0x0b3698d4, 0x81cfa6f5, 0xde28a57a, 0x8e26dab7, 0xbfa43fad, +0x9de42c3a, 0x920d5078, 0xcc9b6a5f, 0x4662547e, 0x13c2f68d, 0xb8e890d8, +0xf75e2e39, 0xaff582c3, 0x80be9f5d, 0x937c69d0, 0x2da96fd5, 0x12b3cf25, +0x993bc8ac, 0x7da71018, 0x636ee89c, 0xbb7bdb3b, 0x7809cd26, 0x18f46e59, +0xb701ec9a, 0x9aa8834f, 0x6e65e695, 0xe67eaaff, 0xcf0821bc, 0xe8e6ef15, +0x9bd9bae7, 0x36ce4a6f, 0x09d4ea9f, 0x7cd629b0, 0xb2af31a4, 0x23312a3f, +0x9430c6a5, 0x66c035a2, 0xbc37744e, 0xcaa6fc82, 0xd0b0e090, 0xd81533a7, +0x984af104, 0xdaf741ec, 0x500e7fcd, 0xf62f1791, 0xd68d764d, 0xb04d43ef, +0x4d54ccaa, 0x04dfe496, 0xb5e39ed1, 0x881b4c6a, 0x1fb8c12c, 0x517f4665, +0xea049d5e, 0x355d018c, 0x7473fa87, 0x412efb0b, 0x1d5ab367, 0xd25292db, +0x5633e910, 0x47136dd6, 0x618c9ad7, 0x0c7a37a1, 0x148e59f8, 0x3c89eb13, +0x27eecea9, 0xc935b761, 0xe5ede11c, 0xb13c7a47, 0xdf599cd2, 0x733f55f2, +0xce791814, 0x37bf73c7, 0xcdea53f7, 0xaa5b5ffd, 0x6f14df3d, 0xdb867844, +0xf381caaf, 0xc43eb968, 0x342c3824, 0x405fc2a3, 0xc372161d, 0x250cbce2, +0x498b283c, 0x9541ff0d, 0x017139a8, 0xb3de080c, 0xe49cd8b4, 0xc1906456, +0x84617bcb, 0xb670d532, 0x5c74486c, 0x5742d0b8 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _TInv3[256] = +{ +0x5150a7f4, 0x7e536541, 0x1ac3a417, 0x3a965e27, 0x3bcb6bab, 0x1ff1459d, +0xacab58fa, 0x4b9303e3, 0x2055fa30, 0xadf66d76, 0x889176cc, 0xf5254c02, +0x4ffcd7e5, 0xc5d7cb2a, 0x26804435, 0xb58fa362, 0xde495ab1, 0x25671bba, +0x45980eea, 0x5de1c0fe, 0xc302752f, 0x8112f04c, 0x8da39746, 0x6bc6f9d3, +0x03e75f8f, 0x15959c92, 0xbfeb7a6d, 0x95da5952, 0xd42d83be, 0x58d32174, +0x492969e0, 0x8e44c8c9, 0x756a89c2, 0xf478798e, 0x996b3e58, 0x27dd71b9, +0xbeb64fe1, 0xf017ad88, 0xc966ac20, 0x7db43ace, 0x63184adf, 0xe582311a, +0x97603351, 0x62457f53, 0xb1e07764, 0xbb84ae6b, 0xfe1ca081, 0xf9942b08, +0x70586848, 0x8f19fd45, 0x94876cde, 0x52b7f87b, 0xab23d373, 0x72e2024b, +0xe3578f1f, 0x662aab55, 0xb20728eb, 0x2f03c2b5, 0x869a7bc5, 0xd3a50837, +0x30f28728, 0x23b2a5bf, 0x02ba6a03, 0xed5c8216, 0x8a2b1ccf, 0xa792b479, +0xf3f0f207, 0x4ea1e269, 0x65cdf4da, 0x06d5be05, 0xd11f6234, 0xc48afea6, +0x349d532e, 0xa2a055f3, 0x0532e18a, 0xa475ebf6, 0x0b39ec83, 0x40aaef60, +0x5e069f71, 0xbd51106e, 0x3ef98a21, 0x963d06dd, 0xddae053e, 0x4d46bde6, +0x91b58d54, 0x71055dc4, 0x046fd406, 0x60ff1550, 0x1924fb98, 0xd697e9bd, +0x89cc4340, 0x67779ed9, 0xb0bd42e8, 0x07888b89, 0xe7385b19, 0x79dbeec8, +0xa1470a7c, 0x7ce90f42, 0xf8c91e84, 0x00000000, 0x09838680, 0x3248ed2b, +0x1eac7011, 0x6c4e725a, 0xfdfbff0e, 0x0f563885, 0x3d1ed5ae, 0x3627392d, +0x0a64d90f, 0x6821a65c, 0x9bd1545b, 0x243a2e36, 0x0cb1670a, 0x930fe757, +0xb4d296ee, 0x1b9e919b, 0x804fc5c0, 0x61a220dc, 0x5a694b77, 0x1c161a12, +0xe20aba93, 0xc0e52aa0, 0x3c43e022, 0x121d171b, 0x0e0b0d09, 0xf2adc78b, +0x2db9a8b6, 0x14c8a91e, 0x578519f1, 0xaf4c0775, 0xeebbdd99, 0xa3fd607f, +0xf79f2601, 0x5cbcf572, 0x44c53b66, 0x5b347efb, 0x8b762943, 0xcbdcc623, +0xb668fced, 0xb863f1e4, 0xd7cadc31, 0x42108563, 0x13402297, 0x842011c6, +0x857d244a, 0xd2f83dbb, 0xae1132f9, 0xc76da129, 0x1d4b2f9e, 0xdcf330b2, +0x0dec5286, 0x77d0e3c1, 0x2b6c16b3, 0xa999b970, 0x11fa4894, 0x472264e9, +0xa8c48cfc, 0xa01a3ff0, 0x56d82c7d, 0x22ef9033, 0x87c74e49, 0xd9c1d138, +0x8cfea2ca, 0x98360bd4, 0xa6cf81f5, 0xa528de7a, 0xda268eb7, 0x3fa4bfad, +0x2ce49d3a, 0x500d9278, 0x6a9bcc5f, 0x5462467e, 0xf6c2138d, 0x90e8b8d8, +0x2e5ef739, 0x82f5afc3, 0x9fbe805d, 0x697c93d0, 0x6fa92dd5, 0xcfb31225, +0xc83b99ac, 0x10a77d18, 0xe86e639c, 0xdb7bbb3b, 0xcd097826, 0x6ef41859, +0xec01b79a, 0x83a89a4f, 0xe6656e95, 0xaa7ee6ff, 0x2108cfbc, 0xefe6e815, +0xbad99be7, 0x4ace366f, 0xead4099f, 0x29d67cb0, 0x31afb2a4, 0x2a31233f, +0xc63094a5, 0x35c066a2, 0x7437bc4e, 0xfca6ca82, 0xe0b0d090, 0x3315d8a7, +0xf14a9804, 0x41f7daec, 0x7f0e50cd, 0x172ff691, 0x768dd64d, 0x434db0ef, +0xcc544daa, 0xe4df0496, 0x9ee3b5d1, 0x4c1b886a, 0xc1b81f2c, 0x467f5165, +0x9d04ea5e, 0x015d358c, 0xfa737487, 0xfb2e410b, 0xb35a1d67, 0x9252d2db, +0xe9335610, 0x6d1347d6, 0x9a8c61d7, 0x377a0ca1, 0x598e14f8, 0xeb893c13, +0xceee27a9, 0xb735c961, 0xe1ede51c, 0x7a3cb147, 0x9c59dfd2, 0x553f73f2, +0x1879ce14, 0x73bf37c7, 0x53eacdf7, 0x5f5baafd, 0xdf146f3d, 0x7886db44, +0xca81f3af, 0xb93ec468, 0x382c3424, 0xc25f40a3, 0x1672c31d, 0xbc0c25e2, +0x288b493c, 0xff41950d, 0x397101a8, 0x08deb30c, 0xd89ce4b4, 0x6490c156, +0x7b6184cb, 0xd570b632, 0x48745c6c, 0xd04257b8 +}; +#else +static const PRUint32 _TInv3[256] = +{ +0xf4a75051, 0x4165537e, 0x17a4c31a, 0x275e963a, 0xab6bcb3b, 0x9d45f11f, +0xfa58abac, 0xe303934b, 0x30fa5520, 0x766df6ad, 0xcc769188, 0x024c25f5, +0xe5d7fc4f, 0x2acbd7c5, 0x35448026, 0x62a38fb5, 0xb15a49de, 0xba1b6725, +0xea0e9845, 0xfec0e15d, 0x2f7502c3, 0x4cf01281, 0x4697a38d, 0xd3f9c66b, +0x8f5fe703, 0x929c9515, 0x6d7aebbf, 0x5259da95, 0xbe832dd4, 0x7421d358, +0xe0692949, 0xc9c8448e, 0xc2896a75, 0x8e7978f4, 0x583e6b99, 0xb971dd27, +0xe14fb6be, 0x88ad17f0, 0x20ac66c9, 0xce3ab47d, 0xdf4a1863, 0x1a3182e5, +0x51336097, 0x537f4562, 0x6477e0b1, 0x6bae84bb, 0x81a01cfe, 0x082b94f9, +0x48685870, 0x45fd198f, 0xde6c8794, 0x7bf8b752, 0x73d323ab, 0x4b02e272, +0x1f8f57e3, 0x55ab2a66, 0xeb2807b2, 0xb5c2032f, 0xc57b9a86, 0x3708a5d3, +0x2887f230, 0xbfa5b223, 0x036aba02, 0x16825ced, 0xcf1c2b8a, 0x79b492a7, +0x07f2f0f3, 0x69e2a14e, 0xdaf4cd65, 0x05bed506, 0x34621fd1, 0xa6fe8ac4, +0x2e539d34, 0xf355a0a2, 0x8ae13205, 0xf6eb75a4, 0x83ec390b, 0x60efaa40, +0x719f065e, 0x6e1051bd, 0x218af93e, 0xdd063d96, 0x3e05aedd, 0xe6bd464d, +0x548db591, 0xc45d0571, 0x06d46f04, 0x5015ff60, 0x98fb2419, 0xbde997d6, +0x4043cc89, 0xd99e7767, 0xe842bdb0, 0x898b8807, 0x195b38e7, 0xc8eedb79, +0x7c0a47a1, 0x420fe97c, 0x841ec9f8, 0x00000000, 0x80868309, 0x2bed4832, +0x1170ac1e, 0x5a724e6c, 0x0efffbfd, 0x8538560f, 0xaed51e3d, 0x2d392736, +0x0fd9640a, 0x5ca62168, 0x5b54d19b, 0x362e3a24, 0x0a67b10c, 0x57e70f93, +0xee96d2b4, 0x9b919e1b, 0xc0c54f80, 0xdc20a261, 0x774b695a, 0x121a161c, +0x93ba0ae2, 0xa02ae5c0, 0x22e0433c, 0x1b171d12, 0x090d0b0e, 0x8bc7adf2, +0xb6a8b92d, 0x1ea9c814, 0xf1198557, 0x75074caf, 0x99ddbbee, 0x7f60fda3, +0x01269ff7, 0x72f5bc5c, 0x663bc544, 0xfb7e345b, 0x4329768b, 0x23c6dccb, +0xedfc68b6, 0xe4f163b8, 0x31dccad7, 0x63851042, 0x97224013, 0xc6112084, +0x4a247d85, 0xbb3df8d2, 0xf93211ae, 0x29a16dc7, 0x9e2f4b1d, 0xb230f3dc, +0x8652ec0d, 0xc1e3d077, 0xb3166c2b, 0x70b999a9, 0x9448fa11, 0xe9642247, +0xfc8cc4a8, 0xf03f1aa0, 0x7d2cd856, 0x3390ef22, 0x494ec787, 0x38d1c1d9, +0xcaa2fe8c, 0xd40b3698, 0xf581cfa6, 0x7ade28a5, 0xb78e26da, 0xadbfa43f, +0x3a9de42c, 0x78920d50, 0x5fcc9b6a, 0x7e466254, 0x8d13c2f6, 0xd8b8e890, +0x39f75e2e, 0xc3aff582, 0x5d80be9f, 0xd0937c69, 0xd52da96f, 0x2512b3cf, +0xac993bc8, 0x187da710, 0x9c636ee8, 0x3bbb7bdb, 0x267809cd, 0x5918f46e, +0x9ab701ec, 0x4f9aa883, 0x956e65e6, 0xffe67eaa, 0xbccf0821, 0x15e8e6ef, +0xe79bd9ba, 0x6f36ce4a, 0x9f09d4ea, 0xb07cd629, 0xa4b2af31, 0x3f23312a, +0xa59430c6, 0xa266c035, 0x4ebc3774, 0x82caa6fc, 0x90d0b0e0, 0xa7d81533, +0x04984af1, 0xecdaf741, 0xcd500e7f, 0x91f62f17, 0x4dd68d76, 0xefb04d43, +0xaa4d54cc, 0x9604dfe4, 0xd1b5e39e, 0x6a881b4c, 0x2c1fb8c1, 0x65517f46, +0x5eea049d, 0x8c355d01, 0x877473fa, 0x0b412efb, 0x671d5ab3, 0xdbd25292, +0x105633e9, 0xd647136d, 0xd7618c9a, 0xa10c7a37, 0xf8148e59, 0x133c89eb, +0xa927eece, 0x61c935b7, 0x1ce5ede1, 0x47b13c7a, 0xd2df599c, 0xf2733f55, +0x14ce7918, 0xc737bf73, 0xf7cdea53, 0xfdaa5b5f, 0x3d6f14df, 0x44db8678, +0xaff381ca, 0x68c43eb9, 0x24342c38, 0xa3405fc2, 0x1dc37216, 0xe2250cbc, +0x3c498b28, 0x0d9541ff, 0xa8017139, 0x0cb3de08, 0xb4e49cd8, 0x56c19064, +0xcb84617b, 0x32b670d5, 0x6c5c7448, 0xb85742d0 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC0[256] = +{ +0x00000000, 0x0b0d090e, 0x161a121c, 0x1d171b12, 0x2c342438, 0x27392d36, +0x3a2e3624, 0x31233f2a, 0x58684870, 0x5365417e, 0x4e725a6c, 0x457f5362, +0x745c6c48, 0x7f516546, 0x62467e54, 0x694b775a, 0xb0d090e0, 0xbbdd99ee, +0xa6ca82fc, 0xadc78bf2, 0x9ce4b4d8, 0x97e9bdd6, 0x8afea6c4, 0x81f3afca, +0xe8b8d890, 0xe3b5d19e, 0xfea2ca8c, 0xf5afc382, 0xc48cfca8, 0xcf81f5a6, +0xd296eeb4, 0xd99be7ba, 0x7bbb3bdb, 0x70b632d5, 0x6da129c7, 0x66ac20c9, +0x578f1fe3, 0x5c8216ed, 0x41950dff, 0x4a9804f1, 0x23d373ab, 0x28de7aa5, +0x35c961b7, 0x3ec468b9, 0x0fe75793, 0x04ea5e9d, 0x19fd458f, 0x12f04c81, +0xcb6bab3b, 0xc066a235, 0xdd71b927, 0xd67cb029, 0xe75f8f03, 0xec52860d, +0xf1459d1f, 0xfa489411, 0x9303e34b, 0x980eea45, 0x8519f157, 0x8e14f859, +0xbf37c773, 0xb43ace7d, 0xa92dd56f, 0xa220dc61, 0xf66d76ad, 0xfd607fa3, +0xe07764b1, 0xeb7a6dbf, 0xda595295, 0xd1545b9b, 0xcc434089, 0xc74e4987, +0xae053edd, 0xa50837d3, 0xb81f2cc1, 0xb31225cf, 0x82311ae5, 0x893c13eb, +0x942b08f9, 0x9f2601f7, 0x46bde64d, 0x4db0ef43, 0x50a7f451, 0x5baafd5f, +0x6a89c275, 0x6184cb7b, 0x7c93d069, 0x779ed967, 0x1ed5ae3d, 0x15d8a733, +0x08cfbc21, 0x03c2b52f, 0x32e18a05, 0x39ec830b, 0x24fb9819, 0x2ff69117, +0x8dd64d76, 0x86db4478, 0x9bcc5f6a, 0x90c15664, 0xa1e2694e, 0xaaef6040, +0xb7f87b52, 0xbcf5725c, 0xd5be0506, 0xdeb30c08, 0xc3a4171a, 0xc8a91e14, +0xf98a213e, 0xf2872830, 0xef903322, 0xe49d3a2c, 0x3d06dd96, 0x360bd498, +0x2b1ccf8a, 0x2011c684, 0x1132f9ae, 0x1a3ff0a0, 0x0728ebb2, 0x0c25e2bc, +0x656e95e6, 0x6e639ce8, 0x737487fa, 0x78798ef4, 0x495ab1de, 0x4257b8d0, +0x5f40a3c2, 0x544daacc, 0xf7daec41, 0xfcd7e54f, 0xe1c0fe5d, 0xeacdf753, +0xdbeec879, 0xd0e3c177, 0xcdf4da65, 0xc6f9d36b, 0xafb2a431, 0xa4bfad3f, +0xb9a8b62d, 0xb2a5bf23, 0x83868009, 0x888b8907, 0x959c9215, 0x9e919b1b, +0x470a7ca1, 0x4c0775af, 0x51106ebd, 0x5a1d67b3, 0x6b3e5899, 0x60335197, +0x7d244a85, 0x7629438b, 0x1f6234d1, 0x146f3ddf, 0x097826cd, 0x02752fc3, +0x335610e9, 0x385b19e7, 0x254c02f5, 0x2e410bfb, 0x8c61d79a, 0x876cde94, +0x9a7bc586, 0x9176cc88, 0xa055f3a2, 0xab58faac, 0xb64fe1be, 0xbd42e8b0, +0xd4099fea, 0xdf0496e4, 0xc2138df6, 0xc91e84f8, 0xf83dbbd2, 0xf330b2dc, +0xee27a9ce, 0xe52aa0c0, 0x3cb1477a, 0x37bc4e74, 0x2aab5566, 0x21a65c68, +0x10856342, 0x1b886a4c, 0x069f715e, 0x0d927850, 0x64d90f0a, 0x6fd40604, +0x72c31d16, 0x79ce1418, 0x48ed2b32, 0x43e0223c, 0x5ef7392e, 0x55fa3020, +0x01b79aec, 0x0aba93e2, 0x17ad88f0, 0x1ca081fe, 0x2d83bed4, 0x268eb7da, +0x3b99acc8, 0x3094a5c6, 0x59dfd29c, 0x52d2db92, 0x4fc5c080, 0x44c8c98e, +0x75ebf6a4, 0x7ee6ffaa, 0x63f1e4b8, 0x68fcedb6, 0xb1670a0c, 0xba6a0302, +0xa77d1810, 0xac70111e, 0x9d532e34, 0x965e273a, 0x8b493c28, 0x80443526, +0xe90f427c, 0xe2024b72, 0xff155060, 0xf418596e, 0xc53b6644, 0xce366f4a, +0xd3217458, 0xd82c7d56, 0x7a0ca137, 0x7101a839, 0x6c16b32b, 0x671bba25, +0x5638850f, 0x5d358c01, 0x40229713, 0x4b2f9e1d, 0x2264e947, 0x2969e049, +0x347efb5b, 0x3f73f255, 0x0e50cd7f, 0x055dc471, 0x184adf63, 0x1347d66d, +0xcadc31d7, 0xc1d138d9, 0xdcc623cb, 0xd7cb2ac5, 0xe6e815ef, 0xede51ce1, +0xf0f207f3, 0xfbff0efd, 0x92b479a7, 0x99b970a9, 0x84ae6bbb, 0x8fa362b5, +0xbe805d9f, 0xb58d5491, 0xa89a4f83, 0xa397468d +}; +#else +static const PRUint32 _IMXC0[256] = +{ +0x00000000, 0x0e090d0b, 0x1c121a16, 0x121b171d, 0x3824342c, 0x362d3927, +0x24362e3a, 0x2a3f2331, 0x70486858, 0x7e416553, 0x6c5a724e, 0x62537f45, +0x486c5c74, 0x4665517f, 0x547e4662, 0x5a774b69, 0xe090d0b0, 0xee99ddbb, +0xfc82caa6, 0xf28bc7ad, 0xd8b4e49c, 0xd6bde997, 0xc4a6fe8a, 0xcaaff381, +0x90d8b8e8, 0x9ed1b5e3, 0x8ccaa2fe, 0x82c3aff5, 0xa8fc8cc4, 0xa6f581cf, +0xb4ee96d2, 0xbae79bd9, 0xdb3bbb7b, 0xd532b670, 0xc729a16d, 0xc920ac66, +0xe31f8f57, 0xed16825c, 0xff0d9541, 0xf104984a, 0xab73d323, 0xa57ade28, +0xb761c935, 0xb968c43e, 0x9357e70f, 0x9d5eea04, 0x8f45fd19, 0x814cf012, +0x3bab6bcb, 0x35a266c0, 0x27b971dd, 0x29b07cd6, 0x038f5fe7, 0x0d8652ec, +0x1f9d45f1, 0x119448fa, 0x4be30393, 0x45ea0e98, 0x57f11985, 0x59f8148e, +0x73c737bf, 0x7dce3ab4, 0x6fd52da9, 0x61dc20a2, 0xad766df6, 0xa37f60fd, +0xb16477e0, 0xbf6d7aeb, 0x955259da, 0x9b5b54d1, 0x894043cc, 0x87494ec7, +0xdd3e05ae, 0xd33708a5, 0xc12c1fb8, 0xcf2512b3, 0xe51a3182, 0xeb133c89, +0xf9082b94, 0xf701269f, 0x4de6bd46, 0x43efb04d, 0x51f4a750, 0x5ffdaa5b, +0x75c2896a, 0x7bcb8461, 0x69d0937c, 0x67d99e77, 0x3daed51e, 0x33a7d815, +0x21bccf08, 0x2fb5c203, 0x058ae132, 0x0b83ec39, 0x1998fb24, 0x1791f62f, +0x764dd68d, 0x7844db86, 0x6a5fcc9b, 0x6456c190, 0x4e69e2a1, 0x4060efaa, +0x527bf8b7, 0x5c72f5bc, 0x0605bed5, 0x080cb3de, 0x1a17a4c3, 0x141ea9c8, +0x3e218af9, 0x302887f2, 0x223390ef, 0x2c3a9de4, 0x96dd063d, 0x98d40b36, +0x8acf1c2b, 0x84c61120, 0xaef93211, 0xa0f03f1a, 0xb2eb2807, 0xbce2250c, +0xe6956e65, 0xe89c636e, 0xfa877473, 0xf48e7978, 0xdeb15a49, 0xd0b85742, +0xc2a3405f, 0xccaa4d54, 0x41ecdaf7, 0x4fe5d7fc, 0x5dfec0e1, 0x53f7cdea, +0x79c8eedb, 0x77c1e3d0, 0x65daf4cd, 0x6bd3f9c6, 0x31a4b2af, 0x3fadbfa4, +0x2db6a8b9, 0x23bfa5b2, 0x09808683, 0x07898b88, 0x15929c95, 0x1b9b919e, +0xa17c0a47, 0xaf75074c, 0xbd6e1051, 0xb3671d5a, 0x99583e6b, 0x97513360, +0x854a247d, 0x8b432976, 0xd134621f, 0xdf3d6f14, 0xcd267809, 0xc32f7502, +0xe9105633, 0xe7195b38, 0xf5024c25, 0xfb0b412e, 0x9ad7618c, 0x94de6c87, +0x86c57b9a, 0x88cc7691, 0xa2f355a0, 0xacfa58ab, 0xbee14fb6, 0xb0e842bd, +0xea9f09d4, 0xe49604df, 0xf68d13c2, 0xf8841ec9, 0xd2bb3df8, 0xdcb230f3, +0xcea927ee, 0xc0a02ae5, 0x7a47b13c, 0x744ebc37, 0x6655ab2a, 0x685ca621, +0x42638510, 0x4c6a881b, 0x5e719f06, 0x5078920d, 0x0a0fd964, 0x0406d46f, +0x161dc372, 0x1814ce79, 0x322bed48, 0x3c22e043, 0x2e39f75e, 0x2030fa55, +0xec9ab701, 0xe293ba0a, 0xf088ad17, 0xfe81a01c, 0xd4be832d, 0xdab78e26, +0xc8ac993b, 0xc6a59430, 0x9cd2df59, 0x92dbd252, 0x80c0c54f, 0x8ec9c844, +0xa4f6eb75, 0xaaffe67e, 0xb8e4f163, 0xb6edfc68, 0x0c0a67b1, 0x02036aba, +0x10187da7, 0x1e1170ac, 0x342e539d, 0x3a275e96, 0x283c498b, 0x26354480, +0x7c420fe9, 0x724b02e2, 0x605015ff, 0x6e5918f4, 0x44663bc5, 0x4a6f36ce, +0x587421d3, 0x567d2cd8, 0x37a10c7a, 0x39a80171, 0x2bb3166c, 0x25ba1b67, +0x0f853856, 0x018c355d, 0x13972240, 0x1d9e2f4b, 0x47e96422, 0x49e06929, +0x5bfb7e34, 0x55f2733f, 0x7fcd500e, 0x71c45d05, 0x63df4a18, 0x6dd64713, +0xd731dcca, 0xd938d1c1, 0xcb23c6dc, 0xc52acbd7, 0xef15e8e6, 0xe11ce5ed, +0xf307f2f0, 0xfd0efffb, 0xa779b492, 0xa970b999, 0xbb6bae84, 0xb562a38f, +0x9f5d80be, 0x91548db5, 0x834f9aa8, 0x8d4697a3 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC1[256] = +{ +0x00000000, 0x0d090e0b, 0x1a121c16, 0x171b121d, 0x3424382c, 0x392d3627, +0x2e36243a, 0x233f2a31, 0x68487058, 0x65417e53, 0x725a6c4e, 0x7f536245, +0x5c6c4874, 0x5165467f, 0x467e5462, 0x4b775a69, 0xd090e0b0, 0xdd99eebb, +0xca82fca6, 0xc78bf2ad, 0xe4b4d89c, 0xe9bdd697, 0xfea6c48a, 0xf3afca81, +0xb8d890e8, 0xb5d19ee3, 0xa2ca8cfe, 0xafc382f5, 0x8cfca8c4, 0x81f5a6cf, +0x96eeb4d2, 0x9be7bad9, 0xbb3bdb7b, 0xb632d570, 0xa129c76d, 0xac20c966, +0x8f1fe357, 0x8216ed5c, 0x950dff41, 0x9804f14a, 0xd373ab23, 0xde7aa528, +0xc961b735, 0xc468b93e, 0xe757930f, 0xea5e9d04, 0xfd458f19, 0xf04c8112, +0x6bab3bcb, 0x66a235c0, 0x71b927dd, 0x7cb029d6, 0x5f8f03e7, 0x52860dec, +0x459d1ff1, 0x489411fa, 0x03e34b93, 0x0eea4598, 0x19f15785, 0x14f8598e, +0x37c773bf, 0x3ace7db4, 0x2dd56fa9, 0x20dc61a2, 0x6d76adf6, 0x607fa3fd, +0x7764b1e0, 0x7a6dbfeb, 0x595295da, 0x545b9bd1, 0x434089cc, 0x4e4987c7, +0x053eddae, 0x0837d3a5, 0x1f2cc1b8, 0x1225cfb3, 0x311ae582, 0x3c13eb89, +0x2b08f994, 0x2601f79f, 0xbde64d46, 0xb0ef434d, 0xa7f45150, 0xaafd5f5b, +0x89c2756a, 0x84cb7b61, 0x93d0697c, 0x9ed96777, 0xd5ae3d1e, 0xd8a73315, +0xcfbc2108, 0xc2b52f03, 0xe18a0532, 0xec830b39, 0xfb981924, 0xf691172f, +0xd64d768d, 0xdb447886, 0xcc5f6a9b, 0xc1566490, 0xe2694ea1, 0xef6040aa, +0xf87b52b7, 0xf5725cbc, 0xbe0506d5, 0xb30c08de, 0xa4171ac3, 0xa91e14c8, +0x8a213ef9, 0x872830f2, 0x903322ef, 0x9d3a2ce4, 0x06dd963d, 0x0bd49836, +0x1ccf8a2b, 0x11c68420, 0x32f9ae11, 0x3ff0a01a, 0x28ebb207, 0x25e2bc0c, +0x6e95e665, 0x639ce86e, 0x7487fa73, 0x798ef478, 0x5ab1de49, 0x57b8d042, +0x40a3c25f, 0x4daacc54, 0xdaec41f7, 0xd7e54ffc, 0xc0fe5de1, 0xcdf753ea, +0xeec879db, 0xe3c177d0, 0xf4da65cd, 0xf9d36bc6, 0xb2a431af, 0xbfad3fa4, +0xa8b62db9, 0xa5bf23b2, 0x86800983, 0x8b890788, 0x9c921595, 0x919b1b9e, +0x0a7ca147, 0x0775af4c, 0x106ebd51, 0x1d67b35a, 0x3e58996b, 0x33519760, +0x244a857d, 0x29438b76, 0x6234d11f, 0x6f3ddf14, 0x7826cd09, 0x752fc302, +0x5610e933, 0x5b19e738, 0x4c02f525, 0x410bfb2e, 0x61d79a8c, 0x6cde9487, +0x7bc5869a, 0x76cc8891, 0x55f3a2a0, 0x58faacab, 0x4fe1beb6, 0x42e8b0bd, +0x099fead4, 0x0496e4df, 0x138df6c2, 0x1e84f8c9, 0x3dbbd2f8, 0x30b2dcf3, +0x27a9ceee, 0x2aa0c0e5, 0xb1477a3c, 0xbc4e7437, 0xab55662a, 0xa65c6821, +0x85634210, 0x886a4c1b, 0x9f715e06, 0x9278500d, 0xd90f0a64, 0xd406046f, +0xc31d1672, 0xce141879, 0xed2b3248, 0xe0223c43, 0xf7392e5e, 0xfa302055, +0xb79aec01, 0xba93e20a, 0xad88f017, 0xa081fe1c, 0x83bed42d, 0x8eb7da26, +0x99acc83b, 0x94a5c630, 0xdfd29c59, 0xd2db9252, 0xc5c0804f, 0xc8c98e44, +0xebf6a475, 0xe6ffaa7e, 0xf1e4b863, 0xfcedb668, 0x670a0cb1, 0x6a0302ba, +0x7d1810a7, 0x70111eac, 0x532e349d, 0x5e273a96, 0x493c288b, 0x44352680, +0x0f427ce9, 0x024b72e2, 0x155060ff, 0x18596ef4, 0x3b6644c5, 0x366f4ace, +0x217458d3, 0x2c7d56d8, 0x0ca1377a, 0x01a83971, 0x16b32b6c, 0x1bba2567, +0x38850f56, 0x358c015d, 0x22971340, 0x2f9e1d4b, 0x64e94722, 0x69e04929, +0x7efb5b34, 0x73f2553f, 0x50cd7f0e, 0x5dc47105, 0x4adf6318, 0x47d66d13, +0xdc31d7ca, 0xd138d9c1, 0xc623cbdc, 0xcb2ac5d7, 0xe815efe6, 0xe51ce1ed, +0xf207f3f0, 0xff0efdfb, 0xb479a792, 0xb970a999, 0xae6bbb84, 0xa362b58f, +0x805d9fbe, 0x8d5491b5, 0x9a4f83a8, 0x97468da3 +}; +#else +static const PRUint32 _IMXC1[256] = +{ +0x00000000, 0x0b0e090d, 0x161c121a, 0x1d121b17, 0x2c382434, 0x27362d39, +0x3a24362e, 0x312a3f23, 0x58704868, 0x537e4165, 0x4e6c5a72, 0x4562537f, +0x74486c5c, 0x7f466551, 0x62547e46, 0x695a774b, 0xb0e090d0, 0xbbee99dd, +0xa6fc82ca, 0xadf28bc7, 0x9cd8b4e4, 0x97d6bde9, 0x8ac4a6fe, 0x81caaff3, +0xe890d8b8, 0xe39ed1b5, 0xfe8ccaa2, 0xf582c3af, 0xc4a8fc8c, 0xcfa6f581, +0xd2b4ee96, 0xd9bae79b, 0x7bdb3bbb, 0x70d532b6, 0x6dc729a1, 0x66c920ac, +0x57e31f8f, 0x5ced1682, 0x41ff0d95, 0x4af10498, 0x23ab73d3, 0x28a57ade, +0x35b761c9, 0x3eb968c4, 0x0f9357e7, 0x049d5eea, 0x198f45fd, 0x12814cf0, +0xcb3bab6b, 0xc035a266, 0xdd27b971, 0xd629b07c, 0xe7038f5f, 0xec0d8652, +0xf11f9d45, 0xfa119448, 0x934be303, 0x9845ea0e, 0x8557f119, 0x8e59f814, +0xbf73c737, 0xb47dce3a, 0xa96fd52d, 0xa261dc20, 0xf6ad766d, 0xfda37f60, +0xe0b16477, 0xebbf6d7a, 0xda955259, 0xd19b5b54, 0xcc894043, 0xc787494e, +0xaedd3e05, 0xa5d33708, 0xb8c12c1f, 0xb3cf2512, 0x82e51a31, 0x89eb133c, +0x94f9082b, 0x9ff70126, 0x464de6bd, 0x4d43efb0, 0x5051f4a7, 0x5b5ffdaa, +0x6a75c289, 0x617bcb84, 0x7c69d093, 0x7767d99e, 0x1e3daed5, 0x1533a7d8, +0x0821bccf, 0x032fb5c2, 0x32058ae1, 0x390b83ec, 0x241998fb, 0x2f1791f6, +0x8d764dd6, 0x867844db, 0x9b6a5fcc, 0x906456c1, 0xa14e69e2, 0xaa4060ef, +0xb7527bf8, 0xbc5c72f5, 0xd50605be, 0xde080cb3, 0xc31a17a4, 0xc8141ea9, +0xf93e218a, 0xf2302887, 0xef223390, 0xe42c3a9d, 0x3d96dd06, 0x3698d40b, +0x2b8acf1c, 0x2084c611, 0x11aef932, 0x1aa0f03f, 0x07b2eb28, 0x0cbce225, +0x65e6956e, 0x6ee89c63, 0x73fa8774, 0x78f48e79, 0x49deb15a, 0x42d0b857, +0x5fc2a340, 0x54ccaa4d, 0xf741ecda, 0xfc4fe5d7, 0xe15dfec0, 0xea53f7cd, +0xdb79c8ee, 0xd077c1e3, 0xcd65daf4, 0xc66bd3f9, 0xaf31a4b2, 0xa43fadbf, +0xb92db6a8, 0xb223bfa5, 0x83098086, 0x8807898b, 0x9515929c, 0x9e1b9b91, +0x47a17c0a, 0x4caf7507, 0x51bd6e10, 0x5ab3671d, 0x6b99583e, 0x60975133, +0x7d854a24, 0x768b4329, 0x1fd13462, 0x14df3d6f, 0x09cd2678, 0x02c32f75, +0x33e91056, 0x38e7195b, 0x25f5024c, 0x2efb0b41, 0x8c9ad761, 0x8794de6c, +0x9a86c57b, 0x9188cc76, 0xa0a2f355, 0xabacfa58, 0xb6bee14f, 0xbdb0e842, +0xd4ea9f09, 0xdfe49604, 0xc2f68d13, 0xc9f8841e, 0xf8d2bb3d, 0xf3dcb230, +0xeecea927, 0xe5c0a02a, 0x3c7a47b1, 0x37744ebc, 0x2a6655ab, 0x21685ca6, +0x10426385, 0x1b4c6a88, 0x065e719f, 0x0d507892, 0x640a0fd9, 0x6f0406d4, +0x72161dc3, 0x791814ce, 0x48322bed, 0x433c22e0, 0x5e2e39f7, 0x552030fa, +0x01ec9ab7, 0x0ae293ba, 0x17f088ad, 0x1cfe81a0, 0x2dd4be83, 0x26dab78e, +0x3bc8ac99, 0x30c6a594, 0x599cd2df, 0x5292dbd2, 0x4f80c0c5, 0x448ec9c8, +0x75a4f6eb, 0x7eaaffe6, 0x63b8e4f1, 0x68b6edfc, 0xb10c0a67, 0xba02036a, +0xa710187d, 0xac1e1170, 0x9d342e53, 0x963a275e, 0x8b283c49, 0x80263544, +0xe97c420f, 0xe2724b02, 0xff605015, 0xf46e5918, 0xc544663b, 0xce4a6f36, +0xd3587421, 0xd8567d2c, 0x7a37a10c, 0x7139a801, 0x6c2bb316, 0x6725ba1b, +0x560f8538, 0x5d018c35, 0x40139722, 0x4b1d9e2f, 0x2247e964, 0x2949e069, +0x345bfb7e, 0x3f55f273, 0x0e7fcd50, 0x0571c45d, 0x1863df4a, 0x136dd647, +0xcad731dc, 0xc1d938d1, 0xdccb23c6, 0xd7c52acb, 0xe6ef15e8, 0xede11ce5, +0xf0f307f2, 0xfbfd0eff, 0x92a779b4, 0x99a970b9, 0x84bb6bae, 0x8fb562a3, +0xbe9f5d80, 0xb591548d, 0xa8834f9a, 0xa38d4697 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC2[256] = +{ +0x00000000, 0x090e0b0d, 0x121c161a, 0x1b121d17, 0x24382c34, 0x2d362739, +0x36243a2e, 0x3f2a3123, 0x48705868, 0x417e5365, 0x5a6c4e72, 0x5362457f, +0x6c48745c, 0x65467f51, 0x7e546246, 0x775a694b, 0x90e0b0d0, 0x99eebbdd, +0x82fca6ca, 0x8bf2adc7, 0xb4d89ce4, 0xbdd697e9, 0xa6c48afe, 0xafca81f3, +0xd890e8b8, 0xd19ee3b5, 0xca8cfea2, 0xc382f5af, 0xfca8c48c, 0xf5a6cf81, +0xeeb4d296, 0xe7bad99b, 0x3bdb7bbb, 0x32d570b6, 0x29c76da1, 0x20c966ac, +0x1fe3578f, 0x16ed5c82, 0x0dff4195, 0x04f14a98, 0x73ab23d3, 0x7aa528de, +0x61b735c9, 0x68b93ec4, 0x57930fe7, 0x5e9d04ea, 0x458f19fd, 0x4c8112f0, +0xab3bcb6b, 0xa235c066, 0xb927dd71, 0xb029d67c, 0x8f03e75f, 0x860dec52, +0x9d1ff145, 0x9411fa48, 0xe34b9303, 0xea45980e, 0xf1578519, 0xf8598e14, +0xc773bf37, 0xce7db43a, 0xd56fa92d, 0xdc61a220, 0x76adf66d, 0x7fa3fd60, +0x64b1e077, 0x6dbfeb7a, 0x5295da59, 0x5b9bd154, 0x4089cc43, 0x4987c74e, +0x3eddae05, 0x37d3a508, 0x2cc1b81f, 0x25cfb312, 0x1ae58231, 0x13eb893c, +0x08f9942b, 0x01f79f26, 0xe64d46bd, 0xef434db0, 0xf45150a7, 0xfd5f5baa, +0xc2756a89, 0xcb7b6184, 0xd0697c93, 0xd967779e, 0xae3d1ed5, 0xa73315d8, +0xbc2108cf, 0xb52f03c2, 0x8a0532e1, 0x830b39ec, 0x981924fb, 0x91172ff6, +0x4d768dd6, 0x447886db, 0x5f6a9bcc, 0x566490c1, 0x694ea1e2, 0x6040aaef, +0x7b52b7f8, 0x725cbcf5, 0x0506d5be, 0x0c08deb3, 0x171ac3a4, 0x1e14c8a9, +0x213ef98a, 0x2830f287, 0x3322ef90, 0x3a2ce49d, 0xdd963d06, 0xd498360b, +0xcf8a2b1c, 0xc6842011, 0xf9ae1132, 0xf0a01a3f, 0xebb20728, 0xe2bc0c25, +0x95e6656e, 0x9ce86e63, 0x87fa7374, 0x8ef47879, 0xb1de495a, 0xb8d04257, +0xa3c25f40, 0xaacc544d, 0xec41f7da, 0xe54ffcd7, 0xfe5de1c0, 0xf753eacd, +0xc879dbee, 0xc177d0e3, 0xda65cdf4, 0xd36bc6f9, 0xa431afb2, 0xad3fa4bf, +0xb62db9a8, 0xbf23b2a5, 0x80098386, 0x8907888b, 0x9215959c, 0x9b1b9e91, +0x7ca1470a, 0x75af4c07, 0x6ebd5110, 0x67b35a1d, 0x58996b3e, 0x51976033, +0x4a857d24, 0x438b7629, 0x34d11f62, 0x3ddf146f, 0x26cd0978, 0x2fc30275, +0x10e93356, 0x19e7385b, 0x02f5254c, 0x0bfb2e41, 0xd79a8c61, 0xde94876c, +0xc5869a7b, 0xcc889176, 0xf3a2a055, 0xfaacab58, 0xe1beb64f, 0xe8b0bd42, +0x9fead409, 0x96e4df04, 0x8df6c213, 0x84f8c91e, 0xbbd2f83d, 0xb2dcf330, +0xa9ceee27, 0xa0c0e52a, 0x477a3cb1, 0x4e7437bc, 0x55662aab, 0x5c6821a6, +0x63421085, 0x6a4c1b88, 0x715e069f, 0x78500d92, 0x0f0a64d9, 0x06046fd4, +0x1d1672c3, 0x141879ce, 0x2b3248ed, 0x223c43e0, 0x392e5ef7, 0x302055fa, +0x9aec01b7, 0x93e20aba, 0x88f017ad, 0x81fe1ca0, 0xbed42d83, 0xb7da268e, +0xacc83b99, 0xa5c63094, 0xd29c59df, 0xdb9252d2, 0xc0804fc5, 0xc98e44c8, +0xf6a475eb, 0xffaa7ee6, 0xe4b863f1, 0xedb668fc, 0x0a0cb167, 0x0302ba6a, +0x1810a77d, 0x111eac70, 0x2e349d53, 0x273a965e, 0x3c288b49, 0x35268044, +0x427ce90f, 0x4b72e202, 0x5060ff15, 0x596ef418, 0x6644c53b, 0x6f4ace36, +0x7458d321, 0x7d56d82c, 0xa1377a0c, 0xa8397101, 0xb32b6c16, 0xba25671b, +0x850f5638, 0x8c015d35, 0x97134022, 0x9e1d4b2f, 0xe9472264, 0xe0492969, +0xfb5b347e, 0xf2553f73, 0xcd7f0e50, 0xc471055d, 0xdf63184a, 0xd66d1347, +0x31d7cadc, 0x38d9c1d1, 0x23cbdcc6, 0x2ac5d7cb, 0x15efe6e8, 0x1ce1ede5, +0x07f3f0f2, 0x0efdfbff, 0x79a792b4, 0x70a999b9, 0x6bbb84ae, 0x62b58fa3, +0x5d9fbe80, 0x5491b58d, 0x4f83a89a, 0x468da397 +}; +#else +static const PRUint32 _IMXC2[256] = +{ +0x00000000, 0x0d0b0e09, 0x1a161c12, 0x171d121b, 0x342c3824, 0x3927362d, +0x2e3a2436, 0x23312a3f, 0x68587048, 0x65537e41, 0x724e6c5a, 0x7f456253, +0x5c74486c, 0x517f4665, 0x4662547e, 0x4b695a77, 0xd0b0e090, 0xddbbee99, +0xcaa6fc82, 0xc7adf28b, 0xe49cd8b4, 0xe997d6bd, 0xfe8ac4a6, 0xf381caaf, +0xb8e890d8, 0xb5e39ed1, 0xa2fe8cca, 0xaff582c3, 0x8cc4a8fc, 0x81cfa6f5, +0x96d2b4ee, 0x9bd9bae7, 0xbb7bdb3b, 0xb670d532, 0xa16dc729, 0xac66c920, +0x8f57e31f, 0x825ced16, 0x9541ff0d, 0x984af104, 0xd323ab73, 0xde28a57a, +0xc935b761, 0xc43eb968, 0xe70f9357, 0xea049d5e, 0xfd198f45, 0xf012814c, +0x6bcb3bab, 0x66c035a2, 0x71dd27b9, 0x7cd629b0, 0x5fe7038f, 0x52ec0d86, +0x45f11f9d, 0x48fa1194, 0x03934be3, 0x0e9845ea, 0x198557f1, 0x148e59f8, +0x37bf73c7, 0x3ab47dce, 0x2da96fd5, 0x20a261dc, 0x6df6ad76, 0x60fda37f, +0x77e0b164, 0x7aebbf6d, 0x59da9552, 0x54d19b5b, 0x43cc8940, 0x4ec78749, +0x05aedd3e, 0x08a5d337, 0x1fb8c12c, 0x12b3cf25, 0x3182e51a, 0x3c89eb13, +0x2b94f908, 0x269ff701, 0xbd464de6, 0xb04d43ef, 0xa75051f4, 0xaa5b5ffd, +0x896a75c2, 0x84617bcb, 0x937c69d0, 0x9e7767d9, 0xd51e3dae, 0xd81533a7, +0xcf0821bc, 0xc2032fb5, 0xe132058a, 0xec390b83, 0xfb241998, 0xf62f1791, +0xd68d764d, 0xdb867844, 0xcc9b6a5f, 0xc1906456, 0xe2a14e69, 0xefaa4060, +0xf8b7527b, 0xf5bc5c72, 0xbed50605, 0xb3de080c, 0xa4c31a17, 0xa9c8141e, +0x8af93e21, 0x87f23028, 0x90ef2233, 0x9de42c3a, 0x063d96dd, 0x0b3698d4, +0x1c2b8acf, 0x112084c6, 0x3211aef9, 0x3f1aa0f0, 0x2807b2eb, 0x250cbce2, +0x6e65e695, 0x636ee89c, 0x7473fa87, 0x7978f48e, 0x5a49deb1, 0x5742d0b8, +0x405fc2a3, 0x4d54ccaa, 0xdaf741ec, 0xd7fc4fe5, 0xc0e15dfe, 0xcdea53f7, +0xeedb79c8, 0xe3d077c1, 0xf4cd65da, 0xf9c66bd3, 0xb2af31a4, 0xbfa43fad, +0xa8b92db6, 0xa5b223bf, 0x86830980, 0x8b880789, 0x9c951592, 0x919e1b9b, +0x0a47a17c, 0x074caf75, 0x1051bd6e, 0x1d5ab367, 0x3e6b9958, 0x33609751, +0x247d854a, 0x29768b43, 0x621fd134, 0x6f14df3d, 0x7809cd26, 0x7502c32f, +0x5633e910, 0x5b38e719, 0x4c25f502, 0x412efb0b, 0x618c9ad7, 0x6c8794de, +0x7b9a86c5, 0x769188cc, 0x55a0a2f3, 0x58abacfa, 0x4fb6bee1, 0x42bdb0e8, +0x09d4ea9f, 0x04dfe496, 0x13c2f68d, 0x1ec9f884, 0x3df8d2bb, 0x30f3dcb2, +0x27eecea9, 0x2ae5c0a0, 0xb13c7a47, 0xbc37744e, 0xab2a6655, 0xa621685c, +0x85104263, 0x881b4c6a, 0x9f065e71, 0x920d5078, 0xd9640a0f, 0xd46f0406, +0xc372161d, 0xce791814, 0xed48322b, 0xe0433c22, 0xf75e2e39, 0xfa552030, +0xb701ec9a, 0xba0ae293, 0xad17f088, 0xa01cfe81, 0x832dd4be, 0x8e26dab7, +0x993bc8ac, 0x9430c6a5, 0xdf599cd2, 0xd25292db, 0xc54f80c0, 0xc8448ec9, +0xeb75a4f6, 0xe67eaaff, 0xf163b8e4, 0xfc68b6ed, 0x67b10c0a, 0x6aba0203, +0x7da71018, 0x70ac1e11, 0x539d342e, 0x5e963a27, 0x498b283c, 0x44802635, +0x0fe97c42, 0x02e2724b, 0x15ff6050, 0x18f46e59, 0x3bc54466, 0x36ce4a6f, +0x21d35874, 0x2cd8567d, 0x0c7a37a1, 0x017139a8, 0x166c2bb3, 0x1b6725ba, +0x38560f85, 0x355d018c, 0x22401397, 0x2f4b1d9e, 0x642247e9, 0x692949e0, +0x7e345bfb, 0x733f55f2, 0x500e7fcd, 0x5d0571c4, 0x4a1863df, 0x47136dd6, +0xdccad731, 0xd1c1d938, 0xc6dccb23, 0xcbd7c52a, 0xe8e6ef15, 0xe5ede11c, +0xf2f0f307, 0xfffbfd0e, 0xb492a779, 0xb999a970, 0xae84bb6b, 0xa38fb562, +0x80be9f5d, 0x8db59154, 0x9aa8834f, 0x97a38d46 +}; +#endif + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 _IMXC3[256] = +{ +0x00000000, 0x0e0b0d09, 0x1c161a12, 0x121d171b, 0x382c3424, 0x3627392d, +0x243a2e36, 0x2a31233f, 0x70586848, 0x7e536541, 0x6c4e725a, 0x62457f53, +0x48745c6c, 0x467f5165, 0x5462467e, 0x5a694b77, 0xe0b0d090, 0xeebbdd99, +0xfca6ca82, 0xf2adc78b, 0xd89ce4b4, 0xd697e9bd, 0xc48afea6, 0xca81f3af, +0x90e8b8d8, 0x9ee3b5d1, 0x8cfea2ca, 0x82f5afc3, 0xa8c48cfc, 0xa6cf81f5, +0xb4d296ee, 0xbad99be7, 0xdb7bbb3b, 0xd570b632, 0xc76da129, 0xc966ac20, +0xe3578f1f, 0xed5c8216, 0xff41950d, 0xf14a9804, 0xab23d373, 0xa528de7a, +0xb735c961, 0xb93ec468, 0x930fe757, 0x9d04ea5e, 0x8f19fd45, 0x8112f04c, +0x3bcb6bab, 0x35c066a2, 0x27dd71b9, 0x29d67cb0, 0x03e75f8f, 0x0dec5286, +0x1ff1459d, 0x11fa4894, 0x4b9303e3, 0x45980eea, 0x578519f1, 0x598e14f8, +0x73bf37c7, 0x7db43ace, 0x6fa92dd5, 0x61a220dc, 0xadf66d76, 0xa3fd607f, +0xb1e07764, 0xbfeb7a6d, 0x95da5952, 0x9bd1545b, 0x89cc4340, 0x87c74e49, +0xddae053e, 0xd3a50837, 0xc1b81f2c, 0xcfb31225, 0xe582311a, 0xeb893c13, +0xf9942b08, 0xf79f2601, 0x4d46bde6, 0x434db0ef, 0x5150a7f4, 0x5f5baafd, +0x756a89c2, 0x7b6184cb, 0x697c93d0, 0x67779ed9, 0x3d1ed5ae, 0x3315d8a7, +0x2108cfbc, 0x2f03c2b5, 0x0532e18a, 0x0b39ec83, 0x1924fb98, 0x172ff691, +0x768dd64d, 0x7886db44, 0x6a9bcc5f, 0x6490c156, 0x4ea1e269, 0x40aaef60, +0x52b7f87b, 0x5cbcf572, 0x06d5be05, 0x08deb30c, 0x1ac3a417, 0x14c8a91e, +0x3ef98a21, 0x30f28728, 0x22ef9033, 0x2ce49d3a, 0x963d06dd, 0x98360bd4, +0x8a2b1ccf, 0x842011c6, 0xae1132f9, 0xa01a3ff0, 0xb20728eb, 0xbc0c25e2, +0xe6656e95, 0xe86e639c, 0xfa737487, 0xf478798e, 0xde495ab1, 0xd04257b8, +0xc25f40a3, 0xcc544daa, 0x41f7daec, 0x4ffcd7e5, 0x5de1c0fe, 0x53eacdf7, +0x79dbeec8, 0x77d0e3c1, 0x65cdf4da, 0x6bc6f9d3, 0x31afb2a4, 0x3fa4bfad, +0x2db9a8b6, 0x23b2a5bf, 0x09838680, 0x07888b89, 0x15959c92, 0x1b9e919b, +0xa1470a7c, 0xaf4c0775, 0xbd51106e, 0xb35a1d67, 0x996b3e58, 0x97603351, +0x857d244a, 0x8b762943, 0xd11f6234, 0xdf146f3d, 0xcd097826, 0xc302752f, +0xe9335610, 0xe7385b19, 0xf5254c02, 0xfb2e410b, 0x9a8c61d7, 0x94876cde, +0x869a7bc5, 0x889176cc, 0xa2a055f3, 0xacab58fa, 0xbeb64fe1, 0xb0bd42e8, +0xead4099f, 0xe4df0496, 0xf6c2138d, 0xf8c91e84, 0xd2f83dbb, 0xdcf330b2, +0xceee27a9, 0xc0e52aa0, 0x7a3cb147, 0x7437bc4e, 0x662aab55, 0x6821a65c, +0x42108563, 0x4c1b886a, 0x5e069f71, 0x500d9278, 0x0a64d90f, 0x046fd406, +0x1672c31d, 0x1879ce14, 0x3248ed2b, 0x3c43e022, 0x2e5ef739, 0x2055fa30, +0xec01b79a, 0xe20aba93, 0xf017ad88, 0xfe1ca081, 0xd42d83be, 0xda268eb7, +0xc83b99ac, 0xc63094a5, 0x9c59dfd2, 0x9252d2db, 0x804fc5c0, 0x8e44c8c9, +0xa475ebf6, 0xaa7ee6ff, 0xb863f1e4, 0xb668fced, 0x0cb1670a, 0x02ba6a03, +0x10a77d18, 0x1eac7011, 0x349d532e, 0x3a965e27, 0x288b493c, 0x26804435, +0x7ce90f42, 0x72e2024b, 0x60ff1550, 0x6ef41859, 0x44c53b66, 0x4ace366f, +0x58d32174, 0x56d82c7d, 0x377a0ca1, 0x397101a8, 0x2b6c16b3, 0x25671bba, +0x0f563885, 0x015d358c, 0x13402297, 0x1d4b2f9e, 0x472264e9, 0x492969e0, +0x5b347efb, 0x553f73f2, 0x7f0e50cd, 0x71055dc4, 0x63184adf, 0x6d1347d6, +0xd7cadc31, 0xd9c1d138, 0xcbdcc623, 0xc5d7cb2a, 0xefe6e815, 0xe1ede51c, +0xf3f0f207, 0xfdfbff0e, 0xa792b479, 0xa999b970, 0xbb84ae6b, 0xb58fa362, +0x9fbe805d, 0x91b58d54, 0x83a89a4f, 0x8da39746 +}; +#else +static const PRUint32 _IMXC3[256] = +{ +0x00000000, 0x090d0b0e, 0x121a161c, 0x1b171d12, 0x24342c38, 0x2d392736, +0x362e3a24, 0x3f23312a, 0x48685870, 0x4165537e, 0x5a724e6c, 0x537f4562, +0x6c5c7448, 0x65517f46, 0x7e466254, 0x774b695a, 0x90d0b0e0, 0x99ddbbee, +0x82caa6fc, 0x8bc7adf2, 0xb4e49cd8, 0xbde997d6, 0xa6fe8ac4, 0xaff381ca, +0xd8b8e890, 0xd1b5e39e, 0xcaa2fe8c, 0xc3aff582, 0xfc8cc4a8, 0xf581cfa6, +0xee96d2b4, 0xe79bd9ba, 0x3bbb7bdb, 0x32b670d5, 0x29a16dc7, 0x20ac66c9, +0x1f8f57e3, 0x16825ced, 0x0d9541ff, 0x04984af1, 0x73d323ab, 0x7ade28a5, +0x61c935b7, 0x68c43eb9, 0x57e70f93, 0x5eea049d, 0x45fd198f, 0x4cf01281, +0xab6bcb3b, 0xa266c035, 0xb971dd27, 0xb07cd629, 0x8f5fe703, 0x8652ec0d, +0x9d45f11f, 0x9448fa11, 0xe303934b, 0xea0e9845, 0xf1198557, 0xf8148e59, +0xc737bf73, 0xce3ab47d, 0xd52da96f, 0xdc20a261, 0x766df6ad, 0x7f60fda3, +0x6477e0b1, 0x6d7aebbf, 0x5259da95, 0x5b54d19b, 0x4043cc89, 0x494ec787, +0x3e05aedd, 0x3708a5d3, 0x2c1fb8c1, 0x2512b3cf, 0x1a3182e5, 0x133c89eb, +0x082b94f9, 0x01269ff7, 0xe6bd464d, 0xefb04d43, 0xf4a75051, 0xfdaa5b5f, +0xc2896a75, 0xcb84617b, 0xd0937c69, 0xd99e7767, 0xaed51e3d, 0xa7d81533, +0xbccf0821, 0xb5c2032f, 0x8ae13205, 0x83ec390b, 0x98fb2419, 0x91f62f17, +0x4dd68d76, 0x44db8678, 0x5fcc9b6a, 0x56c19064, 0x69e2a14e, 0x60efaa40, +0x7bf8b752, 0x72f5bc5c, 0x05bed506, 0x0cb3de08, 0x17a4c31a, 0x1ea9c814, +0x218af93e, 0x2887f230, 0x3390ef22, 0x3a9de42c, 0xdd063d96, 0xd40b3698, +0xcf1c2b8a, 0xc6112084, 0xf93211ae, 0xf03f1aa0, 0xeb2807b2, 0xe2250cbc, +0x956e65e6, 0x9c636ee8, 0x877473fa, 0x8e7978f4, 0xb15a49de, 0xb85742d0, +0xa3405fc2, 0xaa4d54cc, 0xecdaf741, 0xe5d7fc4f, 0xfec0e15d, 0xf7cdea53, +0xc8eedb79, 0xc1e3d077, 0xdaf4cd65, 0xd3f9c66b, 0xa4b2af31, 0xadbfa43f, +0xb6a8b92d, 0xbfa5b223, 0x80868309, 0x898b8807, 0x929c9515, 0x9b919e1b, +0x7c0a47a1, 0x75074caf, 0x6e1051bd, 0x671d5ab3, 0x583e6b99, 0x51336097, +0x4a247d85, 0x4329768b, 0x34621fd1, 0x3d6f14df, 0x267809cd, 0x2f7502c3, +0x105633e9, 0x195b38e7, 0x024c25f5, 0x0b412efb, 0xd7618c9a, 0xde6c8794, +0xc57b9a86, 0xcc769188, 0xf355a0a2, 0xfa58abac, 0xe14fb6be, 0xe842bdb0, +0x9f09d4ea, 0x9604dfe4, 0x8d13c2f6, 0x841ec9f8, 0xbb3df8d2, 0xb230f3dc, +0xa927eece, 0xa02ae5c0, 0x47b13c7a, 0x4ebc3774, 0x55ab2a66, 0x5ca62168, +0x63851042, 0x6a881b4c, 0x719f065e, 0x78920d50, 0x0fd9640a, 0x06d46f04, +0x1dc37216, 0x14ce7918, 0x2bed4832, 0x22e0433c, 0x39f75e2e, 0x30fa5520, +0x9ab701ec, 0x93ba0ae2, 0x88ad17f0, 0x81a01cfe, 0xbe832dd4, 0xb78e26da, +0xac993bc8, 0xa59430c6, 0xd2df599c, 0xdbd25292, 0xc0c54f80, 0xc9c8448e, +0xf6eb75a4, 0xffe67eaa, 0xe4f163b8, 0xedfc68b6, 0x0a67b10c, 0x036aba02, +0x187da710, 0x1170ac1e, 0x2e539d34, 0x275e963a, 0x3c498b28, 0x35448026, +0x420fe97c, 0x4b02e272, 0x5015ff60, 0x5918f46e, 0x663bc544, 0x6f36ce4a, +0x7421d358, 0x7d2cd856, 0xa10c7a37, 0xa8017139, 0xb3166c2b, 0xba1b6725, +0x8538560f, 0x8c355d01, 0x97224013, 0x9e2f4b1d, 0xe9642247, 0xe0692949, +0xfb7e345b, 0xf2733f55, 0xcd500e7f, 0xc45d0571, 0xdf4a1863, 0xd647136d, +0x31dccad7, 0x38d1c1d9, 0x23c6dccb, 0x2acbd7c5, 0x15e8e6ef, 0x1ce5ede1, +0x07f2f0f3, 0x0efffbfd, 0x79b492a7, 0x70b999a9, 0x6bae84bb, 0x62a38fb5, +0x5d80be9f, 0x548db591, 0x4f9aa883, 0x4697a38d +}; +#endif + +#endif /* RIJNDAEL_INCLUDE_TABLES */ + +#ifdef IS_LITTLE_ENDIAN +static const PRUint32 Rcon[30] = { +0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020, +0x00000040, 0x00000080, 0x0000001b, 0x00000036, 0x0000006c, 0x000000d8, +0x000000ab, 0x0000004d, 0x0000009a, 0x0000002f, 0x0000005e, 0x000000bc, +0x00000063, 0x000000c6, 0x00000097, 0x00000035, 0x0000006a, 0x000000d4, +0x000000b3, 0x0000007d, 0x000000fa, 0x000000ef, 0x000000c5, 0x00000091 +}; +#else +static const PRUint32 Rcon[30] = { +0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000, +0x40000000, 0x80000000, 0x1b000000, 0x36000000, 0x6c000000, 0xd8000000, +0xab000000, 0x4d000000, 0x9a000000, 0x2f000000, 0x5e000000, 0xbc000000, +0x63000000, 0xc6000000, 0x97000000, 0x35000000, 0x6a000000, 0xd4000000, +0xb3000000, 0x7d000000, 0xfa000000, 0xef000000, 0xc5000000, 0x91000000 +}; +#endif + diff --git a/security/nss/lib/freebl/rijndael_tables.c b/security/nss/lib/freebl/rijndael_tables.c new file mode 100644 index 000000000..78dd85a96 --- /dev/null +++ b/security/nss/lib/freebl/rijndael_tables.c @@ -0,0 +1,215 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "stdio.h" +#include "prtypes.h" +#include "blapi.h" + +/* + * what follows is code thrown together to generate the myriad of tables + * used by Rijndael, the AES cipher. + */ + +#define WORD_LE(b0, b1, b2, b3) \ + (((b3) << 24) | ((b2) << 16) | ((b1) << 8) | b0) + +#define WORD_BE(b0, b1, b2, b3) \ + (((b0) << 24) | ((b1) << 16) | ((b2) << 8) | b3) + +static const PRUint8 __S[256] = + { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, 171, 118, + 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, 156, 164, 114, 192, + 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, 229, 241, 113, 216, 49, 21, + 4, 199, 35, 195, 24, 150, 5, 154, 7, 18, 128, 226, 235, 39, 178, 117, + 9, 131, 44, 26, 27, 110, 90, 160, 82, 59, 214, 179, 41, 227, 47, 132, + 83, 209, 0, 237, 32, 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, + 208, 239, 170, 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, + 81, 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, 210, + 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, 93, 25, 115, + 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, 20, 222, 94, 11, 219, + 224, 50, 58, 10, 73, 6, 36, 92, 194, 211, 172, 98, 145, 149, 228, 121, + 231, 200, 55, 109, 141, 213, 78, 169, 108, 86, 244, 234, 101, 122, 174, 8, + 186, 120, 37, 46, 28, 166, 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, + 112, 62, 181, 102, 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, + 225, 248, 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, + 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, 187, 22, + }; + +static const PRUint8 __SInv[256] = + { + 82, 9, 106, 213, 48, 54, 165, 56, 191, 64, 163, 158, 129, 243, 215, 251, + 124, 227, 57, 130, 155, 47, 255, 135, 52, 142, 67, 68, 196, 222, 233, 203, + 84, 123, 148, 50, 166, 194, 35, 61, 238, 76, 149, 11, 66, 250, 195, 78, + 8, 46, 161, 102, 40, 217, 36, 178, 118, 91, 162, 73, 109, 139, 209, 37, + 114, 248, 246, 100, 134, 104, 152, 22, 212, 164, 92, 204, 93, 101, 182, 146, + 108, 112, 72, 80, 253, 237, 185, 218, 94, 21, 70, 87, 167, 141, 157, 132, + 144, 216, 171, 0, 140, 188, 211, 10, 247, 228, 88, 5, 184, 179, 69, 6, + 208, 44, 30, 143, 202, 63, 15, 2, 193, 175, 189, 3, 1, 19, 138, 107, + 58, 145, 17, 65, 79, 103, 220, 234, 151, 242, 207, 206, 240, 180, 230, 115, + 150, 172, 116, 34, 231, 173, 53, 133, 226, 249, 55, 232, 28, 117, 223, 110, + 71, 241, 26, 113, 29, 41, 197, 137, 111, 183, 98, 14, 170, 24, 190, 27, + 252, 86, 62, 75, 198, 210, 121, 32, 154, 219, 192, 254, 120, 205, 90, 244, + 31, 221, 168, 51, 136, 7, 199, 49, 177, 18, 16, 89, 39, 128, 236, 95, + 96, 81, 127, 169, 25, 181, 74, 13, 45, 229, 122, 159, 147, 201, 156, 239, + 160, 224, 59, 77, 174, 42, 245, 176, 200, 235, 187, 60, 131, 83, 153, 97, + 23, 43, 4, 126, 186, 119, 214, 38, 225, 105, 20, 99, 85, 33, 12, 125 + }; + +/* GF_MULTIPLY + * + * multiply two bytes represented in GF(2**8), mod (x**4 + 1) + */ +PRUint8 +gf_multiply(PRUint8 a, PRUint8 b) +{ + PRUint8 res = 0; + while (b > 0) { + res = (b & 0x01) ? res ^ a : res; + a = (a & 0x80) ? ((a << 1) ^ 0x1b) : (a << 1); + b >>= 1; + } + return res; +} + +void +make_T_Table(char *table, const PRUint8 Sx[256], FILE *file, + unsigned char m0, unsigned char m1, + unsigned char m2, unsigned char m3) +{ + PRUint32 Ti; + int i; + fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table); + for (i = 0; i < 256; i++) { + Ti = WORD_LE(gf_multiply(Sx[i], m0), + gf_multiply(Sx[i], m1), + gf_multiply(Sx[i], m2), + gf_multiply(Sx[i], m3)); + if (Ti == 0) + fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + else + fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#else\n"); + fprintf(file, "static const PRUint32 _T%s[256] = \n{\n", table); + for (i = 0; i < 256; i++) { + Ti = WORD_BE(gf_multiply(Sx[i], m0), + gf_multiply(Sx[i], m1), + gf_multiply(Sx[i], m2), + gf_multiply(Sx[i], m3)); + if (Ti == 0) + fprintf(file, "0x00000000%c%c", (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + else + fprintf(file, "%#.8x%c%c", Ti, (i == 255) ? ' ' : ',', + (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#endif\n\n"); +} + +void +make_InvMixCol_Table(int num, FILE *file, PRUint8 m0, PRUint8 m1, PRUint8 m2, PRUint8 m3) +{ + PRUint16 i; + PRUint8 b0, b1, b2, b3; + fprintf(file, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num); + for (i = 0; i < 256; i++) { + b0 = gf_multiply(i, m0); + b1 = gf_multiply(i, m1); + b2 = gf_multiply(i, m2); + b3 = gf_multiply(i, m3); + fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b3, b2, b1, b0, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#else\n"); + fprintf(file, "static const PRUint32 _IMXC%d[256] = \n{\n", num); + for (i = 0; i < 256; i++) { + b0 = gf_multiply(i, m0); + b1 = gf_multiply(i, m1); + b2 = gf_multiply(i, m2); + b3 = gf_multiply(i, m3); + fprintf(file, "0x%.2x%.2x%.2x%.2x%c%c", b0, b1, b2, b3, (i == 255) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + } + fprintf(file, "\n};\n"); + fprintf(file, "#endif\n\n"); +} + +int +main() +{ + int i, j; + PRUint8 cur, last; + PRUint32 tmp; + FILE *optfile; + optfile = fopen("rijndael32.tab", "w"); + /* output S, if there are no T tables */ + fprintf(optfile, "#ifndef RIJNDAEL_INCLUDE_TABLES\n"); + fprintf(optfile, "static const PRUint8 _S[256] = \n{\n"); + for (i = 0; i < 256; i++) { + fprintf(optfile, "%3d%c%c", __S[i], (i == 255) ? ' ' : ',', + (i % 16 == 15) ? '\n' : ' '); + } + fprintf(optfile, "};\n#endif /* not RIJNDAEL_INCLUDE_TABLES */\n\n"); + /* output S**-1 */ + fprintf(optfile, "static const PRUint8 _SInv[256] = \n{\n"); + for (i = 0; i < 256; i++) { + fprintf(optfile, "%3d%c%c", __SInv[i], (i == 255) ? ' ' : ',', + (i % 16 == 15) ? '\n' : ' '); + } + fprintf(optfile, "};\n\n"); + fprintf(optfile, "#ifdef RIJNDAEL_INCLUDE_TABLES\n"); + /* The 32-bit word tables for optimized implementation */ + /* T0 = [ S[a] * 02, S[a], S[a], S[a] * 03 ] */ + make_T_Table("0", __S, optfile, 0x02, 0x01, 0x01, 0x03); + /* T1 = [ S[a] * 03, S[a] * 02, S[a], S[a] ] */ + make_T_Table("1", __S, optfile, 0x03, 0x02, 0x01, 0x01); + /* T2 = [ S[a], S[a] * 03, S[a] * 02, S[a] ] */ + make_T_Table("2", __S, optfile, 0x01, 0x03, 0x02, 0x01); + /* T3 = [ S[a], S[a], S[a] * 03, S[a] * 02 ] */ + make_T_Table("3", __S, optfile, 0x01, 0x01, 0x03, 0x02); + /* TInv0 = [ Si[a] * 0E, Si[a] * 09, Si[a] * 0D, Si[a] * 0B ] */ + make_T_Table("Inv0", __SInv, optfile, 0x0e, 0x09, 0x0d, 0x0b); + /* TInv1 = [ Si[a] * 0B, Si[a] * 0E, Si[a] * 09, Si[a] * 0D ] */ + make_T_Table("Inv1", __SInv, optfile, 0x0b, 0x0e, 0x09, 0x0d); + /* TInv2 = [ Si[a] * 0D, Si[a] * 0B, Si[a] * 0E, Si[a] * 09 ] */ + make_T_Table("Inv2", __SInv, optfile, 0x0d, 0x0b, 0x0e, 0x09); + /* TInv3 = [ Si[a] * 09, Si[a] * 0D, Si[a] * 0B, Si[a] * 0E ] */ + make_T_Table("Inv3", __SInv, optfile, 0x09, 0x0d, 0x0b, 0x0e); + /* byte multiply tables for inverse key expansion (mimics InvMixColumn) */ + make_InvMixCol_Table(0, optfile, 0x0e, 0x09, 0x0d, 0x0b); + make_InvMixCol_Table(1, optfile, 0x0b, 0x0E, 0x09, 0x0d); + make_InvMixCol_Table(2, optfile, 0x0d, 0x0b, 0x0e, 0x09); + make_InvMixCol_Table(3, optfile, 0x09, 0x0d, 0x0b, 0x0e); + fprintf(optfile, "#endif /* RIJNDAEL_INCLUDE_TABLES */\n\n"); + /* round constants for key expansion */ + fprintf(optfile, "#ifdef IS_LITTLE_ENDIAN\n"); + fprintf(optfile, "static const PRUint32 Rcon[30] = {\n"); + cur = 0x01; + for (i = 0; i < 30; i++) { + fprintf(optfile, "%#.8x%c%c", WORD_LE(cur, 0, 0, 0), + (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + last = cur; + cur = gf_multiply(last, 0x02); + } + fprintf(optfile, "};\n"); + fprintf(optfile, "#else\n"); + fprintf(optfile, "static const PRUint32 Rcon[30] = {\n"); + cur = 0x01; + for (i = 0; i < 30; i++) { + fprintf(optfile, "%#.8x%c%c", WORD_BE(cur, 0, 0, 0), + (i == 29) ? ' ' : ',', (i % 6 == 5) ? '\n' : ' '); + last = cur; + cur = gf_multiply(last, 0x02); + } + fprintf(optfile, "};\n"); + fprintf(optfile, "#endif\n\n"); + fclose(optfile); + return 0; +} diff --git a/security/nss/lib/freebl/rsa.c b/security/nss/lib/freebl/rsa.c new file mode 100644 index 000000000..ff8c40ed9 --- /dev/null +++ b/security/nss/lib/freebl/rsa.c @@ -0,0 +1,1625 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * RSA key generation, public key op, private key op. + */ +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" + +#include "prclist.h" +#include "nssilock.h" +#include "prinit.h" +#include "blapi.h" +#include "mpi.h" +#include "mpprime.h" +#include "mplogic.h" +#include "secmpi.h" +#include "secitem.h" +#include "blapii.h" + +/* +** Number of times to attempt to generate a prime (p or q) from a random +** seed (the seed changes for each iteration). +*/ +#define MAX_PRIME_GEN_ATTEMPTS 10 +/* +** Number of times to attempt to generate a key. The primes p and q change +** for each attempt. +*/ +#define MAX_KEY_GEN_ATTEMPTS 10 + +/* Blinding Parameters max cache size */ +#define RSA_BLINDING_PARAMS_MAX_CACHE_SIZE 20 + +/* exponent should not be greater than modulus */ +#define BAD_RSA_KEY_SIZE(modLen, expLen) \ + ((expLen) > (modLen) || (modLen) > RSA_MAX_MODULUS_BITS / 8 || \ + (expLen) > RSA_MAX_EXPONENT_BITS / 8) + +struct blindingParamsStr; +typedef struct blindingParamsStr blindingParams; + +struct blindingParamsStr { + blindingParams *next; + mp_int f, g; /* blinding parameter */ + int counter; /* number of remaining uses of (f, g) */ +}; + +/* +** RSABlindingParamsStr +** +** For discussion of Paul Kocher's timing attack against an RSA private key +** operation, see http://www.cryptography.com/timingattack/paper.html. The +** countermeasure to this attack, known as blinding, is also discussed in +** the Handbook of Applied Cryptography, 11.118-11.119. +*/ +struct RSABlindingParamsStr { + /* Blinding-specific parameters */ + PRCList link; /* link to list of structs */ + SECItem modulus; /* list element "key" */ + blindingParams *free, *bp; /* Blinding parameters queue */ + blindingParams array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE]; +}; +typedef struct RSABlindingParamsStr RSABlindingParams; + +/* +** RSABlindingParamsListStr +** +** List of key-specific blinding params. The arena holds the volatile pool +** of memory for each entry and the list itself. The lock is for list +** operations, in this case insertions and iterations, as well as control +** of the counter for each set of blinding parameters. +*/ +struct RSABlindingParamsListStr { + PZLock *lock; /* Lock for the list */ + PRCondVar *cVar; /* Condidtion Variable */ + int waitCount; /* Number of threads waiting on cVar */ + PRCList head; /* Pointer to the list */ +}; + +/* +** The master blinding params list. +*/ +static struct RSABlindingParamsListStr blindingParamsList = { 0 }; + +/* Number of times to reuse (f, g). Suggested by Paul Kocher */ +#define RSA_BLINDING_PARAMS_MAX_REUSE 50 + +/* Global, allows optional use of blinding. On by default. */ +/* Cannot be changed at the moment, due to thread-safety issues. */ +static PRBool nssRSAUseBlinding = PR_TRUE; + +static SECStatus +rsa_build_from_primes(const mp_int *p, const mp_int *q, + mp_int *e, PRBool needPublicExponent, + mp_int *d, PRBool needPrivateExponent, + RSAPrivateKey *key, unsigned int keySizeInBits) +{ + mp_int n, phi; + mp_int psub1, qsub1, tmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&n) = 0; + MP_DIGITS(&phi) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&qsub1) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&phi)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&qsub1)); + CHECK_MPI_OK(mp_init(&tmp)); + /* p and q must be distinct. */ + if (mp_cmp(p, q) == 0) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + /* 1. Compute n = p*q */ + CHECK_MPI_OK(mp_mul(p, q, &n)); + /* verify that the modulus has the desired number of bits */ + if ((unsigned)mpl_significant_bits(&n) != keySizeInBits) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + rv = SECFailure; + goto cleanup; + } + + /* at least one exponent must be given */ + PORT_Assert(!(needPublicExponent && needPrivateExponent)); + + /* 2. Compute phi = (p-1)*(q-1) */ + CHECK_MPI_OK(mp_sub_d(p, 1, &psub1)); + CHECK_MPI_OK(mp_sub_d(q, 1, &qsub1)); + if (needPublicExponent || needPrivateExponent) { + CHECK_MPI_OK(mp_lcm(&psub1, &qsub1, &phi)); + /* 3. Compute d = e**-1 mod(phi) */ + /* or e = d**-1 mod(phi) as necessary */ + if (needPublicExponent) { + err = mp_invmod(d, &phi, e); + } else { + err = mp_invmod(e, &phi, d); + } + } else { + err = MP_OKAY; + } + /* Verify that phi(n) and e have no common divisors */ + if (err != MP_OKAY) { + if (err == MP_UNDEF) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); + err = MP_OKAY; /* to keep PORT_SetError from being called again */ + rv = SECFailure; + } + goto cleanup; + } + + /* 4. Compute exponent1 = d mod (p-1) */ + CHECK_MPI_OK(mp_mod(d, &psub1, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->exponent1, key->arena); + /* 5. Compute exponent2 = d mod (q-1) */ + CHECK_MPI_OK(mp_mod(d, &qsub1, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->exponent2, key->arena); + /* 6. Compute coefficient = q**-1 mod p */ + CHECK_MPI_OK(mp_invmod(q, p, &tmp)); + MPINT_TO_SECITEM(&tmp, &key->coefficient, key->arena); + + /* copy our calculated results, overwrite what is there */ + key->modulus.data = NULL; + MPINT_TO_SECITEM(&n, &key->modulus, key->arena); + key->privateExponent.data = NULL; + MPINT_TO_SECITEM(d, &key->privateExponent, key->arena); + key->publicExponent.data = NULL; + MPINT_TO_SECITEM(e, &key->publicExponent, key->arena); + key->prime1.data = NULL; + MPINT_TO_SECITEM(p, &key->prime1, key->arena); + key->prime2.data = NULL; + MPINT_TO_SECITEM(q, &key->prime2, key->arena); +cleanup: + mp_clear(&n); + mp_clear(&phi); + mp_clear(&psub1); + mp_clear(&qsub1); + mp_clear(&tmp); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} +static SECStatus +generate_prime(mp_int *prime, int primeLen) +{ + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + unsigned long counter = 0; + int piter; + unsigned char *pb = NULL; + pb = PORT_Alloc(primeLen); + if (!pb) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + for (piter = 0; piter < MAX_PRIME_GEN_ATTEMPTS; piter++) { + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(pb, primeLen)); + pb[0] |= 0xC0; /* set two high-order bits */ + pb[primeLen - 1] |= 0x01; /* set low-order bit */ + CHECK_MPI_OK(mp_read_unsigned_octets(prime, pb, primeLen)); + err = mpp_make_prime(prime, primeLen * 8, PR_FALSE, &counter); + if (err != MP_NO) + goto cleanup; + /* keep going while err == MP_NO */ + } +cleanup: + if (pb) + PORT_ZFree(pb, primeLen); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* + * make sure the key components meet fips186 requirements. + */ +static PRBool +rsa_fips186_verify(mp_int *p, mp_int *q, mp_int *d, int keySizeInBits) +{ + mp_int pq_diff; + mp_err err = MP_OKAY; + PRBool ret = PR_FALSE; + + if (keySizeInBits < 250) { + /* not a valid FIPS length, no point in our other tests */ + /* if you are here, and in FIPS mode, you are outside the security + * policy */ + return PR_TRUE; + } + + /* p & q are already known to be greater then sqrt(2)*2^(keySize/2-1) */ + /* we also know that gcd(p-1,e) = 1 and gcd(q-1,e) = 1 because the + * mp_invmod() function will fail. */ + /* now check p-q > 2^(keysize/2-100) */ + MP_DIGITS(&pq_diff) = 0; + CHECK_MPI_OK(mp_init(&pq_diff)); + /* NSS always has p > q, so we know pq_diff is positive */ + CHECK_MPI_OK(mp_sub(p, q, &pq_diff)); + if ((unsigned)mpl_significant_bits(&pq_diff) < (keySizeInBits / 2 - 100)) { + goto cleanup; + } + /* now verify d is large enough*/ + if ((unsigned)mpl_significant_bits(d) < (keySizeInBits / 2)) { + goto cleanup; + } + ret = PR_TRUE; + +cleanup: + mp_clear(&pq_diff); + return ret; +} + +/* +** Generate and return a new RSA public and private key. +** Both keys are encoded in a single RSAPrivateKey structure. +** "cx" is the random number generator context +** "keySizeInBits" is the size of the key to be generated, in bits. +** 512, 1024, etc. +** "publicExponent" when not NULL is a pointer to some data that +** represents the public exponent to use. The data is a byte +** encoded integer, in "big endian" order. +*/ +RSAPrivateKey * +RSA_NewKey(int keySizeInBits, SECItem *publicExponent) +{ + unsigned int primeLen; + mp_int p, q, e, d; + int kiter; + int max_attempts; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + int prerr = 0; + RSAPrivateKey *key = NULL; + PLArenaPool *arena = NULL; + /* Require key size to be a multiple of 16 bits. */ + if (!publicExponent || keySizeInBits % 16 != 0 || + BAD_RSA_KEY_SIZE((unsigned int)keySizeInBits / 8, publicExponent->len)) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return NULL; + } + /* 1. Allocate arena & key */ + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return NULL; + } + key = PORT_ArenaZNew(arena, RSAPrivateKey); + if (!key) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + PORT_FreeArena(arena, PR_TRUE); + return NULL; + } + key->arena = arena; + /* length of primes p and q (in bytes) */ + primeLen = keySizeInBits / (2 * PR_BITS_PER_BYTE); + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&d) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&d)); + /* 2. Set the version number (PKCS1 v1.5 says it should be zero) */ + SECITEM_AllocItem(arena, &key->version, 1); + key->version.data[0] = 0; + /* 3. Set the public exponent */ + SECITEM_TO_MPINT(*publicExponent, &e); + kiter = 0; + max_attempts = 5 * (keySizeInBits / 2); /* FIPS 186-4 B.3.3 steps 4.7 and 5.8 */ + do { + prerr = 0; + PORT_SetError(0); + CHECK_SEC_OK(generate_prime(&p, primeLen)); + CHECK_SEC_OK(generate_prime(&q, primeLen)); + /* Assure p > q */ + /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any + * implementation optimization that requires p > q. We can remove + * this code in the future. + */ + if (mp_cmp(&p, &q) < 0) + mp_exch(&p, &q); + /* Attempt to use these primes to generate a key */ + rv = rsa_build_from_primes(&p, &q, + &e, PR_FALSE, /* needPublicExponent=false */ + &d, PR_TRUE, /* needPrivateExponent=true */ + key, keySizeInBits); + if (rv == SECSuccess) { + if (rsa_fips186_verify(&p, &q, &d, keySizeInBits)) { + break; + } + prerr = SEC_ERROR_NEED_RANDOM; /* retry with different values */ + } else { + prerr = PORT_GetError(); + } + kiter++; + /* loop until have primes */ + } while (prerr == SEC_ERROR_NEED_RANDOM && kiter < max_attempts); + if (prerr) + goto cleanup; +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&e); + mp_clear(&d); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv && arena) { + PORT_FreeArena(arena, PR_TRUE); + key = NULL; + } + return key; +} + +mp_err +rsa_is_prime(mp_int *p) +{ + int res; + + /* run a Fermat test */ + res = mpp_fermat(p, 2); + if (res != MP_OKAY) { + return res; + } + + /* If that passed, run some Miller-Rabin tests */ + res = mpp_pprime(p, 2); + return res; +} + +/* + * Factorize a RSA modulus n into p and q by using the exponents e and d. + * + * In: e, d, n + * Out: p, q + * + * See Handbook of Applied Cryptography, 8.2.2(i). + * + * The algorithm is probabilistic, it is run 64 times and each run has a 50% + * chance of succeeding with a runtime of O(log(e*d)). + * + * The returned p might be smaller than q. + */ +static mp_err +rsa_factorize_n_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q, + mp_int *n) +{ + /* lambda is the private modulus: e*d = 1 mod lambda */ + /* so: e*d - 1 = k*lambda = t*2^s where t is odd */ + mp_int klambda; + mp_int t, onetwentyeight; + unsigned long s = 0; + unsigned long i; + + /* cand = a^(t * 2^i) mod n, next_cand = a^(t * 2^(i+1)) mod n */ + mp_int a; + mp_int cand; + mp_int next_cand; + + mp_int n_minus_one; + mp_err err = MP_OKAY; + + MP_DIGITS(&klambda) = 0; + MP_DIGITS(&t) = 0; + MP_DIGITS(&a) = 0; + MP_DIGITS(&cand) = 0; + MP_DIGITS(&n_minus_one) = 0; + MP_DIGITS(&next_cand) = 0; + MP_DIGITS(&onetwentyeight) = 0; + CHECK_MPI_OK(mp_init(&klambda)); + CHECK_MPI_OK(mp_init(&t)); + CHECK_MPI_OK(mp_init(&a)); + CHECK_MPI_OK(mp_init(&cand)); + CHECK_MPI_OK(mp_init(&n_minus_one)); + CHECK_MPI_OK(mp_init(&next_cand)); + CHECK_MPI_OK(mp_init(&onetwentyeight)); + + mp_set_int(&onetwentyeight, 128); + + /* calculate k*lambda = e*d - 1 */ + CHECK_MPI_OK(mp_mul(e, d, &klambda)); + CHECK_MPI_OK(mp_sub_d(&klambda, 1, &klambda)); + + /* factorize klambda into t*2^s */ + CHECK_MPI_OK(mp_copy(&klambda, &t)); + while (mpp_divis_d(&t, 2) == MP_YES) { + CHECK_MPI_OK(mp_div_2(&t, &t)); + s += 1; + } + + /* precompute n_minus_one = n - 1 */ + CHECK_MPI_OK(mp_copy(n, &n_minus_one)); + CHECK_MPI_OK(mp_sub_d(&n_minus_one, 1, &n_minus_one)); + + /* pick random bases a, each one has a 50% leading to a factorization */ + CHECK_MPI_OK(mp_set_int(&a, 2)); + /* The following is equivalent to for (a=2, a <= 128, a+=2) */ + while (mp_cmp(&a, &onetwentyeight) <= 0) { + /* compute the base cand = a^(t * 2^0) [i = 0] */ + CHECK_MPI_OK(mp_exptmod(&a, &t, n, &cand)); + + for (i = 0; i < s; i++) { + /* condition 1: skip the base if we hit a trivial factor of n */ + if (mp_cmp(&cand, &n_minus_one) == 0 || mp_cmp_d(&cand, 1) == 0) { + break; + } + + /* increase i in a^(t * 2^i) by squaring the number */ + CHECK_MPI_OK(mp_exptmod_d(&cand, 2, n, &next_cand)); + + /* condition 2: a^(t * 2^(i+1)) = 1 mod n */ + if (mp_cmp_d(&next_cand, 1) == 0) { + /* conditions verified, gcd(a^(t * 2^i) - 1, n) is a factor */ + CHECK_MPI_OK(mp_sub_d(&cand, 1, &cand)); + CHECK_MPI_OK(mp_gcd(&cand, n, p)); + if (mp_cmp_d(p, 1) == 0) { + CHECK_MPI_OK(mp_add_d(&cand, 1, &cand)); + break; + } + CHECK_MPI_OK(mp_div(n, p, q, NULL)); + goto cleanup; + } + CHECK_MPI_OK(mp_copy(&next_cand, &cand)); + } + + CHECK_MPI_OK(mp_add_d(&a, 2, &a)); + } + + /* if we reach here it's likely (2^64 - 1 / 2^64) that d is wrong */ + err = MP_RANGE; + +cleanup: + mp_clear(&klambda); + mp_clear(&t); + mp_clear(&a); + mp_clear(&cand); + mp_clear(&n_minus_one); + mp_clear(&next_cand); + mp_clear(&onetwentyeight); + return err; +} + +/* + * Try to find the two primes based on 2 exponents plus a prime. + * + * In: e, d and p. + * Out: p,q. + * + * Step 1, Since d = e**-1 mod phi, we know that d*e == 1 mod phi, or + * d*e = 1+k*phi, or d*e-1 = k*phi. since d is less than phi and e is + * usually less than d, then k must be an integer between e-1 and 1 + * (probably on the order of e). + * Step 1a, We can divide k*phi by prime-1 and get k*(q-1). This will reduce + * the size of our division through the rest of the loop. + * Step 2, Loop through the values k=e-1 to 1 looking for k. k should be on + * the order or e, and e is typically small. This may take a while for + * a large random e. We are looking for a k that divides kphi + * evenly. Once we find a k that divides kphi evenly, we assume it + * is the true k. It's possible this k is not the 'true' k but has + * swapped factors of p-1 and/or q-1. Because of this, we + * tentatively continue Steps 3-6 inside this loop, and may return looking + * for another k on failure. + * Step 3, Calculate our tentative phi=kphi/k. Note: real phi is (p-1)*(q-1). + * Step 4a, kphi is k*(q-1), so phi is our tenative q-1. q = phi+1. + * If k is correct, q should be the right length and prime. + * Step 4b, It's possible q-1 and k could have swapped factors. We now have a + * possible solution that meets our criteria. It may not be the only + * solution, however, so we keep looking. If we find more than one, + * we will fail since we cannot determine which is the correct + * solution, and returning the wrong modulus will compromise both + * moduli. If no other solution is found, we return the unique solution. + * + * This will return p & q. q may be larger than p in the case that p was given + * and it was the smaller prime. + */ +static mp_err +rsa_get_prime_from_exponents(mp_int *e, mp_int *d, mp_int *p, mp_int *q, + mp_int *n, unsigned int keySizeInBits) +{ + mp_int kphi; /* k*phi */ + mp_int k; /* current guess at 'k' */ + mp_int phi; /* (p-1)(q-1) */ + mp_int r; /* remainder */ + mp_int tmp; /* p-1 if p is given */ + mp_err err = MP_OKAY; + unsigned int order_k; + + MP_DIGITS(&kphi) = 0; + MP_DIGITS(&phi) = 0; + MP_DIGITS(&k) = 0; + MP_DIGITS(&r) = 0; + MP_DIGITS(&tmp) = 0; + CHECK_MPI_OK(mp_init(&kphi)); + CHECK_MPI_OK(mp_init(&phi)); + CHECK_MPI_OK(mp_init(&k)); + CHECK_MPI_OK(mp_init(&r)); + CHECK_MPI_OK(mp_init(&tmp)); + + /* our algorithm looks for a factor k whose maximum size is dependent + * on the size of our smallest exponent, which had better be the public + * exponent (if it's the private, the key is vulnerable to a brute force + * attack). + * + * since our factor search is linear, we need to limit the maximum + * size of the public key. this should not be a problem normally, since + * public keys are usually small. + * + * if we want to handle larger public key sizes, we should have + * a version which tries to 'completely' factor k*phi (where completely + * means 'factor into primes, or composites with which are products of + * large primes). Once we have all the factors, we can sort them out and + * try different combinations to form our phi. The risk is if (p-1)/2, + * (q-1)/2, and k are all large primes. In any case if the public key + * is small (order of 20 some bits), then a linear search for k is + * manageable. + */ + if (mpl_significant_bits(e) > 23) { + err = MP_RANGE; + goto cleanup; + } + + /* calculate k*phi = e*d - 1 */ + CHECK_MPI_OK(mp_mul(e, d, &kphi)); + CHECK_MPI_OK(mp_sub_d(&kphi, 1, &kphi)); + + /* kphi is (e*d)-1, which is the same as k*(p-1)(q-1) + * d < (p-1)(q-1), therefor k must be less than e-1 + * We can narrow down k even more, though. Since p and q are odd and both + * have their high bit set, then we know that phi must be on order of + * keySizeBits. + */ + order_k = (unsigned)mpl_significant_bits(&kphi) - keySizeInBits; + + /* for (k=kinit; order(k) >= order_k; k--) { */ + /* k=kinit: k can't be bigger than kphi/2^(keySizeInBits -1) */ + CHECK_MPI_OK(mp_2expt(&k, keySizeInBits - 1)); + CHECK_MPI_OK(mp_div(&kphi, &k, &k, NULL)); + if (mp_cmp(&k, e) >= 0) { + /* also can't be bigger then e-1 */ + CHECK_MPI_OK(mp_sub_d(e, 1, &k)); + } + + /* calculate our temp value */ + /* This saves recalculating this value when the k guess is wrong, which + * is reasonably frequent. */ + /* tmp = p-1 (used to calculate q-1= phi/tmp) */ + CHECK_MPI_OK(mp_sub_d(p, 1, &tmp)); + CHECK_MPI_OK(mp_div(&kphi, &tmp, &kphi, &r)); + if (mp_cmp_z(&r) != 0) { + /* p-1 doesn't divide kphi, some parameter wasn't correct */ + err = MP_RANGE; + goto cleanup; + } + mp_zero(q); + /* kphi is now k*(q-1) */ + + /* rest of the for loop */ + for (; (err == MP_OKAY) && (mpl_significant_bits(&k) >= order_k); + err = mp_sub_d(&k, 1, &k)) { + CHECK_MPI_OK(err); + /* looking for k as a factor of kphi */ + CHECK_MPI_OK(mp_div(&kphi, &k, &phi, &r)); + if (mp_cmp_z(&r) != 0) { + /* not a factor, try the next one */ + continue; + } + /* we have a possible phi, see if it works */ + if ((unsigned)mpl_significant_bits(&phi) != keySizeInBits / 2) { + /* phi is not the right size */ + continue; + } + /* phi should be divisible by 2, since + * q is odd and phi=(q-1). */ + if (mpp_divis_d(&phi, 2) == MP_NO) { + /* phi is not divisible by 4 */ + continue; + } + /* we now have a candidate for the second prime */ + CHECK_MPI_OK(mp_add_d(&phi, 1, &tmp)); + + /* check to make sure it is prime */ + err = rsa_is_prime(&tmp); + if (err != MP_OKAY) { + if (err == MP_NO) { + /* No, then we still have the wrong phi */ + continue; + } + goto cleanup; + } + /* + * It is possible that we have the wrong phi if + * k_guess*(q_guess-1) = k*(q-1) (k and q-1 have swapped factors). + * since our q_quess is prime, however. We have found a valid + * rsa key because: + * q is the correct order of magnitude. + * phi = (p-1)(q-1) where p and q are both primes. + * e*d mod phi = 1. + * There is no way to know from the info given if this is the + * original key. We never want to return the wrong key because if + * two moduli with the same factor is known, then euclid's gcd + * algorithm can be used to find that factor. Even though the + * caller didn't pass the original modulus, it doesn't mean the + * modulus wasn't known or isn't available somewhere. So to be safe + * if we can't be sure we have the right q, we don't return any. + * + * So to make sure we continue looking for other valid q's. If none + * are found, then we can safely return this one, otherwise we just + * fail */ + if (mp_cmp_z(q) != 0) { + /* this is the second valid q, don't return either, + * just fail */ + err = MP_RANGE; + break; + } + /* we only have one q so far, save it and if no others are found, + * it's safe to return it */ + CHECK_MPI_OK(mp_copy(&tmp, q)); + continue; + } + if ((unsigned)mpl_significant_bits(&k) < order_k) { + if (mp_cmp_z(q) == 0) { + /* If we get here, something was wrong with the parameters we + * were given */ + err = MP_RANGE; + } + } +cleanup: + mp_clear(&kphi); + mp_clear(&phi); + mp_clear(&k); + mp_clear(&r); + mp_clear(&tmp); + return err; +} + +/* + * take a private key with only a few elements and fill out the missing pieces. + * + * All the entries will be overwritten with data allocated out of the arena + * If no arena is supplied, one will be created. + * + * The following fields must be supplied in order for this function + * to succeed: + * one of either publicExponent or privateExponent + * two more of the following 5 parameters. + * modulus (n) + * prime1 (p) + * prime2 (q) + * publicExponent (e) + * privateExponent (d) + * + * NOTE: if only the publicExponent, privateExponent, and one prime is given, + * then there may be more than one RSA key that matches that combination. + * + * All parameters will be replaced in the key structure with new parameters + * Allocated out of the arena. There is no attempt to free the old structures. + * Prime1 will always be greater than prime2 (even if the caller supplies the + * smaller prime as prime1 or the larger prime as prime2). The parameters are + * not overwritten on failure. + * + * How it works: + * We can generate all the parameters from one of the exponents, plus the + * two primes. (rsa_build_key_from_primes) + * If we are given one of the exponents and both primes, we are done. + * If we are given one of the exponents, the modulus and one prime, we + * caclulate the second prime by dividing the modulus by the given + * prime, giving us an exponent and 2 primes. + * If we are given 2 exponents and one of the primes we calculate + * k*phi = d*e-1, where k is an integer less than d which + * divides d*e-1. We find factor k so we can isolate phi. + * phi = (p-1)(q-1) + * We can use phi to find the other prime as follows: + * q = (phi/(p-1)) + 1. We now have 2 primes and an exponent. + * (NOTE: if more then one prime meets this condition, the operation + * will fail. See comments elsewhere in this file about this). + * (rsa_get_prime_from_exponents) + * If we are given 2 exponents and the modulus we factor the modulus to + * get the 2 missing primes (rsa_factorize_n_from_exponents) + * + */ +SECStatus +RSA_PopulatePrivateKey(RSAPrivateKey *key) +{ + PLArenaPool *arena = NULL; + PRBool needPublicExponent = PR_TRUE; + PRBool needPrivateExponent = PR_TRUE; + PRBool hasModulus = PR_FALSE; + unsigned int keySizeInBits = 0; + int prime_count = 0; + /* standard RSA nominclature */ + mp_int p, q, e, d, n; + /* remainder */ + mp_int r; + mp_err err = 0; + SECStatus rv = SECFailure; + + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&r) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&r)); + + /* if the key didn't already have an arena, create one. */ + if (key->arena == NULL) { + arena = PORT_NewArena(NSS_FREEBL_DEFAULT_CHUNKSIZE); + if (!arena) { + goto cleanup; + } + key->arena = arena; + } + + /* load up the known exponents */ + if (key->publicExponent.data) { + SECITEM_TO_MPINT(key->publicExponent, &e); + needPublicExponent = PR_FALSE; + } + if (key->privateExponent.data) { + SECITEM_TO_MPINT(key->privateExponent, &d); + needPrivateExponent = PR_FALSE; + } + if (needPrivateExponent && needPublicExponent) { + /* Not enough information, we need at least one exponent */ + err = MP_BADARG; + goto cleanup; + } + + /* load up the known primes. If only one prime is given, it will be + * assigned 'p'. Once we have both primes, well make sure p is the larger. + * The value prime_count tells us howe many we have acquired. + */ + if (key->prime1.data) { + int primeLen = key->prime1.len; + if (key->prime1.data[0] == 0) { + primeLen--; + } + keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->prime1, &p); + prime_count++; + } + if (key->prime2.data) { + int primeLen = key->prime2.len; + if (key->prime2.data[0] == 0) { + primeLen--; + } + keySizeInBits = primeLen * 2 * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->prime2, prime_count ? &q : &p); + prime_count++; + } + /* load up the modulus */ + if (key->modulus.data) { + int modLen = key->modulus.len; + if (key->modulus.data[0] == 0) { + modLen--; + } + keySizeInBits = modLen * PR_BITS_PER_BYTE; + SECITEM_TO_MPINT(key->modulus, &n); + hasModulus = PR_TRUE; + } + /* if we have the modulus and one prime, calculate the second. */ + if ((prime_count == 1) && (hasModulus)) { + if (mp_div(&n, &p, &q, &r) != MP_OKAY || mp_cmp_z(&r) != 0) { + /* p is not a factor or n, fail */ + err = MP_BADARG; + goto cleanup; + } + prime_count++; + } + + /* If we didn't have enough primes try to calculate the primes from + * the exponents */ + if (prime_count < 2) { + /* if we don't have at least 2 primes at this point, then we need both + * exponents and one prime or a modulus*/ + if (!needPublicExponent && !needPrivateExponent && + (prime_count > 0)) { + CHECK_MPI_OK(rsa_get_prime_from_exponents(&e, &d, &p, &q, &n, + keySizeInBits)); + } else if (!needPublicExponent && !needPrivateExponent && hasModulus) { + CHECK_MPI_OK(rsa_factorize_n_from_exponents(&e, &d, &p, &q, &n)); + } else { + /* not enough given parameters to get both primes */ + err = MP_BADARG; + goto cleanup; + } + } + + /* Assure p > q */ + /* NOTE: PKCS #1 does not require p > q, and NSS doesn't use any + * implementation optimization that requires p > q. We can remove + * this code in the future. + */ + if (mp_cmp(&p, &q) < 0) + mp_exch(&p, &q); + + /* we now have our 2 primes and at least one exponent, we can fill + * in the key */ + rv = rsa_build_from_primes(&p, &q, + &e, needPublicExponent, + &d, needPrivateExponent, + key, keySizeInBits); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&e); + mp_clear(&d); + mp_clear(&n); + mp_clear(&r); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + if (rv && arena) { + PORT_FreeArena(arena, PR_TRUE); + key->arena = NULL; + } + return rv; +} + +static unsigned int +rsa_modulusLen(SECItem *modulus) +{ + unsigned char byteZero = modulus->data[0]; + unsigned int modLen = modulus->len - !byteZero; + return modLen; +} + +/* +** Perform a raw public-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +SECStatus +RSA_PublicKeyOp(RSAPublicKey *key, + unsigned char *output, + const unsigned char *input) +{ + unsigned int modLen, expLen, offset; + mp_int n, e, m, c; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + if (!key || !output || !input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&n) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&c) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&m)); + CHECK_MPI_OK(mp_init(&c)); + modLen = rsa_modulusLen(&key->modulus); + expLen = rsa_modulusLen(&key->publicExponent); + /* 1. Obtain public key (n, e) */ + if (BAD_RSA_KEY_SIZE(modLen, expLen)) { + PORT_SetError(SEC_ERROR_INVALID_KEY); + rv = SECFailure; + goto cleanup; + } + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->publicExponent, &e); + if (e.used > n.used) { + /* exponent should not be greater than modulus */ + PORT_SetError(SEC_ERROR_INVALID_KEY); + rv = SECFailure; + goto cleanup; + } + /* 2. check input out of range (needs to be in range [0..n-1]) */ + offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ + if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + rv = SECFailure; + goto cleanup; + } + /* 2 bis. Represent message as integer in range [0..n-1] */ + CHECK_MPI_OK(mp_read_unsigned_octets(&m, input, modLen)); +/* 3. Compute c = m**e mod n */ +#ifdef USE_MPI_EXPT_D + /* XXX see which is faster */ + if (MP_USED(&e) == 1) { + CHECK_MPI_OK(mp_exptmod_d(&m, MP_DIGIT(&e, 0), &n, &c)); + } else +#endif + CHECK_MPI_OK(mp_exptmod(&m, &e, &n, &c)); + /* 4. result c is ciphertext */ + err = mp_to_fixlen_octets(&c, output, modLen); + if (err >= 0) + err = MP_OKAY; +cleanup: + mp_clear(&n); + mp_clear(&e); + mp_clear(&m); + mp_clear(&c); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** RSA Private key operation (no CRT). +*/ +static SECStatus +rsa_PrivateKeyOpNoCRT(RSAPrivateKey *key, mp_int *m, mp_int *c, mp_int *n, + unsigned int modLen) +{ + mp_int d; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&d) = 0; + CHECK_MPI_OK(mp_init(&d)); + SECITEM_TO_MPINT(key->privateExponent, &d); + /* 1. m = c**d mod n */ + CHECK_MPI_OK(mp_exptmod(c, &d, n, m)); +cleanup: + mp_clear(&d); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** RSA Private key operation using CRT. +*/ +static SECStatus +rsa_PrivateKeyOpCRTNoCheck(RSAPrivateKey *key, mp_int *m, mp_int *c) +{ + mp_int p, q, d_p, d_q, qInv; + mp_int m1, m2, h, ctmp; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&d_p) = 0; + MP_DIGITS(&d_q) = 0; + MP_DIGITS(&qInv) = 0; + MP_DIGITS(&m1) = 0; + MP_DIGITS(&m2) = 0; + MP_DIGITS(&h) = 0; + MP_DIGITS(&ctmp) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&d_p)); + CHECK_MPI_OK(mp_init(&d_q)); + CHECK_MPI_OK(mp_init(&qInv)); + CHECK_MPI_OK(mp_init(&m1)); + CHECK_MPI_OK(mp_init(&m2)); + CHECK_MPI_OK(mp_init(&h)); + CHECK_MPI_OK(mp_init(&ctmp)); + /* copy private key parameters into mp integers */ + SECITEM_TO_MPINT(key->prime1, &p); /* p */ + SECITEM_TO_MPINT(key->prime2, &q); /* q */ + SECITEM_TO_MPINT(key->exponent1, &d_p); /* d_p = d mod (p-1) */ + SECITEM_TO_MPINT(key->exponent2, &d_q); /* d_q = d mod (q-1) */ + SECITEM_TO_MPINT(key->coefficient, &qInv); /* qInv = q**-1 mod p */ + /* 1. m1 = c**d_p mod p */ + CHECK_MPI_OK(mp_mod(c, &p, &ctmp)); + CHECK_MPI_OK(mp_exptmod(&ctmp, &d_p, &p, &m1)); + /* 2. m2 = c**d_q mod q */ + CHECK_MPI_OK(mp_mod(c, &q, &ctmp)); + CHECK_MPI_OK(mp_exptmod(&ctmp, &d_q, &q, &m2)); + /* 3. h = (m1 - m2) * qInv mod p */ + CHECK_MPI_OK(mp_submod(&m1, &m2, &p, &h)); + CHECK_MPI_OK(mp_mulmod(&h, &qInv, &p, &h)); + /* 4. m = m2 + h * q */ + CHECK_MPI_OK(mp_mul(&h, &q, m)); + CHECK_MPI_OK(mp_add(m, &m2, m)); +cleanup: + mp_clear(&p); + mp_clear(&q); + mp_clear(&d_p); + mp_clear(&d_q); + mp_clear(&qInv); + mp_clear(&m1); + mp_clear(&m2); + mp_clear(&h); + mp_clear(&ctmp); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +/* +** An attack against RSA CRT was described by Boneh, DeMillo, and Lipton in: +** "On the Importance of Eliminating Errors in Cryptographic Computations", +** http://theory.stanford.edu/~dabo/papers/faults.ps.gz +** +** As a defense against the attack, carry out the private key operation, +** followed up with a public key operation to invert the result. +** Verify that result against the input. +*/ +static SECStatus +rsa_PrivateKeyOpCRTCheckedPubKey(RSAPrivateKey *key, mp_int *m, mp_int *c) +{ + mp_int n, e, v; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&n) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&v) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&v)); + CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, m, c)); + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->publicExponent, &e); + /* Perform a public key operation v = m ** e mod n */ + CHECK_MPI_OK(mp_exptmod(m, &e, &n, &v)); + if (mp_cmp(&v, c) != 0) { + rv = SECFailure; + } +cleanup: + mp_clear(&n); + mp_clear(&e); + mp_clear(&v); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +static PRCallOnceType coBPInit = { 0, 0, 0 }; +static PRStatus +init_blinding_params_list(void) +{ + blindingParamsList.lock = PZ_NewLock(nssILockOther); + if (!blindingParamsList.lock) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return PR_FAILURE; + } + blindingParamsList.cVar = PR_NewCondVar(blindingParamsList.lock); + if (!blindingParamsList.cVar) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return PR_FAILURE; + } + blindingParamsList.waitCount = 0; + PR_INIT_CLIST(&blindingParamsList.head); + return PR_SUCCESS; +} + +static SECStatus +generate_blinding_params(RSAPrivateKey *key, mp_int *f, mp_int *g, mp_int *n, + unsigned int modLen) +{ + SECStatus rv = SECSuccess; + mp_int e, k; + mp_err err = MP_OKAY; + unsigned char *kb = NULL; + + MP_DIGITS(&e) = 0; + MP_DIGITS(&k) = 0; + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&k)); + SECITEM_TO_MPINT(key->publicExponent, &e); + /* generate random k < n */ + kb = PORT_Alloc(modLen); + if (!kb) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + CHECK_SEC_OK(RNG_GenerateGlobalRandomBytes(kb, modLen)); + CHECK_MPI_OK(mp_read_unsigned_octets(&k, kb, modLen)); + /* k < n */ + CHECK_MPI_OK(mp_mod(&k, n, &k)); + /* f = k**e mod n */ + CHECK_MPI_OK(mp_exptmod(&k, &e, n, f)); + /* g = k**-1 mod n */ + CHECK_MPI_OK(mp_invmod(&k, n, g)); +cleanup: + if (kb) + PORT_ZFree(kb, modLen); + mp_clear(&k); + mp_clear(&e); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +static SECStatus +init_blinding_params(RSABlindingParams *rsabp, RSAPrivateKey *key, + mp_int *n, unsigned int modLen) +{ + blindingParams *bp = rsabp->array; + int i = 0; + + /* Initialize the list pointer for the element */ + PR_INIT_CLIST(&rsabp->link); + for (i = 0; i < RSA_BLINDING_PARAMS_MAX_CACHE_SIZE; ++i, ++bp) { + bp->next = bp + 1; + MP_DIGITS(&bp->f) = 0; + MP_DIGITS(&bp->g) = 0; + bp->counter = 0; + } + /* The last bp->next value was initialized with out + * of rsabp->array pointer and must be set to NULL + */ + rsabp->array[RSA_BLINDING_PARAMS_MAX_CACHE_SIZE - 1].next = NULL; + + bp = rsabp->array; + rsabp->bp = NULL; + rsabp->free = bp; + + /* List elements are keyed using the modulus */ + return SECITEM_CopyItem(NULL, &rsabp->modulus, &key->modulus); +} + +static SECStatus +get_blinding_params(RSAPrivateKey *key, mp_int *n, unsigned int modLen, + mp_int *f, mp_int *g) +{ + RSABlindingParams *rsabp = NULL; + blindingParams *bpUnlinked = NULL; + blindingParams *bp; + PRCList *el; + SECStatus rv = SECSuccess; + mp_err err = MP_OKAY; + int cmp = -1; + PRBool holdingLock = PR_FALSE; + + do { + if (blindingParamsList.lock == NULL) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + /* Acquire the list lock */ + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + + /* Walk the list looking for the private key */ + for (el = PR_NEXT_LINK(&blindingParamsList.head); + el != &blindingParamsList.head; + el = PR_NEXT_LINK(el)) { + rsabp = (RSABlindingParams *)el; + cmp = SECITEM_CompareItem(&rsabp->modulus, &key->modulus); + if (cmp >= 0) { + /* The key is found or not in the list. */ + break; + } + } + + if (cmp) { + /* At this point, the key is not in the list. el should point to + ** the list element before which this key should be inserted. + */ + rsabp = PORT_ZNew(RSABlindingParams); + if (!rsabp) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto cleanup; + } + + rv = init_blinding_params(rsabp, key, n, modLen); + if (rv != SECSuccess) { + PORT_ZFree(rsabp, sizeof(RSABlindingParams)); + goto cleanup; + } + + /* Insert the new element into the list + ** If inserting in the middle of the list, el points to the link + ** to insert before. Otherwise, the link needs to be appended to + ** the end of the list, which is the same as inserting before the + ** head (since el would have looped back to the head). + */ + PR_INSERT_BEFORE(&rsabp->link, el); + } + + /* We've found (or created) the RSAblindingParams struct for this key. + * Now, search its list of ready blinding params for a usable one. + */ + while (0 != (bp = rsabp->bp)) { + if (--(bp->counter) > 0) { + /* Found a match and there are still remaining uses left */ + /* Return the parameters */ + CHECK_MPI_OK(mp_copy(&bp->f, f)); + CHECK_MPI_OK(mp_copy(&bp->g, g)); + + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; + } + /* exhausted this one, give its values to caller, and + * then retire it. + */ + mp_exch(&bp->f, f); + mp_exch(&bp->g, g); + mp_clear(&bp->f); + mp_clear(&bp->g); + bp->counter = 0; + /* Move to free list */ + rsabp->bp = bp->next; + bp->next = rsabp->free; + rsabp->free = bp; + /* In case there're threads waiting for new blinding + * value - notify 1 thread the value is ready + */ + if (blindingParamsList.waitCount > 0) { + PR_NotifyCondVar(blindingParamsList.cVar); + blindingParamsList.waitCount--; + } + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; + } + /* We did not find a usable set of blinding params. Can we make one? */ + /* Find a free bp struct. */ + if ((bp = rsabp->free) != NULL) { + /* unlink this bp */ + rsabp->free = bp->next; + bp->next = NULL; + bpUnlinked = bp; /* In case we fail */ + + PZ_Unlock(blindingParamsList.lock); + holdingLock = PR_FALSE; + /* generate blinding parameter values for the current thread */ + CHECK_SEC_OK(generate_blinding_params(key, f, g, n, modLen)); + + /* put the blinding parameter values into cache */ + CHECK_MPI_OK(mp_init(&bp->f)); + CHECK_MPI_OK(mp_init(&bp->g)); + CHECK_MPI_OK(mp_copy(f, &bp->f)); + CHECK_MPI_OK(mp_copy(g, &bp->g)); + + /* Put this at head of queue of usable params. */ + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + (void)holdingLock; + /* initialize RSABlindingParamsStr */ + bp->counter = RSA_BLINDING_PARAMS_MAX_REUSE; + bp->next = rsabp->bp; + rsabp->bp = bp; + bpUnlinked = NULL; + /* In case there're threads waiting for new blinding value + * just notify them the value is ready + */ + if (blindingParamsList.waitCount > 0) { + PR_NotifyAllCondVar(blindingParamsList.cVar); + blindingParamsList.waitCount = 0; + } + PZ_Unlock(blindingParamsList.lock); + return SECSuccess; + } + /* Here, there are no usable blinding parameters available, + * and no free bp blocks, presumably because they're all + * actively having parameters generated for them. + * So, we need to wait here and not eat up CPU until some + * change happens. + */ + blindingParamsList.waitCount++; + PR_WaitCondVar(blindingParamsList.cVar, PR_INTERVAL_NO_TIMEOUT); + PZ_Unlock(blindingParamsList.lock); + holdingLock = PR_FALSE; + (void)holdingLock; + } while (1); + +cleanup: + /* It is possible to reach this after the lock is already released. */ + if (bpUnlinked) { + if (!holdingLock) { + PZ_Lock(blindingParamsList.lock); + holdingLock = PR_TRUE; + } + bp = bpUnlinked; + mp_clear(&bp->f); + mp_clear(&bp->g); + bp->counter = 0; + /* Must put the unlinked bp back on the free list */ + bp->next = rsabp->free; + rsabp->free = bp; + } + if (holdingLock) { + PZ_Unlock(blindingParamsList.lock); + } + if (err) { + MP_TO_SEC_ERROR(err); + } + return SECFailure; +} + +/* +** Perform a raw private-key operation +** Length of input and output buffers are equal to key's modulus len. +*/ +static SECStatus +rsa_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input, + PRBool check) +{ + unsigned int modLen; + unsigned int offset; + SECStatus rv = SECSuccess; + mp_err err; + mp_int n, c, m; + mp_int f, g; + if (!key || !output || !input) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + /* check input out of range (needs to be in range [0..n-1]) */ + modLen = rsa_modulusLen(&key->modulus); + offset = (key->modulus.data[0] == 0) ? 1 : 0; /* may be leading 0 */ + if (memcmp(input, key->modulus.data + offset, modLen) >= 0) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + MP_DIGITS(&n) = 0; + MP_DIGITS(&c) = 0; + MP_DIGITS(&m) = 0; + MP_DIGITS(&f) = 0; + MP_DIGITS(&g) = 0; + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&c)); + CHECK_MPI_OK(mp_init(&m)); + CHECK_MPI_OK(mp_init(&f)); + CHECK_MPI_OK(mp_init(&g)); + SECITEM_TO_MPINT(key->modulus, &n); + OCTETS_TO_MPINT(input, &c, modLen); + /* If blinding, compute pre-image of ciphertext by multiplying by + ** blinding factor + */ + if (nssRSAUseBlinding) { + CHECK_SEC_OK(get_blinding_params(key, &n, modLen, &f, &g)); + /* c' = c*f mod n */ + CHECK_MPI_OK(mp_mulmod(&c, &f, &n, &c)); + } + /* Do the private key operation m = c**d mod n */ + if (key->prime1.len == 0 || + key->prime2.len == 0 || + key->exponent1.len == 0 || + key->exponent2.len == 0 || + key->coefficient.len == 0) { + CHECK_SEC_OK(rsa_PrivateKeyOpNoCRT(key, &m, &c, &n, modLen)); + } else if (check) { + CHECK_SEC_OK(rsa_PrivateKeyOpCRTCheckedPubKey(key, &m, &c)); + } else { + CHECK_SEC_OK(rsa_PrivateKeyOpCRTNoCheck(key, &m, &c)); + } + /* If blinding, compute post-image of plaintext by multiplying by + ** blinding factor + */ + if (nssRSAUseBlinding) { + /* m = m'*g mod n */ + CHECK_MPI_OK(mp_mulmod(&m, &g, &n, &m)); + } + err = mp_to_fixlen_octets(&m, output, modLen); + if (err >= 0) + err = MP_OKAY; +cleanup: + mp_clear(&n); + mp_clear(&c); + mp_clear(&m); + mp_clear(&f); + mp_clear(&g); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +SECStatus +RSA_PrivateKeyOp(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + return rsa_PrivateKeyOp(key, output, input, PR_FALSE); +} + +SECStatus +RSA_PrivateKeyOpDoubleChecked(RSAPrivateKey *key, + unsigned char *output, + const unsigned char *input) +{ + return rsa_PrivateKeyOp(key, output, input, PR_TRUE); +} + +SECStatus +RSA_PrivateKeyCheck(const RSAPrivateKey *key) +{ + mp_int p, q, n, psub1, qsub1, e, d, d_p, d_q, qInv, res; + mp_err err = MP_OKAY; + SECStatus rv = SECSuccess; + MP_DIGITS(&p) = 0; + MP_DIGITS(&q) = 0; + MP_DIGITS(&n) = 0; + MP_DIGITS(&psub1) = 0; + MP_DIGITS(&qsub1) = 0; + MP_DIGITS(&e) = 0; + MP_DIGITS(&d) = 0; + MP_DIGITS(&d_p) = 0; + MP_DIGITS(&d_q) = 0; + MP_DIGITS(&qInv) = 0; + MP_DIGITS(&res) = 0; + CHECK_MPI_OK(mp_init(&p)); + CHECK_MPI_OK(mp_init(&q)); + CHECK_MPI_OK(mp_init(&n)); + CHECK_MPI_OK(mp_init(&psub1)); + CHECK_MPI_OK(mp_init(&qsub1)); + CHECK_MPI_OK(mp_init(&e)); + CHECK_MPI_OK(mp_init(&d)); + CHECK_MPI_OK(mp_init(&d_p)); + CHECK_MPI_OK(mp_init(&d_q)); + CHECK_MPI_OK(mp_init(&qInv)); + CHECK_MPI_OK(mp_init(&res)); + + if (!key->modulus.data || !key->prime1.data || !key->prime2.data || + !key->publicExponent.data || !key->privateExponent.data || + !key->exponent1.data || !key->exponent2.data || + !key->coefficient.data) { + /* call RSA_PopulatePrivateKey first, if the application wishes to + * recover these parameters */ + err = MP_BADARG; + goto cleanup; + } + + SECITEM_TO_MPINT(key->modulus, &n); + SECITEM_TO_MPINT(key->prime1, &p); + SECITEM_TO_MPINT(key->prime2, &q); + SECITEM_TO_MPINT(key->publicExponent, &e); + SECITEM_TO_MPINT(key->privateExponent, &d); + SECITEM_TO_MPINT(key->exponent1, &d_p); + SECITEM_TO_MPINT(key->exponent2, &d_q); + SECITEM_TO_MPINT(key->coefficient, &qInv); + /* p and q must be distinct. */ + if (mp_cmp(&p, &q) == 0) { + rv = SECFailure; + goto cleanup; + } +#define VERIFY_MPI_EQUAL(m1, m2) \ + if (mp_cmp(m1, m2) != 0) { \ + rv = SECFailure; \ + goto cleanup; \ + } +#define VERIFY_MPI_EQUAL_1(m) \ + if (mp_cmp_d(m, 1) != 0) { \ + rv = SECFailure; \ + goto cleanup; \ + } + /* n == p * q */ + CHECK_MPI_OK(mp_mul(&p, &q, &res)); + VERIFY_MPI_EQUAL(&res, &n); + /* gcd(e, p-1) == 1 */ + CHECK_MPI_OK(mp_sub_d(&p, 1, &psub1)); + CHECK_MPI_OK(mp_gcd(&e, &psub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* gcd(e, q-1) == 1 */ + CHECK_MPI_OK(mp_sub_d(&q, 1, &qsub1)); + CHECK_MPI_OK(mp_gcd(&e, &qsub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d*e == 1 mod p-1 */ + CHECK_MPI_OK(mp_mulmod(&d, &e, &psub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d*e == 1 mod q-1 */ + CHECK_MPI_OK(mp_mulmod(&d, &e, &qsub1, &res)); + VERIFY_MPI_EQUAL_1(&res); + /* d_p == d mod p-1 */ + CHECK_MPI_OK(mp_mod(&d, &psub1, &res)); + VERIFY_MPI_EQUAL(&res, &d_p); + /* d_q == d mod q-1 */ + CHECK_MPI_OK(mp_mod(&d, &qsub1, &res)); + VERIFY_MPI_EQUAL(&res, &d_q); + /* q * q**-1 == 1 mod p */ + CHECK_MPI_OK(mp_mulmod(&q, &qInv, &p, &res)); + VERIFY_MPI_EQUAL_1(&res); + +cleanup: + mp_clear(&n); + mp_clear(&p); + mp_clear(&q); + mp_clear(&psub1); + mp_clear(&qsub1); + mp_clear(&e); + mp_clear(&d); + mp_clear(&d_p); + mp_clear(&d_q); + mp_clear(&qInv); + mp_clear(&res); + if (err) { + MP_TO_SEC_ERROR(err); + rv = SECFailure; + } + return rv; +} + +static SECStatus +RSA_Init(void) +{ + if (PR_CallOnce(&coBPInit, init_blinding_params_list) != PR_SUCCESS) { + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return SECFailure; + } + return SECSuccess; +} + +SECStatus +BL_Init(void) +{ + return RSA_Init(); +} + +/* cleanup at shutdown */ +void +RSA_Cleanup(void) +{ + blindingParams *bp = NULL; + if (!coBPInit.initialized) + return; + + while (!PR_CLIST_IS_EMPTY(&blindingParamsList.head)) { + RSABlindingParams *rsabp = + (RSABlindingParams *)PR_LIST_HEAD(&blindingParamsList.head); + PR_REMOVE_LINK(&rsabp->link); + /* clear parameters cache */ + while (rsabp->bp != NULL) { + bp = rsabp->bp; + rsabp->bp = rsabp->bp->next; + mp_clear(&bp->f); + mp_clear(&bp->g); + } + SECITEM_FreeItem(&rsabp->modulus, PR_FALSE); + PORT_Free(rsabp); + } + + if (blindingParamsList.cVar) { + PR_DestroyCondVar(blindingParamsList.cVar); + blindingParamsList.cVar = NULL; + } + + if (blindingParamsList.lock) { + SKIP_AFTER_FORK(PZ_DestroyLock(blindingParamsList.lock)); + blindingParamsList.lock = NULL; + } + + coBPInit.initialized = 0; + coBPInit.inProgress = 0; + coBPInit.status = 0; +} + +/* + * need a central place for this function to free up all the memory that + * free_bl may have allocated along the way. Currently only RSA does this, + * so I've put it here for now. + */ +void +BL_Cleanup(void) +{ + RSA_Cleanup(); +} + +PRBool bl_parentForkedAfterC_Initialize; + +/* + * Set fork flag so it can be tested in SKIP_AFTER_FORK on relevant platforms. + */ +void +BL_SetForkState(PRBool forked) +{ + bl_parentForkedAfterC_Initialize = forked; +} diff --git a/security/nss/lib/freebl/rsapkcs.c b/security/nss/lib/freebl/rsapkcs.c new file mode 100644 index 000000000..577fe1f61 --- /dev/null +++ b/security/nss/lib/freebl/rsapkcs.c @@ -0,0 +1,1385 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * RSA PKCS#1 v2.1 (RFC 3447) operations + */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "secerr.h" + +#include "blapi.h" +#include "secitem.h" +#include "blapii.h" + +#define RSA_BLOCK_MIN_PAD_LEN 8 +#define RSA_BLOCK_FIRST_OCTET 0x00 +#define RSA_BLOCK_PRIVATE_PAD_OCTET 0xff +#define RSA_BLOCK_AFTER_PAD_OCTET 0x00 + +/* + * RSA block types + * + * The values of RSA_BlockPrivate and RSA_BlockPublic are fixed. + * The value of RSA_BlockRaw isn't fixed by definition, but we are keeping + * the value that NSS has been using in the past. + */ +typedef enum { + RSA_BlockPrivate = 1, /* pad for a private-key operation */ + RSA_BlockPublic = 2, /* pad for a public-key operation */ + RSA_BlockRaw = 4 /* simply justify the block appropriately */ +} RSA_BlockType; + +/* Needed for RSA-PSS functions */ +static const unsigned char eightZeros[] = { 0, 0, 0, 0, 0, 0, 0, 0 }; + +/* Constant time comparison of a single byte. + * Returns 1 iff a == b, otherwise returns 0. + * Note: For ranges of bytes, use constantTimeCompare. + */ +static unsigned char +constantTimeEQ8(unsigned char a, unsigned char b) +{ + unsigned char c = ~((a - b) | (b - a)); + c >>= 7; + return c; +} + +/* Constant time comparison of a range of bytes. + * Returns 1 iff len bytes of a are identical to len bytes of b, otherwise + * returns 0. + */ +static unsigned char +constantTimeCompare(const unsigned char *a, + const unsigned char *b, + unsigned int len) +{ + unsigned char tmp = 0; + unsigned int i; + for (i = 0; i < len; ++i, ++a, ++b) + tmp |= *a ^ *b; + return constantTimeEQ8(0x00, tmp); +} + +/* Constant time conditional. + * Returns a if c is 1, or b if c is 0. The result is undefined if c is + * not 0 or 1. + */ +static unsigned int +constantTimeCondition(unsigned int c, + unsigned int a, + unsigned int b) +{ + return (~(c - 1) & a) | ((c - 1) & b); +} + +static unsigned int +rsa_modulusLen(SECItem *modulus) +{ + unsigned char byteZero = modulus->data[0]; + unsigned int modLen = modulus->len - !byteZero; + return modLen; +} + +/* + * Format one block of data for public/private key encryption using + * the rules defined in PKCS #1. + */ +static unsigned char * +rsa_FormatOneBlock(unsigned modulusLen, + RSA_BlockType blockType, + SECItem *data) +{ + unsigned char *block; + unsigned char *bp; + int padLen; + int i, j; + SECStatus rv; + + block = (unsigned char *)PORT_Alloc(modulusLen); + if (block == NULL) + return NULL; + + bp = block; + + /* + * All RSA blocks start with two octets: + * 0x00 || BlockType + */ + *bp++ = RSA_BLOCK_FIRST_OCTET; + *bp++ = (unsigned char)blockType; + + switch (blockType) { + + /* + * Blocks intended for private-key operation. + */ + case RSA_BlockPrivate: /* preferred method */ + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * 1 1 padLen 1 data->len + * Pad is either all 0x00 or all 0xff bytes, depending on blockType. + */ + padLen = modulusLen - data->len - 3; + PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN); + if (padLen < RSA_BLOCK_MIN_PAD_LEN) { + PORT_Free(block); + return NULL; + } + PORT_Memset(bp, RSA_BLOCK_PRIVATE_PAD_OCTET, padLen); + bp += padLen; + *bp++ = RSA_BLOCK_AFTER_PAD_OCTET; + PORT_Memcpy(bp, data->data, data->len); + break; + + /* + * Blocks intended for public-key operation. + */ + case RSA_BlockPublic: + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * 1 1 padLen 1 data->len + * Pad is all non-zero random bytes. + * + * Build the block left to right. + * Fill the entire block from Pad to the end with random bytes. + * Use the bytes after Pad as a supply of extra random bytes from + * which to find replacements for the zero bytes in Pad. + * If we need more than that, refill the bytes after Pad with + * new random bytes as necessary. + */ + padLen = modulusLen - (data->len + 3); + PORT_Assert(padLen >= RSA_BLOCK_MIN_PAD_LEN); + if (padLen < RSA_BLOCK_MIN_PAD_LEN) { + PORT_Free(block); + return NULL; + } + j = modulusLen - 2; + rv = RNG_GenerateGlobalRandomBytes(bp, j); + if (rv == SECSuccess) { + for (i = 0; i < padLen;) { + unsigned char repl; + /* Pad with non-zero random data. */ + if (bp[i] != RSA_BLOCK_AFTER_PAD_OCTET) { + ++i; + continue; + } + if (j <= padLen) { + rv = RNG_GenerateGlobalRandomBytes(bp + padLen, + modulusLen - (2 + padLen)); + if (rv != SECSuccess) + break; + j = modulusLen - 2; + } + do { + repl = bp[--j]; + } while (repl == RSA_BLOCK_AFTER_PAD_OCTET && j > padLen); + if (repl != RSA_BLOCK_AFTER_PAD_OCTET) { + bp[i++] = repl; + } + } + } + if (rv != SECSuccess) { + PORT_Free(block); + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); + return NULL; + } + bp += padLen; + *bp++ = RSA_BLOCK_AFTER_PAD_OCTET; + PORT_Memcpy(bp, data->data, data->len); + break; + + default: + PORT_Assert(0); + PORT_Free(block); + return NULL; + } + + return block; +} + +static SECStatus +rsa_FormatBlock(SECItem *result, + unsigned modulusLen, + RSA_BlockType blockType, + SECItem *data) +{ + switch (blockType) { + case RSA_BlockPrivate: + case RSA_BlockPublic: + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * + * The "3" below is the first octet + the second octet + the 0x00 + * octet that always comes just before the ActualData. + */ + PORT_Assert(data->len <= (modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN))); + + result->data = rsa_FormatOneBlock(modulusLen, blockType, data); + if (result->data == NULL) { + result->len = 0; + return SECFailure; + } + result->len = modulusLen; + + break; + + case RSA_BlockRaw: + /* + * Pad || ActualData + * Pad is zeros. The application is responsible for recovering + * the actual data. + */ + if (data->len > modulusLen) { + return SECFailure; + } + result->data = (unsigned char *)PORT_ZAlloc(modulusLen); + result->len = modulusLen; + PORT_Memcpy(result->data + (modulusLen - data->len), + data->data, data->len); + break; + + default: + PORT_Assert(0); + result->data = NULL; + result->len = 0; + return SECFailure; + } + + return SECSuccess; +} + +/* + * Mask generation function MGF1 as defined in PKCS #1 v2.1 / RFC 3447. + */ +static SECStatus +MGF1(HASH_HashType hashAlg, + unsigned char *mask, + unsigned int maskLen, + const unsigned char *mgfSeed, + unsigned int mgfSeedLen) +{ + unsigned int digestLen; + PRUint32 counter; + PRUint32 rounds; + unsigned char *tempHash; + unsigned char *temp; + const SECHashObject *hash; + void *hashContext; + unsigned char C[4]; + + hash = HASH_GetRawHashObject(hashAlg); + if (hash == NULL) + return SECFailure; + + hashContext = (*hash->create)(); + rounds = (maskLen + hash->length - 1) / hash->length; + for (counter = 0; counter < rounds; counter++) { + C[0] = (unsigned char)((counter >> 24) & 0xff); + C[1] = (unsigned char)((counter >> 16) & 0xff); + C[2] = (unsigned char)((counter >> 8) & 0xff); + C[3] = (unsigned char)(counter & 0xff); + + /* This could be optimized when the clone functions in + * rawhash.c are implemented. */ + (*hash->begin)(hashContext); + (*hash->update)(hashContext, mgfSeed, mgfSeedLen); + (*hash->update)(hashContext, C, sizeof C); + + tempHash = mask + counter * hash->length; + if (counter != (rounds - 1)) { + (*hash->end)(hashContext, tempHash, &digestLen, hash->length); + } else { /* we're in the last round and need to cut the hash */ + temp = (unsigned char *)PORT_Alloc(hash->length); + (*hash->end)(hashContext, temp, &digestLen, hash->length); + PORT_Memcpy(tempHash, temp, maskLen - counter * hash->length); + PORT_Free(temp); + } + } + (*hash->destroy)(hashContext, PR_TRUE); + + return SECSuccess; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_SignRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *data, + unsigned int dataLen) +{ + SECStatus rv = SECSuccess; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + if (maxOutputLen < modulusLen) + return SECFailure; + + unformatted.len = dataLen; + unformatted.data = (unsigned char *)data; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted); + if (rv != SECSuccess) + goto done; + + rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data); + *outputLen = modulusLen; + +done: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSignRaw(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *buffer; + + if (sigLen != modulusLen) + goto failure; + if (hashLen > modulusLen) + goto failure; + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) + goto failure; + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) + goto loser; + + /* + * make sure we get the same results + */ + /* XXX(rsleevi): Constant time */ + /* NOTE: should we verify the leading zeros? */ + if (PORT_Memcmp(buffer + (modulusLen - hashLen), hash, hashLen) != 0) + goto loser; + + PORT_Free(buffer); + return SECSuccess; + +loser: + PORT_Free(buffer); +failure: + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSignRecoverRaw(RSAPublicKey *key, + unsigned char *data, + unsigned int *dataLen, + unsigned int maxDataLen, + const unsigned char *sig, + unsigned int sigLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + + if (sigLen != modulusLen) + goto failure; + if (maxDataLen < modulusLen) + goto failure; + + rv = RSA_PublicKeyOp(key, data, sig); + if (rv != SECSuccess) + goto failure; + + *dataLen = modulusLen; + return SECSuccess; + +failure: + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_EncryptRaw(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + formatted.data = NULL; + if (maxOutputLen < modulusLen) + goto failure; + + unformatted.len = inputLen; + unformatted.data = (unsigned char *)input; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockRaw, &unformatted); + if (rv != SECSuccess) + goto failure; + + rv = RSA_PublicKeyOp(key, output, formatted.data); + if (rv != SECSuccess) + goto failure; + + PORT_ZFree(formatted.data, modulusLen); + *outputLen = modulusLen; + return SECSuccess; + +failure: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_DecryptRaw(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + + if (modulusLen > maxOutputLen) + goto failure; + if (inputLen != modulusLen) + goto failure; + + rv = RSA_PrivateKeyOp(key, output, input); + if (rv != SECSuccess) + goto failure; + + *outputLen = modulusLen; + return SECSuccess; + +failure: + return SECFailure; +} + +/* + * Decodes an EME-OAEP encoded block, validating the encoding in constant + * time. + * Described in RFC 3447, section 7.1.2. + * input contains the encoded block, after decryption. + * label is the optional value L that was associated with the message. + * On success, the original message and message length will be stored in + * output and outputLen. + */ +static SECStatus +eme_oaep_decode(unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen) +{ + const SECHashObject *hash; + void *hashContext; + SECStatus rv = SECFailure; + unsigned char labelHash[HASH_LENGTH_MAX]; + unsigned int i; + unsigned int maskLen; + unsigned int paddingOffset; + unsigned char *mask = NULL; + unsigned char *tmpOutput = NULL; + unsigned char isGood; + unsigned char foundPaddingEnd; + + hash = HASH_GetRawHashObject(hashAlg); + + /* 1.c */ + if (inputLen < (hash->length * 2) + 2) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + /* Step 3.a - Generate lHash */ + hashContext = (*hash->create)(); + if (hashContext == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hashContext); + if (labelLen > 0) + (*hash->update)(hashContext, label, labelLen); + (*hash->end)(hashContext, labelHash, &i, sizeof(labelHash)); + (*hash->destroy)(hashContext, PR_TRUE); + + tmpOutput = (unsigned char *)PORT_Alloc(inputLen); + if (tmpOutput == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + maskLen = inputLen - hash->length - 1; + mask = (unsigned char *)PORT_Alloc(maskLen); + if (mask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + goto done; + } + + PORT_Memcpy(tmpOutput, input, inputLen); + + /* 3.c - Generate seedMask */ + MGF1(maskHashAlg, mask, hash->length, &tmpOutput[1 + hash->length], + inputLen - hash->length - 1); + /* 3.d - Unmask seed */ + for (i = 0; i < hash->length; ++i) + tmpOutput[1 + i] ^= mask[i]; + + /* 3.e - Generate dbMask */ + MGF1(maskHashAlg, mask, maskLen, &tmpOutput[1], hash->length); + /* 3.f - Unmask DB */ + for (i = 0; i < maskLen; ++i) + tmpOutput[1 + hash->length + i] ^= mask[i]; + + /* 3.g - Compare Y, lHash, and PS in constant time + * Warning: This code is timing dependent and must not disclose which of + * these were invalid. + */ + paddingOffset = 0; + isGood = 1; + foundPaddingEnd = 0; + + /* Compare Y */ + isGood &= constantTimeEQ8(0x00, tmpOutput[0]); + + /* Compare lHash and lHash' */ + isGood &= constantTimeCompare(&labelHash[0], + &tmpOutput[1 + hash->length], + hash->length); + + /* Compare that the padding is zero or more zero octets, followed by a + * 0x01 octet */ + for (i = 1 + (hash->length * 2); i < inputLen; ++i) { + unsigned char isZero = constantTimeEQ8(0x00, tmpOutput[i]); + unsigned char isOne = constantTimeEQ8(0x01, tmpOutput[i]); + /* non-constant time equivalent: + * if (tmpOutput[i] == 0x01 && !foundPaddingEnd) + * paddingOffset = i; + */ + paddingOffset = constantTimeCondition(isOne & ~foundPaddingEnd, i, + paddingOffset); + /* non-constant time equivalent: + * if (tmpOutput[i] == 0x01) + * foundPaddingEnd = true; + * + * Note: This may yield false positives, as it will be set whenever + * a 0x01 byte is encountered. If there was bad padding (eg: + * 0x03 0x02 0x01), foundPaddingEnd will still be set to true, and + * paddingOffset will still be set to 2. + */ + foundPaddingEnd = constantTimeCondition(isOne, 1, foundPaddingEnd); + /* non-constant time equivalent: + * if (tmpOutput[i] != 0x00 && tmpOutput[i] != 0x01 && + * !foundPaddingEnd) { + * isGood = false; + * } + * + * Note: This may yield false positives, as a message (and padding) + * that is entirely zeros will result in isGood still being true. Thus + * it's necessary to check foundPaddingEnd is positive below. + */ + isGood = constantTimeCondition(~foundPaddingEnd & ~isZero, 0, isGood); + } + + /* While both isGood and foundPaddingEnd may have false positives, they + * cannot BOTH have false positives. If both are not true, then an invalid + * message was received. Note, this comparison must still be done in constant + * time so as not to leak either condition. + */ + if (!(isGood & foundPaddingEnd)) { + PORT_SetError(SEC_ERROR_BAD_DATA); + goto done; + } + + /* End timing dependent code */ + + ++paddingOffset; /* Skip the 0x01 following the end of PS */ + + *outputLen = inputLen - paddingOffset; + if (*outputLen > maxOutputLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + goto done; + } + + if (*outputLen) + PORT_Memcpy(output, &tmpOutput[paddingOffset], *outputLen); + rv = SECSuccess; + +done: + if (mask) + PORT_ZFree(mask, maskLen); + if (tmpOutput) + PORT_ZFree(tmpOutput, inputLen); + return rv; +} + +/* + * Generate an EME-OAEP encoded block for encryption + * Described in RFC 3447, section 7.1.1 + * We use input instead of M for the message to be encrypted + * label is the optional value L to be associated with the message. + */ +static SECStatus +eme_oaep_encode(unsigned char *em, + unsigned int emLen, + const unsigned char *input, + unsigned int inputLen, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen) +{ + const SECHashObject *hash; + void *hashContext; + SECStatus rv; + unsigned char *mask; + unsigned int reservedLen; + unsigned int dbMaskLen; + unsigned int i; + + hash = HASH_GetRawHashObject(hashAlg); + PORT_Assert(seed == NULL || seedLen == hash->length); + + /* Step 1.b */ + reservedLen = (2 * hash->length) + 2; + if (emLen < reservedLen || inputLen > (emLen - reservedLen)) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + /* + * From RFC 3447, Section 7.1 + * +----------+---------+-------+ + * DB = | lHash | PS | M | + * +----------+---------+-------+ + * | + * +----------+ V + * | seed |--> MGF ---> xor + * +----------+ | + * | | + * +--+ V | + * |00| xor <----- MGF <-----| + * +--+ | | + * | | | + * V V V + * +--+----------+----------------------------+ + * EM = |00|maskedSeed| maskedDB | + * +--+----------+----------------------------+ + * + * We use mask to hold the result of the MGF functions, and all other + * values are generated in their final resting place. + */ + *em = 0x00; + + /* Step 2.a - Generate lHash */ + hashContext = (*hash->create)(); + if (hashContext == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hashContext); + if (labelLen > 0) + (*hash->update)(hashContext, label, labelLen); + (*hash->end)(hashContext, &em[1 + hash->length], &i, hash->length); + (*hash->destroy)(hashContext, PR_TRUE); + + /* Step 2.b - Generate PS */ + if (emLen - reservedLen - inputLen > 0) { + PORT_Memset(em + 1 + (hash->length * 2), 0x00, + emLen - reservedLen - inputLen); + } + + /* Step 2.c. - Generate DB + * DB = lHash || PS || 0x01 || M + * Note that PS and lHash have already been placed into em at their + * appropriate offsets. This just copies M into place + */ + em[emLen - inputLen - 1] = 0x01; + if (inputLen) + PORT_Memcpy(em + emLen - inputLen, input, inputLen); + + if (seed == NULL) { + /* Step 2.d - Generate seed */ + rv = RNG_GenerateGlobalRandomBytes(em + 1, hash->length); + if (rv != SECSuccess) { + return rv; + } + } else { + /* For Known Answer Tests, copy the supplied seed. */ + PORT_Memcpy(em + 1, seed, seedLen); + } + + /* Step 2.e - Generate dbMask*/ + dbMaskLen = emLen - hash->length - 1; + mask = (unsigned char *)PORT_Alloc(dbMaskLen); + if (mask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + MGF1(maskHashAlg, mask, dbMaskLen, em + 1, hash->length); + /* Step 2.f - Compute maskedDB*/ + for (i = 0; i < dbMaskLen; ++i) + em[1 + hash->length + i] ^= mask[i]; + + /* Step 2.g - Generate seedMask */ + MGF1(maskHashAlg, mask, hash->length, &em[1 + hash->length], dbMaskLen); + /* Step 2.h - Compute maskedSeed */ + for (i = 0; i < hash->length; ++i) + em[1 + i] ^= mask[i]; + + PORT_ZFree(mask, dbMaskLen); + return SECSuccess; +} + +SECStatus +RSA_EncryptOAEP(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + const unsigned char *seed, + unsigned int seedLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *oaepEncoded = NULL; + + if (maxOutputLen < modulusLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + if ((labelLen == 0 && label != NULL) || + (labelLen > 0 && label == NULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen); + if (oaepEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + rv = eme_oaep_encode(oaepEncoded, modulusLen, input, inputLen, + hashAlg, maskHashAlg, label, labelLen, seed, seedLen); + if (rv != SECSuccess) + goto done; + + rv = RSA_PublicKeyOp(key, output, oaepEncoded); + if (rv != SECSuccess) + goto done; + *outputLen = modulusLen; + +done: + PORT_Free(oaepEncoded); + return rv; +} + +SECStatus +RSA_DecryptOAEP(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *label, + unsigned int labelLen, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECFailure; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *oaepEncoded = NULL; + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + if (inputLen != modulusLen) { + PORT_SetError(SEC_ERROR_INPUT_LEN); + return SECFailure; + } + + if ((labelLen == 0 && label != NULL) || + (labelLen > 0 && label == NULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + oaepEncoded = (unsigned char *)PORT_Alloc(modulusLen); + if (oaepEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + rv = RSA_PrivateKeyOpDoubleChecked(key, oaepEncoded, input); + if (rv != SECSuccess) { + goto done; + } + rv = eme_oaep_decode(output, outputLen, maxOutputLen, oaepEncoded, + modulusLen, hashAlg, maskHashAlg, label, + labelLen); + +done: + if (oaepEncoded) + PORT_ZFree(oaepEncoded, modulusLen); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_EncryptBlock(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + formatted.data = NULL; + if (maxOutputLen < modulusLen) + goto failure; + + unformatted.len = inputLen; + unformatted.data = (unsigned char *)input; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPublic, + &unformatted); + if (rv != SECSuccess) + goto failure; + + rv = RSA_PublicKeyOp(key, output, formatted.data); + if (rv != SECSuccess) + goto failure; + + PORT_ZFree(formatted.data, modulusLen); + *outputLen = modulusLen; + return SECSuccess; + +failure: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_DecryptBlock(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer; + + if (inputLen != modulusLen) + goto failure; + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) + goto failure; + + rv = RSA_PrivateKeyOp(key, buffer, input); + if (rv != SECSuccess) + goto loser; + + /* XXX(rsleevi): Constant time */ + if (buffer[0] != RSA_BLOCK_FIRST_OCTET || + buffer[1] != (unsigned char)RSA_BlockPublic) { + goto loser; + } + *outputLen = 0; + for (i = 2; i < modulusLen; i++) { + if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) { + *outputLen = modulusLen - i - 1; + break; + } + } + if (*outputLen == 0) + goto loser; + if (*outputLen > maxOutputLen) + goto loser; + + PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen); + + PORT_Free(buffer); + return SECSuccess; + +loser: + PORT_Free(buffer); +failure: + return SECFailure; +} + +/* + * Encode a RSA-PSS signature. + * Described in RFC 3447, section 9.1.1. + * We use mHash instead of M as input. + * emBits from the RFC is just modBits - 1, see section 8.1.1. + * We only support MGF1 as the MGF. + * + * NOTE: this code assumes modBits is a multiple of 8. + */ +static SECStatus +emsa_pss_encode(unsigned char *em, + unsigned int emLen, + const unsigned char *mHash, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLen) +{ + const SECHashObject *hash; + void *hash_context; + unsigned char *dbMask; + unsigned int dbMaskLen; + unsigned int i; + SECStatus rv; + + hash = HASH_GetRawHashObject(hashAlg); + dbMaskLen = emLen - hash->length - 1; + + /* Step 3 */ + if (emLen < hash->length + saltLen + 2) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + /* Step 4 */ + if (salt == NULL) { + rv = RNG_GenerateGlobalRandomBytes(&em[dbMaskLen - saltLen], saltLen); + if (rv != SECSuccess) { + return rv; + } + } else { + PORT_Memcpy(&em[dbMaskLen - saltLen], salt, saltLen); + } + + /* Step 5 + 6 */ + /* Compute H and store it at its final location &em[dbMaskLen]. */ + hash_context = (*hash->create)(); + if (hash_context == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hash_context); + (*hash->update)(hash_context, eightZeros, 8); + (*hash->update)(hash_context, mHash, hash->length); + (*hash->update)(hash_context, &em[dbMaskLen - saltLen], saltLen); + (*hash->end)(hash_context, &em[dbMaskLen], &i, hash->length); + (*hash->destroy)(hash_context, PR_TRUE); + + /* Step 7 + 8 */ + PORT_Memset(em, 0, dbMaskLen - saltLen - 1); + em[dbMaskLen - saltLen - 1] = 0x01; + + /* Step 9 */ + dbMask = (unsigned char *)PORT_Alloc(dbMaskLen); + if (dbMask == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + MGF1(maskHashAlg, dbMask, dbMaskLen, &em[dbMaskLen], hash->length); + + /* Step 10 */ + for (i = 0; i < dbMaskLen; i++) + em[i] ^= dbMask[i]; + PORT_Free(dbMask); + + /* Step 11 */ + em[0] &= 0x7f; + + /* Step 12 */ + em[emLen - 1] = 0xbc; + + return SECSuccess; +} + +/* + * Verify a RSA-PSS signature. + * Described in RFC 3447, section 9.1.2. + * We use mHash instead of M as input. + * emBits from the RFC is just modBits - 1, see section 8.1.2. + * We only support MGF1 as the MGF. + * + * NOTE: this code assumes modBits is a multiple of 8. + */ +static SECStatus +emsa_pss_verify(const unsigned char *mHash, + const unsigned char *em, + unsigned int emLen, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLen) +{ + const SECHashObject *hash; + void *hash_context; + unsigned char *db; + unsigned char *H_; /* H' from the RFC */ + unsigned int i; + unsigned int dbMaskLen; + SECStatus rv; + + hash = HASH_GetRawHashObject(hashAlg); + dbMaskLen = emLen - hash->length - 1; + + /* Step 3 + 4 + 6 */ + if ((emLen < (hash->length + saltLen + 2)) || + (em[emLen - 1] != 0xbc) || + ((em[0] & 0x80) != 0)) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* Step 7 */ + db = (unsigned char *)PORT_Alloc(dbMaskLen); + if (db == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + /* &em[dbMaskLen] points to H, used as mgfSeed */ + MGF1(maskHashAlg, db, dbMaskLen, &em[dbMaskLen], hash->length); + + /* Step 8 */ + for (i = 0; i < dbMaskLen; i++) { + db[i] ^= em[i]; + } + + /* Step 9 */ + db[0] &= 0x7f; + + /* Step 10 */ + for (i = 0; i < (dbMaskLen - saltLen - 1); i++) { + if (db[i] != 0) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + } + if (db[dbMaskLen - saltLen - 1] != 0x01) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + /* Step 12 + 13 */ + H_ = (unsigned char *)PORT_Alloc(hash->length); + if (H_ == NULL) { + PORT_Free(db); + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + hash_context = (*hash->create)(); + if (hash_context == NULL) { + PORT_Free(db); + PORT_Free(H_); + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + (*hash->begin)(hash_context); + (*hash->update)(hash_context, eightZeros, 8); + (*hash->update)(hash_context, mHash, hash->length); + (*hash->update)(hash_context, &db[dbMaskLen - saltLen], saltLen); + (*hash->end)(hash_context, H_, &i, hash->length); + (*hash->destroy)(hash_context, PR_TRUE); + + PORT_Free(db); + + /* Step 14 */ + if (PORT_Memcmp(H_, &em[dbMaskLen], hash->length) != 0) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + rv = SECFailure; + } else { + rv = SECSuccess; + } + + PORT_Free(H_); + return rv; +} + +SECStatus +RSA_SignPSS(RSAPrivateKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + const unsigned char *salt, + unsigned int saltLength, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *pssEncoded = NULL; + + if (maxOutputLen < modulusLen) { + PORT_SetError(SEC_ERROR_OUTPUT_LEN); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + pssEncoded = (unsigned char *)PORT_Alloc(modulusLen); + if (pssEncoded == NULL) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + rv = emsa_pss_encode(pssEncoded, modulusLen, input, hashAlg, + maskHashAlg, salt, saltLength); + if (rv != SECSuccess) + goto done; + + rv = RSA_PrivateKeyOpDoubleChecked(key, output, pssEncoded); + *outputLen = modulusLen; + +done: + PORT_Free(pssEncoded); + return rv; +} + +SECStatus +RSA_CheckSignPSS(RSAPublicKey *key, + HASH_HashType hashAlg, + HASH_HashType maskHashAlg, + unsigned int saltLength, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *hash, + unsigned int hashLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned char *buffer; + + if (sigLen != modulusLen) { + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + if ((hashAlg == HASH_AlgNULL) || (maskHashAlg == HASH_AlgNULL)) { + PORT_SetError(SEC_ERROR_INVALID_ALGORITHM); + return SECFailure; + } + + buffer = (unsigned char *)PORT_Alloc(modulusLen); + if (!buffer) { + PORT_SetError(SEC_ERROR_NO_MEMORY); + return SECFailure; + } + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) { + PORT_Free(buffer); + PORT_SetError(SEC_ERROR_BAD_SIGNATURE); + return SECFailure; + } + + rv = emsa_pss_verify(hash, buffer, modulusLen, hashAlg, + maskHashAlg, saltLength); + PORT_Free(buffer); + + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_Sign(RSAPrivateKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *input, + unsigned int inputLen) +{ + SECStatus rv = SECSuccess; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + SECItem formatted; + SECItem unformatted; + + if (maxOutputLen < modulusLen) + return SECFailure; + + unformatted.len = inputLen; + unformatted.data = (unsigned char *)input; + formatted.data = NULL; + rv = rsa_FormatBlock(&formatted, modulusLen, RSA_BlockPrivate, + &unformatted); + if (rv != SECSuccess) + goto done; + + rv = RSA_PrivateKeyOpDoubleChecked(key, output, formatted.data); + *outputLen = modulusLen; + + goto done; + +done: + if (formatted.data != NULL) + PORT_ZFree(formatted.data, modulusLen); + return rv; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSign(RSAPublicKey *key, + const unsigned char *sig, + unsigned int sigLen, + const unsigned char *data, + unsigned int dataLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer; + + if (sigLen != modulusLen) + goto failure; + /* + * 0x00 || BT || Pad || 0x00 || ActualData + * + * The "3" below is the first octet + the second octet + the 0x00 + * octet that always comes just before the ActualData. + */ + if (dataLen > modulusLen - (3 + RSA_BLOCK_MIN_PAD_LEN)) + goto failure; + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) + goto failure; + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) + goto loser; + + /* + * check the padding that was used + */ + if (buffer[0] != RSA_BLOCK_FIRST_OCTET || + buffer[1] != (unsigned char)RSA_BlockPrivate) { + goto loser; + } + for (i = 2; i < modulusLen - dataLen - 1; i++) { + if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) + goto loser; + } + if (buffer[i] != RSA_BLOCK_AFTER_PAD_OCTET) + goto loser; + + /* + * make sure we get the same results + */ + if (PORT_Memcmp(buffer + modulusLen - dataLen, data, dataLen) != 0) + goto loser; + + PORT_Free(buffer); + return SECSuccess; + +loser: + PORT_Free(buffer); +failure: + return SECFailure; +} + +/* XXX Doesn't set error code */ +SECStatus +RSA_CheckSignRecover(RSAPublicKey *key, + unsigned char *output, + unsigned int *outputLen, + unsigned int maxOutputLen, + const unsigned char *sig, + unsigned int sigLen) +{ + SECStatus rv; + unsigned int modulusLen = rsa_modulusLen(&key->modulus); + unsigned int i; + unsigned char *buffer; + + if (sigLen != modulusLen) + goto failure; + + buffer = (unsigned char *)PORT_Alloc(modulusLen + 1); + if (!buffer) + goto failure; + + rv = RSA_PublicKeyOp(key, buffer, sig); + if (rv != SECSuccess) + goto loser; + *outputLen = 0; + + /* + * check the padding that was used + */ + if (buffer[0] != RSA_BLOCK_FIRST_OCTET || + buffer[1] != (unsigned char)RSA_BlockPrivate) { + goto loser; + } + for (i = 2; i < modulusLen; i++) { + if (buffer[i] == RSA_BLOCK_AFTER_PAD_OCTET) { + *outputLen = modulusLen - i - 1; + break; + } + if (buffer[i] != RSA_BLOCK_PRIVATE_PAD_OCTET) + goto loser; + } + if (*outputLen == 0) + goto loser; + if (*outputLen > maxOutputLen) + goto loser; + + PORT_Memcpy(output, buffer + modulusLen - *outputLen, *outputLen); + + PORT_Free(buffer); + return SECSuccess; + +loser: + PORT_Free(buffer); +failure: + return SECFailure; +} diff --git a/security/nss/lib/freebl/secmpi.h b/security/nss/lib/freebl/secmpi.h new file mode 100644 index 000000000..5e8fd1105 --- /dev/null +++ b/security/nss/lib/freebl/secmpi.h @@ -0,0 +1,54 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "mpi.h" + +#define CHECK_SEC_OK(func) \ + if (SECSuccess != (rv = func)) \ + goto cleanup + +#define CHECK_MPI_OK(func) \ + if (MP_OKAY > (err = func)) \ + goto cleanup + +#define OCTETS_TO_MPINT(oc, mp, len) \ + CHECK_MPI_OK(mp_read_unsigned_octets((mp), oc, len)) + +#define SECITEM_TO_MPINT(it, mp) \ + CHECK_MPI_OK(mp_read_unsigned_octets((mp), (it).data, (it).len)) + +#define MPINT_TO_SECITEM(mp, it, arena) \ + do { \ + int mpintLen = mp_unsigned_octet_size(mp); \ + if (mpintLen <= 0) { \ + err = MP_RANGE; \ + goto cleanup; \ + } \ + SECITEM_AllocItem(arena, (it), mpintLen); \ + if ((it)->data == NULL) { \ + err = MP_MEM; \ + goto cleanup; \ + } \ + err = mp_to_unsigned_octets(mp, (it)->data, (it)->len); \ + if (err < 0) \ + goto cleanup; \ + else \ + err = MP_OKAY; \ + } while (0) + +#define MP_TO_SEC_ERROR(err) \ + switch (err) { \ + case MP_MEM: \ + PORT_SetError(SEC_ERROR_NO_MEMORY); \ + break; \ + case MP_RANGE: \ + PORT_SetError(SEC_ERROR_BAD_DATA); \ + break; \ + case MP_BADARG: \ + PORT_SetError(SEC_ERROR_INVALID_ARGS); \ + break; \ + default: \ + PORT_SetError(SEC_ERROR_LIBRARY_FAILURE); \ + break; \ + } diff --git a/security/nss/lib/freebl/secrng.h b/security/nss/lib/freebl/secrng.h new file mode 100644 index 000000000..19eae4833 --- /dev/null +++ b/security/nss/lib/freebl/secrng.h @@ -0,0 +1,65 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SECRNG_H_ +#define _SECRNG_H_ +/* + * secrng.h - public data structures and prototypes for the secure random + * number generator + */ + +/******************************************/ +/* +** Random number generation. A cryptographically strong random number +** generator. +*/ + +#include "blapi.h" + +/* the number of bytes to read from the system random number generator */ +#define SYSTEM_RNG_SEED_COUNT 1024 + +SEC_BEGIN_PROTOS + +/* +** The following functions are provided by the security library +** but are differently implemented for the UNIX, Win, and OS/2 +** versions +*/ + +/* +** Get the "noisiest" information available on the system. +** The amount of data returned depends on the system implementation. +** It will not exceed maxbytes, but may be (much) less. +** Returns number of noise bytes copied into buf, or zero if error. +*/ +extern size_t RNG_GetNoise(void *buf, size_t maxbytes); + +/* +** RNG_SystemInfoForRNG should be called before any use of SSL. It +** gathers up the system specific information to help seed the +** state of the global random number generator. +*/ +extern void RNG_SystemInfoForRNG(void); + +/* +** Use the contents (and stat) of a file to help seed the +** global random number generator. +*/ +extern void RNG_FileForRNG(const char *filename); + +/* +** Get maxbytes bytes of random data from the system random number +** generator. +** Returns the number of bytes copied into buf -- maxbytes if success +** or zero if error. +** Errors: +** PR_NOT_IMPLEMENTED_ERROR There is no system RNG on the platform. +** SEC_ERROR_NEED_RANDOM The system RNG failed. +*/ +extern size_t RNG_SystemRNG(void *buf, size_t maxbytes); + +SEC_END_PROTOS + +#endif /* _SECRNG_H_ */ diff --git a/security/nss/lib/freebl/seed.c b/security/nss/lib/freebl/seed.c new file mode 100644 index 000000000..f198cce45 --- /dev/null +++ b/security/nss/lib/freebl/seed.c @@ -0,0 +1,641 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include +#include +#include +#ifdef WIN32 +#include +#endif + +#include "seed.h" +#include "secerr.h" + +static const seed_word SS[4][256] = { + { 0x2989a1a8, 0x05858184, 0x16c6d2d4, 0x13c3d3d0, + 0x14445054, 0x1d0d111c, 0x2c8ca0ac, 0x25052124, + 0x1d4d515c, 0x03434340, 0x18081018, 0x1e0e121c, + 0x11415150, 0x3cccf0fc, 0x0acac2c8, 0x23436360, + 0x28082028, 0x04444044, 0x20002020, 0x1d8d919c, + 0x20c0e0e0, 0x22c2e2e0, 0x08c8c0c8, 0x17071314, + 0x2585a1a4, 0x0f8f838c, 0x03030300, 0x3b4b7378, + 0x3b8bb3b8, 0x13031310, 0x12c2d2d0, 0x2ecee2ec, + 0x30407070, 0x0c8c808c, 0x3f0f333c, 0x2888a0a8, + 0x32023230, 0x1dcdd1dc, 0x36c6f2f4, 0x34447074, + 0x2ccce0ec, 0x15859194, 0x0b0b0308, 0x17475354, + 0x1c4c505c, 0x1b4b5358, 0x3d8db1bc, 0x01010100, + 0x24042024, 0x1c0c101c, 0x33437370, 0x18889098, + 0x10001010, 0x0cccc0cc, 0x32c2f2f0, 0x19c9d1d8, + 0x2c0c202c, 0x27c7e3e4, 0x32427270, 0x03838380, + 0x1b8b9398, 0x11c1d1d0, 0x06868284, 0x09c9c1c8, + 0x20406060, 0x10405050, 0x2383a3a0, 0x2bcbe3e8, + 0x0d0d010c, 0x3686b2b4, 0x1e8e929c, 0x0f4f434c, + 0x3787b3b4, 0x1a4a5258, 0x06c6c2c4, 0x38487078, + 0x2686a2a4, 0x12021210, 0x2f8fa3ac, 0x15c5d1d4, + 0x21416160, 0x03c3c3c0, 0x3484b0b4, 0x01414140, + 0x12425250, 0x3d4d717c, 0x0d8d818c, 0x08080008, + 0x1f0f131c, 0x19899198, 0x00000000, 0x19091118, + 0x04040004, 0x13435350, 0x37c7f3f4, 0x21c1e1e0, + 0x3dcdf1fc, 0x36467274, 0x2f0f232c, 0x27072324, + 0x3080b0b0, 0x0b8b8388, 0x0e0e020c, 0x2b8ba3a8, + 0x2282a2a0, 0x2e4e626c, 0x13839390, 0x0d4d414c, + 0x29496168, 0x3c4c707c, 0x09090108, 0x0a0a0208, + 0x3f8fb3bc, 0x2fcfe3ec, 0x33c3f3f0, 0x05c5c1c4, + 0x07878384, 0x14041014, 0x3ecef2fc, 0x24446064, + 0x1eced2dc, 0x2e0e222c, 0x0b4b4348, 0x1a0a1218, + 0x06060204, 0x21012120, 0x2b4b6368, 0x26466264, + 0x02020200, 0x35c5f1f4, 0x12829290, 0x0a8a8288, + 0x0c0c000c, 0x3383b3b0, 0x3e4e727c, 0x10c0d0d0, + 0x3a4a7278, 0x07474344, 0x16869294, 0x25c5e1e4, + 0x26062224, 0x00808080, 0x2d8da1ac, 0x1fcfd3dc, + 0x2181a1a0, 0x30003030, 0x37073334, 0x2e8ea2ac, + 0x36063234, 0x15051114, 0x22022220, 0x38083038, + 0x34c4f0f4, 0x2787a3a4, 0x05454144, 0x0c4c404c, + 0x01818180, 0x29c9e1e8, 0x04848084, 0x17879394, + 0x35053134, 0x0bcbc3c8, 0x0ecec2cc, 0x3c0c303c, + 0x31417170, 0x11011110, 0x07c7c3c4, 0x09898188, + 0x35457174, 0x3bcbf3f8, 0x1acad2d8, 0x38c8f0f8, + 0x14849094, 0x19495158, 0x02828280, 0x04c4c0c4, + 0x3fcff3fc, 0x09494148, 0x39093138, 0x27476364, + 0x00c0c0c0, 0x0fcfc3cc, 0x17c7d3d4, 0x3888b0b8, + 0x0f0f030c, 0x0e8e828c, 0x02424240, 0x23032320, + 0x11819190, 0x2c4c606c, 0x1bcbd3d8, 0x2484a0a4, + 0x34043034, 0x31c1f1f0, 0x08484048, 0x02c2c2c0, + 0x2f4f636c, 0x3d0d313c, 0x2d0d212c, 0x00404040, + 0x3e8eb2bc, 0x3e0e323c, 0x3c8cb0bc, 0x01c1c1c0, + 0x2a8aa2a8, 0x3a8ab2b8, 0x0e4e424c, 0x15455154, + 0x3b0b3338, 0x1cccd0dc, 0x28486068, 0x3f4f737c, + 0x1c8c909c, 0x18c8d0d8, 0x0a4a4248, 0x16465254, + 0x37477374, 0x2080a0a0, 0x2dcde1ec, 0x06464244, + 0x3585b1b4, 0x2b0b2328, 0x25456164, 0x3acaf2f8, + 0x23c3e3e0, 0x3989b1b8, 0x3181b1b0, 0x1f8f939c, + 0x1e4e525c, 0x39c9f1f8, 0x26c6e2e4, 0x3282b2b0, + 0x31013130, 0x2acae2e8, 0x2d4d616c, 0x1f4f535c, + 0x24c4e0e4, 0x30c0f0f0, 0x0dcdc1cc, 0x08888088, + 0x16061214, 0x3a0a3238, 0x18485058, 0x14c4d0d4, + 0x22426260, 0x29092128, 0x07070304, 0x33033330, + 0x28c8e0e8, 0x1b0b1318, 0x05050104, 0x39497178, + 0x10809090, 0x2a4a6268, 0x2a0a2228, 0x1a8a9298 }, + { 0x38380830, 0xe828c8e0, 0x2c2d0d21, 0xa42686a2, + 0xcc0fcfc3, 0xdc1eced2, 0xb03383b3, 0xb83888b0, + 0xac2f8fa3, 0x60204060, 0x54154551, 0xc407c7c3, + 0x44044440, 0x6c2f4f63, 0x682b4b63, 0x581b4b53, + 0xc003c3c3, 0x60224262, 0x30330333, 0xb43585b1, + 0x28290921, 0xa02080a0, 0xe022c2e2, 0xa42787a3, + 0xd013c3d3, 0x90118191, 0x10110111, 0x04060602, + 0x1c1c0c10, 0xbc3c8cb0, 0x34360632, 0x480b4b43, + 0xec2fcfe3, 0x88088880, 0x6c2c4c60, 0xa82888a0, + 0x14170713, 0xc404c4c0, 0x14160612, 0xf434c4f0, + 0xc002c2c2, 0x44054541, 0xe021c1e1, 0xd416c6d2, + 0x3c3f0f33, 0x3c3d0d31, 0x8c0e8e82, 0x98188890, + 0x28280820, 0x4c0e4e42, 0xf436c6f2, 0x3c3e0e32, + 0xa42585a1, 0xf839c9f1, 0x0c0d0d01, 0xdc1fcfd3, + 0xd818c8d0, 0x282b0b23, 0x64264662, 0x783a4a72, + 0x24270723, 0x2c2f0f23, 0xf031c1f1, 0x70324272, + 0x40024242, 0xd414c4d0, 0x40014141, 0xc000c0c0, + 0x70334373, 0x64274763, 0xac2c8ca0, 0x880b8b83, + 0xf437c7f3, 0xac2d8da1, 0x80008080, 0x1c1f0f13, + 0xc80acac2, 0x2c2c0c20, 0xa82a8aa2, 0x34340430, + 0xd012c2d2, 0x080b0b03, 0xec2ecee2, 0xe829c9e1, + 0x5c1d4d51, 0x94148490, 0x18180810, 0xf838c8f0, + 0x54174753, 0xac2e8ea2, 0x08080800, 0xc405c5c1, + 0x10130313, 0xcc0dcdc1, 0x84068682, 0xb83989b1, + 0xfc3fcff3, 0x7c3d4d71, 0xc001c1c1, 0x30310131, + 0xf435c5f1, 0x880a8a82, 0x682a4a62, 0xb03181b1, + 0xd011c1d1, 0x20200020, 0xd417c7d3, 0x00020202, + 0x20220222, 0x04040400, 0x68284860, 0x70314171, + 0x04070703, 0xd81bcbd3, 0x9c1d8d91, 0x98198991, + 0x60214161, 0xbc3e8eb2, 0xe426c6e2, 0x58194951, + 0xdc1dcdd1, 0x50114151, 0x90108090, 0xdc1cccd0, + 0x981a8a92, 0xa02383a3, 0xa82b8ba3, 0xd010c0d0, + 0x80018181, 0x0c0f0f03, 0x44074743, 0x181a0a12, + 0xe023c3e3, 0xec2ccce0, 0x8c0d8d81, 0xbc3f8fb3, + 0x94168692, 0x783b4b73, 0x5c1c4c50, 0xa02282a2, + 0xa02181a1, 0x60234363, 0x20230323, 0x4c0d4d41, + 0xc808c8c0, 0x9c1e8e92, 0x9c1c8c90, 0x383a0a32, + 0x0c0c0c00, 0x2c2e0e22, 0xb83a8ab2, 0x6c2e4e62, + 0x9c1f8f93, 0x581a4a52, 0xf032c2f2, 0x90128292, + 0xf033c3f3, 0x48094941, 0x78384870, 0xcc0cccc0, + 0x14150511, 0xf83bcbf3, 0x70304070, 0x74354571, + 0x7c3f4f73, 0x34350531, 0x10100010, 0x00030303, + 0x64244460, 0x6c2d4d61, 0xc406c6c2, 0x74344470, + 0xd415c5d1, 0xb43484b0, 0xe82acae2, 0x08090901, + 0x74364672, 0x18190911, 0xfc3ecef2, 0x40004040, + 0x10120212, 0xe020c0e0, 0xbc3d8db1, 0x04050501, + 0xf83acaf2, 0x00010101, 0xf030c0f0, 0x282a0a22, + 0x5c1e4e52, 0xa82989a1, 0x54164652, 0x40034343, + 0x84058581, 0x14140410, 0x88098981, 0x981b8b93, + 0xb03080b0, 0xe425c5e1, 0x48084840, 0x78394971, + 0x94178793, 0xfc3cccf0, 0x1c1e0e12, 0x80028282, + 0x20210121, 0x8c0c8c80, 0x181b0b13, 0x5c1f4f53, + 0x74374773, 0x54144450, 0xb03282b2, 0x1c1d0d11, + 0x24250521, 0x4c0f4f43, 0x00000000, 0x44064642, + 0xec2dcde1, 0x58184850, 0x50124252, 0xe82bcbe3, + 0x7c3e4e72, 0xd81acad2, 0xc809c9c1, 0xfc3dcdf1, + 0x30300030, 0x94158591, 0x64254561, 0x3c3c0c30, + 0xb43686b2, 0xe424c4e0, 0xb83b8bb3, 0x7c3c4c70, + 0x0c0e0e02, 0x50104050, 0x38390931, 0x24260622, + 0x30320232, 0x84048480, 0x68294961, 0x90138393, + 0x34370733, 0xe427c7e3, 0x24240420, 0xa42484a0, + 0xc80bcbc3, 0x50134353, 0x080a0a02, 0x84078783, + 0xd819c9d1, 0x4c0c4c40, 0x80038383, 0x8c0f8f83, + 0xcc0ecec2, 0x383b0b33, 0x480a4a42, 0xb43787b3 }, + { 0xa1a82989, 0x81840585, 0xd2d416c6, 0xd3d013c3, + 0x50541444, 0x111c1d0d, 0xa0ac2c8c, 0x21242505, + 0x515c1d4d, 0x43400343, 0x10181808, 0x121c1e0e, + 0x51501141, 0xf0fc3ccc, 0xc2c80aca, 0x63602343, + 0x20282808, 0x40440444, 0x20202000, 0x919c1d8d, + 0xe0e020c0, 0xe2e022c2, 0xc0c808c8, 0x13141707, + 0xa1a42585, 0x838c0f8f, 0x03000303, 0x73783b4b, + 0xb3b83b8b, 0x13101303, 0xd2d012c2, 0xe2ec2ece, + 0x70703040, 0x808c0c8c, 0x333c3f0f, 0xa0a82888, + 0x32303202, 0xd1dc1dcd, 0xf2f436c6, 0x70743444, + 0xe0ec2ccc, 0x91941585, 0x03080b0b, 0x53541747, + 0x505c1c4c, 0x53581b4b, 0xb1bc3d8d, 0x01000101, + 0x20242404, 0x101c1c0c, 0x73703343, 0x90981888, + 0x10101000, 0xc0cc0ccc, 0xf2f032c2, 0xd1d819c9, + 0x202c2c0c, 0xe3e427c7, 0x72703242, 0x83800383, + 0x93981b8b, 0xd1d011c1, 0x82840686, 0xc1c809c9, + 0x60602040, 0x50501040, 0xa3a02383, 0xe3e82bcb, + 0x010c0d0d, 0xb2b43686, 0x929c1e8e, 0x434c0f4f, + 0xb3b43787, 0x52581a4a, 0xc2c406c6, 0x70783848, + 0xa2a42686, 0x12101202, 0xa3ac2f8f, 0xd1d415c5, + 0x61602141, 0xc3c003c3, 0xb0b43484, 0x41400141, + 0x52501242, 0x717c3d4d, 0x818c0d8d, 0x00080808, + 0x131c1f0f, 0x91981989, 0x00000000, 0x11181909, + 0x00040404, 0x53501343, 0xf3f437c7, 0xe1e021c1, + 0xf1fc3dcd, 0x72743646, 0x232c2f0f, 0x23242707, + 0xb0b03080, 0x83880b8b, 0x020c0e0e, 0xa3a82b8b, + 0xa2a02282, 0x626c2e4e, 0x93901383, 0x414c0d4d, + 0x61682949, 0x707c3c4c, 0x01080909, 0x02080a0a, + 0xb3bc3f8f, 0xe3ec2fcf, 0xf3f033c3, 0xc1c405c5, + 0x83840787, 0x10141404, 0xf2fc3ece, 0x60642444, + 0xd2dc1ece, 0x222c2e0e, 0x43480b4b, 0x12181a0a, + 0x02040606, 0x21202101, 0x63682b4b, 0x62642646, + 0x02000202, 0xf1f435c5, 0x92901282, 0x82880a8a, + 0x000c0c0c, 0xb3b03383, 0x727c3e4e, 0xd0d010c0, + 0x72783a4a, 0x43440747, 0x92941686, 0xe1e425c5, + 0x22242606, 0x80800080, 0xa1ac2d8d, 0xd3dc1fcf, + 0xa1a02181, 0x30303000, 0x33343707, 0xa2ac2e8e, + 0x32343606, 0x11141505, 0x22202202, 0x30383808, + 0xf0f434c4, 0xa3a42787, 0x41440545, 0x404c0c4c, + 0x81800181, 0xe1e829c9, 0x80840484, 0x93941787, + 0x31343505, 0xc3c80bcb, 0xc2cc0ece, 0x303c3c0c, + 0x71703141, 0x11101101, 0xc3c407c7, 0x81880989, + 0x71743545, 0xf3f83bcb, 0xd2d81aca, 0xf0f838c8, + 0x90941484, 0x51581949, 0x82800282, 0xc0c404c4, + 0xf3fc3fcf, 0x41480949, 0x31383909, 0x63642747, + 0xc0c000c0, 0xc3cc0fcf, 0xd3d417c7, 0xb0b83888, + 0x030c0f0f, 0x828c0e8e, 0x42400242, 0x23202303, + 0x91901181, 0x606c2c4c, 0xd3d81bcb, 0xa0a42484, + 0x30343404, 0xf1f031c1, 0x40480848, 0xc2c002c2, + 0x636c2f4f, 0x313c3d0d, 0x212c2d0d, 0x40400040, + 0xb2bc3e8e, 0x323c3e0e, 0xb0bc3c8c, 0xc1c001c1, + 0xa2a82a8a, 0xb2b83a8a, 0x424c0e4e, 0x51541545, + 0x33383b0b, 0xd0dc1ccc, 0x60682848, 0x737c3f4f, + 0x909c1c8c, 0xd0d818c8, 0x42480a4a, 0x52541646, + 0x73743747, 0xa0a02080, 0xe1ec2dcd, 0x42440646, + 0xb1b43585, 0x23282b0b, 0x61642545, 0xf2f83aca, + 0xe3e023c3, 0xb1b83989, 0xb1b03181, 0x939c1f8f, + 0x525c1e4e, 0xf1f839c9, 0xe2e426c6, 0xb2b03282, + 0x31303101, 0xe2e82aca, 0x616c2d4d, 0x535c1f4f, + 0xe0e424c4, 0xf0f030c0, 0xc1cc0dcd, 0x80880888, + 0x12141606, 0x32383a0a, 0x50581848, 0xd0d414c4, + 0x62602242, 0x21282909, 0x03040707, 0x33303303, + 0xe0e828c8, 0x13181b0b, 0x01040505, 0x71783949, + 0x90901080, 0x62682a4a, 0x22282a0a, 0x92981a8a }, + { 0x08303838, 0xc8e0e828, 0x0d212c2d, 0x86a2a426, + 0xcfc3cc0f, 0xced2dc1e, 0x83b3b033, 0x88b0b838, + 0x8fa3ac2f, 0x40606020, 0x45515415, 0xc7c3c407, + 0x44404404, 0x4f636c2f, 0x4b63682b, 0x4b53581b, + 0xc3c3c003, 0x42626022, 0x03333033, 0x85b1b435, + 0x09212829, 0x80a0a020, 0xc2e2e022, 0x87a3a427, + 0xc3d3d013, 0x81919011, 0x01111011, 0x06020406, + 0x0c101c1c, 0x8cb0bc3c, 0x06323436, 0x4b43480b, + 0xcfe3ec2f, 0x88808808, 0x4c606c2c, 0x88a0a828, + 0x07131417, 0xc4c0c404, 0x06121416, 0xc4f0f434, + 0xc2c2c002, 0x45414405, 0xc1e1e021, 0xc6d2d416, + 0x0f333c3f, 0x0d313c3d, 0x8e828c0e, 0x88909818, + 0x08202828, 0x4e424c0e, 0xc6f2f436, 0x0e323c3e, + 0x85a1a425, 0xc9f1f839, 0x0d010c0d, 0xcfd3dc1f, + 0xc8d0d818, 0x0b23282b, 0x46626426, 0x4a72783a, + 0x07232427, 0x0f232c2f, 0xc1f1f031, 0x42727032, + 0x42424002, 0xc4d0d414, 0x41414001, 0xc0c0c000, + 0x43737033, 0x47636427, 0x8ca0ac2c, 0x8b83880b, + 0xc7f3f437, 0x8da1ac2d, 0x80808000, 0x0f131c1f, + 0xcac2c80a, 0x0c202c2c, 0x8aa2a82a, 0x04303434, + 0xc2d2d012, 0x0b03080b, 0xcee2ec2e, 0xc9e1e829, + 0x4d515c1d, 0x84909414, 0x08101818, 0xc8f0f838, + 0x47535417, 0x8ea2ac2e, 0x08000808, 0xc5c1c405, + 0x03131013, 0xcdc1cc0d, 0x86828406, 0x89b1b839, + 0xcff3fc3f, 0x4d717c3d, 0xc1c1c001, 0x01313031, + 0xc5f1f435, 0x8a82880a, 0x4a62682a, 0x81b1b031, + 0xc1d1d011, 0x00202020, 0xc7d3d417, 0x02020002, + 0x02222022, 0x04000404, 0x48606828, 0x41717031, + 0x07030407, 0xcbd3d81b, 0x8d919c1d, 0x89919819, + 0x41616021, 0x8eb2bc3e, 0xc6e2e426, 0x49515819, + 0xcdd1dc1d, 0x41515011, 0x80909010, 0xccd0dc1c, + 0x8a92981a, 0x83a3a023, 0x8ba3a82b, 0xc0d0d010, + 0x81818001, 0x0f030c0f, 0x47434407, 0x0a12181a, + 0xc3e3e023, 0xcce0ec2c, 0x8d818c0d, 0x8fb3bc3f, + 0x86929416, 0x4b73783b, 0x4c505c1c, 0x82a2a022, + 0x81a1a021, 0x43636023, 0x03232023, 0x4d414c0d, + 0xc8c0c808, 0x8e929c1e, 0x8c909c1c, 0x0a32383a, + 0x0c000c0c, 0x0e222c2e, 0x8ab2b83a, 0x4e626c2e, + 0x8f939c1f, 0x4a52581a, 0xc2f2f032, 0x82929012, + 0xc3f3f033, 0x49414809, 0x48707838, 0xccc0cc0c, + 0x05111415, 0xcbf3f83b, 0x40707030, 0x45717435, + 0x4f737c3f, 0x05313435, 0x00101010, 0x03030003, + 0x44606424, 0x4d616c2d, 0xc6c2c406, 0x44707434, + 0xc5d1d415, 0x84b0b434, 0xcae2e82a, 0x09010809, + 0x46727436, 0x09111819, 0xcef2fc3e, 0x40404000, + 0x02121012, 0xc0e0e020, 0x8db1bc3d, 0x05010405, + 0xcaf2f83a, 0x01010001, 0xc0f0f030, 0x0a22282a, + 0x4e525c1e, 0x89a1a829, 0x46525416, 0x43434003, + 0x85818405, 0x04101414, 0x89818809, 0x8b93981b, + 0x80b0b030, 0xc5e1e425, 0x48404808, 0x49717839, + 0x87939417, 0xccf0fc3c, 0x0e121c1e, 0x82828002, + 0x01212021, 0x8c808c0c, 0x0b13181b, 0x4f535c1f, + 0x47737437, 0x44505414, 0x82b2b032, 0x0d111c1d, + 0x05212425, 0x4f434c0f, 0x00000000, 0x46424406, + 0xcde1ec2d, 0x48505818, 0x42525012, 0xcbe3e82b, + 0x4e727c3e, 0xcad2d81a, 0xc9c1c809, 0xcdf1fc3d, + 0x00303030, 0x85919415, 0x45616425, 0x0c303c3c, + 0x86b2b436, 0xc4e0e424, 0x8bb3b83b, 0x4c707c3c, + 0x0e020c0e, 0x40505010, 0x09313839, 0x06222426, + 0x02323032, 0x84808404, 0x49616829, 0x83939013, + 0x07333437, 0xc7e3e427, 0x04202424, 0x84a0a424, + 0xcbc3c80b, 0x43535013, 0x0a02080a, 0x87838407, + 0xc9d1d819, 0x4c404c0c, 0x83838003, 0x8f838c0f, + 0xcec2cc0e, 0x0b33383b, 0x4a42480a, 0x87b3b437 } +}; + +/* key schedule constants - golden ratio */ +#define KC0 0x9e3779b9 +#define KC1 0x3c6ef373 +#define KC2 0x78dde6e6 +#define KC3 0xf1bbcdcc +#define KC4 0xe3779b99 +#define KC5 0xc6ef3733 +#define KC6 0x8dde6e67 +#define KC7 0x1bbcdccf +#define KC8 0x3779b99e +#define KC9 0x6ef3733c +#define KC10 0xdde6e678 +#define KC11 0xbbcdccf1 +#define KC12 0x779b99e3 +#define KC13 0xef3733c6 +#define KC14 0xde6e678d +#define KC15 0xbcdccf1b + +void +SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], + SEED_KEY_SCHEDULE *ks) +{ + seed_word K0, K1, K2, K3; + seed_word t0, t1; + + char2word(rawkey, K0); + char2word(rawkey + 4, K1); + char2word(rawkey + 8, K2); + char2word(rawkey + 12, K3); + + t0 = (K0 + K2 - KC0); + t1 = (K1 - K3 + KC0); + KEYUPDATE_TEMP(t0, t1, &ks->data[0]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC1); + KEYUPDATE_TEMP(t0, t1, &ks->data[2]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC2); + KEYUPDATE_TEMP(t0, t1, &ks->data[4]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC3); + KEYUPDATE_TEMP(t0, t1, &ks->data[6]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC4); + KEYUPDATE_TEMP(t0, t1, &ks->data[8]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC5); + KEYUPDATE_TEMP(t0, t1, &ks->data[10]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC6); + KEYUPDATE_TEMP(t0, t1, &ks->data[12]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC7); + KEYUPDATE_TEMP(t0, t1, &ks->data[14]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC8); + KEYUPDATE_TEMP(t0, t1, &ks->data[16]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC9); + KEYUPDATE_TEMP(t0, t1, &ks->data[18]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC10); + KEYUPDATE_TEMP(t0, t1, &ks->data[20]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC11); + KEYUPDATE_TEMP(t0, t1, &ks->data[22]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC12); + KEYUPDATE_TEMP(t0, t1, &ks->data[24]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC13); + KEYUPDATE_TEMP(t0, t1, &ks->data[26]); + KEYSCHEDULE_UPDATE0(t0, t1, K0, K1, K2, K3, KC14); + KEYUPDATE_TEMP(t0, t1, &ks->data[28]); + KEYSCHEDULE_UPDATE1(t0, t1, K0, K1, K2, K3, KC15); + KEYUPDATE_TEMP(t0, t1, &ks->data[30]); +} + +void +SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks) +{ + seed_word L0, L1, R0, R1; + seed_word t0, t1; + + char2word(s, L0); + char2word(s + 4, L1); + char2word(s + 8, R0); + char2word(s + 12, R1); + + E_SEED(t0, t1, L0, L1, R0, R1, 0); + E_SEED(t0, t1, R0, R1, L0, L1, 2); + E_SEED(t0, t1, L0, L1, R0, R1, 4); + E_SEED(t0, t1, R0, R1, L0, L1, 6); + E_SEED(t0, t1, L0, L1, R0, R1, 8); + E_SEED(t0, t1, R0, R1, L0, L1, 10); + E_SEED(t0, t1, L0, L1, R0, R1, 12); + E_SEED(t0, t1, R0, R1, L0, L1, 14); + E_SEED(t0, t1, L0, L1, R0, R1, 16); + E_SEED(t0, t1, R0, R1, L0, L1, 18); + E_SEED(t0, t1, L0, L1, R0, R1, 20); + E_SEED(t0, t1, R0, R1, L0, L1, 22); + E_SEED(t0, t1, L0, L1, R0, R1, 24); + E_SEED(t0, t1, R0, R1, L0, L1, 26); + E_SEED(t0, t1, L0, L1, R0, R1, 28); + E_SEED(t0, t1, R0, R1, L0, L1, 30); + + word2char(R0, d); + word2char(R1, d + 4); + word2char(L0, d + 8); + word2char(L1, d + 12); +} + +void +SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks) +{ + seed_word L0, L1, R0, R1; + seed_word t0, t1; + + char2word(s, L0); + char2word(s + 4, L1); + char2word(s + 8, R0); + char2word(s + 12, R1); + + E_SEED(t0, t1, L0, L1, R0, R1, 30); + E_SEED(t0, t1, R0, R1, L0, L1, 28); + E_SEED(t0, t1, L0, L1, R0, R1, 26); + E_SEED(t0, t1, R0, R1, L0, L1, 24); + E_SEED(t0, t1, L0, L1, R0, R1, 22); + E_SEED(t0, t1, R0, R1, L0, L1, 20); + E_SEED(t0, t1, L0, L1, R0, R1, 18); + E_SEED(t0, t1, R0, R1, L0, L1, 16); + E_SEED(t0, t1, L0, L1, R0, R1, 14); + E_SEED(t0, t1, R0, R1, L0, L1, 12); + E_SEED(t0, t1, L0, L1, R0, R1, 10); + E_SEED(t0, t1, R0, R1, L0, L1, 8); + E_SEED(t0, t1, L0, L1, R0, R1, 6); + E_SEED(t0, t1, R0, R1, L0, L1, 4); + E_SEED(t0, t1, L0, L1, R0, R1, 2); + E_SEED(t0, t1, R0, R1, L0, L1, 0); + + word2char(R0, d); + word2char(R1, d + 4); + word2char(L0, d + 8); + word2char(L1, d + 12); +} + +void +SEED_ecb_encrypt(const unsigned char *in, + unsigned char *out, + const SEED_KEY_SCHEDULE *ks, int enc) +{ + if (enc) { + SEED_encrypt(in, out, ks); + } else { + SEED_decrypt(in, out, ks); + } +} + +void +SEED_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const SEED_KEY_SCHEDULE *ks, + unsigned char ivec[SEED_BLOCK_SIZE], int enc) +{ + size_t n; + unsigned char tmp[SEED_BLOCK_SIZE]; + const unsigned char *iv = ivec; + + if (enc) { + while (len >= SEED_BLOCK_SIZE) { + for (n = 0; n < SEED_BLOCK_SIZE; ++n) + out[n] = in[n] ^ iv[n]; + + SEED_encrypt(out, out, ks); + iv = out; + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + for (n = 0; n < len; ++n) + out[n] = in[n] ^ iv[n]; + + for (n = len; n < SEED_BLOCK_SIZE; ++n) + out[n] = iv[n]; + + SEED_encrypt(out, out, ks); + iv = out; + } + + memcpy(ivec, iv, SEED_BLOCK_SIZE); + } else if (in != out) { + while (len >= SEED_BLOCK_SIZE) { + SEED_decrypt(in, out, ks); + + for (n = 0; n < SEED_BLOCK_SIZE; ++n) + out[n] ^= iv[n]; + + iv = in; + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + SEED_decrypt(in, tmp, ks); + + for (n = 0; n < len; ++n) + out[n] = tmp[n] ^ iv[n]; + + iv = in; + } + + memcpy(ivec, iv, SEED_BLOCK_SIZE); + } else { + while (len >= SEED_BLOCK_SIZE) { + memcpy(tmp, in, SEED_BLOCK_SIZE); + SEED_decrypt(in, out, ks); + + for (n = 0; n < SEED_BLOCK_SIZE; ++n) + out[n] ^= ivec[n]; + + memcpy(ivec, tmp, SEED_BLOCK_SIZE); + len -= SEED_BLOCK_SIZE; + in += SEED_BLOCK_SIZE; + out += SEED_BLOCK_SIZE; + } + + if (len) { + memcpy(tmp, in, SEED_BLOCK_SIZE); + SEED_decrypt(tmp, tmp, ks); + + for (n = 0; n < len; ++n) + out[n] = tmp[n] ^ ivec[n]; + + memcpy(ivec, tmp, SEED_BLOCK_SIZE); + } + } +} + +SEEDContext * +SEED_AllocateContext(void) +{ + return PORT_ZNew(SEEDContext); +} + +SECStatus +SEED_InitContext(SEEDContext *cx, const unsigned char *key, + unsigned int keylen, const unsigned char *iv, + int mode, unsigned int encrypt, unsigned int unused) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (mode) { + case NSS_SEED: + SEED_set_key(key, &cx->ks); + cx->mode = NSS_SEED; + cx->encrypt = encrypt; + break; + + case NSS_SEED_CBC: + memcpy(cx->iv, iv, 16); + SEED_set_key(key, &cx->ks); + cx->mode = NSS_SEED_CBC; + cx->encrypt = encrypt; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} + +SEEDContext * +SEED_CreateContext(const unsigned char *key, const unsigned char *iv, + int mode, PRBool encrypt) +{ + SEEDContext *cx = PORT_ZNew(SEEDContext); + SECStatus rv = SEED_InitContext(cx, key, SEED_KEY_LENGTH, iv, mode, + encrypt, 0); + + if (rv != SECSuccess) { + PORT_ZFree(cx, sizeof *cx); + cx = NULL; + } + + return cx; +} + +void +SEED_DestroyContext(SEEDContext *cx, PRBool freeit) +{ + if (cx) { + memset(cx, 0, sizeof *cx); + + if (freeit) + PORT_Free(cx); + } +} + +SECStatus +SEED_Encrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen, + unsigned int maxOutLen, const unsigned char *in, + unsigned int inLen) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (!cx->encrypt) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (cx->mode) { + case NSS_SEED: + SEED_ecb_encrypt(in, out, &cx->ks, 1); + *outLen = inLen; + break; + + case NSS_SEED_CBC: + SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 1); + *outLen = inLen; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} + +SECStatus +SEED_Decrypt(SEEDContext *cx, unsigned char *out, unsigned int *outLen, + unsigned int maxOutLen, const unsigned char *in, + unsigned int inLen) +{ + if (!cx) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + if (cx->encrypt) { + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + switch (cx->mode) { + case NSS_SEED: + SEED_ecb_encrypt(in, out, &cx->ks, 0); + *outLen = inLen; + break; + + case NSS_SEED_CBC: + SEED_cbc_encrypt(in, out, inLen, &cx->ks, cx->iv, 0); + *outLen = inLen; + break; + + default: + PORT_SetError(SEC_ERROR_INVALID_ARGS); + return SECFailure; + } + + return SECSuccess; +} diff --git a/security/nss/lib/freebl/seed.h b/security/nss/lib/freebl/seed.h new file mode 100644 index 000000000..f527165b7 --- /dev/null +++ b/security/nss/lib/freebl/seed.h @@ -0,0 +1,125 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef HEADER_SEED_H +#define HEADER_SEED_H + +#include +#include "blapi.h" + +#if !defined(NO_SYS_TYPES_H) +#include +#endif + +typedef PRUint32 seed_word; + +#define G_FUNC(v) \ + SS[0][((v)&0xff)] ^ \ + SS[1][((v) >> 8 & 0xff)] ^ \ + SS[2][((v) >> 16 & 0xff)] ^ \ + SS[3][((v) >> 24 & 0xff)] + +#define char2word(c, i) \ + (i) = ((((seed_word)((c)[0])) << 24) | \ + (((seed_word)((c)[1])) << 16) | \ + (((seed_word)((c)[2])) << 8) | \ + ((seed_word)((c)[3]))) + +#define word2char(l, c) \ + *((c) + 0) = (unsigned char)((l) >> 24); \ + *((c) + 1) = (unsigned char)((l) >> 16); \ + *((c) + 2) = (unsigned char)((l) >> 8); \ + *((c) + 3) = (unsigned char)((l)) + +#define KEYSCHEDULE_UPDATE0(T0, T1, K0, K1, K2, K3, KC) \ + (T0) = (K2); \ + (K2) = (((K2) << 8) ^ ((K3) >> 24)); \ + (K3) = (((K3) << 8) ^ ((T0) >> 24)); \ + (T0) = ((K0) + (K2) - (KC)); \ + (T1) = ((K1) + (KC) - (K3)) + +#define KEYSCHEDULE_UPDATE1(T0, T1, K0, K1, K2, K3, KC) \ + (T0) = (K0); \ + (K0) = (((K0) >> 8) ^ ((K1) << 24)); \ + (K1) = (((K1) >> 8) ^ ((T0) << 24)); \ + (T0) = ((K0) + (K2) - (KC)); \ + (T1) = ((K1) + (KC) - (K3)) + +#define KEYUPDATE_TEMP(T0, T1, K) \ + (K)[0] = G_FUNC((T0)); \ + (K)[1] = G_FUNC((T1)) + +#define XOR_SEEDBLOCK(DST, SRC) \ + (DST)[0] ^= (SRC)[0]; \ + (DST)[1] ^= (SRC)[1]; \ + (DST)[2] ^= (SRC)[2]; \ + (DST)[3] ^= (SRC)[3] + +#define MOV_SEEDBLOCK(DST, SRC) \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3] + +#define CHAR2WORD(C, I) \ + char2word((C), (I)[0]); \ + char2word((C) + 4, (I)[1]); \ + char2word((C) + 8, (I)[2]); \ + char2word((C) + 12, (I)[3]) + +#define WORD2CHAR(I, C) \ + word2char((I)[0], (C)); \ + word2char((I)[1], (C + 4)); \ + word2char((I)[2], (C + 8)); \ + word2char((I)[3], (C + 12)) + +#define E_SEED(T0, T1, X1, X2, X3, X4, rbase) \ + (T0) = (X3) ^ (ks->data)[(rbase)]; \ + (T1) = (X4) ^ (ks->data)[(rbase) + 1]; \ + (T1) ^= (T0); \ + (T1) = G_FUNC(T1); \ + (T0) += (T1); \ + (T0) = G_FUNC(T0); \ + (T1) += (T0); \ + (T1) = G_FUNC(T1); \ + (T0) += (T1); \ + (X1) ^= (T0); \ + (X2) ^= (T1) + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct seed_key_st { + PRUint32 data[32]; +} SEED_KEY_SCHEDULE; + +struct SEEDContextStr { + unsigned char iv[SEED_BLOCK_SIZE]; + SEED_KEY_SCHEDULE ks; + int mode; + unsigned int encrypt; +}; + +void SEED_set_key(const unsigned char rawkey[SEED_KEY_LENGTH], + SEED_KEY_SCHEDULE *ks); + +void SEED_encrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks); +void SEED_decrypt(const unsigned char s[SEED_BLOCK_SIZE], + unsigned char d[SEED_BLOCK_SIZE], + const SEED_KEY_SCHEDULE *ks); + +void SEED_ecb_encrypt(const unsigned char *in, unsigned char *out, + const SEED_KEY_SCHEDULE *ks, int enc); +void SEED_cbc_encrypt(const unsigned char *in, unsigned char *out, + size_t len, const SEED_KEY_SCHEDULE *ks, + unsigned char ivec[SEED_BLOCK_SIZE], int enc); + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_SEED_H */ diff --git a/security/nss/lib/freebl/sha-fast-amd64-sun.s b/security/nss/lib/freebl/sha-fast-amd64-sun.s new file mode 100644 index 000000000..6430469a4 --- /dev/null +++ b/security/nss/lib/freebl/sha-fast-amd64-sun.s @@ -0,0 +1,2151 @@ +/ This Source Code Form is subject to the terms of the Mozilla Public +/ License, v. 2.0. If a copy of the MPL was not distributed with this +/ file, You can obtain one at http://mozilla.org/MPL/2.0/. + + .file "sha_fast.c" + .text + .align 16 +.globl SHA1_Begin + .type SHA1_Begin, @function +SHA1_Begin: +.LFB4: + movl $4023233417, %ecx + movl $2562383102, %edx + movl $3285377520, %eax + movq $0, 64(%rdi) + movq $1732584193, 72(%rdi) + movq %rcx, 80(%rdi) + movq %rdx, 88(%rdi) + movq $271733878, 96(%rdi) + movq %rax, 104(%rdi) + ret +.LFE4: + .size SHA1_Begin, .-SHA1_Begin + .align 16 + .type shaCompress, @function +shaCompress: +.LFB7: + pushq %r15 +.LCFI0: + pushq %r14 +.LCFI1: + pushq %r13 +.LCFI2: + pushq %r12 +.LCFI3: + movq -88(%rdi), %r12 + movq -80(%rdi), %r10 + movq -72(%rdi), %r13 + movq -64(%rdi), %r8 + pushq %rbx +.LCFI4: + movq -56(%rdi), %rcx + movl (%rsi), %eax + movl %r12d, %edx + movq %r13, %r9 + roll $5, %edx + movl 4(%rsi), %ebx + xorq %r8, %r9 +/APP + bswap %eax +/NO_APP + andq %r10, %r9 + mov %eax, %r15d + roll $30, %r10d + movq %r15, -48(%rdi) + xorq %r8, %r9 + movq -48(%rdi), %r14 + addq %r9, %rdx + movq %r10, %rax + movl %r12d, %r15d + addq %rcx, %rdx + xorq %r13, %rax + roll $30, %r15d + leaq 1518500249(%rdx,%r14), %rdx + andq %r12, %rax + movq %r15, %r12 +/APP + bswap %ebx +/NO_APP + movl %edx, %ecx + mov %ebx, %r11d + xorq %r13, %rax + movq %r11, -40(%rdi) + roll $5, %ecx + movq -40(%rdi), %r9 + addq %rax, %rcx + xorq %r10, %r12 + movl 8(%rsi), %r14d + addq %r8, %rcx + andq %rdx, %r12 + movl %edx, %r11d + leaq 1518500249(%rcx,%r9), %rcx + xorq %r10, %r12 + roll $30, %r11d +/APP + bswap %r14d +/NO_APP + movl %ecx, %r8d + mov %r14d, %ebx + movl 12(%rsi), %r9d + movq %rbx, -32(%rdi) + roll $5, %r8d + movq -32(%rdi), %rax + addq %r12, %r8 + movq %r11, %r12 + movl %ecx, %ebx + addq %r13, %r8 + xorq %r15, %r12 + roll $30, %ebx + leaq 1518500249(%r8,%rax), %r8 + andq %rcx, %r12 + movl 16(%rsi), %eax +/APP + bswap %r9d +/NO_APP + movl %r8d, %edx + mov %r9d, %r14d + xorq %r15, %r12 + movq %r14, -24(%rdi) + roll $5, %edx + movq -24(%rdi), %r13 + addq %r12, %rdx + movq %rbx, %r12 + movl %r8d, %r14d + addq %r10, %rdx + leaq 1518500249(%rdx,%r13), %rdx + movl 20(%rsi), %r13d +/APP + bswap %eax +/NO_APP + movl %edx, %ecx + mov %eax, %r9d + roll $5, %ecx + xorq %r11, %r12 + movq %r9, -16(%rdi) + andq %r8, %r12 + movq -16(%rdi), %r10 + roll $30, %r14d + xorq %r11, %r12 + movq %r14, %rax + movl %edx, %r9d + addq %r12, %rcx + xorq %rbx, %rax + roll $30, %r9d + addq %r15, %rcx + andq %rdx, %rax + leaq 1518500249(%rcx,%r10), %rcx + xorq %rbx, %rax + movl 24(%rsi), %r10d +/APP + bswap %r13d +/NO_APP + movl %ecx, %r8d + mov %r13d, %r15d + movq %r15, -8(%rdi) + roll $5, %r8d + movq -8(%rdi), %r12 + addq %rax, %r8 + movl %ecx, %r15d + addq %r11, %r8 + movq %r9, %r11 + roll $30, %r15d + leaq 1518500249(%r8,%r12), %r8 + xorq %r14, %r11 + movl 28(%rsi), %r12d +/APP + bswap %r10d +/NO_APP + andq %rcx, %r11 + mov %r10d, %r13d + movl %r8d, %edx + movq %r13, (%rdi) + xorq %r14, %r11 + movq (%rdi), %rax + roll $5, %edx + movq %r15, %r10 + movl %r8d, %r13d + addq %r11, %rdx + xorq %r9, %r10 + roll $30, %r13d + addq %rbx, %rdx + andq %r8, %r10 + leaq 1518500249(%rdx,%rax), %rdx + xorq %r9, %r10 + movl 32(%rsi), %eax +/APP + bswap %r12d +/NO_APP + movl %edx, %ecx + mov %r12d, %ebx + movq %rbx, 8(%rdi) + roll $5, %ecx + movq 8(%rdi), %r11 + addq %r10, %rcx + movq %r13, %r10 + movl %edx, %ebx + addq %r14, %rcx + leaq 1518500249(%rcx,%r11), %rcx +/APP + bswap %eax +/NO_APP + movl %ecx, %r8d + mov %eax, %r12d + roll $5, %r8d + xorq %r15, %r10 + movq %r12, 16(%rdi) + andq %rdx, %r10 + movq 16(%rdi), %r14 + roll $30, %ebx + xorq %r15, %r10 + movq %rbx, %rax + movl 36(%rsi), %r11d + addq %r10, %r8 + xorq %r13, %rax + movl %ecx, %r12d + addq %r9, %r8 + andq %rcx, %rax + roll $30, %r12d + leaq 1518500249(%r8,%r14), %r8 + xorq %r13, %rax + movl 40(%rsi), %r14d +/APP + bswap %r11d +/NO_APP + movl %r8d, %edx + mov %r11d, %r9d + movq %r12, %r11 + movq %r9, 24(%rdi) + roll $5, %edx + movq 24(%rdi), %r10 + addq %rax, %rdx + xorq %rbx, %r11 + movl %r8d, %r9d + addq %r15, %rdx + andq %r8, %r11 + roll $30, %r9d + leaq 1518500249(%rdx,%r10), %rdx + xorq %rbx, %r11 + movl 44(%rsi), %r10d +/APP + bswap %r14d +/NO_APP + movl %edx, %ecx + mov %r14d, %r15d + movq %r15, 32(%rdi) + roll $5, %ecx + movq 32(%rdi), %rax + addq %r11, %rcx + movq %r9, %r11 + movl %edx, %r15d + addq %r13, %rcx + xorq %r12, %r11 + roll $30, %r15d + leaq 1518500249(%rcx,%rax), %rcx + andq %rdx, %r11 + movl 48(%rsi), %eax +/APP + bswap %r10d +/NO_APP + movl %ecx, %r8d + mov %r10d, %r14d + xorq %r12, %r11 + movq %r14, 40(%rdi) + roll $5, %r8d + movq 40(%rdi), %r13 + addq %r11, %r8 + movq %r15, %r10 + movl %ecx, %r14d + addq %rbx, %r8 + xorq %r9, %r10 + leaq 1518500249(%r8,%r13), %r8 + movl 52(%rsi), %r13d +/APP + bswap %eax +/NO_APP + movl %r8d, %edx + mov %eax, %ebx + roll $5, %edx + andq %rcx, %r10 + movq %rbx, 48(%rdi) + xorq %r9, %r10 + movq 48(%rdi), %r11 + roll $30, %r14d + addq %r10, %rdx + movq %r14, %rax + movl %r8d, %ebx + addq %r12, %rdx + xorq %r15, %rax + roll $30, %ebx + leaq 1518500249(%rdx,%r11), %rdx + andq %r8, %rax + movl 56(%rsi), %r11d +/APP + bswap %r13d +/NO_APP + movl %edx, %ecx + mov %r13d, %r12d + xorq %r15, %rax + movq %r12, 56(%rdi) + roll $5, %ecx + movq 56(%rdi), %r10 + addq %rax, %rcx + movl %edx, %r12d + addq %r9, %rcx + movq %rbx, %r9 + roll $30, %r12d + leaq 1518500249(%rcx,%r10), %rcx + xorq %r14, %r9 + movl 60(%rsi), %r10d +/APP + bswap %r11d +/NO_APP + andq %rdx, %r9 + mov %r11d, %r13d + movl %ecx, %r8d + movq %r13, 64(%rdi) + xorq %r14, %r9 + movq 64(%rdi), %rax + roll $5, %r8d + movq %r12, %r11 + movl %ecx, %r13d + addq %r9, %r8 + xorq %rbx, %r11 + roll $30, %r13d + addq %r15, %r8 + andq %rcx, %r11 + leaq 1518500249(%r8,%rax), %r8 + xorq %rbx, %r11 +/APP + bswap %r10d +/NO_APP + movl %r8d, %esi + mov %r10d, %r15d + movq %r15, 72(%rdi) + roll $5, %esi + movq 72(%rdi), %r9 + movq 56(%rdi), %r10 + movq 16(%rdi), %rcx + addq %r11, %rsi + movq -32(%rdi), %rdx + addq %r14, %rsi + movq -48(%rdi), %rax + leaq 1518500249(%rsi,%r9), %r14 + movq %r13, %r11 + movl %r8d, %r15d + xorq %rcx, %r10 + xorq %rdx, %r10 + movl %r14d, %ecx + xorl %eax, %r10d + roll %r10d + roll $5, %ecx + xorq %r12, %r11 + andq %r8, %r11 + movq %r10, -48(%rdi) + movq -48(%rdi), %r9 + xorq %r12, %r11 + roll $30, %r15d + movl %r14d, %r10d + addq %r11, %rcx + movq 64(%rdi), %r11 + movq 24(%rdi), %rdx + addq %rbx, %rcx + movq -24(%rdi), %rbx + movq -40(%rdi), %rax + leaq 1518500249(%rcx,%r9), %rcx + movq %r15, %r8 + roll $30, %r10d + xorq %rdx, %r11 + xorq %r13, %r8 + xorq %rbx, %r11 + andq %r14, %r8 + movl %ecx, %r9d + xorl %eax, %r11d + xorq %r13, %r8 + roll $5, %r9d + roll %r11d + addq %r8, %r9 + movq %r10, %rax + movq %r11, -40(%rdi) + movq -40(%rdi), %rsi + addq %r12, %r9 + movq 72(%rdi), %rbx + movq 32(%rdi), %rdx + xorq %r15, %rax + movq -16(%rdi), %r14 + movq -32(%rdi), %r12 + andq %rcx, %rax + leaq 1518500249(%r9,%rsi), %r9 + xorq %r15, %rax + movl %ecx, %r11d + xorq %rdx, %rbx + roll $30, %r11d + xorq %r14, %rbx + movl %r9d, %esi + xorl %r12d, %ebx + roll $5, %esi + roll %ebx + addq %rax, %rsi + movq %rbx, -32(%rdi) + movq -32(%rdi), %r8 + addq %r13, %rsi + movq -48(%rdi), %r12 + movq 40(%rdi), %rdx + movq %r11, %r13 + movq -8(%rdi), %r14 + movq -24(%rdi), %rcx + movl %r9d, %ebx + leaq 1518500249(%rsi,%r8), %rsi + xorq %rdx, %r12 + xorq %r14, %r12 + movl %esi, %r8d + xorl %ecx, %r12d + roll %r12d + roll $5, %r8d + xorq %r10, %r13 + andq %r9, %r13 + movq %r12, -24(%rdi) + movq -24(%rdi), %rax + xorq %r10, %r13 + roll $30, %ebx + movl %esi, %r12d + addq %r13, %r8 + xorq %rbx, %rsi + roll $30, %r12d + addq %r15, %r8 + movq -40(%rdi), %r15 + movq 48(%rdi), %rdx + movq (%rdi), %r14 + movq -16(%rdi), %r9 + leaq 1518500249(%r8,%rax), %r13 + xorq %r11, %rsi + xorq %rdx, %r15 + movl %r13d, %ecx + xorq %r14, %r15 + roll $5, %ecx + xorl %r9d, %r15d + addq %rsi, %rcx + roll %r15d + addq %r10, %rcx + movq %r15, -16(%rdi) + movq -16(%rdi), %rsi + movl %r13d, %r15d + movq -32(%rdi), %r14 + movq 56(%rdi), %rax + xorq %r12, %r13 + movq 8(%rdi), %rdx + movq -8(%rdi), %r10 + xorq %rbx, %r13 + leaq 1859775393(%rcx,%rsi), %r9 + roll $30, %r15d + xorq %rax, %r14 + xorq %rdx, %r14 + movl %r9d, %esi + xorl %r10d, %r14d + roll $5, %esi + roll %r14d + addq %r13, %rsi + movq %r14, -8(%rdi) + movq -8(%rdi), %r8 + addq %r11, %rsi + movq -24(%rdi), %r13 + movq 64(%rdi), %rax + movl %r9d, %r14d + movq 16(%rdi), %rdx + movq (%rdi), %r11 + xorq %r15, %r9 + leaq 1859775393(%rsi,%r8), %r10 + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %r8d + xorl %r11d, %r13d + roll $5, %r8d + roll %r13d + xorq %r12, %r9 + roll $30, %r14d + addq %r9, %r8 + movq %r13, (%rdi) + movq (%rdi), %rcx + addq %rbx, %r8 + movq -16(%rdi), %rbx + movq 72(%rdi), %rax + movq 24(%rdi), %rdx + movq 8(%rdi), %r9 + movl %r10d, %r13d + leaq 1859775393(%r8,%rcx), %r11 + xorq %r14, %r10 + roll $30, %r13d + xorq %rax, %rbx + xorq %r15, %r10 + xorq %rdx, %rbx + movl %r11d, %ecx + xorl %r9d, %ebx + roll $5, %ecx + roll %ebx + addq %r10, %rcx + movq %rbx, 8(%rdi) + movq 8(%rdi), %rsi + addq %r12, %rcx + movq -8(%rdi), %r12 + movq -48(%rdi), %rax + movl %r11d, %ebx + movq 32(%rdi), %rdx + movq 16(%rdi), %r9 + xorq %r13, %r11 + leaq 1859775393(%rcx,%rsi), %r10 + xorq %r14, %r11 + roll $30, %ebx + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %esi + xorl %r9d, %r12d + roll $5, %esi + roll %r12d + addq %r11, %rsi + movq %r12, 16(%rdi) + addq %r15, %rsi + movq 16(%rdi), %r8 + movq (%rdi), %r15 + movq -40(%rdi), %rax + movl %r10d, %r12d + movq 40(%rdi), %rdx + movq 24(%rdi), %r9 + xorq %rbx, %r10 + leaq 1859775393(%rsi,%r8), %r11 + xorq %r13, %r10 + xorq %rax, %r15 + xorq %rdx, %r15 + movl %r11d, %r8d + xorl %r9d, %r15d + roll $5, %r8d + roll %r15d + addq %r10, %r8 + movq %r15, 24(%rdi) + movq 24(%rdi), %rcx + addq %r14, %r8 + movq 8(%rdi), %r14 + movq -32(%rdi), %rax + roll $30, %r12d + movq 48(%rdi), %rdx + movq 32(%rdi), %r10 + movl %r11d, %r15d + leaq 1859775393(%r8,%rcx), %r9 + xorq %r12, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rbx, %r11 + xorq %rdx, %r14 + movl %r9d, %ecx + xorl %r10d, %r14d + roll $5, %ecx + roll %r14d + addq %r11, %rcx + movq %r14, 32(%rdi) + addq %r13, %rcx + movq 32(%rdi), %rsi + movq 16(%rdi), %r13 + movq -24(%rdi), %rax + movl %r9d, %r14d + movq 56(%rdi), %rdx + movq 40(%rdi), %r11 + xorq %r15, %r9 + leaq 1859775393(%rcx,%rsi), %r10 + xorq %r12, %r9 + roll $30, %r14d + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %esi + xorl %r11d, %r13d + roll $5, %esi + roll %r13d + addq %r9, %rsi + movq %r13, 40(%rdi) + movq 40(%rdi), %r8 + addq %rbx, %rsi + movq 24(%rdi), %rbx + movq -16(%rdi), %rax + movl %r10d, %r13d + movq 64(%rdi), %rdx + movq 48(%rdi), %r9 + xorq %r14, %r10 + leaq 1859775393(%rsi,%r8), %r11 + xorq %r15, %r10 + roll $30, %r13d + xorq %rax, %rbx + xorq %rdx, %rbx + movl %r11d, %r8d + xorl %r9d, %ebx + roll $5, %r8d + roll %ebx + addq %r10, %r8 + movq %rbx, 48(%rdi) + addq %r12, %r8 + movq 48(%rdi), %rcx + movq 32(%rdi), %r12 + movq -8(%rdi), %rax + movl %r11d, %ebx + movq 72(%rdi), %rdx + movq 56(%rdi), %r9 + leaq 1859775393(%r8,%rcx), %r10 + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %ecx + xorl %r9d, %r12d + xorq %r13, %r11 + roll $5, %ecx + xorq %r14, %r11 + roll %r12d + roll $30, %ebx + addq %r11, %rcx + movq %r12, 56(%rdi) + movq 56(%rdi), %rsi + addq %r15, %rcx + movq 40(%rdi), %r15 + movq (%rdi), %rax + movq -48(%rdi), %rdx + movq 64(%rdi), %r9 + movl %r10d, %r12d + leaq 1859775393(%rcx,%rsi), %r11 + xorq %rbx, %r10 + roll $30, %r12d + xorq %rax, %r15 + xorq %r13, %r10 + xorq %rdx, %r15 + movl %r11d, %esi + xorl %r9d, %r15d + roll $5, %esi + roll %r15d + addq %r10, %rsi + movq %r15, 64(%rdi) + movq 64(%rdi), %r8 + addq %r14, %rsi + movq 48(%rdi), %r14 + movq 8(%rdi), %rax + movl %r11d, %r15d + movq -40(%rdi), %rdx + movq 72(%rdi), %r10 + xorq %r12, %r11 + leaq 1859775393(%rsi,%r8), %r9 + xorq %rbx, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rdx, %r14 + movl %r9d, %r8d + xorl %r10d, %r14d + roll $5, %r8d + roll %r14d + addq %r11, %r8 + movq %r14, 72(%rdi) + addq %r13, %r8 + movq 72(%rdi), %rcx + movq 56(%rdi), %r13 + movq 16(%rdi), %rax + movl %r9d, %r14d + movq -32(%rdi), %rdx + movq -48(%rdi), %r11 + leaq 1859775393(%r8,%rcx), %r10 + xorq %rax, %r13 + xorq %rdx, %r13 + movl %r10d, %ecx + xorl %r11d, %r13d + roll $5, %ecx + roll %r13d + xorq %r15, %r9 + roll $30, %r14d + xorq %r12, %r9 + movq %r13, -48(%rdi) + movq -48(%rdi), %rsi + addq %r9, %rcx + movl %r10d, %r13d + xorq %r14, %r10 + addq %rbx, %rcx + movq 64(%rdi), %rbx + movq 24(%rdi), %rax + movq -24(%rdi), %rdx + leaq 1859775393(%rcx,%rsi), %r11 + movq -40(%rdi), %r9 + xorq %r15, %r10 + roll $30, %r13d + xorq %rax, %rbx + movl %r11d, %esi + xorq %rdx, %rbx + roll $5, %esi + xorl %r9d, %ebx + addq %r10, %rsi + roll %ebx + addq %r12, %rsi + movq %rbx, -40(%rdi) + movq -40(%rdi), %r8 + movl %r11d, %ebx + movq 72(%rdi), %r12 + movq 32(%rdi), %rax + xorq %r13, %r11 + movq -16(%rdi), %rdx + movq -32(%rdi), %r9 + xorq %r14, %r11 + leaq 1859775393(%rsi,%r8), %r10 + roll $30, %ebx + xorq %rax, %r12 + xorq %rdx, %r12 + movl %r10d, %r8d + xorl %r9d, %r12d + roll $5, %r8d + roll %r12d + addq %r11, %r8 + movq %r12, -32(%rdi) + movq -32(%rdi), %rcx + addq %r15, %r8 + movq -48(%rdi), %r15 + movq 40(%rdi), %rax + movl %r10d, %r12d + movq -8(%rdi), %rdx + movq -24(%rdi), %r9 + xorq %rbx, %r10 + leaq 1859775393(%r8,%rcx), %r11 + xorq %r13, %r10 + xorq %rax, %r15 + xorq %rdx, %r15 + movl %r11d, %ecx + xorl %r9d, %r15d + roll $5, %ecx + roll %r15d + addq %r10, %rcx + addq %r14, %rcx + movq %r15, -24(%rdi) + movq -24(%rdi), %rsi + movq -40(%rdi), %r14 + movq 48(%rdi), %rax + roll $30, %r12d + movq (%rdi), %rdx + movq -16(%rdi), %r10 + movl %r11d, %r15d + leaq 1859775393(%rcx,%rsi), %r9 + xorq %r12, %r11 + roll $30, %r15d + xorq %rax, %r14 + xorq %rbx, %r11 + xorq %rdx, %r14 + movl %r9d, %esi + xorl %r10d, %r14d + roll $5, %esi + roll %r14d + addq %r11, %rsi + movq %r14, -16(%rdi) + movq -16(%rdi), %r8 + addq %r13, %rsi + movq -32(%rdi), %r11 + movq 56(%rdi), %rax + movl %r9d, %r14d + movq 8(%rdi), %rdx + movq -8(%rdi), %r10 + xorq %r15, %r9 + leaq 1859775393(%rsi,%r8), %r13 + xorq %r12, %r9 + roll $30, %r14d + xorq %rax, %r11 + xorq %rdx, %r11 + movl %r13d, %r8d + xorl %r10d, %r11d + roll $5, %r8d + movl %r13d, %r10d + roll %r11d + addq %r9, %r8 + xorq %r14, %r13 + movq %r11, -8(%rdi) + addq %rbx, %r8 + movq -8(%rdi), %rbx + movq -24(%rdi), %r9 + movq 64(%rdi), %rax + xorq %r15, %r13 + movq 16(%rdi), %rdx + movq (%rdi), %rcx + leaq 1859775393(%r8,%rbx), %r11 + xorq %rax, %r9 + xorq %rdx, %r9 + movl %r11d, %ebx + xorl %ecx, %r9d + roll $5, %ebx + roll %r9d + addq %r13, %rbx + movq %r9, (%rdi) + movq (%rdi), %rsi + addq %r12, %rbx + movq -16(%rdi), %r12 + movq 72(%rdi), %r13 + movl %r11d, %r9d + leaq 1859775393(%rbx,%rsi), %rcx + movl %r10d, %ebx + movq 24(%rdi), %r10 + movq 8(%rdi), %rax + xorq %r13, %r12 + roll $30, %ebx + movl %ecx, %esi + xorq %r10, %r12 + xorq %rbx, %r11 + roll $5, %esi + xorl %eax, %r12d + xorq %r14, %r11 + roll $30, %r9d + roll %r12d + addq %r11, %rsi + movq %rcx, %rax + movq %r12, 8(%rdi) + movq 8(%rdi), %rdx + addq %r15, %rsi + movq -8(%rdi), %r11 + movq -48(%rdi), %r13 + movl %ecx, %r12d + movq 32(%rdi), %r10 + movq 16(%rdi), %r8 + orq %r9, %rcx + leaq 1859775393(%rsi,%rdx), %rsi + andq %rbx, %rcx + andq %r9, %rax + xorq %r13, %r11 + orq %rcx, %rax + roll $30, %r12d + xorq %r10, %r11 + movq %rsi, %r10 + xorl %r8d, %r11d + movl %esi, %r8d + andq %r12, %r10 + roll %r11d + roll $5, %r8d + movq %r11, 16(%rdi) + addq %rax, %r8 + movq 16(%rdi), %r15 + movq (%rdi), %r13 + movq -40(%rdi), %rdx + addq %r14, %r8 + movq 40(%rdi), %r14 + movq 24(%rdi), %rcx + movl %esi, %r11d + addq %r15, %r8 + movl $2400959708, %r15d + orq %r12, %rsi + xorq %rdx, %r13 + addq %r15, %r8 + andq %r9, %rsi + xorq %r14, %r13 + orq %rsi, %r10 + xorl %ecx, %r13d + movl %r8d, %ecx + roll %r13d + roll $5, %ecx + movq %r13, 24(%rdi) + addq %r10, %rcx + movq 24(%rdi), %rax + movq 8(%rdi), %r14 + movq -32(%rdi), %rdx + addq %rbx, %rcx + movq 48(%rdi), %rbx + movq 32(%rdi), %rsi + roll $30, %r11d + addq %rax, %rcx + movl %r8d, %r13d + movq %r8, %r10 + xorq %rdx, %r14 + addq %r15, %rcx + orq %r11, %r8 + xorq %rbx, %r14 + andq %r12, %r8 + andq %r11, %r10 + xorl %esi, %r14d + movl %ecx, %esi + orq %r8, %r10 + roll $5, %esi + roll %r14d + roll $30, %r13d + addq %r10, %rsi + movq %r14, 32(%rdi) + movq 32(%rdi), %rax + addq %r9, %rsi + movq 16(%rdi), %r9 + movq -24(%rdi), %rdx + movq 56(%rdi), %rbx + movq 40(%rdi), %r8 + movl %ecx, %r14d + addq %rax, %rsi + movq %rcx, %r10 + orq %r13, %rcx + xorq %rdx, %r9 + addq %r15, %rsi + andq %r11, %rcx + xorq %rbx, %r9 + andq %r13, %r10 + roll $30, %r14d + xorl %r8d, %r9d + movl %esi, %r8d + orq %rcx, %r10 + roll %r9d + roll $5, %r8d + movq %r9, 40(%rdi) + addq %r10, %r8 + movq 40(%rdi), %rax + movq 24(%rdi), %r10 + movq -16(%rdi), %rdx + addq %r12, %r8 + movq 64(%rdi), %rbx + movq 48(%rdi), %rcx + movl %esi, %r9d + addq %rax, %r8 + movq %rsi, %r12 + xorq %rdx, %r10 + addq %r15, %r8 + xorq %rbx, %r10 + orq %r14, %rsi + andq %r14, %r12 + andq %r13, %rsi + xorl %ecx, %r10d + movl %r8d, %ecx + orq %rsi, %r12 + roll %r10d + roll $5, %ecx + movq %r10, 48(%rdi) + addq %r12, %rcx + movq 48(%rdi), %rax + movq 32(%rdi), %r12 + movq -8(%rdi), %rdx + addq %r11, %rcx + movq 72(%rdi), %rbx + movq 56(%rdi), %rsi + roll $30, %r9d + addq %rax, %rcx + movl %r8d, %r10d + movq %r8, %r11 + xorq %rdx, %r12 + addq %r15, %rcx + orq %r9, %r8 + xorq %rbx, %r12 + andq %r14, %r8 + andq %r9, %r11 + xorl %esi, %r12d + movl %ecx, %esi + orq %r8, %r11 + roll %r12d + roll $5, %esi + roll $30, %r10d + movq %r12, 56(%rdi) + addq %r11, %rsi + movq 56(%rdi), %rax + movq 40(%rdi), %r11 + movq (%rdi), %rdx + addq %r13, %rsi + movq -48(%rdi), %rbx + movq 64(%rdi), %r8 + movq %rcx, %r13 + addq %rax, %rsi + andq %r10, %r13 + movl %ecx, %r12d + xorq %rdx, %r11 + addq %r15, %rsi + xorq %rbx, %r11 + xorl %r8d, %r11d + movl %esi, %r8d + roll %r11d + roll $5, %r8d + orq %r10, %rcx + andq %r9, %rcx + movq %r11, 64(%rdi) + movq 64(%rdi), %rax + orq %rcx, %r13 + roll $30, %r12d + movl %esi, %r11d + addq %r13, %r8 + movq 48(%rdi), %r13 + movq 8(%rdi), %rdx + movq -40(%rdi), %rbx + addq %r14, %r8 + movq 72(%rdi), %rcx + addq %rax, %r8 + movq %rsi, %r14 + orq %r12, %rsi + xorq %rdx, %r13 + addq %r15, %r8 + andq %r10, %rsi + xorq %rbx, %r13 + andq %r12, %r14 + roll $30, %r11d + xorl %ecx, %r13d + movl %r8d, %ecx + orq %rsi, %r14 + roll %r13d + roll $5, %ecx + movq %r13, 72(%rdi) + addq %r14, %rcx + movq 72(%rdi), %rax + movq 56(%rdi), %r14 + movq 16(%rdi), %rdx + addq %r9, %rcx + movq -32(%rdi), %rbx + movq -48(%rdi), %rsi + movl %r8d, %r13d + addq %rax, %rcx + movq %r8, %r9 + orq %r11, %r8 + xorq %rdx, %r14 + addq %r15, %rcx + andq %r12, %r8 + xorq %rbx, %r14 + andq %r11, %r9 + xorl %esi, %r14d + movl %ecx, %esi + orq %r8, %r9 + roll $5, %esi + roll %r14d + addq %r9, %rsi + movq %r14, -48(%rdi) + movq -48(%rdi), %rax + addq %r10, %rsi + movq 64(%rdi), %r10 + movq 24(%rdi), %rdx + movq -24(%rdi), %rbx + movq -40(%rdi), %r8 + movl %ecx, %r14d + addq %rax, %rsi + roll $30, %r13d + movq %rcx, %r9 + xorq %rdx, %r10 + addq %r15, %rsi + orq %r13, %rcx + xorq %rbx, %r10 + andq %r11, %rcx + andq %r13, %r9 + xorl %r8d, %r10d + movl %esi, %r8d + orq %rcx, %r9 + roll $5, %r8d + roll %r10d + roll $30, %r14d + addq %r9, %r8 + movq %r10, -40(%rdi) + movq -40(%rdi), %rax + addq %r12, %r8 + movq 72(%rdi), %r12 + movq 32(%rdi), %rdx + movq -16(%rdi), %rbx + movq -32(%rdi), %rcx + movl %esi, %r10d + addq %rax, %r8 + movq %rsi, %r9 + orq %r14, %rsi + xorq %rdx, %r12 + addq %r15, %r8 + andq %r13, %rsi + xorq %rbx, %r12 + andq %r14, %r9 + roll $30, %r10d + xorl %ecx, %r12d + movl %r8d, %ecx + orq %rsi, %r9 + roll $5, %ecx + roll %r12d + addq %r9, %rcx + movq %r12, -32(%rdi) + movq -32(%rdi), %rax + addq %r11, %rcx + movq -48(%rdi), %r11 + movq 40(%rdi), %rdx + movq -8(%rdi), %rbx + movq -24(%rdi), %rsi + movl %r8d, %r12d + addq %rax, %rcx + movq %r8, %r9 + xorq %rdx, %r11 + addq %r15, %rcx + xorq %rbx, %r11 + xorl %esi, %r11d + orq %r10, %r8 + andq %r10, %r9 + andq %r14, %r8 + movl %ecx, %esi + roll %r11d + orq %r8, %r9 + roll $5, %esi + movq %r11, -24(%rdi) + addq %r9, %rsi + movq -24(%rdi), %rax + roll $30, %r12d + addq %r13, %rsi + movq -40(%rdi), %r13 + movq 48(%rdi), %rdx + movq (%rdi), %rbx + movq -16(%rdi), %r8 + movl %ecx, %r11d + addq %rax, %rsi + movq %rcx, %r9 + orq %r12, %rcx + xorq %rdx, %r13 + addq %r15, %rsi + andq %r10, %rcx + xorq %rbx, %r13 + andq %r12, %r9 + roll $30, %r11d + xorl %r8d, %r13d + movl %esi, %r8d + orq %rcx, %r9 + roll %r13d + roll $5, %r8d + movq %r13, -16(%rdi) + addq %r9, %r8 + movq -16(%rdi), %rax + movq -32(%rdi), %r9 + movq 56(%rdi), %rdx + addq %r14, %r8 + movq 8(%rdi), %rcx + movq -8(%rdi), %rbx + movl %esi, %r13d + addq %rax, %r8 + movq %rsi, %r14 + orq %r11, %rsi + xorq %rdx, %r9 + addq %r15, %r8 + andq %r11, %r14 + xorq %rcx, %r9 + xorl %ebx, %r9d + movl %r8d, %ebx + roll %r9d + roll $5, %ebx + andq %r12, %rsi + orq %rsi, %r14 + movq %r9, -8(%rdi) + movq -8(%rdi), %rax + addq %r14, %rbx + movq -24(%rdi), %r14 + movq 64(%rdi), %rdx + movq 16(%rdi), %rcx + addq %r10, %rbx + movq (%rdi), %rsi + roll $30, %r13d + addq %rax, %rbx + movl %r8d, %r9d + xorq %rdx, %r14 + addq %r15, %rbx + movq %r8, %r10 + xorq %rcx, %r14 + orq %r13, %r8 + andq %r13, %r10 + andq %r11, %r8 + xorl %esi, %r14d + movl %ebx, %esi + orq %r8, %r10 + roll $5, %esi + roll %r14d + addq %r10, %rsi + movq %r14, (%rdi) + movq (%rdi), %rax + addq %r12, %rsi + movq -16(%rdi), %r12 + movq 72(%rdi), %rdx + movq 24(%rdi), %rcx + movq 8(%rdi), %r8 + roll $30, %r9d + addq %rax, %rsi + movl %ebx, %r14d + movq %rbx, %r10 + xorq %rdx, %r12 + addq %r15, %rsi + orq %r9, %rbx + xorq %rcx, %r12 + andq %r13, %rbx + andq %r9, %r10 + xorl %r8d, %r12d + movl %esi, %r8d + orq %rbx, %r10 + roll %r12d + roll $5, %r8d + movq %r12, 8(%rdi) + movq 8(%rdi), %rax + addq %r10, %r8 + movq -8(%rdi), %rbx + movq -48(%rdi), %rdx + addq %r11, %r8 + movq 32(%rdi), %r11 + movq 16(%rdi), %rcx + movl %esi, %r12d + addq %rax, %r8 + movq %rsi, %r10 + addq %r15, %r8 + xorq %rdx, %rbx + roll $30, %r14d + xorq %r11, %rbx + orq %r14, %rsi + andq %r14, %r10 + xorl %ecx, %ebx + andq %r9, %rsi + movl %r8d, %ecx + roll %ebx + orq %rsi, %r10 + roll $5, %ecx + movq %rbx, 16(%rdi) + movq 16(%rdi), %rsi + addq %r10, %rcx + movq (%rdi), %r11 + movq -40(%rdi), %rax + addq %r13, %rcx + movq 40(%rdi), %rdx + movq 24(%rdi), %r13 + roll $30, %r12d + addq %rsi, %rcx + movl %r8d, %ebx + movq %r8, %r10 + xorq %rax, %r11 + addq %r15, %rcx + orq %r12, %r8 + xorq %rdx, %r11 + andq %r14, %r8 + andq %r12, %r10 + xorl %r13d, %r11d + movl %ecx, %r13d + orq %r8, %r10 + roll %r11d + roll $5, %r13d + roll $30, %ebx + movq %r11, 24(%rdi) + addq %r10, %r13 + movq 24(%rdi), %rsi + movq 8(%rdi), %r10 + movq -32(%rdi), %rax + addq %r9, %r13 + movq 48(%rdi), %rdx + movq 32(%rdi), %r8 + movl %ecx, %r11d + addq %rsi, %r13 + movq %rcx, %r9 + xorq %rax, %r10 + addq %r15, %r13 + xorq %rdx, %r10 + xorl %r8d, %r10d + movl %r13d, %r8d + roll %r10d + orq %rbx, %rcx + andq %rbx, %r9 + movq %r10, 32(%rdi) + andq %r12, %rcx + movl %r13d, %r10d + orq %rcx, %r9 + roll $5, %r10d + movq 32(%rdi), %rsi + addq %r9, %r10 + roll $30, %r11d + movq %r13, %rcx + addq %r14, %r10 + movq 16(%rdi), %r14 + movq -24(%rdi), %rax + movq 56(%rdi), %rdx + movq 40(%rdi), %r9 + addq %rsi, %r10 + addq %r15, %r10 + orq %r11, %r13 + andq %r11, %rcx + xorq %rax, %r14 + andq %rbx, %r13 + xorq %rdx, %r14 + orq %r13, %rcx + xorl %r9d, %r14d + movl %r10d, %r9d + roll %r14d + roll $5, %r9d + movq %r14, 40(%rdi) + movq 40(%rdi), %rsi + addq %rcx, %r9 + movq 24(%rdi), %r13 + addq %r12, %r9 + movq -16(%rdi), %r12 + movq 64(%rdi), %rax + movl %r10d, %r14d + addq %rsi, %r9 + movl %r8d, %esi + addq %r15, %r9 + movq 48(%rdi), %r15 + xorq %r12, %r13 + roll $30, %esi + xorq %rax, %r13 + xorq %rsi, %r10 + xorl %r15d, %r13d + movl %r9d, %r15d + xorq %r11, %r10 + roll $5, %r15d + roll %r13d + addq %r10, %r15 + movq %r13, 48(%rdi) + movq 48(%rdi), %r10 + addq %rbx, %r15 + movq 32(%rdi), %rbx + movq -8(%rdi), %r8 + movq 72(%rdi), %rdx + movq 56(%rdi), %rcx + roll $30, %r14d + addq %r10, %r15 + movl $3395469782, %r10d + movl %r9d, %r13d + xorq %r8, %rbx + addq %r10, %r15 + xorq %r14, %r9 + xorq %rdx, %rbx + xorq %rsi, %r9 + roll $30, %r13d + xorl %ecx, %ebx + movl %r15d, %ecx + roll %ebx + roll $5, %ecx + movq %rbx, 56(%rdi) + addq %r9, %rcx + movq 56(%rdi), %r12 + movq 40(%rdi), %r9 + movq (%rdi), %rax + addq %r11, %rcx + movq -48(%rdi), %r8 + movq 64(%rdi), %r11 + movl %r15d, %ebx + addq %r12, %rcx + xorq %r13, %r15 + roll $30, %ebx + xorq %rax, %r9 + addq %r10, %rcx + xorq %r14, %r15 + xorq %r8, %r9 + xorl %r11d, %r9d + movl %ecx, %r11d + roll %r9d + roll $5, %r11d + movq %r9, 64(%rdi) + addq %r15, %r11 + movq 64(%rdi), %rdx + movq 48(%rdi), %r15 + movq 8(%rdi), %r12 + addq %rsi, %r11 + movq -40(%rdi), %rax + movq 72(%rdi), %r8 + movl %ecx, %r9d + addq %rdx, %r11 + xorq %r12, %r15 + addq %r10, %r11 + xorq %rax, %r15 + xorl %r8d, %r15d + movl %r11d, %r8d + roll %r15d + roll $5, %r8d + xorq %rbx, %rcx + xorq %r13, %rcx + movq %r15, 72(%rdi) + movq 72(%rdi), %rsi + addq %rcx, %r8 + movq 56(%rdi), %r12 + movq 16(%rdi), %rcx + movq -32(%rdi), %rdx + addq %r14, %r8 + movq -48(%rdi), %r14 + addq %rsi, %r8 + roll $30, %r9d + movl %r11d, %r15d + xorq %rcx, %r12 + addq %r10, %r8 + xorq %r9, %r11 + xorq %rdx, %r12 + xorq %rbx, %r11 + roll $30, %r15d + xorl %r14d, %r12d + movl %r8d, %r14d + roll $5, %r14d + roll %r12d + addq %r11, %r14 + movq %r12, -48(%rdi) + movq -48(%rdi), %rax + addq %r13, %r14 + movq 64(%rdi), %r13 + movq 24(%rdi), %rsi + movq -24(%rdi), %rcx + movq -40(%rdi), %r11 + movl %r8d, %r12d + addq %rax, %r14 + xorq %r15, %r8 + roll $30, %r12d + xorq %rsi, %r13 + addq %r10, %r14 + xorq %r9, %r8 + xorq %rcx, %r13 + xorl %r11d, %r13d + movl %r14d, %r11d + roll $5, %r11d + roll %r13d + addq %r8, %r11 + movq %r13, -40(%rdi) + movq -40(%rdi), %rdx + addq %rbx, %r11 + movq 72(%rdi), %rbx + movq 32(%rdi), %rax + movq -16(%rdi), %rsi + movq -32(%rdi), %r8 + movl %r14d, %r13d + addq %rdx, %r11 + xorq %rax, %rbx + addq %r10, %r11 + xorq %rsi, %rbx + xorl %r8d, %ebx + xorq %r12, %r14 + movl %r11d, %r8d + xorq %r15, %r14 + roll %ebx + roll $5, %r8d + movq %rbx, -32(%rdi) + addq %r14, %r8 + movq -32(%rdi), %rcx + movq -48(%rdi), %r14 + movq 40(%rdi), %rdx + addq %r9, %r8 + movq -8(%rdi), %rax + movq -24(%rdi), %r9 + roll $30, %r13d + addq %rcx, %r8 + movl %r11d, %ebx + xorq %r13, %r11 + xorq %rdx, %r14 + addq %r10, %r8 + xorq %r12, %r11 + xorq %rax, %r14 + roll $30, %ebx + xorl %r9d, %r14d + movl %r8d, %r9d + roll $5, %r9d + roll %r14d + addq %r11, %r9 + movq %r14, -24(%rdi) + movq -24(%rdi), %rsi + addq %r15, %r9 + movq -40(%rdi), %r15 + movq 48(%rdi), %rcx + movq (%rdi), %rdx + movq -16(%rdi), %r11 + movl %r8d, %r14d + addq %rsi, %r9 + xorq %rbx, %r8 + xorq %rcx, %r15 + addq %r10, %r9 + xorq %r13, %r8 + xorq %rdx, %r15 + xorl %r11d, %r15d + movl %r9d, %r11d + roll %r15d + roll $5, %r11d + movq %r15, -16(%rdi) + addq %r8, %r11 + movq -16(%rdi), %rax + addq %r12, %r11 + movq -32(%rdi), %r12 + movq 56(%rdi), %rsi + movq 8(%rdi), %rcx + movq -8(%rdi), %r8 + movl %r9d, %r15d + addq %rax, %r11 + addq %r10, %r11 + roll $30, %r14d + xorq %rsi, %r12 + xorq %rcx, %r12 + xorq %r14, %r9 + roll $30, %r15d + xorl %r8d, %r12d + movl %r11d, %r8d + xorq %rbx, %r9 + roll $5, %r8d + roll %r12d + addq %r9, %r8 + movq %r12, -8(%rdi) + movq -8(%rdi), %rdx + addq %r13, %r8 + movq -24(%rdi), %r13 + movq 64(%rdi), %rax + movq 16(%rdi), %rsi + movq (%rdi), %rcx + movl %r11d, %r12d + addq %rdx, %r8 + xorq %r15, %r11 + roll $30, %r12d + xorq %rax, %r13 + addq %r10, %r8 + xorq %r14, %r11 + xorq %rsi, %r13 + xorl %ecx, %r13d + movl %r8d, %ecx + roll $5, %ecx + roll %r13d + addq %r11, %rcx + movq %r13, (%rdi) + movq (%rdi), %r9 + addq %rbx, %rcx + movq -16(%rdi), %rbx + movq 72(%rdi), %rdx + movq 24(%rdi), %rax + movq 8(%rdi), %rsi + movl %r8d, %r13d + addq %r9, %rcx + xorq %r12, %r8 + xorq %rdx, %rbx + addq %r10, %rcx + xorq %r15, %r8 + xorq %rax, %rbx + xorl %esi, %ebx + movl %ecx, %esi + roll $5, %esi + roll %ebx + addq %r8, %rsi + movq %rbx, 8(%rdi) + movq 8(%rdi), %r11 + addq %r14, %rsi + movq -8(%rdi), %r14 + movq -48(%rdi), %r9 + movq 32(%rdi), %rdx + movq 16(%rdi), %r8 + roll $30, %r13d + addq %r11, %rsi + movl %ecx, %ebx + xorq %r13, %rcx + xorq %r9, %r14 + addq %r10, %rsi + xorq %r12, %rcx + xorq %rdx, %r14 + roll $30, %ebx + xorl %r8d, %r14d + movl %esi, %r8d + roll $5, %r8d + roll %r14d + addq %rcx, %r8 + movq %r14, 16(%rdi) + movq 16(%rdi), %rax + addq %r15, %r8 + movq (%rdi), %r15 + movq -40(%rdi), %r11 + movq 40(%rdi), %r9 + movq 24(%rdi), %rcx + movl %esi, %r14d + addq %rax, %r8 + xorq %rbx, %rsi + roll $30, %r14d + xorq %r11, %r15 + addq %r10, %r8 + xorq %r13, %rsi + xorq %r9, %r15 + xorl %ecx, %r15d + movl %r8d, %ecx + roll %r15d + roll $5, %ecx + movq %r15, 24(%rdi) + addq %rsi, %rcx + movq 24(%rdi), %rdx + movq 8(%rdi), %r11 + movq -32(%rdi), %rax + addq %r12, %rcx + movq 48(%rdi), %r12 + movq 32(%rdi), %rsi + movl %r8d, %r15d + addq %rdx, %rcx + xorq %rax, %r11 + addq %r10, %rcx + xorq %r12, %r11 + xorl %esi, %r11d + movl %ecx, %esi + roll %r11d + movq %r11, 32(%rdi) + movl %ecx, %r11d + movq 32(%rdi), %r9 + roll $5, %r11d + xorq %r14, %r8 + movq 16(%rdi), %r12 + xorq %rbx, %r8 + movq -24(%rdi), %rdx + movq 56(%rdi), %rax + addq %r8, %r11 + movq 40(%rdi), %r8 + roll $30, %r15d + addq %r13, %r11 + xorq %r15, %rcx + addq %r9, %r11 + xorq %rdx, %r12 + xorq %r14, %rcx + addq %r10, %r11 + xorq %rax, %r12 + xorl %r8d, %r12d + movl %r11d, %r8d + roll $5, %r8d + roll %r12d + addq %rcx, %r8 + movq %r12, 40(%rdi) + movq 40(%rdi), %r13 + addq %rbx, %r8 + movq 24(%rdi), %rbx + movq -16(%rdi), %r9 + movq 64(%rdi), %rdx + movq 48(%rdi), %rcx + movl %r11d, %r12d + addq %r13, %r8 + movl %esi, %r13d + roll $30, %r12d + xorq %r9, %rbx + addq %r10, %r8 + roll $30, %r13d + xorq %rdx, %rbx + xorq %r13, %r11 + xorl %ecx, %ebx + movl %r8d, %ecx + xorq %r15, %r11 + roll %ebx + roll $5, %ecx + movq %rbx, 48(%rdi) + addq %r11, %rcx + movq 48(%rdi), %rax + movq 32(%rdi), %r11 + movq -8(%rdi), %rsi + addq %r14, %rcx + movq 72(%rdi), %r9 + movq 56(%rdi), %r14 + movl %r8d, %ebx + addq %rax, %rcx + xorq %rsi, %r11 + addq %r10, %rcx + xorq %r9, %r11 + xorl %r14d, %r11d + xorq %r12, %r8 + movl %ecx, %r14d + xorq %r13, %r8 + roll %r11d + roll $5, %r14d + movq %r11, 56(%rdi) + addq %r8, %r14 + movq 56(%rdi), %rdx + movq 40(%rdi), %r8 + movq (%rdi), %rax + addq %r15, %r14 + movq -48(%rdi), %r15 + movq 64(%rdi), %rsi + roll $30, %ebx + addq %rdx, %r14 + movl %ecx, %r11d + xorq %rbx, %rcx + xorq %rax, %r8 + addq %r10, %r14 + xorq %r12, %rcx + xorq %r15, %r8 + roll $30, %r11d + xorl %esi, %r8d + movl %r14d, %esi + roll %r8d + roll $5, %esi + movq %r8, 64(%rdi) + movq 64(%rdi), %r9 + addq %rcx, %rsi + movq 48(%rdi), %r15 + movq 8(%rdi), %rcx + addq %r13, %rsi + movq -40(%rdi), %rdx + movq 72(%rdi), %rax + movl %r14d, %r8d + addq %r9, %rsi + xorq %r11, %r14 + addq %r10, %rsi + xorq %rcx, %r15 + xorq %rbx, %r14 + xorq %rdx, %r15 + movl %esi, %r13d + xorl %eax, %r15d + roll $5, %r13d + roll %r15d + addq %r14, %r13 + movq %r15, 72(%rdi) + addq %r12, %r13 + movq 72(%rdi), %r12 + addq %r12, %r13 + addq %r10, %r13 + movq -88(%rdi), %r10 + roll $30, %r8d + addq %r13, %r10 + movq %r10, -88(%rdi) + movq -80(%rdi), %r9 + addq %rsi, %r9 + movq %r9, -80(%rdi) + movq -72(%rdi), %rcx + addq %r8, %rcx + movq %rcx, -72(%rdi) + movq -64(%rdi), %rdx + addq %r11, %rdx + movq %rdx, -64(%rdi) + movq -56(%rdi), %rax + addq %rbx, %rax + popq %rbx + popq %r12 + popq %r13 + popq %r14 + popq %r15 + movq %rax, -56(%rdi) + ret +.LFE7: + .size shaCompress, .-shaCompress + .align 16 +.globl SHA1_Update + .type SHA1_Update, @function +SHA1_Update: +.LFB5: + pushq %rbp +.LCFI5: + movq %rsp, %rbp +.LCFI6: + movq %r13, -24(%rbp) +.LCFI7: + movq %r14, -16(%rbp) +.LCFI8: + movl %edx, %r13d + movq %r15, -8(%rbp) +.LCFI9: + movq %rbx, -40(%rbp) +.LCFI10: + movq %rdi, %r15 + movq %r12, -32(%rbp) +.LCFI11: + subq $48, %rsp +.LCFI12: + testl %edx, %edx + movq %rsi, %r14 + je .L243 + movq 64(%rdi), %rdx + mov %r13d, %ecx + leaq (%rdx,%rcx), %rax + movq %rax, 64(%rdi) + movl %edx, %eax + andl $63, %eax + movl %eax, -44(%rbp) + jne .L256 +.L245: + cmpl $63, %r13d + jbe .L253 + leaq 160(%r15), %rbx + .align 16 +.L250: + movq %r14, %rsi + subl $64, %r13d + movq %rbx, %rdi + call shaCompress + addq $64, %r14 + cmpl $63, %r13d + ja .L250 +.L253: + testl %r13d, %r13d + je .L243 + mov %r13d, %edx + movq %r14, %rsi + movq %r15, %rdi + movq -40(%rbp), %rbx + movq -32(%rbp), %r12 + movq -24(%rbp), %r13 + movq -16(%rbp), %r14 + movq -8(%rbp), %r15 + leave + jmp memcpy@PLT + .align 16 +.L243: + movq -40(%rbp), %rbx + movq -32(%rbp), %r12 + movq -24(%rbp), %r13 + movq -16(%rbp), %r14 + movq -8(%rbp), %r15 + leave + ret +.L256: + movl $64, %ebx + mov %eax, %edi + subl %eax, %ebx + cmpl %ebx, %r13d + cmovb %r13d, %ebx + addq %r15, %rdi + mov %ebx, %r12d + subl %ebx, %r13d + movq %r12, %rdx + addq %r12, %r14 + call memcpy@PLT + addl -44(%rbp), %ebx + andl $63, %ebx + jne .L245 + leaq 160(%r15), %rdi + movq %r15, %rsi + call shaCompress + jmp .L245 +.LFE5: + .size SHA1_Update, .-SHA1_Update + .section .rodata + .align 32 + .type bulk_pad.0, @object + .size bulk_pad.0, 64 +bulk_pad.0: + .byte -128 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .byte 0 + .text + .align 16 +.globl SHA1_End + .type SHA1_End, @function +SHA1_End: +.LFB6: + pushq %rbp +.LCFI13: + movq %rsp, %rbp +.LCFI14: + movq %r12, -24(%rbp) +.LCFI15: + movq %r13, -16(%rbp) +.LCFI16: + movq %rsi, %r13 + movq %r14, -8(%rbp) +.LCFI17: + movq %rbx, -32(%rbp) +.LCFI18: + subq $32, %rsp +.LCFI19: + movq 64(%rdi), %rbx + movq %rdx, %r14 + movl $119, %edx + leaq bulk_pad.0(%rip), %rsi + movq %rdi, %r12 + movl %ebx, %r8d + salq $3, %rbx + andl $63, %r8d + subl %r8d, %edx + andl $63, %edx + incl %edx + call SHA1_Update@PLT + movq %rbx, %rdi + movq %r12, %rsi + shrq $32, %rdi +/APP + bswap %edi +/NO_APP + movl %edi, 56(%r12) + leaq 160(%r12), %rdi +/APP + bswap %ebx +/NO_APP + movl %ebx, 60(%r12) + call shaCompress + movl 72(%r12), %esi + movl 80(%r12), %ebx + movl 88(%r12), %ecx + movl 96(%r12), %edx + movl 104(%r12), %eax + movq 8(%rsp), %r12 +/APP + bswap %ebx + bswap %esi +/NO_APP + movl %ebx, 4(%r13) + movl %esi, (%r13) +/APP + bswap %ecx + bswap %edx +/NO_APP + movl %ecx, 8(%r13) + movl %edx, 12(%r13) +/APP + bswap %eax +/NO_APP + movq (%rsp), %rbx + movl %eax, 16(%r13) + cmpq $0, %r14 + je .L133 + movl $20, (%r14) +.L133: + movq 16(%rsp), %r13 + movq 24(%rsp), %r14 + leave + ret +.LFE6: + .size SHA1_End, .-SHA1_End + .align 16 +.globl SHA1_NewContext + .type SHA1_NewContext, @function +SHA1_NewContext: +.LFB8: + movl $248, %edi + jmp PORT_Alloc_Util@PLT +.LFE8: + .size SHA1_NewContext, .-SHA1_NewContext + .align 16 +.globl SHA1_DestroyContext + .type SHA1_DestroyContext, @function +SHA1_DestroyContext: +.LFB9: + pushq %rbp +.LCFI20: + movl $248, %edx + movq %rsp, %rbp +.LCFI21: + movq %rbx, -16(%rbp) +.LCFI22: + movq %r12, -8(%rbp) +.LCFI23: + movl %esi, %ebx + subq $16, %rsp +.LCFI24: + xorl %esi, %esi + movq %rdi, %r12 + call memset@PLT + testl %ebx, %ebx + jne .L268 + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + ret + .align 16 +.L268: + movq %r12, %rdi + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + jmp PORT_Free_Util@PLT +.LFE9: + .size SHA1_DestroyContext, .-SHA1_DestroyContext + .align 16 +.globl SHA1_HashBuf + .type SHA1_HashBuf, @function +SHA1_HashBuf: +.LFB10: + pushq %rbp +.LCFI25: + movq %rsp, %rbp +.LCFI26: + movq %rbx, -32(%rbp) +.LCFI27: + leaq -288(%rbp), %rbx + movq %r12, -24(%rbp) +.LCFI28: + movq %r13, -16(%rbp) +.LCFI29: + movq %r14, -8(%rbp) +.LCFI30: + movq %rsi, %r13 + subq $304, %rsp +.LCFI31: + movq %rdi, %r14 + movl %edx, %r12d + movq %rbx, %rdi + call SHA1_Begin@PLT + movl %r12d, %edx + movq %r13, %rsi + movq %rbx, %rdi + call SHA1_Update@PLT + leaq -292(%rbp), %rdx + movq %r14, %rsi + movq %rbx, %rdi + movl $20, %ecx + call SHA1_End@PLT + movq -32(%rbp), %rbx + movq -24(%rbp), %r12 + xorl %eax, %eax + movq -16(%rbp), %r13 + movq -8(%rbp), %r14 + leave + ret +.LFE10: + .size SHA1_HashBuf, .-SHA1_HashBuf + .align 16 +.globl SHA1_Hash + .type SHA1_Hash, @function +SHA1_Hash: +.LFB11: + pushq %rbp +.LCFI32: + movq %rsp, %rbp +.LCFI33: + movq %rbx, -16(%rbp) +.LCFI34: + movq %r12, -8(%rbp) +.LCFI35: + movq %rsi, %rbx + subq $16, %rsp +.LCFI36: + movq %rdi, %r12 + movq %rsi, %rdi + call strlen@PLT + movq %rbx, %rsi + movq %r12, %rdi + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + movl %eax, %edx + jmp SHA1_HashBuf@PLT +.LFE11: + .size SHA1_Hash, .-SHA1_Hash + .align 16 +.globl SHA1_FlattenSize + .type SHA1_FlattenSize, @function +SHA1_FlattenSize: +.LFB12: + movl $248, %eax + ret +.LFE12: + .size SHA1_FlattenSize, .-SHA1_FlattenSize + .align 16 +.globl SHA1_Flatten + .type SHA1_Flatten, @function +SHA1_Flatten: +.LFB13: + pushq %rbp +.LCFI37: + movq %rsi, %rax + movl $248, %edx + movq %rdi, %rsi + movq %rax, %rdi + movq %rsp, %rbp +.LCFI38: + call memcpy@PLT + leave + xorl %eax, %eax + ret +.LFE13: + .size SHA1_Flatten, .-SHA1_Flatten + .align 16 +.globl SHA1_Resurrect + .type SHA1_Resurrect, @function +SHA1_Resurrect: +.LFB14: + pushq %rbp +.LCFI39: + movq %rsp, %rbp +.LCFI40: + movq %rbx, -16(%rbp) +.LCFI41: + movq %r12, -8(%rbp) +.LCFI42: + subq $16, %rsp +.LCFI43: + movq %rdi, %r12 + call SHA1_NewContext@PLT + movq %rax, %rbx + xorl %eax, %eax + testq %rbx, %rbx + je .L273 + movl $248, %edx + movq %r12, %rsi + movq %rbx, %rdi + call memcpy@PLT + movq %rbx, %rax +.L273: + movq (%rsp), %rbx + movq 8(%rsp), %r12 + leave + ret +.LFE14: + .size SHA1_Resurrect, .-SHA1_Resurrect + .align 16 +.globl SHA1_Clone + .type SHA1_Clone, @function +SHA1_Clone: +.LFB15: + movl $248, %edx + jmp memcpy@PLT +.LFE15: + .size SHA1_Clone, .-SHA1_Clone + .align 16 +.globl SHA1_TraceState + .type SHA1_TraceState, @function +SHA1_TraceState: +.LFB16: + movl $-5992, %edi + jmp PORT_SetError_Util@PLT +.LFE16: + .size SHA1_TraceState, .-SHA1_TraceState + .align 16 +.globl SHA1_EndRaw + .type SHA1_EndRaw, @function +SHA1_EndRaw: +.LFB50: + movq 72(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, (%rsi) + movq 80(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 4(%rsi) + movq 88(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 8(%rsi) + movq 96(%rdi), %rax +/APP + bswap %eax +/NO_APP + movl %eax, 12(%rsi) + movq 104(%rdi), %rax +/APP + bswap %eax +/NO_APP + testq %rdx, %rdx + movl %eax, 16(%rsi) + je .L14 + movl $20, (%rdx) +.L14: + rep + ret +.LFE50: + .size SHA1_EndRaw, .-SHA1_EndRaw diff --git a/security/nss/lib/freebl/sha256.h b/security/nss/lib/freebl/sha256.h new file mode 100644 index 000000000..c65ca152d --- /dev/null +++ b/security/nss/lib/freebl/sha256.h @@ -0,0 +1,19 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHA_256_H_ +#define _SHA_256_H_ + +#include "prtypes.h" + +struct SHA256ContextStr { + union { + PRUint32 w[64]; /* message schedule, input buffer, plus 48 words */ + PRUint8 b[256]; + } u; + PRUint32 h[8]; /* 8 state variables */ + PRUint32 sizeHi, sizeLo; /* 64-bit count of hashed bytes. */ +}; + +#endif /* _SHA_256_H_ */ diff --git a/security/nss/lib/freebl/sha512.c b/security/nss/lib/freebl/sha512.c new file mode 100644 index 000000000..528f884b2 --- /dev/null +++ b/security/nss/lib/freebl/sha512.c @@ -0,0 +1,1655 @@ +/* + * sha512.c - implementation of SHA224, SHA256, SHA384 and SHA512 + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "prcpucfg.h" +#if defined(NSS_X86) || defined(SHA_NO_LONG_LONG) +#define NOUNROLL512 1 +#undef HAVE_LONG_LONG +#endif +#include "prtypes.h" /* for PRUintXX */ +#include "prlong.h" +#include "secport.h" /* for PORT_XXX */ +#include "blapi.h" +#include "sha256.h" /* for struct SHA256ContextStr */ + +/* ============= Common constants and defines ======================= */ + +#define W ctx->u.w +#define B ctx->u.b +#define H ctx->h + +#define SHR(x, n) (x >> n) +#define SHL(x, n) (x << n) +#define Ch(x, y, z) ((x & y) ^ (~x & z)) +#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z)) +#define SHA_MIN(a, b) (a < b ? a : b) + +/* Padding used with all flavors of SHA */ +static const PRUint8 pad[240] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + /* compiler will fill the rest in with zeros */ +}; + +/* ============= SHA256 implementation ================================== */ + +/* SHA-256 constants, K256. */ +static const PRUint32 K256[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +/* SHA-256 initial hash values */ +static const PRUint32 H256[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +#if defined(IS_LITTLE_ENDIAN) +#if (_MSC_VER >= 1300) +#include +#pragma intrinsic(_byteswap_ulong) +#define SHA_HTONL(x) _byteswap_ulong(x) +#elif defined(_MSC_VER) && defined(NSS_X86_OR_X64) +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif +#endif +#define FASTCALL __fastcall + +static FORCEINLINE PRUint32 FASTCALL +swap4b(PRUint32 dwd) +{ + __asm { + mov eax,dwd + bswap eax + } +} + +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__GNUC__) && defined(NSS_X86_OR_X64) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + __asm__("bswap %0" + : "+r"(value)); + return (value); +} +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__GNUC__) && (defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__)))) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 ret; + __asm__("rev %0, %1" + : "=r"(ret) + : "r"(value)); + return ret; +} +#define SHA_HTONL(x) swap4b(x) + +#else +#define SWAP4MASK 0x00FF00FF +static PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 t1 = (value << 16) | (value >> 16); + return ((t1 & SWAP4MASK) << 8) | ((t1 >> 8) & SWAP4MASK); +} +#define SHA_HTONL(x) swap4b(x) +#endif +#define BYTESWAP4(x) x = SHA_HTONL(x) +#endif /* defined(IS_LITTLE_ENDIAN) */ + +#if defined(_MSC_VER) +#pragma intrinsic(_lrotr, _lrotl) +#define ROTR32(x, n) _lrotr(x, n) +#define ROTL32(x, n) _lrotl(x, n) +#else +#define ROTR32(x, n) ((x >> n) | (x << ((8 * sizeof x) - n))) +#define ROTL32(x, n) ((x << n) | (x >> ((8 * sizeof x) - n))) +#endif + +/* Capitol Sigma and lower case sigma functions */ +#define S0(x) (ROTR32(x, 2) ^ ROTR32(x, 13) ^ ROTR32(x, 22)) +#define S1(x) (ROTR32(x, 6) ^ ROTR32(x, 11) ^ ROTR32(x, 25)) +#define s0(x) (ROTR32(x, 7) ^ ROTR32(x, 18) ^ SHR(x, 3)) +#define s1(x) (ROTR32(x, 17) ^ ROTR32(x, 19) ^ SHR(x, 10)) + +SHA256Context * +SHA256_NewContext(void) +{ + SHA256Context *ctx = PORT_New(SHA256Context); + return ctx; +} + +void +SHA256_DestroyContext(SHA256Context *ctx, PRBool freeit) +{ + memset(ctx, 0, sizeof *ctx); + if (freeit) { + PORT_Free(ctx); + } +} + +void +SHA256_Begin(SHA256Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H256, sizeof H256); +} + +static void +SHA256_Compress(SHA256Context *ctx) +{ + { +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(W[0]); + BYTESWAP4(W[1]); + BYTESWAP4(W[2]); + BYTESWAP4(W[3]); + BYTESWAP4(W[4]); + BYTESWAP4(W[5]); + BYTESWAP4(W[6]); + BYTESWAP4(W[7]); + BYTESWAP4(W[8]); + BYTESWAP4(W[9]); + BYTESWAP4(W[10]); + BYTESWAP4(W[11]); + BYTESWAP4(W[12]); + BYTESWAP4(W[13]); + BYTESWAP4(W[14]); + BYTESWAP4(W[15]); +#endif + +#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16]) + +/* prepare the "message schedule" */ +#ifdef NOUNROLL256 + { + int t; + for (t = 16; t < 64; ++t) { + INITW(t); + } + } +#else + INITW(16); + INITW(17); + INITW(18); + INITW(19); + + INITW(20); + INITW(21); + INITW(22); + INITW(23); + INITW(24); + INITW(25); + INITW(26); + INITW(27); + INITW(28); + INITW(29); + + INITW(30); + INITW(31); + INITW(32); + INITW(33); + INITW(34); + INITW(35); + INITW(36); + INITW(37); + INITW(38); + INITW(39); + + INITW(40); + INITW(41); + INITW(42); + INITW(43); + INITW(44); + INITW(45); + INITW(46); + INITW(47); + INITW(48); + INITW(49); + + INITW(50); + INITW(51); + INITW(52); + INITW(53); + INITW(54); + INITW(55); + INITW(56); + INITW(57); + INITW(58); + INITW(59); + + INITW(60); + INITW(61); + INITW(62); + INITW(63); + +#endif +#undef INITW + } + { + PRUint32 a, b, c, d, e, f, g, h; + + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + h += S1(e) + Ch(e, f, g) + K256[n] + W[n]; \ + d += h; \ + h += S0(a) + Maj(a, b, c); + +#ifdef NOUNROLL256 + { + int t; + for (t = 0; t < 64; t += 8) { + ROUND(t + 0, a, b, c, d, e, f, g, h) + ROUND(t + 1, h, a, b, c, d, e, f, g) + ROUND(t + 2, g, h, a, b, c, d, e, f) + ROUND(t + 3, f, g, h, a, b, c, d, e) + ROUND(t + 4, e, f, g, h, a, b, c, d) + ROUND(t + 5, d, e, f, g, h, a, b, c) + ROUND(t + 6, c, d, e, f, g, h, a, b) + ROUND(t + 7, b, c, d, e, f, g, h, a) + } + } +#else + ROUND(0, a, b, c, d, e, f, g, h) + ROUND(1, h, a, b, c, d, e, f, g) + ROUND(2, g, h, a, b, c, d, e, f) + ROUND(3, f, g, h, a, b, c, d, e) + ROUND(4, e, f, g, h, a, b, c, d) + ROUND(5, d, e, f, g, h, a, b, c) + ROUND(6, c, d, e, f, g, h, a, b) + ROUND(7, b, c, d, e, f, g, h, a) + + ROUND(8, a, b, c, d, e, f, g, h) + ROUND(9, h, a, b, c, d, e, f, g) + ROUND(10, g, h, a, b, c, d, e, f) + ROUND(11, f, g, h, a, b, c, d, e) + ROUND(12, e, f, g, h, a, b, c, d) + ROUND(13, d, e, f, g, h, a, b, c) + ROUND(14, c, d, e, f, g, h, a, b) + ROUND(15, b, c, d, e, f, g, h, a) + + ROUND(16, a, b, c, d, e, f, g, h) + ROUND(17, h, a, b, c, d, e, f, g) + ROUND(18, g, h, a, b, c, d, e, f) + ROUND(19, f, g, h, a, b, c, d, e) + ROUND(20, e, f, g, h, a, b, c, d) + ROUND(21, d, e, f, g, h, a, b, c) + ROUND(22, c, d, e, f, g, h, a, b) + ROUND(23, b, c, d, e, f, g, h, a) + + ROUND(24, a, b, c, d, e, f, g, h) + ROUND(25, h, a, b, c, d, e, f, g) + ROUND(26, g, h, a, b, c, d, e, f) + ROUND(27, f, g, h, a, b, c, d, e) + ROUND(28, e, f, g, h, a, b, c, d) + ROUND(29, d, e, f, g, h, a, b, c) + ROUND(30, c, d, e, f, g, h, a, b) + ROUND(31, b, c, d, e, f, g, h, a) + + ROUND(32, a, b, c, d, e, f, g, h) + ROUND(33, h, a, b, c, d, e, f, g) + ROUND(34, g, h, a, b, c, d, e, f) + ROUND(35, f, g, h, a, b, c, d, e) + ROUND(36, e, f, g, h, a, b, c, d) + ROUND(37, d, e, f, g, h, a, b, c) + ROUND(38, c, d, e, f, g, h, a, b) + ROUND(39, b, c, d, e, f, g, h, a) + + ROUND(40, a, b, c, d, e, f, g, h) + ROUND(41, h, a, b, c, d, e, f, g) + ROUND(42, g, h, a, b, c, d, e, f) + ROUND(43, f, g, h, a, b, c, d, e) + ROUND(44, e, f, g, h, a, b, c, d) + ROUND(45, d, e, f, g, h, a, b, c) + ROUND(46, c, d, e, f, g, h, a, b) + ROUND(47, b, c, d, e, f, g, h, a) + + ROUND(48, a, b, c, d, e, f, g, h) + ROUND(49, h, a, b, c, d, e, f, g) + ROUND(50, g, h, a, b, c, d, e, f) + ROUND(51, f, g, h, a, b, c, d, e) + ROUND(52, e, f, g, h, a, b, c, d) + ROUND(53, d, e, f, g, h, a, b, c) + ROUND(54, c, d, e, f, g, h, a, b) + ROUND(55, b, c, d, e, f, g, h, a) + + ROUND(56, a, b, c, d, e, f, g, h) + ROUND(57, h, a, b, c, d, e, f, g) + ROUND(58, g, h, a, b, c, d, e, f) + ROUND(59, f, g, h, a, b, c, d, e) + ROUND(60, e, f, g, h, a, b, c, d) + ROUND(61, d, e, f, g, h, a, b, c) + ROUND(62, c, d, e, f, g, h, a, b) + ROUND(63, b, c, d, e, f, g, h, a) +#endif + + H[0] += a; + H[1] += b; + H[2] += c; + H[3] += d; + H[4] += e; + H[5] += f; + H[6] += g; + H[7] += h; + } +#undef ROUND +} + +#undef s0 +#undef s1 +#undef S0 +#undef S1 + +void +SHA256_Update(SHA256Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + unsigned int inBuf = ctx->sizeLo & 0x3f; + if (!inputLen) + return; + + /* Add inputLen into the count of bytes processed, before processing */ + if ((ctx->sizeLo += inputLen) < inputLen) + ctx->sizeHi++; + + /* if data already in buffer, attemp to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA256_BLOCK_LENGTH - inBuf; + if (inputLen < todo) + todo = inputLen; + memcpy(B + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA256_BLOCK_LENGTH) + SHA256_Compress(ctx); + } + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA256_BLOCK_LENGTH) { + memcpy(B, input, SHA256_BLOCK_LENGTH); + input += SHA256_BLOCK_LENGTH; + inputLen -= SHA256_BLOCK_LENGTH; + SHA256_Compress(ctx); + } + /* if data left over, fill it into buffer */ + if (inputLen) + memcpy(B, input, inputLen); +} + +void +SHA256_End(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int inBuf = ctx->sizeLo & 0x3f; + unsigned int padLen = (inBuf < 56) ? (56 - inBuf) : (56 + 64 - inBuf); + PRUint32 hi, lo; + + hi = (ctx->sizeHi << 3) | (ctx->sizeLo >> 29); + lo = (ctx->sizeLo << 3); + + SHA256_Update(ctx, pad, padLen); + +#if defined(IS_LITTLE_ENDIAN) + W[14] = SHA_HTONL(hi); + W[15] = SHA_HTONL(lo); +#else + W[14] = hi; + W[15] = lo; +#endif + SHA256_Compress(ctx); + +/* now output the answer */ +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(H[0]); + BYTESWAP4(H[1]); + BYTESWAP4(H[2]); + BYTESWAP4(H[3]); + BYTESWAP4(H[4]); + BYTESWAP4(H[5]); + BYTESWAP4(H[6]); + BYTESWAP4(H[7]); +#endif + padLen = PR_MIN(SHA256_LENGTH, maxDigestLen); + memcpy(digest, H, padLen); + if (digestLen) + *digestLen = padLen; +} + +void +SHA256_EndRaw(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint32 h[8]; + unsigned int len; + + memcpy(h, ctx->h, sizeof(h)); + +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP4(h[0]); + BYTESWAP4(h[1]); + BYTESWAP4(h[2]); + BYTESWAP4(h[3]); + BYTESWAP4(h[4]); + BYTESWAP4(h[5]); + BYTESWAP4(h[6]); + BYTESWAP4(h[7]); +#endif + + len = PR_MIN(SHA256_LENGTH, maxDigestLen); + memcpy(digest, h, len); + if (digestLen) + *digestLen = len; +} + +SECStatus +SHA256_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA256Context ctx; + unsigned int outLen; + + SHA256_Begin(&ctx); + SHA256_Update(&ctx, src, src_length); + SHA256_End(&ctx, dest, &outLen, SHA256_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA256_Hash(unsigned char *dest, const char *src) +{ + return SHA256_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA256_TraceState(SHA256Context *ctx) +{ +} + +unsigned int +SHA256_FlattenSize(SHA256Context *ctx) +{ + return sizeof *ctx; +} + +SECStatus +SHA256_Flatten(SHA256Context *ctx, unsigned char *space) +{ + PORT_Memcpy(space, ctx, sizeof *ctx); + return SECSuccess; +} + +SHA256Context * +SHA256_Resurrect(unsigned char *space, void *arg) +{ + SHA256Context *ctx = SHA256_NewContext(); + if (ctx) + PORT_Memcpy(ctx, space, sizeof *ctx); + return ctx; +} + +void +SHA256_Clone(SHA256Context *dest, SHA256Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ============= SHA224 implementation ================================== */ + +/* SHA-224 initial hash values */ +static const PRUint32 H224[8] = { + 0xc1059ed8, 0x367cd507, 0x3070dd17, 0xf70e5939, + 0xffc00b31, 0x68581511, 0x64f98fa7, 0xbefa4fa4 +}; + +SHA224Context * +SHA224_NewContext(void) +{ + return SHA256_NewContext(); +} + +void +SHA224_DestroyContext(SHA224Context *ctx, PRBool freeit) +{ + SHA256_DestroyContext(ctx, freeit); +} + +void +SHA224_Begin(SHA224Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H224, sizeof H224); +} + +void +SHA224_Update(SHA224Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + SHA256_Update(ctx, input, inputLen); +} + +void +SHA224_End(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH); + SHA256_End(ctx, digest, digestLen, maxLen); +} + +void +SHA224_EndRaw(SHA256Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA224_LENGTH); + SHA256_EndRaw(ctx, digest, digestLen, maxLen); +} + +SECStatus +SHA224_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA256Context ctx; + unsigned int outLen; + + SHA224_Begin(&ctx); + SHA256_Update(&ctx, src, src_length); + SHA256_End(&ctx, dest, &outLen, SHA224_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA224_Hash(unsigned char *dest, const char *src) +{ + return SHA224_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA224_TraceState(SHA224Context *ctx) +{ +} + +unsigned int +SHA224_FlattenSize(SHA224Context *ctx) +{ + return SHA256_FlattenSize(ctx); +} + +SECStatus +SHA224_Flatten(SHA224Context *ctx, unsigned char *space) +{ + return SHA256_Flatten(ctx, space); +} + +SHA224Context * +SHA224_Resurrect(unsigned char *space, void *arg) +{ + return SHA256_Resurrect(space, arg); +} + +void +SHA224_Clone(SHA224Context *dest, SHA224Context *src) +{ + SHA256_Clone(dest, src); +} + +/* ======= SHA512 and SHA384 common constants and defines ================= */ + +/* common #defines for SHA512 and SHA384 */ +#if defined(HAVE_LONG_LONG) +#if defined(_MSC_VER) +#pragma intrinsic(_rotr64, _rotl64) +#define ROTR64(x, n) _rotr64(x, n) +#define ROTL64(x, n) _rotl64(x, n) +#else +#define ROTR64(x, n) ((x >> n) | (x << (64 - n))) +#define ROTL64(x, n) ((x << n) | (x >> (64 - n))) +#endif + +#define S0(x) (ROTR64(x, 28) ^ ROTR64(x, 34) ^ ROTR64(x, 39)) +#define S1(x) (ROTR64(x, 14) ^ ROTR64(x, 18) ^ ROTR64(x, 41)) +#define s0(x) (ROTR64(x, 1) ^ ROTR64(x, 8) ^ SHR(x, 7)) +#define s1(x) (ROTR64(x, 19) ^ ROTR64(x, 61) ^ SHR(x, 6)) + +#if PR_BYTES_PER_LONG == 8 +#define ULLC(hi, lo) 0x##hi##lo##UL +#elif defined(_MSC_VER) +#define ULLC(hi, lo) 0x##hi##lo##ui64 +#else +#define ULLC(hi, lo) 0x##hi##lo##ULL +#endif + +#if defined(IS_LITTLE_ENDIAN) +#if defined(_MSC_VER) +#pragma intrinsic(_byteswap_uint64) +#define SHA_HTONLL(x) _byteswap_uint64(x) + +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__x86_64)) +static __inline__ PRUint64 +swap8b(PRUint64 value) +{ + __asm__("bswapq %0" + : "+r"(value)); + return (value); +} +#define SHA_HTONLL(x) swap8b(x) + +#else +#define SHA_MASK16 ULLC(0000FFFF, 0000FFFF) +#define SHA_MASK8 ULLC(00FF00FF, 00FF00FF) +static PRUint64 +swap8b(PRUint64 x) +{ + PRUint64 t1 = x; + t1 = ((t1 & SHA_MASK8) << 8) | ((t1 >> 8) & SHA_MASK8); + t1 = ((t1 & SHA_MASK16) << 16) | ((t1 >> 16) & SHA_MASK16); + return (t1 >> 32) | (t1 << 32); +} +#define SHA_HTONLL(x) swap8b(x) +#endif +#define BYTESWAP8(x) x = SHA_HTONLL(x) +#endif /* defined(IS_LITTLE_ENDIAN) */ + +#else /* no long long */ + +#if defined(IS_LITTLE_ENDIAN) +#define ULLC(hi, lo) \ + { \ + 0x##lo##U, 0x##hi##U \ + } +#define SHA_HTONLL(x) (BYTESWAP4(x.lo), BYTESWAP4(x.hi), \ + x.hi ^= x.lo ^= x.hi ^= x.lo, x) +#define BYTESWAP8(x) \ + do { \ + PRUint32 tmp; \ + BYTESWAP4(x.lo); \ + BYTESWAP4(x.hi); \ + tmp = x.lo; \ + x.lo = x.hi; \ + x.hi = tmp; \ + } while (0) +#else +#define ULLC(hi, lo) \ + { \ + 0x##hi##U, 0x##lo##U \ + } +#endif + +#endif + +/* SHA-384 and SHA-512 constants, K512. */ +static const PRUint64 K512[80] = { +#if PR_BYTES_PER_LONG == 8 + 0x428a2f98d728ae22UL, 0x7137449123ef65cdUL, + 0xb5c0fbcfec4d3b2fUL, 0xe9b5dba58189dbbcUL, + 0x3956c25bf348b538UL, 0x59f111f1b605d019UL, + 0x923f82a4af194f9bUL, 0xab1c5ed5da6d8118UL, + 0xd807aa98a3030242UL, 0x12835b0145706fbeUL, + 0x243185be4ee4b28cUL, 0x550c7dc3d5ffb4e2UL, + 0x72be5d74f27b896fUL, 0x80deb1fe3b1696b1UL, + 0x9bdc06a725c71235UL, 0xc19bf174cf692694UL, + 0xe49b69c19ef14ad2UL, 0xefbe4786384f25e3UL, + 0x0fc19dc68b8cd5b5UL, 0x240ca1cc77ac9c65UL, + 0x2de92c6f592b0275UL, 0x4a7484aa6ea6e483UL, + 0x5cb0a9dcbd41fbd4UL, 0x76f988da831153b5UL, + 0x983e5152ee66dfabUL, 0xa831c66d2db43210UL, + 0xb00327c898fb213fUL, 0xbf597fc7beef0ee4UL, + 0xc6e00bf33da88fc2UL, 0xd5a79147930aa725UL, + 0x06ca6351e003826fUL, 0x142929670a0e6e70UL, + 0x27b70a8546d22ffcUL, 0x2e1b21385c26c926UL, + 0x4d2c6dfc5ac42aedUL, 0x53380d139d95b3dfUL, + 0x650a73548baf63deUL, 0x766a0abb3c77b2a8UL, + 0x81c2c92e47edaee6UL, 0x92722c851482353bUL, + 0xa2bfe8a14cf10364UL, 0xa81a664bbc423001UL, + 0xc24b8b70d0f89791UL, 0xc76c51a30654be30UL, + 0xd192e819d6ef5218UL, 0xd69906245565a910UL, + 0xf40e35855771202aUL, 0x106aa07032bbd1b8UL, + 0x19a4c116b8d2d0c8UL, 0x1e376c085141ab53UL, + 0x2748774cdf8eeb99UL, 0x34b0bcb5e19b48a8UL, + 0x391c0cb3c5c95a63UL, 0x4ed8aa4ae3418acbUL, + 0x5b9cca4f7763e373UL, 0x682e6ff3d6b2b8a3UL, + 0x748f82ee5defb2fcUL, 0x78a5636f43172f60UL, + 0x84c87814a1f0ab72UL, 0x8cc702081a6439ecUL, + 0x90befffa23631e28UL, 0xa4506cebde82bde9UL, + 0xbef9a3f7b2c67915UL, 0xc67178f2e372532bUL, + 0xca273eceea26619cUL, 0xd186b8c721c0c207UL, + 0xeada7dd6cde0eb1eUL, 0xf57d4f7fee6ed178UL, + 0x06f067aa72176fbaUL, 0x0a637dc5a2c898a6UL, + 0x113f9804bef90daeUL, 0x1b710b35131c471bUL, + 0x28db77f523047d84UL, 0x32caab7b40c72493UL, + 0x3c9ebe0a15c9bebcUL, 0x431d67c49c100d4cUL, + 0x4cc5d4becb3e42b6UL, 0x597f299cfc657e2aUL, + 0x5fcb6fab3ad6faecUL, 0x6c44198c4a475817UL +#else + ULLC(428a2f98, d728ae22), ULLC(71374491, 23ef65cd), + ULLC(b5c0fbcf, ec4d3b2f), ULLC(e9b5dba5, 8189dbbc), + ULLC(3956c25b, f348b538), ULLC(59f111f1, b605d019), + ULLC(923f82a4, af194f9b), ULLC(ab1c5ed5, da6d8118), + ULLC(d807aa98, a3030242), ULLC(12835b01, 45706fbe), + ULLC(243185be, 4ee4b28c), ULLC(550c7dc3, d5ffb4e2), + ULLC(72be5d74, f27b896f), ULLC(80deb1fe, 3b1696b1), + ULLC(9bdc06a7, 25c71235), ULLC(c19bf174, cf692694), + ULLC(e49b69c1, 9ef14ad2), ULLC(efbe4786, 384f25e3), + ULLC(0fc19dc6, 8b8cd5b5), ULLC(240ca1cc, 77ac9c65), + ULLC(2de92c6f, 592b0275), ULLC(4a7484aa, 6ea6e483), + ULLC(5cb0a9dc, bd41fbd4), ULLC(76f988da, 831153b5), + ULLC(983e5152, ee66dfab), ULLC(a831c66d, 2db43210), + ULLC(b00327c8, 98fb213f), ULLC(bf597fc7, beef0ee4), + ULLC(c6e00bf3, 3da88fc2), ULLC(d5a79147, 930aa725), + ULLC(06ca6351, e003826f), ULLC(14292967, 0a0e6e70), + ULLC(27b70a85, 46d22ffc), ULLC(2e1b2138, 5c26c926), + ULLC(4d2c6dfc, 5ac42aed), ULLC(53380d13, 9d95b3df), + ULLC(650a7354, 8baf63de), ULLC(766a0abb, 3c77b2a8), + ULLC(81c2c92e, 47edaee6), ULLC(92722c85, 1482353b), + ULLC(a2bfe8a1, 4cf10364), ULLC(a81a664b, bc423001), + ULLC(c24b8b70, d0f89791), ULLC(c76c51a3, 0654be30), + ULLC(d192e819, d6ef5218), ULLC(d6990624, 5565a910), + ULLC(f40e3585, 5771202a), ULLC(106aa070, 32bbd1b8), + ULLC(19a4c116, b8d2d0c8), ULLC(1e376c08, 5141ab53), + ULLC(2748774c, df8eeb99), ULLC(34b0bcb5, e19b48a8), + ULLC(391c0cb3, c5c95a63), ULLC(4ed8aa4a, e3418acb), + ULLC(5b9cca4f, 7763e373), ULLC(682e6ff3, d6b2b8a3), + ULLC(748f82ee, 5defb2fc), ULLC(78a5636f, 43172f60), + ULLC(84c87814, a1f0ab72), ULLC(8cc70208, 1a6439ec), + ULLC(90befffa, 23631e28), ULLC(a4506ceb, de82bde9), + ULLC(bef9a3f7, b2c67915), ULLC(c67178f2, e372532b), + ULLC(ca273ece, ea26619c), ULLC(d186b8c7, 21c0c207), + ULLC(eada7dd6, cde0eb1e), ULLC(f57d4f7f, ee6ed178), + ULLC(06f067aa, 72176fba), ULLC(0a637dc5, a2c898a6), + ULLC(113f9804, bef90dae), ULLC(1b710b35, 131c471b), + ULLC(28db77f5, 23047d84), ULLC(32caab7b, 40c72493), + ULLC(3c9ebe0a, 15c9bebc), ULLC(431d67c4, 9c100d4c), + ULLC(4cc5d4be, cb3e42b6), ULLC(597f299c, fc657e2a), + ULLC(5fcb6fab, 3ad6faec), ULLC(6c44198c, 4a475817) +#endif +}; + +struct SHA512ContextStr { + union { + PRUint64 w[80]; /* message schedule, input buffer, plus 64 words */ + PRUint32 l[160]; + PRUint8 b[640]; + } u; + PRUint64 h[8]; /* 8 state variables */ + PRUint64 sizeLo; /* 64-bit count of hashed bytes. */ +}; + +/* =========== SHA512 implementation ===================================== */ + +/* SHA-512 initial hash values */ +static const PRUint64 H512[8] = { +#if PR_BYTES_PER_LONG == 8 + 0x6a09e667f3bcc908UL, 0xbb67ae8584caa73bUL, + 0x3c6ef372fe94f82bUL, 0xa54ff53a5f1d36f1UL, + 0x510e527fade682d1UL, 0x9b05688c2b3e6c1fUL, + 0x1f83d9abfb41bd6bUL, 0x5be0cd19137e2179UL +#else + ULLC(6a09e667, f3bcc908), ULLC(bb67ae85, 84caa73b), + ULLC(3c6ef372, fe94f82b), ULLC(a54ff53a, 5f1d36f1), + ULLC(510e527f, ade682d1), ULLC(9b05688c, 2b3e6c1f), + ULLC(1f83d9ab, fb41bd6b), ULLC(5be0cd19, 137e2179) +#endif +}; + +SHA512Context * +SHA512_NewContext(void) +{ + SHA512Context *ctx = PORT_New(SHA512Context); + return ctx; +} + +void +SHA512_DestroyContext(SHA512Context *ctx, PRBool freeit) +{ + memset(ctx, 0, sizeof *ctx); + if (freeit) { + PORT_Free(ctx); + } +} + +void +SHA512_Begin(SHA512Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H512, sizeof H512); +} + +#if defined(SHA512_TRACE) +#if defined(HAVE_LONG_LONG) +#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %016lx, %s = %016lx\n", \ + n, #e, d, #a, h); +#else +#define DUMP(n, a, d, e, h) printf(" t = %2d, %s = %08x%08x, %s = %08x%08x\n", \ + n, #e, d.hi, d.lo, #a, h.hi, h.lo); +#endif +#else +#define DUMP(n, a, d, e, h) +#endif + +#if defined(HAVE_LONG_LONG) + +#define ADDTO(x, y) y += x + +#define INITW(t) W[t] = (s1(W[t - 2]) + W[t - 7] + s0(W[t - 15]) + W[t - 16]) + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + h += S1(e) + Ch(e, f, g) + K512[n] + W[n]; \ + d += h; \ + h += S0(a) + Maj(a, b, c); \ + DUMP(n, a, d, e, h) + +#else /* use only 32-bit variables, and don't unroll loops */ + +#undef NOUNROLL512 +#define NOUNROLL512 1 + +#define ADDTO(x, y) \ + y.lo += x.lo; \ + y.hi += x.hi + (x.lo > y.lo) + +#define ROTR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n)) +#define ROTR64A(x, n, lo, hi) (x.lo << (64 - n) | x.hi >> (n - 32)) +#define SHR64a(x, n, lo, hi) (x.lo >> n | x.hi << (32 - n)) + +/* Capitol Sigma and lower case sigma functions */ +#define s0lo(x) (ROTR64a(x, 1, lo, hi) ^ ROTR64a(x, 8, lo, hi) ^ SHR64a(x, 7, lo, hi)) +#define s0hi(x) (ROTR64a(x, 1, hi, lo) ^ ROTR64a(x, 8, hi, lo) ^ (x.hi >> 7)) + +#define s1lo(x) (ROTR64a(x, 19, lo, hi) ^ ROTR64A(x, 61, lo, hi) ^ SHR64a(x, 6, lo, hi)) +#define s1hi(x) (ROTR64a(x, 19, hi, lo) ^ ROTR64A(x, 61, hi, lo) ^ (x.hi >> 6)) + +#define S0lo(x) (ROTR64a(x, 28, lo, hi) ^ ROTR64A(x, 34, lo, hi) ^ ROTR64A(x, 39, lo, hi)) +#define S0hi(x) (ROTR64a(x, 28, hi, lo) ^ ROTR64A(x, 34, hi, lo) ^ ROTR64A(x, 39, hi, lo)) + +#define S1lo(x) (ROTR64a(x, 14, lo, hi) ^ ROTR64a(x, 18, lo, hi) ^ ROTR64A(x, 41, lo, hi)) +#define S1hi(x) (ROTR64a(x, 14, hi, lo) ^ ROTR64a(x, 18, hi, lo) ^ ROTR64A(x, 41, hi, lo)) + +/* 32-bit versions of Ch and Maj */ +#define Chxx(x, y, z, lo) ((x.lo & y.lo) ^ (~x.lo & z.lo)) +#define Majx(x, y, z, lo) ((x.lo & y.lo) ^ (x.lo & z.lo) ^ (y.lo & z.lo)) + +#define INITW(t) \ + do { \ + PRUint32 lo, tm; \ + PRUint32 cy = 0; \ + lo = s1lo(W[t - 2]); \ + lo += (tm = W[t - 7].lo); \ + if (lo < tm) \ + cy++; \ + lo += (tm = s0lo(W[t - 15])); \ + if (lo < tm) \ + cy++; \ + lo += (tm = W[t - 16].lo); \ + if (lo < tm) \ + cy++; \ + W[t].lo = lo; \ + W[t].hi = cy + s1hi(W[t - 2]) + W[t - 7].hi + s0hi(W[t - 15]) + W[t - 16].hi; \ + } while (0) + +#define ROUND(n, a, b, c, d, e, f, g, h) \ + { \ + PRUint32 lo, tm, cy; \ + lo = S1lo(e); \ + lo += (tm = Chxx(e, f, g, lo)); \ + cy = (lo < tm); \ + lo += (tm = K512[n].lo); \ + if (lo < tm) \ + cy++; \ + lo += (tm = W[n].lo); \ + if (lo < tm) \ + cy++; \ + h.lo += lo; \ + if (h.lo < lo) \ + cy++; \ + h.hi += cy + S1hi(e) + Chxx(e, f, g, hi) + K512[n].hi + W[n].hi; \ + d.lo += h.lo; \ + d.hi += h.hi + (d.lo < h.lo); \ + lo = S0lo(a); \ + lo += (tm = Majx(a, b, c, lo)); \ + cy = (lo < tm); \ + h.lo += lo; \ + if (h.lo < lo) \ + cy++; \ + h.hi += cy + S0hi(a) + Majx(a, b, c, hi); \ + DUMP(n, a, d, e, h) \ + } +#endif + +static void +SHA512_Compress(SHA512Context *ctx) +{ +#if defined(IS_LITTLE_ENDIAN) + { + BYTESWAP8(W[0]); + BYTESWAP8(W[1]); + BYTESWAP8(W[2]); + BYTESWAP8(W[3]); + BYTESWAP8(W[4]); + BYTESWAP8(W[5]); + BYTESWAP8(W[6]); + BYTESWAP8(W[7]); + BYTESWAP8(W[8]); + BYTESWAP8(W[9]); + BYTESWAP8(W[10]); + BYTESWAP8(W[11]); + BYTESWAP8(W[12]); + BYTESWAP8(W[13]); + BYTESWAP8(W[14]); + BYTESWAP8(W[15]); + } +#endif + + { +#ifdef NOUNROLL512 + { + /* prepare the "message schedule" */ + int t; + for (t = 16; t < 80; ++t) { + INITW(t); + } + } +#else + INITW(16); + INITW(17); + INITW(18); + INITW(19); + + INITW(20); + INITW(21); + INITW(22); + INITW(23); + INITW(24); + INITW(25); + INITW(26); + INITW(27); + INITW(28); + INITW(29); + + INITW(30); + INITW(31); + INITW(32); + INITW(33); + INITW(34); + INITW(35); + INITW(36); + INITW(37); + INITW(38); + INITW(39); + + INITW(40); + INITW(41); + INITW(42); + INITW(43); + INITW(44); + INITW(45); + INITW(46); + INITW(47); + INITW(48); + INITW(49); + + INITW(50); + INITW(51); + INITW(52); + INITW(53); + INITW(54); + INITW(55); + INITW(56); + INITW(57); + INITW(58); + INITW(59); + + INITW(60); + INITW(61); + INITW(62); + INITW(63); + INITW(64); + INITW(65); + INITW(66); + INITW(67); + INITW(68); + INITW(69); + + INITW(70); + INITW(71); + INITW(72); + INITW(73); + INITW(74); + INITW(75); + INITW(76); + INITW(77); + INITW(78); + INITW(79); +#endif + } +#ifdef SHA512_TRACE + { + int i; + for (i = 0; i < 80; ++i) { +#ifdef HAVE_LONG_LONG + printf("W[%2d] = %016lx\n", i, W[i]); +#else + printf("W[%2d] = %08x%08x\n", i, W[i].hi, W[i].lo); +#endif + } + } +#endif + { + PRUint64 a, b, c, d, e, f, g, h; + + a = H[0]; + b = H[1]; + c = H[2]; + d = H[3]; + e = H[4]; + f = H[5]; + g = H[6]; + h = H[7]; + +#ifdef NOUNROLL512 + { + int t; + for (t = 0; t < 80; t += 8) { + ROUND(t + 0, a, b, c, d, e, f, g, h) + ROUND(t + 1, h, a, b, c, d, e, f, g) + ROUND(t + 2, g, h, a, b, c, d, e, f) + ROUND(t + 3, f, g, h, a, b, c, d, e) + ROUND(t + 4, e, f, g, h, a, b, c, d) + ROUND(t + 5, d, e, f, g, h, a, b, c) + ROUND(t + 6, c, d, e, f, g, h, a, b) + ROUND(t + 7, b, c, d, e, f, g, h, a) + } + } +#else + ROUND(0, a, b, c, d, e, f, g, h) + ROUND(1, h, a, b, c, d, e, f, g) + ROUND(2, g, h, a, b, c, d, e, f) + ROUND(3, f, g, h, a, b, c, d, e) + ROUND(4, e, f, g, h, a, b, c, d) + ROUND(5, d, e, f, g, h, a, b, c) + ROUND(6, c, d, e, f, g, h, a, b) + ROUND(7, b, c, d, e, f, g, h, a) + + ROUND(8, a, b, c, d, e, f, g, h) + ROUND(9, h, a, b, c, d, e, f, g) + ROUND(10, g, h, a, b, c, d, e, f) + ROUND(11, f, g, h, a, b, c, d, e) + ROUND(12, e, f, g, h, a, b, c, d) + ROUND(13, d, e, f, g, h, a, b, c) + ROUND(14, c, d, e, f, g, h, a, b) + ROUND(15, b, c, d, e, f, g, h, a) + + ROUND(16, a, b, c, d, e, f, g, h) + ROUND(17, h, a, b, c, d, e, f, g) + ROUND(18, g, h, a, b, c, d, e, f) + ROUND(19, f, g, h, a, b, c, d, e) + ROUND(20, e, f, g, h, a, b, c, d) + ROUND(21, d, e, f, g, h, a, b, c) + ROUND(22, c, d, e, f, g, h, a, b) + ROUND(23, b, c, d, e, f, g, h, a) + + ROUND(24, a, b, c, d, e, f, g, h) + ROUND(25, h, a, b, c, d, e, f, g) + ROUND(26, g, h, a, b, c, d, e, f) + ROUND(27, f, g, h, a, b, c, d, e) + ROUND(28, e, f, g, h, a, b, c, d) + ROUND(29, d, e, f, g, h, a, b, c) + ROUND(30, c, d, e, f, g, h, a, b) + ROUND(31, b, c, d, e, f, g, h, a) + + ROUND(32, a, b, c, d, e, f, g, h) + ROUND(33, h, a, b, c, d, e, f, g) + ROUND(34, g, h, a, b, c, d, e, f) + ROUND(35, f, g, h, a, b, c, d, e) + ROUND(36, e, f, g, h, a, b, c, d) + ROUND(37, d, e, f, g, h, a, b, c) + ROUND(38, c, d, e, f, g, h, a, b) + ROUND(39, b, c, d, e, f, g, h, a) + + ROUND(40, a, b, c, d, e, f, g, h) + ROUND(41, h, a, b, c, d, e, f, g) + ROUND(42, g, h, a, b, c, d, e, f) + ROUND(43, f, g, h, a, b, c, d, e) + ROUND(44, e, f, g, h, a, b, c, d) + ROUND(45, d, e, f, g, h, a, b, c) + ROUND(46, c, d, e, f, g, h, a, b) + ROUND(47, b, c, d, e, f, g, h, a) + + ROUND(48, a, b, c, d, e, f, g, h) + ROUND(49, h, a, b, c, d, e, f, g) + ROUND(50, g, h, a, b, c, d, e, f) + ROUND(51, f, g, h, a, b, c, d, e) + ROUND(52, e, f, g, h, a, b, c, d) + ROUND(53, d, e, f, g, h, a, b, c) + ROUND(54, c, d, e, f, g, h, a, b) + ROUND(55, b, c, d, e, f, g, h, a) + + ROUND(56, a, b, c, d, e, f, g, h) + ROUND(57, h, a, b, c, d, e, f, g) + ROUND(58, g, h, a, b, c, d, e, f) + ROUND(59, f, g, h, a, b, c, d, e) + ROUND(60, e, f, g, h, a, b, c, d) + ROUND(61, d, e, f, g, h, a, b, c) + ROUND(62, c, d, e, f, g, h, a, b) + ROUND(63, b, c, d, e, f, g, h, a) + + ROUND(64, a, b, c, d, e, f, g, h) + ROUND(65, h, a, b, c, d, e, f, g) + ROUND(66, g, h, a, b, c, d, e, f) + ROUND(67, f, g, h, a, b, c, d, e) + ROUND(68, e, f, g, h, a, b, c, d) + ROUND(69, d, e, f, g, h, a, b, c) + ROUND(70, c, d, e, f, g, h, a, b) + ROUND(71, b, c, d, e, f, g, h, a) + + ROUND(72, a, b, c, d, e, f, g, h) + ROUND(73, h, a, b, c, d, e, f, g) + ROUND(74, g, h, a, b, c, d, e, f) + ROUND(75, f, g, h, a, b, c, d, e) + ROUND(76, e, f, g, h, a, b, c, d) + ROUND(77, d, e, f, g, h, a, b, c) + ROUND(78, c, d, e, f, g, h, a, b) + ROUND(79, b, c, d, e, f, g, h, a) +#endif + + ADDTO(a, H[0]); + ADDTO(b, H[1]); + ADDTO(c, H[2]); + ADDTO(d, H[3]); + ADDTO(e, H[4]); + ADDTO(f, H[5]); + ADDTO(g, H[6]); + ADDTO(h, H[7]); + } +} + +void +SHA512_Update(SHA512Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + unsigned int inBuf; + if (!inputLen) + return; + +#if defined(HAVE_LONG_LONG) + inBuf = (unsigned int)ctx->sizeLo & 0x7f; + /* Add inputLen into the count of bytes processed, before processing */ + ctx->sizeLo += inputLen; +#else + inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f; + ctx->sizeLo.lo += inputLen; + if (ctx->sizeLo.lo < inputLen) + ctx->sizeLo.hi++; +#endif + + /* if data already in buffer, attemp to fill rest of buffer */ + if (inBuf) { + unsigned int todo = SHA512_BLOCK_LENGTH - inBuf; + if (inputLen < todo) + todo = inputLen; + memcpy(B + inBuf, input, todo); + input += todo; + inputLen -= todo; + if (inBuf + todo == SHA512_BLOCK_LENGTH) + SHA512_Compress(ctx); + } + + /* if enough data to fill one or more whole buffers, process them. */ + while (inputLen >= SHA512_BLOCK_LENGTH) { + memcpy(B, input, SHA512_BLOCK_LENGTH); + input += SHA512_BLOCK_LENGTH; + inputLen -= SHA512_BLOCK_LENGTH; + SHA512_Compress(ctx); + } + /* if data left over, fill it into buffer */ + if (inputLen) + memcpy(B, input, inputLen); +} + +void +SHA512_End(SHA512Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ +#if defined(HAVE_LONG_LONG) + unsigned int inBuf = (unsigned int)ctx->sizeLo & 0x7f; +#else + unsigned int inBuf = (unsigned int)ctx->sizeLo.lo & 0x7f; +#endif + unsigned int padLen = (inBuf < 112) ? (112 - inBuf) : (112 + 128 - inBuf); + PRUint64 lo; + LL_SHL(lo, ctx->sizeLo, 3); + + SHA512_Update(ctx, pad, padLen); + +#if defined(HAVE_LONG_LONG) + W[14] = 0; +#else + W[14].lo = 0; + W[14].hi = 0; +#endif + + W[15] = lo; +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(W[15]); +#endif + SHA512_Compress(ctx); + +/* now output the answer */ +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(H[0]); + BYTESWAP8(H[1]); + BYTESWAP8(H[2]); + BYTESWAP8(H[3]); + BYTESWAP8(H[4]); + BYTESWAP8(H[5]); + BYTESWAP8(H[6]); + BYTESWAP8(H[7]); +#endif + padLen = PR_MIN(SHA512_LENGTH, maxDigestLen); + memcpy(digest, H, padLen); + if (digestLen) + *digestLen = padLen; +} + +void +SHA512_EndRaw(SHA512Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + PRUint64 h[8]; + unsigned int len; + + memcpy(h, ctx->h, sizeof(h)); + +#if defined(IS_LITTLE_ENDIAN) + BYTESWAP8(h[0]); + BYTESWAP8(h[1]); + BYTESWAP8(h[2]); + BYTESWAP8(h[3]); + BYTESWAP8(h[4]); + BYTESWAP8(h[5]); + BYTESWAP8(h[6]); + BYTESWAP8(h[7]); +#endif + len = PR_MIN(SHA512_LENGTH, maxDigestLen); + memcpy(digest, h, len); + if (digestLen) + *digestLen = len; +} + +SECStatus +SHA512_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA512Context ctx; + unsigned int outLen; + + SHA512_Begin(&ctx); + SHA512_Update(&ctx, src, src_length); + SHA512_End(&ctx, dest, &outLen, SHA512_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA512_Hash(unsigned char *dest, const char *src) +{ + return SHA512_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA512_TraceState(SHA512Context *ctx) +{ +} + +unsigned int +SHA512_FlattenSize(SHA512Context *ctx) +{ + return sizeof *ctx; +} + +SECStatus +SHA512_Flatten(SHA512Context *ctx, unsigned char *space) +{ + PORT_Memcpy(space, ctx, sizeof *ctx); + return SECSuccess; +} + +SHA512Context * +SHA512_Resurrect(unsigned char *space, void *arg) +{ + SHA512Context *ctx = SHA512_NewContext(); + if (ctx) + PORT_Memcpy(ctx, space, sizeof *ctx); + return ctx; +} + +void +SHA512_Clone(SHA512Context *dest, SHA512Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ======================================================================= */ +/* SHA384 uses a SHA512Context as the real context. +** The only differences between SHA384 an SHA512 are: +** a) the intialization values for the context, and +** b) the number of bytes of data produced as output. +*/ + +/* SHA-384 initial hash values */ +static const PRUint64 H384[8] = { +#if PR_BYTES_PER_LONG == 8 + 0xcbbb9d5dc1059ed8UL, 0x629a292a367cd507UL, + 0x9159015a3070dd17UL, 0x152fecd8f70e5939UL, + 0x67332667ffc00b31UL, 0x8eb44a8768581511UL, + 0xdb0c2e0d64f98fa7UL, 0x47b5481dbefa4fa4UL +#else + ULLC(cbbb9d5d, c1059ed8), ULLC(629a292a, 367cd507), + ULLC(9159015a, 3070dd17), ULLC(152fecd8, f70e5939), + ULLC(67332667, ffc00b31), ULLC(8eb44a87, 68581511), + ULLC(db0c2e0d, 64f98fa7), ULLC(47b5481d, befa4fa4) +#endif +}; + +SHA384Context * +SHA384_NewContext(void) +{ + return SHA512_NewContext(); +} + +void +SHA384_DestroyContext(SHA384Context *ctx, PRBool freeit) +{ + SHA512_DestroyContext(ctx, freeit); +} + +void +SHA384_Begin(SHA384Context *ctx) +{ + memset(ctx, 0, sizeof *ctx); + memcpy(H, H384, sizeof H384); +} + +void +SHA384_Update(SHA384Context *ctx, const unsigned char *input, + unsigned int inputLen) +{ + SHA512_Update(ctx, input, inputLen); +} + +void +SHA384_End(SHA384Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH); + SHA512_End(ctx, digest, digestLen, maxLen); +} + +void +SHA384_EndRaw(SHA384Context *ctx, unsigned char *digest, + unsigned int *digestLen, unsigned int maxDigestLen) +{ + unsigned int maxLen = SHA_MIN(maxDigestLen, SHA384_LENGTH); + SHA512_EndRaw(ctx, digest, digestLen, maxLen); +} + +SECStatus +SHA384_HashBuf(unsigned char *dest, const unsigned char *src, + PRUint32 src_length) +{ + SHA512Context ctx; + unsigned int outLen; + + SHA384_Begin(&ctx); + SHA512_Update(&ctx, src, src_length); + SHA512_End(&ctx, dest, &outLen, SHA384_LENGTH); + memset(&ctx, 0, sizeof ctx); + + return SECSuccess; +} + +SECStatus +SHA384_Hash(unsigned char *dest, const char *src) +{ + return SHA384_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +void +SHA384_TraceState(SHA384Context *ctx) +{ +} + +unsigned int +SHA384_FlattenSize(SHA384Context *ctx) +{ + return sizeof(SHA384Context); +} + +SECStatus +SHA384_Flatten(SHA384Context *ctx, unsigned char *space) +{ + return SHA512_Flatten(ctx, space); +} + +SHA384Context * +SHA384_Resurrect(unsigned char *space, void *arg) +{ + return SHA512_Resurrect(space, arg); +} + +void +SHA384_Clone(SHA384Context *dest, SHA384Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +/* ======================================================================= */ +#ifdef SELFTEST +#include + +static const char abc[] = { "abc" }; +static const char abcdbc[] = { + "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq" +}; +static const char abcdef[] = { + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu" +}; + +void +dumpHash32(const unsigned char *buf, unsigned int bufLen) +{ + unsigned int i; + for (i = 0; i < bufLen; i += 4) { + printf(" %02x%02x%02x%02x", buf[i], buf[i + 1], buf[i + 2], buf[i + 3]); + } + printf("\n"); +} + +void +test256(void) +{ + unsigned char outBuf[SHA256_LENGTH]; + + printf("SHA256, input = %s\n", abc); + SHA256_Hash(outBuf, abc); + dumpHash32(outBuf, sizeof outBuf); + + printf("SHA256, input = %s\n", abcdbc); + SHA256_Hash(outBuf, abcdbc); + dumpHash32(outBuf, sizeof outBuf); +} + +void +test224(void) +{ + SHA224Context ctx; + unsigned char a1000times[1000]; + unsigned int outLen; + unsigned char outBuf[SHA224_LENGTH]; + int i; + + /* Test Vector 1 */ + printf("SHA224, input = %s\n", abc); + SHA224_Hash(outBuf, abc); + dumpHash32(outBuf, sizeof outBuf); + + /* Test Vector 2 */ + printf("SHA224, input = %s\n", abcdbc); + SHA224_Hash(outBuf, abcdbc); + dumpHash32(outBuf, sizeof outBuf); + + /* Test Vector 3 */ + + /* to hash one million 'a's perform 1000 + * sha224 updates on a buffer with 1000 'a's + */ + memset(a1000times, 'a', 1000); + printf("SHA224, input = %s\n", "a one million times"); + SHA224_Begin(&ctx); + for (i = 0; i < 1000; i++) + SHA224_Update(&ctx, a1000times, 1000); + SHA224_End(&ctx, outBuf, &outLen, SHA224_LENGTH); + dumpHash32(outBuf, sizeof outBuf); +} + +void +dumpHash64(const unsigned char *buf, unsigned int bufLen) +{ + unsigned int i; + for (i = 0; i < bufLen; i += 8) { + if (i % 32 == 0) + printf("\n"); + printf(" %02x%02x%02x%02x%02x%02x%02x%02x", + buf[i], buf[i + 1], buf[i + 2], buf[i + 3], + buf[i + 4], buf[i + 5], buf[i + 6], buf[i + 7]); + } + printf("\n"); +} + +void +test512(void) +{ + unsigned char outBuf[SHA512_LENGTH]; + + printf("SHA512, input = %s\n", abc); + SHA512_Hash(outBuf, abc); + dumpHash64(outBuf, sizeof outBuf); + + printf("SHA512, input = %s\n", abcdef); + SHA512_Hash(outBuf, abcdef); + dumpHash64(outBuf, sizeof outBuf); +} + +void +time512(void) +{ + unsigned char outBuf[SHA512_LENGTH]; + + SHA512_Hash(outBuf, abc); + SHA512_Hash(outBuf, abcdef); +} + +void +test384(void) +{ + unsigned char outBuf[SHA384_LENGTH]; + + printf("SHA384, input = %s\n", abc); + SHA384_Hash(outBuf, abc); + dumpHash64(outBuf, sizeof outBuf); + + printf("SHA384, input = %s\n", abcdef); + SHA384_Hash(outBuf, abcdef); + dumpHash64(outBuf, sizeof outBuf); +} + +int +main(int argc, char *argv[], char *envp[]) +{ + int i = 1; + if (argc > 1) { + i = atoi(argv[1]); + } + if (i < 2) { + test224(); + test256(); + test384(); + test512(); + } else { + while (i-- > 0) { + time512(); + } + printf("done\n"); + } + return 0; +} + +void * +PORT_Alloc(size_t len) +{ + return malloc(len); +} +void +PORT_Free(void *ptr) +{ + free(ptr); +} +void +PORT_ZFree(void *ptr, size_t len) +{ + memset(ptr, 0, len); + free(ptr); +} +#endif diff --git a/security/nss/lib/freebl/sha_fast.c b/security/nss/lib/freebl/sha_fast.c new file mode 100644 index 000000000..52071f0c9 --- /dev/null +++ b/security/nss/lib/freebl/sha_fast.c @@ -0,0 +1,545 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include +#include "blapi.h" +#include "sha_fast.h" +#include "prerror.h" + +#ifdef TRACING_SSL +#include "ssl.h" +#include "ssltrace.h" +#endif + +static void shaCompress(volatile SHA_HW_t *X, const PRUint32 *datain); + +#define W u.w +#define B u.b + +#define SHA_F1(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z)) +#define SHA_F2(X, Y, Z) ((X) ^ (Y) ^ (Z)) +#define SHA_F3(X, Y, Z) (((X) & (Y)) | ((Z) & ((X) | (Y)))) +#define SHA_F4(X, Y, Z) ((X) ^ (Y) ^ (Z)) + +#define SHA_MIX(n, a, b, c) XW(n) = SHA_ROTL(XW(a) ^ XW(b) ^ XW(c) ^ XW(n), 1) + +/* + * SHA: initialize context + */ +void +SHA1_Begin(SHA1Context *ctx) +{ + ctx->size = 0; + /* + * Initialize H with constants from FIPS180-1. + */ + ctx->H[0] = 0x67452301L; + ctx->H[1] = 0xefcdab89L; + ctx->H[2] = 0x98badcfeL; + ctx->H[3] = 0x10325476L; + ctx->H[4] = 0xc3d2e1f0L; +} + +/* Explanation of H array and index values: + * The context's H array is actually the concatenation of two arrays + * defined by SHA1, the H array of state variables (5 elements), + * and the W array of intermediate values, of which there are 16 elements. + * The W array starts at H[5], that is W[0] is H[5]. + * Although these values are defined as 32-bit values, we use 64-bit + * variables to hold them because the AMD64 stores 64 bit values in + * memory MUCH faster than it stores any smaller values. + * + * Rather than passing the context structure to shaCompress, we pass + * this combined array of H and W values. We do not pass the address + * of the first element of this array, but rather pass the address of an + * element in the middle of the array, element X. Presently X[0] is H[11]. + * So we pass the address of H[11] as the address of array X to shaCompress. + * Then shaCompress accesses the members of the array using positive AND + * negative indexes. + * + * Pictorially: (each element is 8 bytes) + * H | H0 H1 H2 H3 H4 W0 W1 W2 W3 W4 W5 W6 W7 W8 W9 Wa Wb Wc Wd We Wf | + * X |-11-10 -9 -8 -7 -6 -5 -4 -3 -2 -1 X0 X1 X2 X3 X4 X5 X6 X7 X8 X9 | + * + * The byte offset from X[0] to any member of H and W is always + * representable in a signed 8-bit value, which will be encoded + * as a single byte offset in the X86-64 instruction set. + * If we didn't pass the address of H[11], and instead passed the + * address of H[0], the offsets to elements H[16] and above would be + * greater than 127, not representable in a signed 8-bit value, and the + * x86-64 instruction set would encode every such offset as a 32-bit + * signed number in each instruction that accessed element H[16] or + * higher. This results in much bigger and slower code. + */ +#if !defined(SHA_PUT_W_IN_STACK) +#define H2X 11 /* X[0] is H[11], and H[0] is X[-11] */ +#define W2X 6 /* X[0] is W[6], and W[0] is X[-6] */ +#else +#define H2X 0 +#endif + +/* + * SHA: Add data to context. + */ +void +SHA1_Update(SHA1Context *ctx, const unsigned char *dataIn, unsigned int len) +{ + register unsigned int lenB; + register unsigned int togo; + + if (!len) + return; + + /* accumulate the byte count. */ + lenB = (unsigned int)(ctx->size) & 63U; + + ctx->size += len; + + /* + * Read the data into W and process blocks as they get full + */ + if (lenB > 0) { + togo = 64U - lenB; + if (len < togo) + togo = len; + memcpy(ctx->B + lenB, dataIn, togo); + len -= togo; + dataIn += togo; + lenB = (lenB + togo) & 63U; + if (!lenB) { + shaCompress(&ctx->H[H2X], ctx->W); + } + } +#if !defined(HAVE_UNALIGNED_ACCESS) + if ((ptrdiff_t)dataIn % sizeof(PRUint32)) { + while (len >= 64U) { + memcpy(ctx->B, dataIn, 64); + len -= 64U; + shaCompress(&ctx->H[H2X], ctx->W); + dataIn += 64U; + } + } else +#endif + { + while (len >= 64U) { + len -= 64U; + shaCompress(&ctx->H[H2X], (PRUint32 *)dataIn); + dataIn += 64U; + } + } + if (len) { + memcpy(ctx->B, dataIn, len); + } +} + +/* + * SHA: Generate hash value from context + */ +void NO_SANITIZE_ALIGNMENT +SHA1_End(SHA1Context *ctx, unsigned char *hashout, + unsigned int *pDigestLen, unsigned int maxDigestLen) +{ + register PRUint64 size; + register PRUint32 lenB; + + static const unsigned char bulk_pad[64] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; +#define tmp lenB + + PORT_Assert(maxDigestLen >= SHA1_LENGTH); + + /* + * Pad with a binary 1 (e.g. 0x80), then zeroes, then length in bits + */ + size = ctx->size; + + lenB = (PRUint32)size & 63; + SHA1_Update(ctx, bulk_pad, (((55 + 64) - lenB) & 63) + 1); + PORT_Assert(((PRUint32)ctx->size & 63) == 56); + /* Convert size from bytes to bits. */ + size <<= 3; + ctx->W[14] = SHA_HTONL((PRUint32)(size >> 32)); + ctx->W[15] = SHA_HTONL((PRUint32)size); + shaCompress(&ctx->H[H2X], ctx->W); + + /* + * Output hash + */ + SHA_STORE_RESULT; + if (pDigestLen) { + *pDigestLen = SHA1_LENGTH; + } +#undef tmp +} + +void +SHA1_EndRaw(SHA1Context *ctx, unsigned char *hashout, + unsigned int *pDigestLen, unsigned int maxDigestLen) +{ +#if defined(SHA_NEED_TMP_VARIABLE) + register PRUint32 tmp; +#endif + PORT_Assert(maxDigestLen >= SHA1_LENGTH); + + SHA_STORE_RESULT; + if (pDigestLen) + *pDigestLen = SHA1_LENGTH; +} + +#undef B +/* + * SHA: Compression function, unrolled. + * + * Some operations in shaCompress are done as 5 groups of 16 operations. + * Others are done as 4 groups of 20 operations. + * The code below shows that structure. + * + * The functions that compute the new values of the 5 state variables + * A-E are done in 4 groups of 20 operations (or you may also think + * of them as being done in 16 groups of 5 operations). They are + * done by the SHA_RNDx macros below, in the right column. + * + * The functions that set the 16 values of the W array are done in + * 5 groups of 16 operations. The first group is done by the + * LOAD macros below, the latter 4 groups are done by SHA_MIX below, + * in the left column. + * + * gcc's optimizer observes that each member of the W array is assigned + * a value 5 times in this code. It reduces the number of store + * operations done to the W array in the context (that is, in the X array) + * by creating a W array on the stack, and storing the W values there for + * the first 4 groups of operations on W, and storing the values in the + * context's W array only in the fifth group. This is undesirable. + * It is MUCH bigger code than simply using the context's W array, because + * all the offsets to the W array in the stack are 32-bit signed offsets, + * and it is no faster than storing the values in the context's W array. + * + * The original code for sha_fast.c prevented this creation of a separate + * W array in the stack by creating a W array of 80 members, each of + * whose elements is assigned only once. It also separated the computations + * of the W array values and the computations of the values for the 5 + * state variables into two separate passes, W's, then A-E's so that the + * second pass could be done all in registers (except for accessing the W + * array) on machines with fewer registers. The method is suboptimal + * for machines with enough registers to do it all in one pass, and it + * necessitates using many instructions with 32-bit offsets. + * + * This code eliminates the separate W array on the stack by a completely + * different means: by declaring the X array volatile. This prevents + * the optimizer from trying to reduce the use of the X array by the + * creation of a MORE expensive W array on the stack. The result is + * that all instructions use signed 8-bit offsets and not 32-bit offsets. + * + * The combination of this code and the -O3 optimizer flag on GCC 3.4.3 + * results in code that is 3 times faster than the previous NSS sha_fast + * code on AMD64. + */ +static void NO_SANITIZE_ALIGNMENT +shaCompress(volatile SHA_HW_t *X, const PRUint32 *inbuf) +{ + register SHA_HW_t A, B, C, D, E; + +#if defined(SHA_NEED_TMP_VARIABLE) + register PRUint32 tmp; +#endif + +#if !defined(SHA_PUT_W_IN_STACK) +#define XH(n) X[n - H2X] +#define XW(n) X[n - W2X] +#else + SHA_HW_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7, + w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; +#define XW(n) w_##n +#define XH(n) X[n] +#endif + +#define K0 0x5a827999L +#define K1 0x6ed9eba1L +#define K2 0x8f1bbcdcL +#define K3 0xca62c1d6L + +#define SHA_RND1(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F1(c, d, e) + a + XW(n) + K0; \ + c = SHA_ROTL(c, 30) +#define SHA_RND2(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F2(c, d, e) + a + XW(n) + K1; \ + c = SHA_ROTL(c, 30) +#define SHA_RND3(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F3(c, d, e) + a + XW(n) + K2; \ + c = SHA_ROTL(c, 30) +#define SHA_RND4(a, b, c, d, e, n) \ + a = SHA_ROTL(b, 5) + SHA_F4(c, d, e) + a + XW(n) + K3; \ + c = SHA_ROTL(c, 30) + +#define LOAD(n) XW(n) = SHA_HTONL(inbuf[n]) + + A = XH(0); + B = XH(1); + C = XH(2); + D = XH(3); + E = XH(4); + + LOAD(0); + SHA_RND1(E, A, B, C, D, 0); + LOAD(1); + SHA_RND1(D, E, A, B, C, 1); + LOAD(2); + SHA_RND1(C, D, E, A, B, 2); + LOAD(3); + SHA_RND1(B, C, D, E, A, 3); + LOAD(4); + SHA_RND1(A, B, C, D, E, 4); + LOAD(5); + SHA_RND1(E, A, B, C, D, 5); + LOAD(6); + SHA_RND1(D, E, A, B, C, 6); + LOAD(7); + SHA_RND1(C, D, E, A, B, 7); + LOAD(8); + SHA_RND1(B, C, D, E, A, 8); + LOAD(9); + SHA_RND1(A, B, C, D, E, 9); + LOAD(10); + SHA_RND1(E, A, B, C, D, 10); + LOAD(11); + SHA_RND1(D, E, A, B, C, 11); + LOAD(12); + SHA_RND1(C, D, E, A, B, 12); + LOAD(13); + SHA_RND1(B, C, D, E, A, 13); + LOAD(14); + SHA_RND1(A, B, C, D, E, 14); + LOAD(15); + SHA_RND1(E, A, B, C, D, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND1(D, E, A, B, C, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND1(C, D, E, A, B, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND1(B, C, D, E, A, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND1(A, B, C, D, E, 3); + + SHA_MIX(4, 1, 12, 6); + SHA_RND2(E, A, B, C, D, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND2(D, E, A, B, C, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND2(C, D, E, A, B, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND2(B, C, D, E, A, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND2(A, B, C, D, E, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND2(E, A, B, C, D, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND2(D, E, A, B, C, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND2(C, D, E, A, B, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND2(B, C, D, E, A, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND2(A, B, C, D, E, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND2(E, A, B, C, D, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND2(D, E, A, B, C, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND2(C, D, E, A, B, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND2(B, C, D, E, A, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND2(A, B, C, D, E, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND2(E, A, B, C, D, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND2(D, E, A, B, C, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND2(C, D, E, A, B, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND2(B, C, D, E, A, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND2(A, B, C, D, E, 7); + + SHA_MIX(8, 5, 0, 10); + SHA_RND3(E, A, B, C, D, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND3(D, E, A, B, C, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND3(C, D, E, A, B, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND3(B, C, D, E, A, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND3(A, B, C, D, E, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND3(E, A, B, C, D, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND3(D, E, A, B, C, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND3(C, D, E, A, B, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND3(B, C, D, E, A, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND3(A, B, C, D, E, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND3(E, A, B, C, D, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND3(D, E, A, B, C, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND3(C, D, E, A, B, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND3(B, C, D, E, A, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND3(A, B, C, D, E, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND3(E, A, B, C, D, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND3(D, E, A, B, C, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND3(C, D, E, A, B, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND3(B, C, D, E, A, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND3(A, B, C, D, E, 11); + + SHA_MIX(12, 9, 4, 14); + SHA_RND4(E, A, B, C, D, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND4(D, E, A, B, C, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND4(C, D, E, A, B, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND4(B, C, D, E, A, 15); + + SHA_MIX(0, 13, 8, 2); + SHA_RND4(A, B, C, D, E, 0); + SHA_MIX(1, 14, 9, 3); + SHA_RND4(E, A, B, C, D, 1); + SHA_MIX(2, 15, 10, 4); + SHA_RND4(D, E, A, B, C, 2); + SHA_MIX(3, 0, 11, 5); + SHA_RND4(C, D, E, A, B, 3); + SHA_MIX(4, 1, 12, 6); + SHA_RND4(B, C, D, E, A, 4); + SHA_MIX(5, 2, 13, 7); + SHA_RND4(A, B, C, D, E, 5); + SHA_MIX(6, 3, 14, 8); + SHA_RND4(E, A, B, C, D, 6); + SHA_MIX(7, 4, 15, 9); + SHA_RND4(D, E, A, B, C, 7); + SHA_MIX(8, 5, 0, 10); + SHA_RND4(C, D, E, A, B, 8); + SHA_MIX(9, 6, 1, 11); + SHA_RND4(B, C, D, E, A, 9); + SHA_MIX(10, 7, 2, 12); + SHA_RND4(A, B, C, D, E, 10); + SHA_MIX(11, 8, 3, 13); + SHA_RND4(E, A, B, C, D, 11); + SHA_MIX(12, 9, 4, 14); + SHA_RND4(D, E, A, B, C, 12); + SHA_MIX(13, 10, 5, 15); + SHA_RND4(C, D, E, A, B, 13); + SHA_MIX(14, 11, 6, 0); + SHA_RND4(B, C, D, E, A, 14); + SHA_MIX(15, 12, 7, 1); + SHA_RND4(A, B, C, D, E, 15); + + XH(0) += A; + XH(1) += B; + XH(2) += C; + XH(3) += D; + XH(4) += E; +} + +/************************************************************************* +** Code below this line added to make SHA code support BLAPI interface +*/ + +SHA1Context * +SHA1_NewContext(void) +{ + SHA1Context *cx; + + /* no need to ZNew, SHA1_Begin will init the context */ + cx = PORT_New(SHA1Context); + return cx; +} + +/* Zero and free the context */ +void +SHA1_DestroyContext(SHA1Context *cx, PRBool freeit) +{ + memset(cx, 0, sizeof *cx); + if (freeit) { + PORT_Free(cx); + } +} + +SECStatus +SHA1_HashBuf(unsigned char *dest, const unsigned char *src, PRUint32 src_length) +{ + SHA1Context ctx; + unsigned int outLen; + + SHA1_Begin(&ctx); + SHA1_Update(&ctx, src, src_length); + SHA1_End(&ctx, dest, &outLen, SHA1_LENGTH); + memset(&ctx, 0, sizeof ctx); + return SECSuccess; +} + +/* Hash a null-terminated character string. */ +SECStatus +SHA1_Hash(unsigned char *dest, const char *src) +{ + return SHA1_HashBuf(dest, (const unsigned char *)src, PORT_Strlen(src)); +} + +/* + * need to support save/restore state in pkcs11. Stores all the info necessary + * for a structure into just a stream of bytes. + */ +unsigned int +SHA1_FlattenSize(SHA1Context *cx) +{ + return sizeof(SHA1Context); +} + +SECStatus +SHA1_Flatten(SHA1Context *cx, unsigned char *space) +{ + PORT_Memcpy(space, cx, sizeof(SHA1Context)); + return SECSuccess; +} + +SHA1Context * +SHA1_Resurrect(unsigned char *space, void *arg) +{ + SHA1Context *cx = SHA1_NewContext(); + if (cx == NULL) + return NULL; + + PORT_Memcpy(cx, space, sizeof(SHA1Context)); + return cx; +} + +void +SHA1_Clone(SHA1Context *dest, SHA1Context *src) +{ + memcpy(dest, src, sizeof *dest); +} + +void +SHA1_TraceState(SHA1Context *ctx) +{ + PORT_SetError(PR_NOT_IMPLEMENTED_ERROR); +} diff --git a/security/nss/lib/freebl/sha_fast.h b/security/nss/lib/freebl/sha_fast.h new file mode 100644 index 000000000..4f37d13d0 --- /dev/null +++ b/security/nss/lib/freebl/sha_fast.h @@ -0,0 +1,176 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHA_FAST_H_ +#define _SHA_FAST_H_ + +#include "prlong.h" +#include "blapii.h" + +#define SHA1_INPUT_LEN 64 + +#if defined(IS_64) && !defined(__sparc) +typedef PRUint64 SHA_HW_t; +#define SHA1_USING_64_BIT 1 +#else +typedef PRUint32 SHA_HW_t; +#endif + +struct SHA1ContextStr { + union { + PRUint32 w[16]; /* input buffer */ + PRUint8 b[64]; + } u; + PRUint64 size; /* count of hashed bytes. */ + SHA_HW_t H[22]; /* 5 state variables, 16 tmp values, 1 extra */ +}; + +#if defined(_MSC_VER) +#include +#if defined(IS_LITTLE_ENDIAN) +#if (_MSC_VER >= 1300) +#pragma intrinsic(_byteswap_ulong) +#define SHA_HTONL(x) _byteswap_ulong(x) +#elif defined(NSS_X86_OR_X64) +#ifndef FORCEINLINE +#if (_MSC_VER >= 1200) +#define FORCEINLINE __forceinline +#else +#define FORCEINLINE __inline +#endif /* _MSC_VER */ +#endif /* !defined FORCEINLINE */ +#define FASTCALL __fastcall + +static FORCEINLINE PRUint32 FASTCALL +swap4b(PRUint32 dwd) +{ + __asm { + mov eax,dwd + bswap eax + } +} + +#define SHA_HTONL(x) swap4b(x) +#endif /* NSS_X86_OR_X64 */ +#endif /* IS_LITTLE_ENDIAN */ + +#pragma intrinsic(_lrotr, _lrotl) +#define SHA_ROTL(x, n) _lrotl(x, n) +#define SHA_ROTL_IS_DEFINED 1 +#endif /* _MSC_VER */ + +#if defined(__GNUC__) +/* __x86_64__ and __x86_64 are defined by GCC on x86_64 CPUs */ +#if defined(SHA1_USING_64_BIT) +static __inline__ PRUint64 +SHA_ROTL(PRUint64 x, PRUint32 n) +{ + PRUint32 t = (PRUint32)x; + return ((t << n) | (t >> (32 - n))); +} +#else +static __inline__ PRUint32 +SHA_ROTL(PRUint32 t, PRUint32 n) +{ + return ((t << n) | (t >> (32 - n))); +} +#endif +#define SHA_ROTL_IS_DEFINED 1 + +#if defined(NSS_X86_OR_X64) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + __asm__("bswap %0" + : "+r"(value)); + return (value); +} +#define SHA_HTONL(x) swap4b(x) + +#elif defined(__thumb2__) || \ + (!defined(__thumb__) && \ + (defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || \ + defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || \ + defined(__ARM_ARCH_6ZK__) || \ + defined(__ARM_ARCH_6T2__) || \ + defined(__ARM_ARCH_7__) || \ + defined(__ARM_ARCH_7A__) || \ + defined(__ARM_ARCH_7R__))) +static __inline__ PRUint32 +swap4b(PRUint32 value) +{ + PRUint32 ret; + __asm__("rev %0, %1" + : "=r"(ret) + : "r"(value)); + return ret; +} +#define SHA_HTONL(x) swap4b(x) + +#endif /* x86 family */ + +#endif /* __GNUC__ */ + +#if !defined(SHA_ROTL_IS_DEFINED) +#define SHA_NEED_TMP_VARIABLE 1 +#define SHA_ROTL(X, n) (tmp = (X), ((tmp) << (n)) | ((tmp) >> (32 - (n)))) +#endif + +#if !defined(SHA_HTONL) +#define SHA_MASK 0x00FF00FF +#if defined(IS_LITTLE_ENDIAN) +#undef SHA_NEED_TMP_VARIABLE +#define SHA_NEED_TMP_VARIABLE 1 +#define SHA_HTONL(x) (tmp = (x), tmp = (tmp << 16) | (tmp >> 16), \ + ((tmp & SHA_MASK) << 8) | ((tmp >> 8) & SHA_MASK)) +#else +#define SHA_HTONL(x) (x) +#endif +#endif + +#define SHA_BYTESWAP(x) x = SHA_HTONL(x) + +#define SHA_STORE(n) ((PRUint32*)hashout)[n] = SHA_HTONL(ctx->H[n]) +#if defined(HAVE_UNALIGNED_ACCESS) +#define SHA_STORE_RESULT \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); + +#elif defined(IS_LITTLE_ENDIAN) || defined(SHA1_USING_64_BIT) +#define SHA_STORE_RESULT \ + if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); \ + } else { \ + PRUint32 tmpbuf[5]; \ + tmpbuf[0] = SHA_HTONL(ctx->H[0]); \ + tmpbuf[1] = SHA_HTONL(ctx->H[1]); \ + tmpbuf[2] = SHA_HTONL(ctx->H[2]); \ + tmpbuf[3] = SHA_HTONL(ctx->H[3]); \ + tmpbuf[4] = SHA_HTONL(ctx->H[4]); \ + memcpy(hashout, tmpbuf, SHA1_LENGTH); \ + } + +#else +#define SHA_STORE_RESULT \ + if (!((ptrdiff_t)hashout % sizeof(PRUint32))) { \ + SHA_STORE(0); \ + SHA_STORE(1); \ + SHA_STORE(2); \ + SHA_STORE(3); \ + SHA_STORE(4); \ + } else { \ + memcpy(hashout, ctx->H, SHA1_LENGTH); \ + } +#endif + +#endif /* _SHA_FAST_H_ */ diff --git a/security/nss/lib/freebl/shsign.h b/security/nss/lib/freebl/shsign.h new file mode 100644 index 000000000..590c0e6b3 --- /dev/null +++ b/security/nss/lib/freebl/shsign.h @@ -0,0 +1,14 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef _SHSIGN_H_ +#define _SHSIGN_H_ + +#define SGN_SUFFIX ".chk" +#define NSS_SIGN_CHK_MAGIC1 0xf1 +#define NSS_SIGN_CHK_MAGIC2 0xc5 +#define NSS_SIGN_CHK_MAJOR_VERSION 0x01 +#define NSS_SIGN_CHK_MINOR_VERSION 0x02 + +#endif /* _SHSIGN_H_ */ diff --git a/security/nss/lib/freebl/shvfy.c b/security/nss/lib/freebl/shvfy.c new file mode 100644 index 000000000..af4a34fb0 --- /dev/null +++ b/security/nss/lib/freebl/shvfy.c @@ -0,0 +1,534 @@ + +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "shsign.h" +#include "prlink.h" +#include "prio.h" +#include "blapi.h" +#include "seccomon.h" +#include "stdio.h" +#include "prmem.h" +#include "hasht.h" +#include "pqg.h" +#include "blapii.h" + +/* + * Most modern version of Linux support a speed optimization scheme where an + * application called prelink modifies programs and shared libraries to quickly + * load if they fit into an already designed address space. In short, prelink + * scans the list of programs and libraries on your system, assigns them a + * predefined space in the the address space, then provides the fixups to the + * library. + + * The modification of the shared library is correctly detected by the freebl + * FIPS checksum scheme where we check a signed hash of the library against the + * library itself. + * + * The prelink command itself can reverse the process of modification and + * output the prestine shared library as it was before prelink made it's + * changes. If FREEBL_USE_PRELINK is set Freebl uses prelink to output the + * original copy of the shared library before prelink modified it. + */ +#ifdef FREEBL_USE_PRELINK +#ifndef FREELB_PRELINK_COMMAND +#define FREEBL_PRELINK_COMMAND "/usr/sbin/prelink -u -o -" +#endif +#include "private/pprio.h" + +#include +#include +#include +#include +#include + +/* + * This function returns an NSPR PRFileDesc * which the caller can read to + * obtain the prestine value of the shared library, before any OS related + * changes to it (usually address fixups). + * + * If prelink is installed, this + * file descriptor is a pipe connecting the output of + * /usr/sbin/prelink -u -o - {Library} + * and *pid returns the process id of the prelink child. + * + * If prelink is not installed, it returns a normal readonly handle to the + * library itself and *pid is set to '0'. + */ +PRFileDesc * +bl_OpenUnPrelink(const char *shName, int *pid) +{ + char *command = strdup(FREEBL_PRELINK_COMMAND); + char *argString = NULL; + char **argv = NULL; + char *shNameArg = NULL; + char *cp; + pid_t child; + int argc = 0, argNext = 0; + struct stat statBuf; + int pipefd[2] = { -1, -1 }; + int ret; + + *pid = 0; + + /* make sure the prelink command exists first. If not, fall back to + * just reading the file */ + for (cp = command; *cp; cp++) { + if (*cp == ' ') { + *cp++ = 0; + argString = cp; + break; + } + } + memset(&statBuf, 0, sizeof(statBuf)); + /* stat the file, follow the link */ + ret = stat(command, &statBuf); + if (ret < 0) { + free(command); + return PR_Open(shName, PR_RDONLY, 0); + } + /* file exits, make sure it's an executable */ + if (!S_ISREG(statBuf.st_mode) || + ((statBuf.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH)) == 0)) { + free(command); + return PR_Open(shName, PR_RDONLY, 0); + } + + /* OK, the prelink command exists and looks correct, use it */ + /* build the arglist while we can still malloc */ + /* count the args if any */ + if (argString && *argString) { + /* argString may have leading spaces, strip them off*/ + for (cp = argString; *cp && *cp == ' '; cp++) + ; + argString = cp; + if (*cp) { + /* there is at least one arg.. */ + argc = 1; + } + + /* count the rest: Note there is no provision for escaped + * spaces here */ + for (cp = argString; *cp; cp++) { + if (*cp == ' ') { + while (*cp && *cp == ' ') + cp++; + if (*cp) + argc++; + } + } + } + + /* add the additional args: argv[0] (command), shName, NULL*/ + argc += 3; + argv = PORT_NewArray(char *, argc); + if (argv == NULL) { + goto loser; + } + + /* fill in the arglist */ + argv[argNext++] = command; + if (argString && *argString) { + argv[argNext++] = argString; + for (cp = argString; *cp; cp++) { + if (*cp == ' ') { + *cp++ = 0; + while (*cp && *cp == ' ') + cp++; + if (*cp) + argv[argNext++] = cp; + } + } + } + /* exec doesn't advertise taking const char **argv, do the paranoid + * copy */ + shNameArg = strdup(shName); + if (shNameArg == NULL) { + goto loser; + } + argv[argNext++] = shNameArg; + argv[argNext++] = 0; + + ret = pipe(pipefd); + if (ret < 0) { + goto loser; + } + + /* use vfork() so we don't trigger the pthread_at_fork() handlers */ + child = vfork(); + if (child < 0) + goto loser; + if (child == 0) { + /* set up the file descriptors */ + /* if we need to support BSD, this will need to be an open of + * /dev/null and dup2(nullFD, 0)*/ + close(0); + /* associate pipefd[1] with stdout */ + if (pipefd[1] != 1) + dup2(pipefd[1], 1); + close(2); + close(pipefd[0]); + /* should probably close the other file descriptors? */ + + execv(command, argv); + /* avoid at_exit() handlers */ + _exit(1); /* shouldn't reach here except on an error */ + } + close(pipefd[1]); + pipefd[1] = -1; + + /* this is safe because either vfork() as full fork() semantics, and thus + * already has it's own address space, or because vfork() has paused + * the parent util the exec or exit */ + free(command); + free(shNameArg); + PORT_Free(argv); + + *pid = child; + + return PR_ImportPipe(pipefd[0]); + +loser: + if (pipefd[0] != -1) { + close(pipefd[0]); + } + if (pipefd[1] != -1) { + close(pipefd[1]); + } + free(command); + free(shNameArg); + PORT_Free(argv); + + return NULL; +} + +/* + * bl_CloseUnPrelink - + * + * This closes the file descripter and reaps and children openned and crated by + * b;_OpenUnprelink. It's primary difference between it and just close is + * that it calls wait on the pid if one is supplied, preventing zombie children + * from hanging around. + */ +void +bl_CloseUnPrelink(PRFileDesc *file, int pid) +{ + /* close the file descriptor */ + PR_Close(file); + /* reap the child */ + if (pid) { + waitpid(pid, NULL, 0); + } +} +#endif + +/* #define DEBUG_SHVERIFY 1 */ + +static char * +mkCheckFileName(const char *libName) +{ + int ln_len = PORT_Strlen(libName); + char *output = PORT_Alloc(ln_len + sizeof(SGN_SUFFIX)); + int index = ln_len + 1 - sizeof("." SHLIB_SUFFIX); + + if ((index > 0) && + (PORT_Strncmp(&libName[index], + "." SHLIB_SUFFIX, sizeof("." SHLIB_SUFFIX)) == 0)) { + ln_len = index; + } + PORT_Memcpy(output, libName, ln_len); + PORT_Memcpy(&output[ln_len], SGN_SUFFIX, sizeof(SGN_SUFFIX)); + return output; +} + +static int +decodeInt(unsigned char *buf) +{ + return (buf[3]) | (buf[2] << 8) | (buf[1] << 16) | (buf[0] << 24); +} + +static SECStatus +readItem(PRFileDesc *fd, SECItem *item) +{ + unsigned char buf[4]; + int bytesRead; + + bytesRead = PR_Read(fd, buf, 4); + if (bytesRead != 4) { + return SECFailure; + } + item->len = decodeInt(buf); + + item->data = PORT_Alloc(item->len); + if (item->data == NULL) { + item->len = 0; + return SECFailure; + } + bytesRead = PR_Read(fd, item->data, item->len); + if (bytesRead != item->len) { + PORT_Free(item->data); + item->data = NULL; + item->len = 0; + return SECFailure; + } + return SECSuccess; +} + +static PRBool blapi_SHVerifyFile(const char *shName, PRBool self); + +static PRBool +blapi_SHVerify(const char *name, PRFuncPtr addr, PRBool self) +{ + PRBool result = PR_FALSE; /* if anything goes wrong, + * the signature does not verify */ + /* find our shared library name */ + char *shName = PR_GetLibraryFilePathname(name, addr); + if (!shName) { + goto loser; + } + result = blapi_SHVerifyFile(shName, self); + +loser: + if (shName != NULL) { + PR_Free(shName); + } + + return result; +} + +PRBool +BLAPI_SHVerify(const char *name, PRFuncPtr addr) +{ + return blapi_SHVerify(name, addr, PR_FALSE); +} + +PRBool +BLAPI_SHVerifyFile(const char *shName) +{ + return blapi_SHVerifyFile(shName, PR_FALSE); +} + +static PRBool +blapi_SHVerifyFile(const char *shName, PRBool self) +{ + char *checkName = NULL; + PRFileDesc *checkFD = NULL; + PRFileDesc *shFD = NULL; + void *hashcx = NULL; + const SECHashObject *hashObj = NULL; + SECItem signature = { 0, NULL, 0 }; + SECItem hash; + int bytesRead, offset; + SECStatus rv; + DSAPublicKey key; + int count; +#ifdef FREEBL_USE_PRELINK + int pid = 0; +#endif + + PRBool result = PR_FALSE; /* if anything goes wrong, + * the signature does not verify */ + unsigned char buf[4096]; + unsigned char hashBuf[HASH_LENGTH_MAX]; + + PORT_Memset(&key, 0, sizeof(key)); + hash.data = hashBuf; + hash.len = sizeof(hashBuf); + + /* If our integrity check was never ran or failed, fail any other + * integrity checks to prevent any token going into FIPS mode. */ + if (!self && (BL_FIPSEntryOK(PR_FALSE) != SECSuccess)) { + return PR_FALSE; + } + + if (!shName) { + goto loser; + } + + /* figure out the name of our check file */ + checkName = mkCheckFileName(shName); + if (!checkName) { + goto loser; + } + + /* open the check File */ + checkFD = PR_Open(checkName, PR_RDONLY, 0); + if (checkFD == NULL) { +#ifdef DEBUG_SHVERIFY + fprintf(stderr, "Failed to open the check file %s: (%d, %d)\n", + checkName, (int)PR_GetError(), (int)PR_GetOSError()); +#endif /* DEBUG_SHVERIFY */ + goto loser; + } + + /* read and Verify the headerthe header */ + bytesRead = PR_Read(checkFD, buf, 12); + if (bytesRead != 12) { + goto loser; + } + if ((buf[0] != NSS_SIGN_CHK_MAGIC1) || (buf[1] != NSS_SIGN_CHK_MAGIC2)) { + goto loser; + } + if ((buf[2] != NSS_SIGN_CHK_MAJOR_VERSION) || + (buf[3] < NSS_SIGN_CHK_MINOR_VERSION)) { + goto loser; + } +#ifdef notdef + if (decodeInt(&buf[8]) != CKK_DSA) { + goto loser; + } +#endif + + /* seek past any future header extensions */ + offset = decodeInt(&buf[4]); + if (PR_Seek(checkFD, offset, PR_SEEK_SET) < 0) { + goto loser; + } + + /* read the key */ + rv = readItem(checkFD, &key.params.prime); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.params.subPrime); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.params.base); + if (rv != SECSuccess) { + goto loser; + } + rv = readItem(checkFD, &key.publicValue); + if (rv != SECSuccess) { + goto loser; + } + /* read the siganture */ + rv = readItem(checkFD, &signature); + if (rv != SECSuccess) { + goto loser; + } + + /* done with the check file */ + PR_Close(checkFD); + checkFD = NULL; + + hashObj = HASH_GetRawHashObject(PQG_GetHashType(&key.params)); + if (hashObj == NULL) { + goto loser; + } + +/* open our library file */ +#ifdef FREEBL_USE_PRELINK + shFD = bl_OpenUnPrelink(shName, &pid); +#else + shFD = PR_Open(shName, PR_RDONLY, 0); +#endif + if (shFD == NULL) { +#ifdef DEBUG_SHVERIFY + fprintf(stderr, "Failed to open the library file %s: (%d, %d)\n", + shName, (int)PR_GetError(), (int)PR_GetOSError()); +#endif /* DEBUG_SHVERIFY */ + goto loser; + } + + /* hash our library file with SHA1 */ + hashcx = hashObj->create(); + if (hashcx == NULL) { + goto loser; + } + hashObj->begin(hashcx); + + count = 0; + while ((bytesRead = PR_Read(shFD, buf, sizeof(buf))) > 0) { + hashObj->update(hashcx, buf, bytesRead); + count += bytesRead; + } +#ifdef FREEBL_USE_PRELINK + bl_CloseUnPrelink(shFD, pid); +#else + PR_Close(shFD); +#endif + shFD = NULL; + + hashObj->end(hashcx, hash.data, &hash.len, hash.len); + + /* verify the hash against the check file */ + if (DSA_VerifyDigest(&key, &signature, &hash) == SECSuccess) { + result = PR_TRUE; + } +#ifdef DEBUG_SHVERIFY + { + int i, j; + fprintf(stderr, "File %s: %d bytes\n", shName, count); + fprintf(stderr, " hash: %d bytes\n", hash.len); +#define STEP 10 + for (i = 0; i < hash.len; i += STEP) { + fprintf(stderr, " "); + for (j = 0; j < STEP && (i + j) < hash.len; j++) { + fprintf(stderr, " %02x", hash.data[i + j]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, " signature: %d bytes\n", signature.len); + for (i = 0; i < signature.len; i += STEP) { + fprintf(stderr, " "); + for (j = 0; j < STEP && (i + j) < signature.len; j++) { + fprintf(stderr, " %02x", signature.data[i + j]); + } + fprintf(stderr, "\n"); + } + fprintf(stderr, "Verified : %s\n", result ? "TRUE" : "FALSE"); + } +#endif /* DEBUG_SHVERIFY */ + +loser: + if (checkName != NULL) { + PORT_Free(checkName); + } + if (checkFD != NULL) { + PR_Close(checkFD); + } + if (shFD != NULL) { + PR_Close(shFD); + } + if (hashcx != NULL) { + if (hashObj) { + hashObj->destroy(hashcx, PR_TRUE); + } + } + if (signature.data != NULL) { + PORT_Free(signature.data); + } + if (key.params.prime.data != NULL) { + PORT_Free(key.params.prime.data); + } + if (key.params.subPrime.data != NULL) { + PORT_Free(key.params.subPrime.data); + } + if (key.params.base.data != NULL) { + PORT_Free(key.params.base.data); + } + if (key.publicValue.data != NULL) { + PORT_Free(key.publicValue.data); + } + + return result; +} + +PRBool +BLAPI_VerifySelf(const char *name) +{ + if (name == NULL) { + /* + * If name is NULL, freebl is statically linked into softoken. + * softoken will call BLAPI_SHVerify next to verify itself. + */ + return PR_TRUE; + } + return blapi_SHVerify(name, (PRFuncPtr)decodeInt, PR_TRUE); +} diff --git a/security/nss/lib/freebl/stubs.c b/security/nss/lib/freebl/stubs.c new file mode 100644 index 000000000..8e0784935 --- /dev/null +++ b/security/nss/lib/freebl/stubs.c @@ -0,0 +1,711 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Allow freebl and softoken to be loaded without util or NSPR. + * + * These symbols are overridden once real NSPR, and libutil are attached. + */ +#define _GNU_SOURCE 1 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define FREEBL_NO_WEAK 1 + +#define WEAK __attribute__((weak)) + +#ifdef FREEBL_NO_WEAK + +/* + * This uses function pointers. + * + * CONS: A separate function is needed to + * fill in the function pointers. + * + * PROS: it works on all platforms. + * it allows for dynamically finding nspr and libutil, even once + * softoken is loaded and running. (NOTE: this may be a problem if + * we switch between the stubs and real NSPR on the fly. NSPR will + * do bad things if passed an _FakeArena to free or allocate from). + */ +#define STUB_DECLARE(ret, fn, args) \ + typedef ret(*type_##fn) args; \ + static type_##fn ptr_##fn = NULL + +#define STUB_SAFE_CALL0(fn) \ + if (ptr_##fn) { \ + return ptr_##fn(); \ + } +#define STUB_SAFE_CALL1(fn, a1) \ + if (ptr_##fn) { \ + return ptr_##fn(a1); \ + } +#define STUB_SAFE_CALL2(fn, a1, a2) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2); \ + } +#define STUB_SAFE_CALL3(fn, a1, a2, a3) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3); \ + } +#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3, a4); \ + } +#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \ + if (ptr_##fn) { \ + return ptr_##fn(a1, a2, a3, a4, a5, a6); \ + } + +#define STUB_FETCH_FUNCTION(fn) \ + ptr_##fn = (type_##fn)dlsym(lib, #fn); \ + if (ptr_##fn == NULL) { \ + return SECFailure; \ + } + +#else +/* + * this uses the loader weak attribute. it works automatically, but once + * freebl is loaded, the symbols are 'fixed' (later loading of NSPR or + * libutil will not resolve these symbols). + */ + +#define STUB_DECLARE(ret, fn, args) \ + WEAK extern ret fn args + +#define STUB_SAFE_CALL0(fn) \ + if (fn) { \ + return fn(); \ + } +#define STUB_SAFE_CALL1(fn, a1) \ + if (fn) { \ + return fn(a1); \ + } +#define STUB_SAFE_CALL2(fn, a1, a2) \ + if (fn) { \ + return fn(a1, a2); \ + } +#define STUB_SAFE_CALL3(fn, a1, a2, a3) \ + if (fn) { \ + return fn(a1, a2, a3); \ + } +#define STUB_SAFE_CALL4(fn, a1, a2, a3, a4) \ + if (fn) { \ + return fn(a1, a2, a3, a4); \ + } +#define STUB_SAFE_CALL6(fn, a1, a2, a3, a4, a5, a6) \ + if (fn) { \ + return fn(a1, a2, a3, a4, a5, a6); \ + } +#endif + +STUB_DECLARE(void *, PORT_Alloc_Util, (size_t len)); +STUB_DECLARE(void *, PORT_ArenaAlloc_Util, (PLArenaPool * arena, size_t size)); +STUB_DECLARE(void *, PORT_ArenaZAlloc_Util, (PLArenaPool * arena, size_t size)); +STUB_DECLARE(void, PORT_Free_Util, (void *ptr)); +STUB_DECLARE(void, PORT_FreeArena_Util, (PLArenaPool * arena, PRBool zero)); +STUB_DECLARE(int, PORT_GetError_Util, (void)); +STUB_DECLARE(PLArenaPool *, PORT_NewArena_Util, (unsigned long chunksize)); +STUB_DECLARE(void, PORT_SetError_Util, (int value)); +STUB_DECLARE(void *, PORT_ZAlloc_Util, (size_t len)); +STUB_DECLARE(void, PORT_ZFree_Util, (void *ptr, size_t len)); + +STUB_DECLARE(void, PR_Assert, (const char *s, const char *file, PRIntn ln)); +STUB_DECLARE(PRStatus, PR_Access, (const char *name, PRAccessHow how)); +STUB_DECLARE(PRStatus, PR_CallOnce, (PRCallOnceType * once, PRCallOnceFN func)); +STUB_DECLARE(PRStatus, PR_Close, (PRFileDesc * fd)); +STUB_DECLARE(void, PR_DestroyLock, (PRLock * lock)); +STUB_DECLARE(void, PR_DestroyCondVar, (PRCondVar * cvar)); +STUB_DECLARE(void, PR_Free, (void *ptr)); +STUB_DECLARE(char *, PR_GetLibraryFilePathname, (const char *name, + PRFuncPtr addr)); +STUB_DECLARE(PRFileDesc *, PR_ImportPipe, (PROsfd osfd)); +STUB_DECLARE(void, PR_Lock, (PRLock * lock)); +STUB_DECLARE(PRCondVar *, PR_NewCondVar, (PRLock * lock)); +STUB_DECLARE(PRLock *, PR_NewLock, (void)); +STUB_DECLARE(PRStatus, PR_NotifyCondVar, (PRCondVar * cvar)); +STUB_DECLARE(PRStatus, PR_NotifyAllCondVar, (PRCondVar * cvar)); +STUB_DECLARE(PRFileDesc *, PR_Open, (const char *name, PRIntn flags, + PRIntn mode)); +STUB_DECLARE(PRInt32, PR_Read, (PRFileDesc * fd, void *buf, PRInt32 amount)); +STUB_DECLARE(PROffset32, PR_Seek, (PRFileDesc * fd, PROffset32 offset, + PRSeekWhence whence)); +STUB_DECLARE(PRStatus, PR_Sleep, (PRIntervalTime ticks)); +STUB_DECLARE(PRStatus, PR_Unlock, (PRLock * lock)); +STUB_DECLARE(PRStatus, PR_WaitCondVar, (PRCondVar * cvar, + PRIntervalTime timeout)); +STUB_DECLARE(char *, PR_GetEnvSecure, (const char *)); + +STUB_DECLARE(SECItem *, SECITEM_AllocItem_Util, (PLArenaPool * arena, + SECItem *item, unsigned int len)); +STUB_DECLARE(SECComparison, SECITEM_CompareItem_Util, (const SECItem *a, + const SECItem *b)); +STUB_DECLARE(SECStatus, SECITEM_CopyItem_Util, (PLArenaPool * arena, + SECItem *to, const SECItem *from)); +STUB_DECLARE(void, SECITEM_FreeItem_Util, (SECItem * zap, PRBool freeit)); +STUB_DECLARE(void, SECITEM_ZfreeItem_Util, (SECItem * zap, PRBool freeit)); +STUB_DECLARE(SECOidTag, SECOID_FindOIDTag_Util, (const SECItem *oid)); +STUB_DECLARE(int, NSS_SecureMemcmp, (const void *a, const void *b, size_t n)); + +#define PORT_ZNew_stub(type) (type *)PORT_ZAlloc_stub(sizeof(type)) +#define PORT_New_stub(type) (type *)PORT_Alloc_stub(sizeof(type)) +#define PORT_ZNewArray_stub(type, num) \ + (type *)PORT_ZAlloc_stub(sizeof(type) * (num)) + +/* + * NOTE: in order to support hashing only the memory allocation stubs, + * the get library name stubs, and the file io stubs are needed (the latter + * two are for the library verification). The remaining stubs are simply to + * compile. Attempts to use the library for other operations without NSPR + * will most likely fail. + */ + +/* memory */ +extern void * +PORT_Alloc_stub(size_t len) +{ + STUB_SAFE_CALL1(PORT_Alloc_Util, len); + return malloc(len); +} + +extern void +PORT_Free_stub(void *ptr) +{ + STUB_SAFE_CALL1(PORT_Free_Util, ptr); + return free(ptr); +} + +extern void * +PORT_ZAlloc_stub(size_t len) +{ + STUB_SAFE_CALL1(PORT_ZAlloc_Util, len); + void *ptr = malloc(len); + if (ptr) { + memset(ptr, 0, len); + } + return ptr; +} + +extern void +PORT_ZFree_stub(void *ptr, size_t len) +{ + STUB_SAFE_CALL2(PORT_ZFree_Util, ptr, len); + memset(ptr, 0, len); + return free(ptr); +} + +extern void +PR_Free_stub(void *ptr) +{ + STUB_SAFE_CALL1(PR_Free, ptr); + return free(ptr); +} + +/* + * arenas + * + */ +extern PLArenaPool * +PORT_NewArena_stub(unsigned long chunksize) +{ + STUB_SAFE_CALL1(PORT_NewArena_Util, chunksize); + abort(); + return NULL; +} + +extern void * +PORT_ArenaAlloc_stub(PLArenaPool *arena, size_t size) +{ + + STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size); + abort(); + return NULL; +} + +extern void * +PORT_ArenaZAlloc_stub(PLArenaPool *arena, size_t size) +{ + + STUB_SAFE_CALL2(PORT_ArenaZAlloc_Util, arena, size); + abort(); + return NULL; +} + +extern void +PORT_FreeArena_stub(PLArenaPool *arena, PRBool zero) +{ + + STUB_SAFE_CALL2(PORT_FreeArena_Util, arena, zero); + abort(); +} + +/* io */ +extern PRFileDesc * +PR_Open_stub(const char *name, PRIntn flags, PRIntn mode) +{ + int *lfd = NULL; + int fd; + int lflags = 0; + + STUB_SAFE_CALL3(PR_Open, name, flags, mode); + + if (flags & PR_RDWR) { + lflags = O_RDWR; + } else if (flags & PR_WRONLY) { + lflags = O_WRONLY; + } else { + lflags = O_RDONLY; + } + + if (flags & PR_EXCL) + lflags |= O_EXCL; + if (flags & PR_APPEND) + lflags |= O_APPEND; + if (flags & PR_TRUNCATE) + lflags |= O_TRUNC; + + fd = open(name, lflags, mode); + if (fd >= 0) { + lfd = PORT_New_stub(int); + if (lfd != NULL) { + *lfd = fd; + } else { + close(fd); + } + } + return (PRFileDesc *)lfd; +} + +extern PRFileDesc * +PR_ImportPipe_stub(PROsfd fd) +{ + int *lfd = NULL; + + STUB_SAFE_CALL1(PR_ImportPipe, fd); + + lfd = PORT_New_stub(int); + if (lfd != NULL) { + *lfd = fd; + } + return (PRFileDesc *)lfd; +} + +extern PRStatus +PR_Close_stub(PRFileDesc *fd) +{ + int *lfd; + STUB_SAFE_CALL1(PR_Close, fd); + + lfd = (int *)fd; + close(*lfd); + PORT_Free_stub(lfd); + + return PR_SUCCESS; +} + +extern PRInt32 +PR_Read_stub(PRFileDesc *fd, void *buf, PRInt32 amount) +{ + int *lfd; + STUB_SAFE_CALL3(PR_Read, fd, buf, amount); + + lfd = (int *)fd; + return read(*lfd, buf, amount); +} + +extern PROffset32 +PR_Seek_stub(PRFileDesc *fd, PROffset32 offset, PRSeekWhence whence) +{ + int *lfd; + int lwhence = SEEK_SET; + STUB_SAFE_CALL3(PR_Seek, fd, offset, whence); + lfd = (int *)fd; + switch (whence) { + case PR_SEEK_CUR: + lwhence = SEEK_CUR; + break; + case PR_SEEK_END: + lwhence = SEEK_END; + break; + case PR_SEEK_SET: + break; + } + + return lseek(*lfd, offset, lwhence); +} + +PRStatus +PR_Access_stub(const char *name, PRAccessHow how) +{ + int mode = F_OK; + int rv; + STUB_SAFE_CALL2(PR_Access, name, how); + switch (how) { + case PR_ACCESS_WRITE_OK: + mode = W_OK; + break; + case PR_ACCESS_READ_OK: + mode = R_OK; + break; + /* assume F_OK for all others */ + default: + break; + } + rv = access(name, mode); + if (rv == 0) { + return PR_SUCCESS; + } + return PR_FAILURE; +} + +/* + * library + */ +extern char * +PR_GetLibraryFilePathname_stub(const char *name, PRFuncPtr addr) +{ + Dl_info dli; + char *result; + + STUB_SAFE_CALL2(PR_GetLibraryFilePathname, name, addr); + + if (dladdr((void *)addr, &dli) == 0) { + return NULL; + } + result = PORT_Alloc_stub(strlen(dli.dli_fname) + 1); + if (result != NULL) { + strcpy(result, dli.dli_fname); + } + return result; +} + +#include + +/* errors */ +extern int +PORT_GetError_stub(void) +{ + STUB_SAFE_CALL0(PORT_GetError_Util); + return errno; +} + +extern void +PORT_SetError_stub(int value) +{ + STUB_SAFE_CALL1(PORT_SetError_Util, value); + errno = value; +} + +/* misc */ +extern void +PR_Assert_stub(const char *s, const char *file, PRIntn ln) +{ + STUB_SAFE_CALL3(PR_Assert, s, file, ln); + fprintf(stderr, "%s line %d: %s\n", file, ln, s); + abort(); +} + +/* time */ +extern PRStatus +PR_Sleep_stub(PRIntervalTime ticks) +{ + STUB_SAFE_CALL1(PR_Sleep, ticks); + usleep(ticks * 1000); + return PR_SUCCESS; +} + +/* locking */ +extern PRLock * +PR_NewLock_stub(void) +{ + STUB_SAFE_CALL0(PR_NewLock); + abort(); + return NULL; +} + +extern PRStatus +PR_Unlock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_Unlock, lock); + abort(); + return PR_FAILURE; +} + +extern void +PR_Lock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_Lock, lock); + abort(); + return; +} + +extern void +PR_DestroyLock_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_DestroyLock, lock); + abort(); + return; +} + +extern PRCondVar * +PR_NewCondVar_stub(PRLock *lock) +{ + STUB_SAFE_CALL1(PR_NewCondVar, lock); + abort(); + return NULL; +} + +extern PRStatus +PR_NotifyCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_NotifyCondVar, cvar); + abort(); + return PR_FAILURE; +} + +extern PRStatus +PR_NotifyAllCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_NotifyAllCondVar, cvar); + abort(); + return PR_FAILURE; +} + +extern PRStatus +PR_WaitCondVar_stub(PRCondVar *cvar, PRIntervalTime timeout) +{ + STUB_SAFE_CALL2(PR_WaitCondVar, cvar, timeout); + abort(); + return PR_FAILURE; +} + +extern char * +PR_GetEnvSecure_stub(const char *var) +{ + STUB_SAFE_CALL1(PR_GetEnvSecure, var); + abort(); + return NULL; +} + +extern void +PR_DestroyCondVar_stub(PRCondVar *cvar) +{ + STUB_SAFE_CALL1(PR_DestroyCondVar, cvar); + abort(); + return; +} + +/* + * NOTE: this presupposes GCC 4.1 + */ +extern PRStatus +PR_CallOnce_stub(PRCallOnceType *once, PRCallOnceFN func) +{ + STUB_SAFE_CALL2(PR_CallOnce, once, func); + abort(); + return PR_FAILURE; +} + +/* + * SECITEMS implement Item Utilities + */ +extern void +SECITEM_FreeItem_stub(SECItem *zap, PRBool freeit) +{ + STUB_SAFE_CALL2(SECITEM_FreeItem_Util, zap, freeit); + abort(); +} + +extern SECItem * +SECITEM_AllocItem_stub(PLArenaPool *arena, SECItem *item, unsigned int len) +{ + STUB_SAFE_CALL3(SECITEM_AllocItem_Util, arena, item, len); + abort(); + return NULL; +} + +extern SECComparison +SECITEM_CompareItem_stub(const SECItem *a, const SECItem *b) +{ + STUB_SAFE_CALL2(SECITEM_CompareItem_Util, a, b); + abort(); + return SECEqual; +} + +extern SECStatus +SECITEM_CopyItem_stub(PLArenaPool *arena, SECItem *to, const SECItem *from) +{ + STUB_SAFE_CALL3(SECITEM_CopyItem_Util, arena, to, from); + abort(); + return SECFailure; +} + +extern SECOidTag +SECOID_FindOIDTag_stub(const SECItem *oid) +{ + STUB_SAFE_CALL1(SECOID_FindOIDTag_Util, oid); + abort(); + return SEC_OID_UNKNOWN; +} + +extern void +SECITEM_ZfreeItem_stub(SECItem *zap, PRBool freeit) +{ + STUB_SAFE_CALL2(SECITEM_ZfreeItem_Util, zap, freeit); + abort(); +} + +extern int +NSS_SecureMemcmp_stub(const void *a, const void *b, size_t n) +{ + STUB_SAFE_CALL3(NSS_SecureMemcmp, a, b, n); + abort(); +} + +#ifdef FREEBL_NO_WEAK + +static const char *nsprLibName = SHLIB_PREFIX "nspr4." SHLIB_SUFFIX; +static const char *nssutilLibName = SHLIB_PREFIX "nssutil3." SHLIB_SUFFIX; + +static SECStatus +freebl_InitNSPR(void *lib) +{ + STUB_FETCH_FUNCTION(PR_Free); + STUB_FETCH_FUNCTION(PR_Open); + STUB_FETCH_FUNCTION(PR_ImportPipe); + STUB_FETCH_FUNCTION(PR_Close); + STUB_FETCH_FUNCTION(PR_Read); + STUB_FETCH_FUNCTION(PR_Seek); + STUB_FETCH_FUNCTION(PR_GetLibraryFilePathname); + STUB_FETCH_FUNCTION(PR_Assert); + STUB_FETCH_FUNCTION(PR_Access); + STUB_FETCH_FUNCTION(PR_Sleep); + STUB_FETCH_FUNCTION(PR_CallOnce); + STUB_FETCH_FUNCTION(PR_NewCondVar); + STUB_FETCH_FUNCTION(PR_NotifyCondVar); + STUB_FETCH_FUNCTION(PR_NotifyAllCondVar); + STUB_FETCH_FUNCTION(PR_WaitCondVar); + STUB_FETCH_FUNCTION(PR_DestroyCondVar); + STUB_FETCH_FUNCTION(PR_NewLock); + STUB_FETCH_FUNCTION(PR_Unlock); + STUB_FETCH_FUNCTION(PR_Lock); + STUB_FETCH_FUNCTION(PR_DestroyLock); + STUB_FETCH_FUNCTION(PR_GetEnvSecure); + return SECSuccess; +} + +static SECStatus +freebl_InitNSSUtil(void *lib) +{ + STUB_FETCH_FUNCTION(PORT_Alloc_Util); + STUB_FETCH_FUNCTION(PORT_Free_Util); + STUB_FETCH_FUNCTION(PORT_ZAlloc_Util); + STUB_FETCH_FUNCTION(PORT_ZFree_Util); + STUB_FETCH_FUNCTION(PORT_NewArena_Util); + STUB_FETCH_FUNCTION(PORT_ArenaAlloc_Util); + STUB_FETCH_FUNCTION(PORT_ArenaZAlloc_Util); + STUB_FETCH_FUNCTION(PORT_FreeArena_Util); + STUB_FETCH_FUNCTION(PORT_GetError_Util); + STUB_FETCH_FUNCTION(PORT_SetError_Util); + STUB_FETCH_FUNCTION(SECITEM_FreeItem_Util); + STUB_FETCH_FUNCTION(SECITEM_AllocItem_Util); + STUB_FETCH_FUNCTION(SECITEM_CompareItem_Util); + STUB_FETCH_FUNCTION(SECITEM_CopyItem_Util); + STUB_FETCH_FUNCTION(SECITEM_ZfreeItem_Util); + STUB_FETCH_FUNCTION(SECOID_FindOIDTag_Util); + STUB_FETCH_FUNCTION(NSS_SecureMemcmp); + return SECSuccess; +} + +/* + * fetch the library if it's loaded. For NSS it should already be loaded + */ +#define freebl_getLibrary(libName) \ + dlopen(libName, RTLD_LAZY | RTLD_NOLOAD) + +#define freebl_releaseLibrary(lib) \ + if (lib) \ + dlclose(lib) + +static void *FREEBLnsprGlobalLib = NULL; +static void *FREEBLnssutilGlobalLib = NULL; + +void __attribute((destructor)) FREEBL_unload() +{ + freebl_releaseLibrary(FREEBLnsprGlobalLib); + freebl_releaseLibrary(FREEBLnssutilGlobalLib); +} +#endif + +/* + * load the symbols from the real libraries if available. + * + * if force is set, explicitly load the libraries if they are not already + * loaded. If we could not use the real libraries, return failure. + */ +extern SECStatus +FREEBL_InitStubs() +{ + SECStatus rv = SECSuccess; +#ifdef FREEBL_NO_WEAK + void *nspr = NULL; + void *nssutil = NULL; + + /* NSPR should be first */ + if (!FREEBLnsprGlobalLib) { + nspr = freebl_getLibrary(nsprLibName); + if (!nspr) { + return SECFailure; + } + rv = freebl_InitNSPR(nspr); + if (rv != SECSuccess) { + freebl_releaseLibrary(nspr); + return rv; + } + FREEBLnsprGlobalLib = nspr; /* adopt */ + } + /* now load NSSUTIL */ + if (!FREEBLnssutilGlobalLib) { + nssutil = freebl_getLibrary(nssutilLibName); + if (!nssutil) { + return SECFailure; + } + rv = freebl_InitNSSUtil(nssutil); + if (rv != SECSuccess) { + freebl_releaseLibrary(nssutil); + return rv; + } + FREEBLnssutilGlobalLib = nssutil; /* adopt */ + } +#endif + + return rv; +} diff --git a/security/nss/lib/freebl/stubs.h b/security/nss/lib/freebl/stubs.h new file mode 100644 index 000000000..25ec394ec --- /dev/null +++ b/security/nss/lib/freebl/stubs.h @@ -0,0 +1,66 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +/* + * Allow freebl and softoken to be loaded without util or NSPR. + * + * These symbols are overridden once real NSPR, and libutil are attached. + */ + +#ifndef _STUBS_H +#define _STUBS_H_ 1 + +#ifdef _LIBUTIL_H_ +/* must be included before util */ +/*#error stubs.h included too late */ +#define MP_DIGITES(x) "stubs included too late" +#endif + +/* hide libutil rename */ +#define _LIBUTIL_H_ 1 + +#define PORT_Alloc PORT_Alloc_stub +#define PORT_ArenaAlloc PORT_ArenaAlloc_stub +#define PORT_ArenaZAlloc PORT_ArenaZAlloc_stub +#define PORT_Free PORT_Free_stub +#define PORT_FreeArena PORT_FreeArena_stub +#define PORT_GetError PORT_GetError_stub +#define PORT_NewArena PORT_NewArena_stub +#define PORT_SetError PORT_SetError_stub +#define PORT_ZAlloc PORT_ZAlloc_stub +#define PORT_ZFree PORT_ZFree_stub + +#define SECITEM_AllocItem SECITEM_AllocItem_stub +#define SECITEM_CompareItem SECITEM_CompareItem_stub +#define SECITEM_CopyItem SECITEM_CopyItem_stub +#define SECITEM_FreeItem SECITEM_FreeItem_stub +#define SECITEM_ZfreeItem SECITEM_ZfreeItem_stub +#define SECOID_FindOIDTag SECOID_FindOIDTag_stub +#define NSS_SecureMemcmp NSS_SecureMemcmp_stub + +#define PR_Assert PR_Assert_stub +#define PR_Access PR_Access_stub +#define PR_CallOnce PR_CallOnce_stub +#define PR_Close PR_Close_stub +#define PR_DestroyCondVar PR_DestroyCondVar_stub +#define PR_DestroyLock PR_DestroyLock_stub +#define PR_Free PR_Free_stub +#define PR_GetLibraryFilePathname PR_GetLibraryFilePathname_stub +#define PR_ImportPipe PR_ImportPipe_stub +#define PR_Lock PR_Lock_stub +#define PR_NewCondVar PR_NewCondVar_stub +#define PR_NewLock PR_NewLock_stub +#define PR_NotifyCondVar PR_NotifyCondVar_stub +#define PR_NotifyAllCondVar PR_NotifyAllCondVar_stub +#define PR_Open PR_Open_stub +#define PR_Read PR_Read_stub +#define PR_Seek PR_Seek_stub +#define PR_Sleep PR_Sleep_stub +#define PR_Unlock PR_Unlock_stub +#define PR_WaitCondVar PR_WaitCondVar_stub +#define PR_GetEnvSecure PR_GetEnvSecure_stub + +extern int FREEBL_InitStubs(void); + +#endif diff --git a/security/nss/lib/freebl/sysrand.c b/security/nss/lib/freebl/sysrand.c new file mode 100644 index 000000000..0128fa0ee --- /dev/null +++ b/security/nss/lib/freebl/sysrand.c @@ -0,0 +1,49 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "seccomon.h" + +#ifndef XP_WIN +static size_t rng_systemFromNoise(unsigned char *dest, size_t maxLen); +#endif + +#if defined(XP_UNIX) || defined(XP_BEOS) +#include "unix_rand.c" +#endif +#ifdef XP_WIN +#include "win_rand.c" +#endif +#ifdef XP_OS2 +#include "os2_rand.c" +#endif + +#ifndef XP_WIN +/* + * Normal RNG_SystemRNG() isn't available, use the system noise to collect + * the required amount of entropy. + */ +static size_t +rng_systemFromNoise(unsigned char *dest, size_t maxLen) +{ + size_t retBytes = maxLen; + + while (maxLen) { + size_t nbytes = RNG_GetNoise(dest, maxLen); + + PORT_Assert(nbytes != 0); + + dest += nbytes; + maxLen -= nbytes; + + /* some hw op to try to introduce more entropy into the next + * RNG_GetNoise call */ + rng_systemJitter(); + } + return retBytes; +} +#endif diff --git a/security/nss/lib/freebl/tlsprfalg.c b/security/nss/lib/freebl/tlsprfalg.c new file mode 100644 index 000000000..1e5e67886 --- /dev/null +++ b/security/nss/lib/freebl/tlsprfalg.c @@ -0,0 +1,134 @@ +/* tlsprfalg.c - TLS Pseudo Random Function (PRF) implementation + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifdef FREEBL_NO_DEPEND +#include "stubs.h" +#endif + +#include "blapi.h" +#include "hasht.h" +#include "alghmac.h" + +#define PHASH_STATE_MAX_LEN HASH_LENGTH_MAX + +/* TLS P_hash function */ +SECStatus +TLS_P_hash(HASH_HashType hashType, const SECItem *secret, const char *label, + SECItem *seed, SECItem *result, PRBool isFIPS) +{ + unsigned char state[PHASH_STATE_MAX_LEN]; + unsigned char outbuf[PHASH_STATE_MAX_LEN]; + unsigned int state_len = 0, label_len = 0, outbuf_len = 0, chunk_size; + unsigned int remaining; + unsigned char *res; + SECStatus status; + HMACContext *cx; + SECStatus rv = SECFailure; + const SECHashObject *hashObj = HASH_GetRawHashObject(hashType); + + PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len)); + PORT_Assert((seed != NULL) && (seed->data != NULL)); + PORT_Assert((result != NULL) && (result->data != NULL)); + + remaining = result->len; + res = result->data; + + if (label != NULL) + label_len = PORT_Strlen(label); + + cx = HMAC_Create(hashObj, secret->data, secret->len, isFIPS); + if (cx == NULL) + goto loser; + + /* initialize the state = A(1) = HMAC_hash(secret, seed) */ + HMAC_Begin(cx); + HMAC_Update(cx, (unsigned char *)label, label_len); + HMAC_Update(cx, seed->data, seed->len); + status = HMAC_Finish(cx, state, &state_len, sizeof(state)); + if (status != SECSuccess) + goto loser; + + /* generate a block at a time until we're done */ + while (remaining > 0) { + + HMAC_Begin(cx); + HMAC_Update(cx, state, state_len); + if (label_len) + HMAC_Update(cx, (unsigned char *)label, label_len); + HMAC_Update(cx, seed->data, seed->len); + status = HMAC_Finish(cx, outbuf, &outbuf_len, sizeof(outbuf)); + if (status != SECSuccess) + goto loser; + + /* Update the state = A(i) = HMAC_hash(secret, A(i-1)) */ + HMAC_Begin(cx); + HMAC_Update(cx, state, state_len); + status = HMAC_Finish(cx, state, &state_len, sizeof(state)); + if (status != SECSuccess) + goto loser; + + chunk_size = PR_MIN(outbuf_len, remaining); + PORT_Memcpy(res, &outbuf, chunk_size); + res += chunk_size; + remaining -= chunk_size; + } + + rv = SECSuccess; + +loser: + /* clear out state so it's not left on the stack */ + if (cx) + HMAC_Destroy(cx, PR_TRUE); + PORT_Memset(state, 0, sizeof(state)); + PORT_Memset(outbuf, 0, sizeof(outbuf)); + return rv; +} + +SECStatus +TLS_PRF(const SECItem *secret, const char *label, SECItem *seed, + SECItem *result, PRBool isFIPS) +{ + SECStatus rv = SECFailure, status; + unsigned int i; + SECItem tmp = { siBuffer, NULL, 0 }; + SECItem S1; + SECItem S2; + + PORT_Assert((secret != NULL) && (secret->data != NULL || !secret->len)); + PORT_Assert((seed != NULL) && (seed->data != NULL)); + PORT_Assert((result != NULL) && (result->data != NULL)); + + S1.type = siBuffer; + S1.len = (secret->len / 2) + (secret->len & 1); + S1.data = secret->data; + + S2.type = siBuffer; + S2.len = S1.len; + S2.data = secret->data + (secret->len - S2.len); + + tmp.data = (unsigned char *)PORT_Alloc(result->len); + if (tmp.data == NULL) + goto loser; + tmp.len = result->len; + + status = TLS_P_hash(HASH_AlgMD5, &S1, label, seed, result, isFIPS); + if (status != SECSuccess) + goto loser; + + status = TLS_P_hash(HASH_AlgSHA1, &S2, label, seed, &tmp, isFIPS); + if (status != SECSuccess) + goto loser; + + for (i = 0; i < result->len; i++) + result->data[i] ^= tmp.data[i]; + + rv = SECSuccess; + +loser: + if (tmp.data != NULL) + PORT_ZFree(tmp.data, tmp.len); + return rv; +} diff --git a/security/nss/lib/freebl/unix_rand.c b/security/nss/lib/freebl/unix_rand.c new file mode 100644 index 000000000..ea3b6af3d --- /dev/null +++ b/security/nss/lib/freebl/unix_rand.c @@ -0,0 +1,1176 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "secrng.h" +#include "secerr.h" +#include "prerror.h" +#include "prthread.h" +#include "prprf.h" +#include "prenv.h" + +size_t RNG_FileUpdate(const char *fileName, size_t limit); + +/* + * When copying data to the buffer we want the least signicant bytes + * from the input since those bits are changing the fastest. The address + * of least significant byte depends upon whether we are running on + * a big-endian or little-endian machine. + * + * Does this mean the least signicant bytes are the most significant + * to us? :-) + */ + +static size_t +CopyLowBits(void *dst, size_t dstlen, void *src, size_t srclen) +{ + union endianness { + PRInt32 i; + char c[4]; + } u; + + if (srclen <= dstlen) { + memcpy(dst, src, srclen); + return srclen; + } + u.i = 0x01020304; + if (u.c[0] == 0x01) { + /* big-endian case */ + memcpy(dst, (char *)src + (srclen - dstlen), dstlen); + } else { + /* little-endian case */ + memcpy(dst, src, dstlen); + } + return dstlen; +} + +#ifdef SOLARIS + +#include + +static const PRUint32 entropy_buf_len = 4096; /* buffer up to 4 KB */ + +/* Buffer entropy data, and feed it to the RNG, entropy_buf_len bytes at a time. + * Returns error if RNG_RandomUpdate fails. Also increments *total_fed + * by the number of bytes successfully buffered. + */ +static SECStatus +BufferEntropy(char *inbuf, PRUint32 inlen, + char *entropy_buf, PRUint32 *entropy_buffered, + PRUint32 *total_fed) +{ + PRUint32 tocopy = 0; + PRUint32 avail = 0; + SECStatus rv = SECSuccess; + + while (inlen) { + avail = entropy_buf_len - *entropy_buffered; + if (!avail) { + /* Buffer is full, time to feed it to the RNG. */ + rv = RNG_RandomUpdate(entropy_buf, entropy_buf_len); + if (SECSuccess != rv) { + break; + } + *entropy_buffered = 0; + avail = entropy_buf_len; + } + tocopy = PR_MIN(avail, inlen); + memcpy(entropy_buf + *entropy_buffered, inbuf, tocopy); + *entropy_buffered += tocopy; + inlen -= tocopy; + inbuf += tocopy; + *total_fed += tocopy; + } + return rv; +} + +/* Feed kernel statistics structures and ks_data field to the RNG. + * Returns status as well as the number of bytes successfully fed to the RNG. + */ +static SECStatus +RNG_kstat(PRUint32 *fed) +{ + kstat_ctl_t *kc = NULL; + kstat_t *ksp = NULL; + PRUint32 entropy_buffered = 0; + char *entropy_buf = NULL; + SECStatus rv = SECSuccess; + + PORT_Assert(fed); + if (!fed) { + return SECFailure; + } + *fed = 0; + + kc = kstat_open(); + PORT_Assert(kc); + if (!kc) { + return SECFailure; + } + entropy_buf = (char *)PORT_Alloc(entropy_buf_len); + PORT_Assert(entropy_buf); + if (entropy_buf) { + for (ksp = kc->kc_chain; ksp != NULL; ksp = ksp->ks_next) { + if (-1 == kstat_read(kc, ksp, NULL)) { + /* missing data from a single kstat shouldn't be fatal */ + continue; + } + rv = BufferEntropy((char *)ksp, sizeof(kstat_t), + entropy_buf, &entropy_buffered, + fed); + if (SECSuccess != rv) { + break; + } + + if (ksp->ks_data && ksp->ks_data_size > 0 && ksp->ks_ndata > 0) { + rv = BufferEntropy((char *)ksp->ks_data, ksp->ks_data_size, + entropy_buf, &entropy_buffered, + fed); + if (SECSuccess != rv) { + break; + } + } + } + if (SECSuccess == rv && entropy_buffered) { + /* Buffer is not empty, time to feed it to the RNG */ + rv = RNG_RandomUpdate(entropy_buf, entropy_buffered); + } + PORT_Free(entropy_buf); + } else { + rv = SECFailure; + } + if (kstat_close(kc)) { + PORT_Assert(0); + rv = SECFailure; + } + return rv; +} + +#endif + +#if defined(SCO) || defined(UNIXWARE) || defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(DARWIN) || defined(OPENBSD) || defined(NTO) || defined(__riscos__) +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + int ticks; + struct tms buffer; + + ticks = times(&buffer); + return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks)); +} + +static void +GiveSystemInfo(void) +{ + long si; + + /* + * Is this really necessary? Why not use rand48 or something? + */ + si = sysconf(_SC_CHILD_MAX); + RNG_RandomUpdate(&si, sizeof(si)); + + si = sysconf(_SC_STREAM_MAX); + RNG_RandomUpdate(&si, sizeof(si)); + + si = sysconf(_SC_OPEN_MAX); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif + +#if defined(__sun) +#if defined(__svr4) || defined(SVR4) +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + hrtime_t t; + t = gethrtime(); + if (t) { + return CopyLowBits(buf, maxbytes, &t, sizeof(t)); + } + return 0; +} +#else /* SunOS (Sun, but not SVR4) */ + +extern long sysconf(int name); + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + long si; + + /* This is not very good */ + si = sysconf(_SC_CHILD_MAX); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif +#endif /* Sun */ + +#if defined(__hpux) +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +#if defined(__ia64) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + PRUint64 t; + + t = _Asm_mov_from_ar(_AREG44); + return CopyLowBits(buf, maxbytes, &t, sizeof(t)); +} +#else +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + extern int ret_cr16(); + int cr16val; + + cr16val = ret_cr16(); + return CopyLowBits(buf, maxbytes, &cr16val, sizeof(cr16val)); +} +#endif + +static void +GiveSystemInfo(void) +{ + long si; + + /* This is not very good */ + si = sysconf(_AES_OS_VERSION); + RNG_RandomUpdate(&si, sizeof(si)); + si = sysconf(_SC_CPU_VERSION); + RNG_RandomUpdate(&si, sizeof(si)); +} +#endif /* HPUX */ + +#if defined(OSF1) +#include +#include +#include +#include + +static void +GiveSystemInfo(void) +{ + char buf[BUFSIZ]; + int rv; + int off = 0; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +/* + * Use the "get the cycle counter" instruction on the alpha. + * The low 32 bits completely turn over in less than a minute. + * The high 32 bits are some non-counter gunk that changes sometimes. + */ +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + unsigned long t; + + t = asm("rpcc %v0"); + return CopyLowBits(buf, maxbytes, &t, sizeof(t)); +} + +#endif /* Alpha */ + +#if defined(_IBMR2) +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + /* XXX haven't found any yet! */ +} +#endif /* IBM R2 */ + +#if defined(LINUX) +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ +#ifndef NO_SYSINFO + struct sysinfo si; + if (sysinfo(&si) == 0) { + RNG_RandomUpdate(&si, sizeof(si)); + } +#endif +} +#endif /* LINUX */ + +#if defined(NCR) + +#include +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +#endif /* NCR */ + +#if defined(sgi) +#include +#undef PRIVATE +#include +#include +#include +#include +#include +#include + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[4096]; + + rv = syssgi(SGI_SYSID, &buf[0]); + if (rv > 0) { + RNG_RandomUpdate(buf, MAXSYSIDSIZE); + } +#ifdef SGI_RDUBLK + rv = syssgi(SGI_RDUBLK, getpid(), &buf[0], sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, sizeof(buf)); + } +#endif /* SGI_RDUBLK */ + rv = syssgi(SGI_INVENT, SGI_INV_READ, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, sizeof(buf)); + } + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} + +static size_t +GetHighResClock(void *buf, size_t maxbuf) +{ + unsigned phys_addr, raddr, cycleval; + static volatile unsigned *iotimer_addr = NULL; + static int tries = 0; + static int cntr_size; + int mfd; + long s0[2]; + struct timeval tv; + +#ifndef SGI_CYCLECNTR_SIZE +#define SGI_CYCLECNTR_SIZE 165 /* Size user needs to use to read CC */ +#endif + + if (iotimer_addr == NULL) { + if (tries++ > 1) { + /* Don't keep trying if it didn't work */ + return 0; + } + + /* + ** For SGI machines we can use the cycle counter, if it has one, + ** to generate some truly random numbers + */ + phys_addr = syssgi(SGI_QUERY_CYCLECNTR, &cycleval); + if (phys_addr) { + int pgsz = getpagesize(); + int pgoffmask = pgsz - 1; + + raddr = phys_addr & ~pgoffmask; + mfd = open("/dev/mmem", O_RDONLY); + if (mfd < 0) { + return 0; + } + iotimer_addr = (unsigned *) + mmap(0, pgoffmask, PROT_READ, MAP_PRIVATE, mfd, (int)raddr); + if (iotimer_addr == (void *)-1) { + close(mfd); + iotimer_addr = NULL; + return 0; + } + iotimer_addr = (unsigned *)((__psint_t)iotimer_addr | (phys_addr & pgoffmask)); + /* + * The file 'mfd' is purposefully not closed. + */ + cntr_size = syssgi(SGI_CYCLECNTR_SIZE); + if (cntr_size < 0) { + struct utsname utsinfo; + + /* + * We must be executing on a 6.0 or earlier system, since the + * SGI_CYCLECNTR_SIZE call is not supported. + * + * The only pre-6.1 platforms with 64-bit counters are + * IP19 and IP21 (Challenge, PowerChallenge, Onyx). + */ + uname(&utsinfo); + if (!strncmp(utsinfo.machine, "IP19", 4) || + !strncmp(utsinfo.machine, "IP21", 4)) + cntr_size = 64; + else + cntr_size = 32; + } + cntr_size /= 8; /* Convert from bits to bytes */ + } + } + + s0[0] = *iotimer_addr; + if (cntr_size > 4) + s0[1] = *(iotimer_addr + 1); + memcpy(buf, (char *)&s0[0], cntr_size); + return CopyLowBits(buf, maxbuf, &s0, cntr_size); +} +#endif + +#if defined(sony) +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* sony */ + +#if defined(sinix) +#include +#include + +int gettimeofday(struct timeval *, struct timezone *); +int gethostname(char *, int); + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + int ticks; + struct tms buffer; + + ticks = times(&buffer); + return CopyLowBits(buf, maxbytes, &ticks, sizeof(ticks)); +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* sinix */ + +#ifdef BEOS +#include + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + bigtime_t bigtime; /* Actually a int64 */ + + bigtime = real_time_clock_usecs(); + return CopyLowBits(buf, maxbytes, &bigtime, sizeof(bigtime)); +} + +static void +GiveSystemInfo(void) +{ + system_info *info = NULL; + PRInt32 val; + get_system_info(info); + if (info) { + val = info->boot_time; + RNG_RandomUpdate(&val, sizeof(val)); + val = info->used_pages; + RNG_RandomUpdate(&val, sizeof(val)); + val = info->used_ports; + RNG_RandomUpdate(&val, sizeof(val)); + val = info->used_threads; + RNG_RandomUpdate(&val, sizeof(val)); + val = info->used_teams; + RNG_RandomUpdate(&val, sizeof(val)); + } +} +#endif /* BEOS */ + +#if defined(nec_ews) +#include + +#define getdtablesize() sysconf(_SC_OPEN_MAX) + +static size_t +GetHighResClock(void *buf, size_t maxbytes) +{ + return 0; +} + +static void +GiveSystemInfo(void) +{ + int rv; + char buf[2000]; + + rv = sysinfo(SI_MACHINE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_RELEASE, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } + rv = sysinfo(SI_HW_SERIAL, buf, sizeof(buf)); + if (rv > 0) { + RNG_RandomUpdate(buf, rv); + } +} +#endif /* nec_ews */ + +size_t +RNG_GetNoise(void *buf, size_t maxbytes) +{ + struct timeval tv; + int n = 0; + int c; + + n = GetHighResClock(buf, maxbytes); + maxbytes -= n; + + (void)gettimeofday(&tv, 0); + c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_usec, sizeof(tv.tv_usec)); + n += c; + maxbytes -= c; + c = CopyLowBits((char *)buf + n, maxbytes, &tv.tv_sec, sizeof(tv.tv_sec)); + n += c; + return n; +} + +#define SAFE_POPEN_MAXARGS 10 /* must be at least 2 */ + +/* + * safe_popen is static to this module and we know what arguments it is + * called with. Note that this version only supports a single open child + * process at any time. + */ +static pid_t safe_popen_pid; +static struct sigaction oldact; + +static FILE * +safe_popen(char *cmd) +{ + int p[2], fd, argc; + pid_t pid; + char *argv[SAFE_POPEN_MAXARGS + 1]; + FILE *fp; + static char blank[] = " \t"; + static struct sigaction newact; + + if (pipe(p) < 0) + return 0; + + fp = fdopen(p[0], "r"); + if (fp == 0) { + close(p[0]); + close(p[1]); + return 0; + } + + /* Setup signals so that SIGCHLD is ignored as we want to do waitpid */ + newact.sa_handler = SIG_DFL; + newact.sa_flags = 0; + sigfillset(&newact.sa_mask); + sigaction(SIGCHLD, &newact, &oldact); + + pid = fork(); + switch (pid) { + int ndesc; + + case -1: + fclose(fp); /* this closes p[0], the fd associated with fp */ + close(p[1]); + sigaction(SIGCHLD, &oldact, NULL); + return 0; + + case 0: + /* dup write-side of pipe to stderr and stdout */ + if (p[1] != 1) + dup2(p[1], 1); + if (p[1] != 2) + dup2(p[1], 2); + + /* + * close the other file descriptors, except stdin which we + * try reassociating with /dev/null, first (bug 174993) + */ + if (!freopen("/dev/null", "r", stdin)) + close(0); + ndesc = getdtablesize(); + for (fd = PR_MIN(65536, ndesc); --fd > 2; close(fd)) + ; + + /* clean up environment in the child process */ + putenv("PATH=/bin:/usr/bin:/sbin:/usr/sbin:/etc:/usr/etc"); + putenv("SHELL=/bin/sh"); + putenv("IFS= \t"); + + /* + * The caller may have passed us a string that is in text + * space. It may be illegal to modify the string + */ + cmd = strdup(cmd); + /* format argv */ + argv[0] = strtok(cmd, blank); + argc = 1; + while ((argv[argc] = strtok(0, blank)) != 0) { + if (++argc == SAFE_POPEN_MAXARGS) { + argv[argc] = 0; + break; + } + } + + /* and away we go */ + execvp(argv[0], argv); + exit(127); + break; + + default: + close(p[1]); + break; + } + + /* non-zero means there's a cmd running */ + safe_popen_pid = pid; + return fp; +} + +static int +safe_pclose(FILE *fp) +{ + pid_t pid; + int status = -1, rv; + + if ((pid = safe_popen_pid) == 0) + return -1; + safe_popen_pid = 0; + + fclose(fp); + + /* yield the processor so the child gets some time to exit normally */ + PR_Sleep(PR_INTERVAL_NO_WAIT); + + /* if the child hasn't exited, kill it -- we're done with its output */ + while ((rv = waitpid(pid, &status, WNOHANG)) == -1 && errno == EINTR) + ; + if (rv == 0) { + kill(pid, SIGKILL); + while ((rv = waitpid(pid, &status, 0)) == -1 && errno == EINTR) + ; + } + + /* Reset SIGCHLD signal hander before returning */ + sigaction(SIGCHLD, &oldact, NULL); + + return status; +} + +#ifdef DARWIN +#include +#if !TARGET_OS_IPHONE +#include +#endif +#endif + +/* Fork netstat to collect its output by default. Do not unset this unless + * another source of entropy is available + */ +#define DO_NETSTAT 1 + +void +RNG_SystemInfoForRNG(void) +{ + FILE *fp; + char buf[BUFSIZ]; + size_t bytes; + const char *const *cp; + char *randfile; +#ifdef DARWIN +#if TARGET_OS_IPHONE + /* iOS does not expose a way to access environ. */ + char **environ = NULL; +#else + char **environ = *_NSGetEnviron(); +#endif +#else + extern char **environ; +#endif +#ifdef BEOS + static const char *const files[] = { + "/boot/var/swap", + "/boot/var/log/syslog", + "/boot/var/tmp", + "/boot/home/config/settings", + "/boot/home", + 0 + }; +#else + static const char *const files[] = { + "/etc/passwd", + "/etc/utmp", + "/tmp", + "/var/tmp", + "/usr/tmp", + 0 + }; +#endif + +#if defined(BSDI) + static char netstat_ni_cmd[] = "netstat -nis"; +#else + static char netstat_ni_cmd[] = "netstat -ni"; +#endif + + GiveSystemInfo(); + + bytes = RNG_GetNoise(buf, sizeof(buf)); + RNG_RandomUpdate(buf, bytes); + + /* + * Pass the C environment and the addresses of the pointers to the + * hash function. This makes the random number function depend on the + * execution environment of the user and on the platform the program + * is running on. + */ + if (environ != NULL) { + cp = (const char *const *)environ; + while (*cp) { + RNG_RandomUpdate(*cp, strlen(*cp)); + cp++; + } + RNG_RandomUpdate(environ, (char *)cp - (char *)environ); + } + + /* Give in system information */ + if (gethostname(buf, sizeof(buf)) == 0) { + RNG_RandomUpdate(buf, strlen(buf)); + } + GiveSystemInfo(); + + /* grab some data from system's PRNG before any other files. */ + bytes = RNG_FileUpdate("/dev/urandom", SYSTEM_RNG_SEED_COUNT); + + /* If the user points us to a random file, pass it through the rng */ + randfile = PR_GetEnvSecure("NSRANDFILE"); + if ((randfile != NULL) && (randfile[0] != '\0')) { + char *randCountString = PR_GetEnvSecure("NSRANDCOUNT"); + int randCount = randCountString ? atoi(randCountString) : 0; + if (randCount != 0) { + RNG_FileUpdate(randfile, randCount); + } else { + RNG_FileForRNG(randfile); + } + } + + /* pass other files through */ + for (cp = files; *cp; cp++) + RNG_FileForRNG(*cp); + +/* + * Bug 100447: On BSD/OS 4.2 and 4.3, we have problem calling safe_popen + * in a pthreads environment. Therefore, we call safe_popen last and on + * BSD/OS we do not call safe_popen when we succeeded in getting data + * from /dev/urandom. + * + * Bug 174993: On platforms providing /dev/urandom, don't fork netstat + * either, if data has been gathered successfully. + */ + +#if defined(BSDI) || defined(FREEBSD) || defined(NETBSD) || defined(OPENBSD) || defined(DARWIN) || defined(LINUX) || defined(HPUX) + if (bytes) + return; +#endif + +#ifdef SOLARIS + +/* + * On Solaris, NSS may be initialized automatically from libldap in + * applications that are unaware of the use of NSS. safe_popen forks, and + * sometimes creates issues with some applications' pthread_atfork handlers. + * We always have /dev/urandom on Solaris 9 and above as an entropy source, + * and for Solaris 8 we have the libkstat interface, so we don't need to + * fork netstat. + */ + +#undef DO_NETSTAT + if (!bytes) { + /* On Solaris 8, /dev/urandom isn't available, so we use libkstat. */ + PRUint32 kstat_bytes = 0; + if (SECSuccess != RNG_kstat(&kstat_bytes)) { + PORT_Assert(0); + } + bytes += kstat_bytes; + PORT_Assert(bytes); + } +#endif + +#ifdef DO_NETSTAT + fp = safe_popen(netstat_ni_cmd); + if (fp != NULL) { + while ((bytes = fread(buf, 1, sizeof(buf), fp)) > 0) + RNG_RandomUpdate(buf, bytes); + safe_pclose(fp); + } +#endif +} + +#define TOTAL_FILE_LIMIT 1000000 /* one million */ + +size_t +RNG_FileUpdate(const char *fileName, size_t limit) +{ + FILE *file; + int fd; + int bytes; + size_t fileBytes = 0; + struct stat stat_buf; + unsigned char buffer[BUFSIZ]; + static size_t totalFileBytes = 0; + + /* suppress valgrind warnings due to holes in struct stat */ + memset(&stat_buf, 0, sizeof(stat_buf)); + + if (stat((char *)fileName, &stat_buf) < 0) + return fileBytes; + RNG_RandomUpdate(&stat_buf, sizeof(stat_buf)); + + file = fopen(fileName, "r"); + if (file != NULL) { + /* Read from the underlying file descriptor directly to bypass stdio + * buffering and avoid reading more bytes than we need from + * /dev/urandom. NOTE: we can't use fread with unbuffered I/O because + * fread may return EOF in unbuffered I/O mode on Android. + * + * Moreover, we read into a buffer of size BUFSIZ, so buffered I/O + * has no performance advantage. */ + fd = fileno(file); + /* 'file' was just opened, so this should not fail. */ + PORT_Assert(fd != -1); + while (limit > fileBytes && fd != -1) { + bytes = PR_MIN(sizeof buffer, limit - fileBytes); + bytes = read(fd, buffer, bytes); + if (bytes <= 0) + break; + RNG_RandomUpdate(buffer, bytes); + fileBytes += bytes; + totalFileBytes += bytes; + /* after TOTAL_FILE_LIMIT has been reached, only read in first + ** buffer of data from each subsequent file. + */ + if (totalFileBytes > TOTAL_FILE_LIMIT) + break; + } + fclose(file); + } + /* + * Pass yet another snapshot of our highest resolution clock into + * the hash function. + */ + bytes = RNG_GetNoise(buffer, sizeof(buffer)); + RNG_RandomUpdate(buffer, bytes); + return fileBytes; +} + +void +RNG_FileForRNG(const char *fileName) +{ + RNG_FileUpdate(fileName, TOTAL_FILE_LIMIT); +} + +void +ReadSingleFile(const char *fileName) +{ + FILE *file; + unsigned char buffer[BUFSIZ]; + + file = fopen(fileName, "rb"); + if (file != NULL) { + while (fread(buffer, 1, sizeof(buffer), file) > 0) + ; + fclose(file); + } +} + +#define _POSIX_PTHREAD_SEMANTICS +#include + +PRBool +ReadFileOK(char *dir, char *file) +{ + struct stat stat_buf; + char filename[PATH_MAX]; + int count = snprintf(filename, sizeof filename, "%s/%s", dir, file); + + if (count <= 0) { + return PR_FALSE; /* name too long, can't read it anyway */ + } + + if (stat(filename, &stat_buf) < 0) + return PR_FALSE; /* can't stat, probably can't read it then as well */ + return S_ISREG(stat_buf.st_mode) ? PR_TRUE : PR_FALSE; +} + +/* + * read one file out of either /etc or the user's home directory. + * fileToRead tells which file to read. + * + * return 1 if it's time to reset the fileToRead (no more files to read). + */ +static int +ReadOneFile(int fileToRead) +{ + char *dir = "/etc"; + DIR *fd = opendir(dir); + int resetCount = 0; + struct dirent *entry; +#if defined(__sun) + char firstName[256]; +#else + char firstName[NAME_MAX + 1]; +#endif + const char *name = NULL; + int i; + + if (fd == NULL) { + dir = PR_GetEnvSecure("HOME"); + if (dir) { + fd = opendir(dir); + } + } + if (fd == NULL) { + return 1; + } + + firstName[0] = '\0'; + for (i = 0; i <= fileToRead; i++) { + do { + /* readdir() isn't guaranteed to be thread safe on every platform; + * this code assumes the same directory isn't read concurrently. + * This usage is confirmed safe on Linux, see bug 1254334. */ + entry = readdir(fd); + } while (entry != NULL && !ReadFileOK(dir, &entry->d_name[0])); + if (entry == NULL) { + resetCount = 1; /* read to the end, start again at the beginning */ + if (firstName[0]) { + /* ran out of entries in the directory, use the first one */ + name = firstName; + } + break; + } + name = entry->d_name; + if (i == 0) { + /* copy the name of the first in case we run out of entries */ + PORT_Assert(PORT_Strlen(name) < sizeof(firstName)); + PORT_Strncpy(firstName, name, sizeof(firstName) - 1); + firstName[sizeof(firstName) - 1] = '\0'; + } + } + + if (name) { + char filename[PATH_MAX]; + int count = snprintf(filename, sizeof(filename), "%s/%s", dir, name); + if (count >= 1) { + ReadSingleFile(filename); + } + } + + closedir(fd); + return resetCount; +} + +/* + * do something to try to introduce more noise into the 'GetNoise' call + */ +static void +rng_systemJitter(void) +{ + static int fileToRead = 1; + + if (ReadOneFile(fileToRead)) { + fileToRead = 1; + } else { + fileToRead++; + } +} + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + FILE *file; + int fd; + int bytes; + size_t fileBytes = 0; + unsigned char *buffer = dest; + + file = fopen("/dev/urandom", "r"); + if (file == NULL) { + return rng_systemFromNoise(dest, maxLen); + } + /* Read from the underlying file descriptor directly to bypass stdio + * buffering and avoid reading more bytes than we need from /dev/urandom. + * NOTE: we can't use fread with unbuffered I/O because fread may return + * EOF in unbuffered I/O mode on Android. + */ + fd = fileno(file); + /* 'file' was just opened, so this should not fail. */ + PORT_Assert(fd != -1); + while (maxLen > fileBytes && fd != -1) { + bytes = maxLen - fileBytes; + bytes = read(fd, buffer, bytes); + if (bytes <= 0) + break; + fileBytes += bytes; + buffer += bytes; + } + fclose(file); + if (fileBytes != maxLen) { + PORT_SetError(SEC_ERROR_NEED_RANDOM); /* system RNG failed */ + fileBytes = 0; + } + return fileBytes; +} diff --git a/security/nss/lib/freebl/win_rand.c b/security/nss/lib/freebl/win_rand.c new file mode 100644 index 000000000..b863776d2 --- /dev/null +++ b/security/nss/lib/freebl/win_rand.c @@ -0,0 +1,161 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "secrng.h" + +#ifdef XP_WIN +#include +#include + +static BOOL +CurrentClockTickTime(LPDWORD lpdwHigh, LPDWORD lpdwLow) +{ + LARGE_INTEGER liCount; + + if (!QueryPerformanceCounter(&liCount)) + return FALSE; + + *lpdwHigh = liCount.u.HighPart; + *lpdwLow = liCount.u.LowPart; + return TRUE; +} + +size_t +RNG_GetNoise(void *buf, size_t maxbuf) +{ + DWORD dwHigh, dwLow, dwVal; + int n = 0; + int nBytes; + time_t sTime; + + if (maxbuf <= 0) + return 0; + + CurrentClockTickTime(&dwHigh, &dwLow); + + // get the maximally changing bits first + nBytes = sizeof(dwLow) > maxbuf ? maxbuf : sizeof(dwLow); + memcpy((char *)buf, &dwLow, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + nBytes = sizeof(dwHigh) > maxbuf ? maxbuf : sizeof(dwHigh); + memcpy(((char *)buf) + n, &dwHigh, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + // get the number of milliseconds that have elapsed since Windows started + dwVal = GetTickCount(); + + nBytes = sizeof(dwVal) > maxbuf ? maxbuf : sizeof(dwVal); + memcpy(((char *)buf) + n, &dwVal, nBytes); + n += nBytes; + maxbuf -= nBytes; + + if (maxbuf <= 0) + return n; + + // get the time in seconds since midnight Jan 1, 1970 + time(&sTime); + nBytes = sizeof(sTime) > maxbuf ? maxbuf : sizeof(sTime); + memcpy(((char *)buf) + n, &sTime, nBytes); + n += nBytes; + + return n; +} + +void +RNG_SystemInfoForRNG(void) +{ + DWORD dwVal; + char buffer[256]; + int nBytes; + MEMORYSTATUS sMem; + HANDLE hVal; + DWORD dwSerialNum; + DWORD dwComponentLen; + DWORD dwSysFlags; + char volName[128]; + DWORD dwSectors, dwBytes, dwFreeClusters, dwNumClusters; + + nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes + RNG_RandomUpdate(buffer, nBytes); + + sMem.dwLength = sizeof(sMem); + GlobalMemoryStatus(&sMem); // assorted memory stats + RNG_RandomUpdate(&sMem, sizeof(sMem)); + + dwVal = GetLogicalDrives(); + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); // bitfields in bits 0-25 + + dwVal = sizeof(buffer); + if (GetComputerName(buffer, &dwVal)) + RNG_RandomUpdate(buffer, dwVal); + + hVal = GetCurrentProcess(); // 4 or 8 byte pseudo handle (a + // constant!) of current process + RNG_RandomUpdate(&hVal, sizeof(hVal)); + + dwVal = GetCurrentProcessId(); // process ID (4 bytes) + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); + + dwVal = GetCurrentThreadId(); // thread ID (4 bytes) + RNG_RandomUpdate(&dwVal, sizeof(dwVal)); + + volName[0] = '\0'; + buffer[0] = '\0'; + GetVolumeInformation(NULL, + volName, + sizeof(volName), + &dwSerialNum, + &dwComponentLen, + &dwSysFlags, + buffer, + sizeof(buffer)); + + RNG_RandomUpdate(volName, strlen(volName)); + RNG_RandomUpdate(&dwSerialNum, sizeof(dwSerialNum)); + RNG_RandomUpdate(&dwComponentLen, sizeof(dwComponentLen)); + RNG_RandomUpdate(&dwSysFlags, sizeof(dwSysFlags)); + RNG_RandomUpdate(buffer, strlen(buffer)); + + if (GetDiskFreeSpace(NULL, &dwSectors, &dwBytes, &dwFreeClusters, + &dwNumClusters)) { + RNG_RandomUpdate(&dwSectors, sizeof(dwSectors)); + RNG_RandomUpdate(&dwBytes, sizeof(dwBytes)); + RNG_RandomUpdate(&dwFreeClusters, sizeof(dwFreeClusters)); + RNG_RandomUpdate(&dwNumClusters, sizeof(dwNumClusters)); + } + + nBytes = RNG_GetNoise(buffer, 20); // get up to 20 bytes + RNG_RandomUpdate(buffer, nBytes); +} + +/* + * The RtlGenRandom function is declared in , but the + * declaration is missing a calling convention specifier. So we + * declare it manually here. + */ +#define RtlGenRandom SystemFunction036 +DECLSPEC_IMPORT BOOLEAN WINAPI RtlGenRandom( + PVOID RandomBuffer, + ULONG RandomBufferLength); + +size_t +RNG_SystemRNG(void *dest, size_t maxLen) +{ + size_t bytes = 0; + + if (RtlGenRandom(dest, maxLen)) { + bytes = maxLen; + } + return bytes; +} +#endif /* is XP_WIN */ -- cgit v1.2.3